13 #include "caffe2/core/logging.h" 14 #include "caffe2/utils/fixed_divisor.h" 15 #include "caffe2/utils/threadpool/pthreadpool.h" 18 static inline size_t divide_round_up(
size_t dividend,
size_t divisor) {
19 if (dividend % divisor == 0) {
20 return dividend / divisor;
22 return dividend / divisor + 1;
26 static inline size_t min(
size_t a,
size_t b) {
31 pthreadpool_function_1d_tiled_t
function;
38 const size_t tile_index = linear_index;
39 const size_t index = tile_index * context->tile;
40 const size_t tile = min(context->tile, context->range - index);
41 context->function(context->argument, index, tile);
44 void pthreadpool_compute_1d_tiled(
46 pthreadpool_function_1d_tiled_t
function,
51 if (threadpool == NULL) {
53 for (
size_t i = 0; i < range; i += tile) {
54 function(argument, i, min(range - i, tile));
58 const size_t tile_range = divide_round_up(range, tile);
65 pthreadpool_compute_1d(threadpool, (pthreadpool_function_1d_t) compute_1d_tiled, &context, tile_range);
70 pthreadpool_function_2d_t
function;
75 static void compute_2d(
const struct compute_2d_context* context,
size_t linear_index) {
76 DCHECK_LE(linear_index, std::numeric_limits<int>::max());
80 context->range_j.divMod((
int) linear_index, q, r);
81 context->function(context->argument, q, r);
84 void pthreadpool_compute_2d(
86 pthreadpool_function_2d_t
function,
91 if (threadpool == NULL) {
93 for (
size_t i = 0; i < range_i; i++) {
94 for (
size_t j = 0; j < range_j; j++) {
95 function(argument, i, j);
99 DCHECK_LE(range_i * range_j, (
size_t) std::numeric_limits<int>::max());
102 .function =
function,
103 .argument = argument,
106 pthreadpool_compute_1d(threadpool, (pthreadpool_function_1d_t) compute_2d, &context, range_i * range_j);
111 pthreadpool_function_2d_tiled_t
function;
124 context->tile_range_j.divMod(linear_index, q, r);
125 const size_t max_tile_i = context->tile_i;
126 const size_t max_tile_j = context->tile_j;
127 const size_t index_i = q * max_tile_i;
128 const size_t index_j = r * max_tile_j;
129 const size_t tile_i = min(max_tile_i, context->range_i - index_i);
130 const size_t tile_j = min(max_tile_j, context->range_j - index_j);
131 context->function(context->argument, index_i, index_j, tile_i, tile_j);
134 void pthreadpool_compute_2d_tiled(
136 pthreadpool_function_2d_tiled_t
function,
143 if (threadpool == NULL) {
145 for (
size_t i = 0; i < range_i; i += tile_i) {
146 for (
size_t j = 0; j < range_j; j += tile_j) {
147 function(argument, i, j, min(range_i - i, tile_i), min(range_j - j, tile_j));
152 const size_t tile_range_i = divide_round_up(range_i, tile_i);
153 const size_t tile_range_j = divide_round_up(range_j, tile_j);
154 DCHECK_LE(tile_range_i * tile_range_j, (
size_t) std::numeric_limits<int>::max());
156 .function =
function,
157 .argument = argument,
164 pthreadpool_compute_1d(threadpool, (pthreadpool_function_1d_t) compute_2d_tiled, &context, tile_range_i * tile_range_j);