Caffe2 - C++ API
A deep learning, cross platform ML framework
reduction_front_back_ops.cc
1 #include "caffe2/operators/reduction_front_back_ops.h"
2 #include "caffe2/core/operator_gradient.h"
3 
4 namespace caffe2 {
5 
6 /***
7  Sum Ops
8 ***/
9 
10 // ReduceFrontSum: columnwise sum
11 template <>
12 template <typename T>
13 void SumReduceDimsOp<CPUContext, true, false>::Compute(
14  int rows,
15  int cols,
16  const T* in_data,
17  const int32_t* lengths_data,
18  T* out_data) {
19  for (int j = 0; j < cols; j++) {
20  T sum = in_data[j];
21  int length = lengths_data == nullptr ? rows : lengths_data[j];
22  for (int i = 1; i < length; i++) {
23  sum += in_data[i * cols + j];
24  }
25  out_data[j] = sum;
26  }
27 }
28 
29 // ReduceBackSum: rowwise sum
30 template <>
31 template <typename T>
32 void SumReduceDimsOp<CPUContext, false, false>::Compute(
33  int rows,
34  int cols,
35  const T* in_data,
36  const int32_t* lengths_data,
37  T* out_data) {
38  for (int i = 0; i < rows; i++) {
39  int offset = i * cols;
40  T sum = in_data[offset];
41  int length = lengths_data == nullptr ? cols : lengths_data[i];
42  for (int j = 1; j < length; j++) {
43  sum += in_data[offset + j];
44  }
45  out_data[i] = sum;
46  }
47 }
48 
49 // ReduceFrontSumGradient
50 template <>
51 template <typename T>
52 void SumReduceDimsGradientOp<CPUContext, true, false>::Compute(
53  int rows,
54  int cols,
55  const T* dYdata,
56  const int* lengths_data,
57  T* dXdata) {
58  for (int i = 0; i < rows * cols; i++) {
59  int row = i / cols;
60  int col = i % cols;
61  if (lengths_data == nullptr || row < lengths_data[col]) {
62  dXdata[i] = dYdata[col];
63  } else {
64  dXdata[i] = 0;
65  }
66  }
67 }
68 
69 // ReduceBackSumGradient
70 template <>
71 template <typename T>
72 void SumReduceDimsGradientOp<CPUContext, false, false>::Compute(
73  int rows,
74  int cols,
75  const T* dYdata,
76  const int* lengths_data,
77  T* dXdata) {
78  for (int i = 0; i < rows * cols; i++) {
79  int row = i / cols;
80  int col = i % cols;
81  if (lengths_data == nullptr || col < lengths_data[row]) {
82  dXdata[i] = dYdata[row];
83  } else {
84  dXdata[i] = 0;
85  }
86  }
87 }
88 
89 REGISTER_CPU_OPERATOR(ReduceFrontSum, SumReduceDimsOp<CPUContext, true, false>);
90 REGISTER_CPU_OPERATOR(
91  ReduceFrontSumGradient,
92  SumReduceDimsGradientOp<CPUContext, true, false>);
93 
95  using GradientMakerBase::GradientMakerBase;
96  vector<OperatorDef> GetGradientDefs() override {
97  vector<string> grad_in = {GO(0), I(0)};
98  if (def_.input_size() == 2) {
99  grad_in.push_back(I(1));
100  }
101  return SingleGradientDef(
102  "ReduceFrontSumGradient", "", grad_in, vector<string>{GI(0)});
103  }
104 };
105 
106 REGISTER_GRADIENT(ReduceFrontSum, GetReduceFrontSumGradient);
107 
108 REGISTER_CPU_OPERATOR(ReduceBackSum, SumReduceDimsOp<CPUContext, false, false>);
109 REGISTER_CPU_OPERATOR(
110  ReduceBackSumGradient,
112 
114  using GradientMakerBase::GradientMakerBase;
115  vector<OperatorDef> GetGradientDefs() override {
116  vector<string> grad_in = {GO(0), I(0)};
117  if (def_.input_size() == 2) {
118  grad_in.push_back(I(1));
119  }
120  return SingleGradientDef(
121  "ReduceBackSumGradient", "", grad_in, vector<string>{GI(0)});
122  }
123 };
124 
125 REGISTER_GRADIENT(ReduceBackSum, GetReduceBackSumGradient);
126 
127 #define REDUCTION_OP_SHAPE_INFERENCE(is_front_reducer) \
128  CAFFE_ENFORCE_LE(1, in.size()); \
129  CAFFE_ENFORCE_GE(2, in.size()); \
130  ArgumentHelper helper(def); \
131  int num_reduce_dims = helper.GetSingleArgument<int>("num_reduce_dim", 1); \
132  int start_index = is_front_reducer ? num_reduce_dims : 0; \
133  int end_index = is_front_reducer ? in[0].dims_size() \
134  : in[0].dims_size() - num_reduce_dims; \
135  vector<int> output_shape; \
136  for (int i = start_index; i < end_index; ++i) { \
137  output_shape.push_back(in[0].dims(i)); \
138  } \
139  return vector<TensorShape>{ \
140  CreateTensorShape(output_shape, in[0].data_type())};
141 
142 OPERATOR_SCHEMA(ReduceFrontSum)
143  .NumInputs(1, 2)
144  .NumOutputs(1)
145  .Arg("num_reduce_dims", "Number of dimensions to reduce.")
146  .SetDoc(R"DOC(
147 Reduces the input tensor along the first dimension of the input
148 tensor by applying 'Sum'. When lengths is given, sum is only computed
149 with subsets of elements correspondingly.
150 )DOC")
151  .Input(0, "data_in", "(T<D1..., Dn>) Input data.")
152  .Input(
153  1,
154  "lengths",
155  "Num of elements in each sample, should have size D2 x D3 x ... x Dn.")
156  .TensorInferenceFunction([](const OperatorDef& def,
157  const vector<TensorShape>& in) {
158  REDUCTION_OP_SHAPE_INFERENCE(true)
159  });
160 OPERATOR_SCHEMA(ReduceFrontSumGradient).NumInputs(2, 3).NumOutputs(1);
161 
162 OPERATOR_SCHEMA(ReduceBackSum)
163  .NumInputs(1, 2)
164  .NumOutputs(1)
165  .Arg("num_reduce_dims", "Number of dimensions to reduce.")
166  .SetDoc(R"DOC(
167 Reduces the input tensor along the last dimension of the
168 input tensor by applying 'Sum'. When lengths is given, sum is only computed
169 with subsets of elements correspondingly.
170 )DOC")
171  .Input(0, "data_in", "(T<D1..., Dn>) Input data.")
172  .Input(
173  1,
174  "lengths",
175  "Num of elements in each sample, should have size D1 x D2 x ... x D(n-1).")
176  .TensorInferenceFunction([](const OperatorDef& def,
177  const vector<TensorShape>& in) {
178  REDUCTION_OP_SHAPE_INFERENCE(false)
179  });
180 OPERATOR_SCHEMA(ReduceBackSumGradient).NumInputs(2, 3).NumOutputs(1);
181 
182 /***
183  Mean Ops
184 ***/
185 
186 // ReduceFrontMean: columnwise mean
187 template <>
188 template <typename T>
190  int rows,
191  int cols,
192  const T* in_data,
193  const int32_t* lengths_data,
194  T* out_data) {
195  for (int j = 0; j < cols; j++) {
196  T sum = in_data[j];
197  int length = lengths_data == nullptr ? rows : lengths_data[j];
198  for (int i = 1; i < length; i++) {
199  sum += in_data[i * cols + j];
200  }
201  out_data[j] = sum / length;
202  }
203 }
204 
205 // ReduceBackMean: rowwise mean
206 template <>
207 template <typename T>
209  int rows,
210  int cols,
211  const T* in_data,
212  const int32_t* lengths_data,
213  T* out_data) {
214  for (int i = 0; i < rows; i++) {
215  int offset = i * cols;
216  T sum = in_data[offset];
217  int length = lengths_data == nullptr ? cols : lengths_data[i];
218  for (int j = 1; j < length; j++) {
219  sum += in_data[offset + j];
220  }
221  out_data[i] = sum / length;
222  }
223 }
224 
225 // ReduceFrontMeanGradient
226 template <>
227 template <typename T>
229  int rows,
230  int cols,
231  const T* dYdata,
232  const int* lengths_data,
233  T* dXdata) {
234  for (int i = 0; i < rows * cols; i++) {
235  int row = i / cols;
236  int col = i % cols;
237  if (lengths_data == nullptr) {
238  dXdata[i] = dYdata[col] / rows;
239  } else if (row < lengths_data[col]) {
240  dXdata[i] = dYdata[col] / lengths_data[col];
241  } else {
242  dXdata[i] = 0;
243  }
244  }
245 }
246 
247 // ReduceBackMeanGradient
248 template <>
249 template <typename T>
251  int rows,
252  int cols,
253  const T* dYdata,
254  const int* lengths_data,
255  T* dXdata) {
256  for (int i = 0; i < rows * cols; i++) {
257  int row = i / cols;
258  int col = i % cols;
259  if (lengths_data == nullptr) {
260  dXdata[i] = dYdata[row] / cols;
261  } else if (col < lengths_data[row]) {
262  dXdata[i] = dYdata[row] / lengths_data[row];
263  } else {
264  dXdata[i] = 0;
265  }
266  }
267 }
268 
269 REGISTER_CPU_OPERATOR(ReduceFrontMean, SumReduceDimsOp<CPUContext, true, true>);
270 REGISTER_CPU_OPERATOR(
271  ReduceFrontMeanGradient,
273 
275  using GradientMakerBase::GradientMakerBase;
276  vector<OperatorDef> GetGradientDefs() override {
277  vector<string> grad_in = {GO(0), I(0)};
278  if (def_.input_size() == 2) {
279  grad_in.push_back(I(1));
280  }
281  return SingleGradientDef(
282  "ReduceFrontMeanGradient", "", grad_in, vector<string>{GI(0)});
283  }
284 };
285 
286 REGISTER_GRADIENT(ReduceFrontMean, GetReduceFrontMeanGradient);
287 
288 OPERATOR_SCHEMA(ReduceFrontMean)
289  .NumInputs(1, 2)
290  .NumOutputs(1)
291  .Arg("num_reduce_dims", "Number of dimensions to reduce.")
292  .SetDoc(R"DOC(
293 Reduces the input tensor along the first dimension of the input
294 tensor by applying 'Mean'. When lengths is given, mean is only computed
295 with subsets of elements correspondingly.
296 )DOC")
297  .Input(0, "data_in", "(T<D1..., Dn>) Input data.")
298  .Input(
299  1,
300  "lengths",
301  "Num of elements in each sample, should have size D2 x D3 x ... x Dn.")
302  .TensorInferenceFunction([](const OperatorDef& def,
303  const vector<TensorShape>& in) {
304  REDUCTION_OP_SHAPE_INFERENCE(true)
305  });
306 OPERATOR_SCHEMA(ReduceFrontMeanGradient).NumInputs(2, 3).NumOutputs(1);
307 
308 REGISTER_CPU_OPERATOR(ReduceBackMean, SumReduceDimsOp<CPUContext, false, true>);
309 REGISTER_CPU_OPERATOR(
310  ReduceBackMeanGradient,
312 
314  using GradientMakerBase::GradientMakerBase;
315  vector<OperatorDef> GetGradientDefs() override {
316  vector<string> grad_in = {GO(0), I(0)};
317  if (def_.input_size() == 2) {
318  grad_in.push_back(I(1));
319  }
320  return SingleGradientDef(
321  "ReduceBackMeanGradient", "", grad_in, vector<string>{GI(0)});
322  }
323 };
324 
325 REGISTER_GRADIENT(ReduceBackMean, GetReduceBackMeanGradient);
326 
327 OPERATOR_SCHEMA(ReduceBackMean)
328  .NumInputs(1, 2)
329  .NumOutputs(1)
330  .Arg("num_reduce_dims", "Number of dimensions to reduce.")
331  .SetDoc(R"DOC(
332 Reduces the input tensor along the last dimension of the input
333 tensor by applying 'Mean'. When lengths is given, mean is only computed
334 with subsets of elements correspondingly.
335 )DOC")
336  .Input(0, "data_in", "(T<D1..., Dn>) Input data.")
337  .Input(
338  1,
339  "lengths",
340  "Num of elements in each sample, should have size D1 x D2 x ... x D(n-1).")
341  .TensorInferenceFunction([](const OperatorDef& def,
342  const vector<TensorShape>& in) {
343  REDUCTION_OP_SHAPE_INFERENCE(false)
344  });
345 OPERATOR_SCHEMA(ReduceBackMeanGradient).NumInputs(2, 3).NumOutputs(1);
346 
347 /***
348  Max Ops
349 ***/
350 
351 // ReduceFrontMax
352 template <>
354  int rows,
355  int cols,
356  const float* data,
357  const int32_t* lengths_data,
358  float* out_data) {
359  for (int i = 0; i < cols; i++) {
360  float mx = data[i];
361  int length = lengths_data == nullptr ? rows : lengths_data[i];
362  for (int j = 1; j < length; j++) {
363  mx = std::max(mx, data[j * cols + i]);
364  }
365  out_data[i] = mx;
366  }
367 }
368 
369 // ReduceBackMax
370 template <>
372  int rows,
373  int cols,
374  const float* data,
375  const int32_t* lengths_data,
376  float* out_data) {
377  for (int i = 0; i < rows; i++) {
378  float mx = data[i * cols];
379  int length = lengths_data == nullptr ? cols : lengths_data[i];
380  for (int j = 1; j < length; j++) {
381  mx = std::max(mx, data[i * cols + j]);
382  }
383  out_data[i] = mx;
384  }
385 }
386 
387 // ReduceFrontMaxGradient
388 template <>
390  int rows,
391  int cols,
392  const float* dYdata,
393  const float* Xdata,
394  const float* Ydata,
395  const int32_t* lengths_data,
396  float* dXdata) {
397  int len = cols * rows;
398  for (int i = 0; i < len; i++) {
399  int col = i % cols;
400  int row = i / cols;
401  if (lengths_data != nullptr && row >= lengths_data[col]) {
402  dXdata[i] = 0.0f;
403  } else {
404  dXdata[i] = Xdata[i] == Ydata[col] ? dYdata[col] : 0.0f;
405  }
406  }
407 }
408 
409 // ReduceBackMaxGradient
410 template <>
412  int rows,
413  int cols,
414  const float* dYdata,
415  const float* Xdata,
416  const float* Ydata,
417  const int32_t* lengths_data,
418  float* dXdata) {
419  int len = cols * rows;
420  for (int i = 0; i < len; i++) {
421  int row = i / cols;
422  int col = i % cols;
423  if (lengths_data == nullptr || col < lengths_data[row]) {
424  dXdata[i] = Xdata[i] == Ydata[row] ? dYdata[row] : 0.0f;
425  } else {
426  dXdata[i] = 0.0f;
427  }
428  }
429 }
430 
431 REGISTER_CPU_OPERATOR(ReduceFrontMax, MaxReduceDimsOp<float, CPUContext, true>);
432 REGISTER_CPU_OPERATOR(
433  ReduceFrontMaxGradient,
435 
436 REGISTER_CPU_OPERATOR(ReduceBackMax, MaxReduceDimsOp<float, CPUContext, false>);
437 REGISTER_CPU_OPERATOR(
438  ReduceBackMaxGradient,
440 
442  using GradientMakerBase::GradientMakerBase;
443  vector<OperatorDef> GetGradientDefs() override {
444  vector<string> grad_in = {GO(0), I(0), O(0)};
445  if (def_.input_size() == 2) {
446  grad_in.push_back(I(1));
447  }
448  return SingleGradientDef(
449  "ReduceFrontMaxGradient", "", grad_in, vector<string>{GI(0)});
450  }
451 };
452 
453 REGISTER_GRADIENT(ReduceFrontMax, GetReduceFrontMaxGradient);
454 
456  using GradientMakerBase::GradientMakerBase;
457  vector<OperatorDef> GetGradientDefs() override {
458  vector<string> grad_in = {GO(0), I(0), O(0)};
459  if (def_.input_size() == 2) {
460  grad_in.push_back(I(1));
461  }
462  return SingleGradientDef(
463  "ReduceBackMaxGradient", "", grad_in, vector<string>{GI(0)});
464  }
465 };
466 
467 REGISTER_GRADIENT(ReduceBackMax, GetReduceBackMaxGradient);
468 
469 OPERATOR_SCHEMA(ReduceFrontMax)
470  .NumInputs(1, 2)
471  .NumOutputs(1)
472  .Arg("num_reduce_dims", "Number of dimensions to reduce")
473  .SetDoc(R"DOC(
474 Reduces the input tensor along the first dimension of the input
475 tensor by applying 'Max'. When lengths is given, max is only computed
476 with subsets of elements correspondingly.
477 )DOC")
478  .Input(0, "data_in", "(T<D1..., Dn>) Input data.")
479  .Input(
480  1,
481  "lengths",
482  "Num of elements in each sample, should have size D2 x D3 ... x Dn.")
483  .TensorInferenceFunction([](const OperatorDef& def,
484  const vector<TensorShape>& in) {
485  REDUCTION_OP_SHAPE_INFERENCE(true)
486  });
487 OPERATOR_SCHEMA(ReduceFrontMaxGradient).NumInputs(3, 4).NumOutputs(1);
488 
489 OPERATOR_SCHEMA(ReduceBackMax)
490  .NumInputs(1, 2)
491  .NumOutputs(1)
492  .Arg("num_reduce_dims", "Number of dimensions to reduce")
493  .SetDoc(R"DOC(
494 Reduces the input tensor along the last dimension of the
495 input tensor by applying 'Max'. When lengths is given, max is only computed
496 with subsets of elements correspondingly.
497 )DOC")
498  .Input(0, "data_in", "(T<D1..., Dn>) Input data.")
499  .Input(
500  1,
501  "lengths",
502  "Num of elements in each sample, should have size D1 x D2 x ... x D(n-1).")
503  .TensorInferenceFunction([](const OperatorDef& def,
504  const vector<TensorShape>& in) {
505  REDUCTION_OP_SHAPE_INFERENCE(false)
506  });
507 OPERATOR_SCHEMA(ReduceBackMaxGradient).NumInputs(3, 4).NumOutputs(1);
508 
509 #undef REDUCTION_OP_SHAPE_INFERENCE
510 
511 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...