Caffe2 - C++ API
A deep learning, cross platform ML framework
elementwise_op.h
1 #ifndef CAFFE2_OPERATORS_ELEMENTWISE_OP_H_
2 #define CAFFE2_OPERATORS_ELEMENTWISE_OP_H_
3 
4 #include "caffe2/core/common_omp.h"
5 #include "caffe2/core/context.h"
6 #include "caffe2/core/logging.h"
7 #include "caffe2/core/operator.h"
8 #include "caffe2/core/tensor.h"
9 #include "caffe2/utils/math.h"
10 
11 #include <tuple>
12 
13 namespace caffe2 {
14 
15 using NumericTypes = TensorTypes<int32_t, int64_t, float, double>;
16 using IntTypes = TensorTypes<int32_t, int64_t>;
17 using BoolTypes = TensorTypes<bool>;
18 using IntBoolTypes = TensorTypes<int32_t, int64_t, bool>; // discrete types
19 
21  template <typename T>
22  using type = T;
23 };
24 
25 template <typename R>
26 struct FixedType {
27  template <typename T>
28  using type = R;
29 };
30 
31 template <
32  typename InputTypes,
33  class Context,
34  class Functor,
35  class TypeMap = SameTypeAsInput>
36 class UnaryElementwiseWithArgsOp : public Operator<Context> {
37  public:
38  USE_OPERATOR_CONTEXT_FUNCTIONS;
39  UnaryElementwiseWithArgsOp(const OperatorDef& operator_def, Workspace* ws)
40  : Operator<Context>(operator_def, ws), functor_(*this) {}
41 
42  bool RunOnDevice() override {
43  return DispatchHelper<InputTypes>::call(this, Input(0));
44  }
45 
46  template <typename T>
47  bool DoRunWithType() {
48  auto& input = Input(0);
49  auto* output = Output(0);
50  output->ResizeLike(input);
51  using R = typename TypeMap::template type<T>;
52  functor_(
53  input.size(),
54  input.template data<T>(),
55  output->template mutable_data<R>(),
56  &context_);
57  return true;
58  }
59 
60  private:
61  Functor functor_;
62 };
63 
69 template <typename Functor>
71  explicit WithDefaultConstructor(OperatorBase& /*op*/) {}
72 
73  template <typename In, typename Out, typename Context>
74  void operator()(int n, const In* in, Out* out, Context* c) {
75  Functor()(n, in, out, c);
76  }
77 };
78 
84 template <
85  typename InputTypes,
86  class Context,
87  class Functor,
88  class OutputType = SameTypeAsInput>
90  InputTypes,
91  Context,
93  OutputType>;
94 
95 template <typename Context>
96 std::tuple<size_t, size_t, size_t> calculate_broadcast_sizes(
97  const Tensor<Context>& A,
98  const Tensor<Context>& B,
99  int axis) {
100  CAFFE_ENFORCE_GE(
101  A.ndim(),
102  B.ndim(),
103  "If you are doing broadcasting, input1 should have "
104  "a smaller or equal number of dimensions.");
105  if (axis == -1) {
106  axis = A.ndim() - B.ndim();
107  }
108  CAFFE_ENFORCE(
109  axis >= 0 && axis <= A.ndim() - B.ndim(),
110  "Broadcast axis should be in the range of"
111  "[0, A.ndim() - B.ndim()], but axis = ",
112  axis);
113 
114  int b_dim_start = 0;
115  while (b_dim_start < B.ndim() && B.dim(b_dim_start) == 1) {
116  ++b_dim_start;
117  }
118  int b_dim_end = B.ndim() - 1;
119  while (b_dim_end >= b_dim_start && B.dim(b_dim_end) == 1) {
120  --b_dim_end;
121  }
122  size_t pre = 1, n = 1, post = 1;
123  for (int i = 0; i < axis + b_dim_start; ++i) {
124  pre *= A.dim(i);
125  }
126  for (int i = b_dim_start; i <= b_dim_end; ++i) {
127  CAFFE_ENFORCE_EQ(
128  A.dim(i + axis), B.dim(i), "Broadcast dimension mismatch.");
129  n *= B.dim(i);
130  }
131  for (int i = axis + b_dim_end + 1; i < A.ndim(); ++i) {
132  post *= A.dim(i);
133  }
134  return std::make_tuple(pre, n, post);
135 }
136 
154 template <
155  typename InputTypes,
156  class Context,
157  class Functor,
158  class TypeMap = SameTypeAsInput>
159 class BinaryElementwiseOp : public Operator<Context> {
160  public:
161  USE_OPERATOR_CONTEXT_FUNCTIONS;
162 
163  BinaryElementwiseOp(const OperatorDef& operator_def, Workspace* ws)
164  : Operator<Context>(operator_def, ws),
165  OP_SINGLE_ARG(bool, "broadcast", enable_broadcast_, 0),
166  OP_SINGLE_ARG(int, "axis", axis_, -1),
167  OP_SINGLE_ARG(string, "axis_str", axis_str_, ""),
168  OP_SINGLE_ARG(string, "order", order_, "NCHW"),
169  functor_() {
170  // Figure out the correct axis to use.
171  if (enable_broadcast_) {
172  if (axis_ != -1) {
173  // Get axis from an explicit axis argument.
174  CAFFE_ENFORCE_EQ(
175  axis_str_.size(),
176  0,
177  "Args axis and axis_str cannot be used simultaneously.");
178  } else if (axis_str_.size()) {
179  // Get the axis index semantically.
180  CAFFE_ENFORCE_EQ(
181  axis_str_.size(), 1, "Unsupported axis string", axis_str_);
182  size_t semantic_axis_ = order_.find(axis_str_);
183  CAFFE_ENFORCE_NE(
184  semantic_axis_,
185  string::npos,
186  "Unrecognizable axis string ",
187  axis_str_,
188  " from order string ",
189  order_);
190  axis_ = semantic_axis_;
191  }
192  } else {
193  CAFFE_ENFORCE(
194  axis_ == -1 && axis_str_.size() == 0,
195  "Do not specify axis or axis_str if broadcast is not enabled.");
196  }
197  }
198 
199  bool RunOnDevice() override {
200  return DispatchHelper<InputTypes>::call(this, Input(0));
201  }
202 
203  template <typename T>
204  bool DoRunWithType() {
205  const auto& A = Input(0);
206  const auto& B = Input(1);
207  auto* C = Output(0);
208  CAFFE_ENFORCE(
209  &B != C || !enable_broadcast_,
210  "In-place is allowed only with the first tensor when broadcasting");
211  C->ResizeLike(A);
212  const T* Adata = A.template data<T>();
213  const T* Bdata = B.template data<T>();
214  auto* Cdata =
215  C->template mutable_data<typename TypeMap::template type<T>>();
216  if (!enable_broadcast_) {
217  CAFFE_ENFORCE_EQ(
218  A.dims(),
219  B.dims(),
220  "Dimension mismatch - did you forget to set broadcast=1?");
221  functor_.template Run<false>(A.size(), Adata, Bdata, Cdata, &context_);
222  } else if (B.size() == 1) {
223  functor_.template Run<true>(A.size(), Adata, Bdata, Cdata, &context_);
224  } else {
225  size_t pre, n, post;
226  std::tie(pre, n, post) = calculate_broadcast_sizes(A, B, axis_);
227  if (post == 1) {
228  functor_.RunWithBroadcast(Adata, Bdata, Cdata, pre, n, &context_);
229  } else {
230  functor_.RunWithBroadcast2(
231  Adata, Bdata, Cdata, pre, n, post, &context_);
232  }
233  }
234  return true;
235  }
236 
237  private:
238  bool enable_broadcast_;
239  int axis_;
240  string axis_str_;
241  string order_;
242  Functor functor_;
243 };
244 
245 template <typename Functor>
247  template <bool b_is_scalar, typename T, typename R, typename Context>
248  inline void Run(size_t n, const T* a, const T* b, R* out, Context* c) {
249  if (b_is_scalar) {
250  CAFFE_THROW("Broadcast not supported.");
251  } else {
252  Functor().Run(n, a, b, out, c);
253  }
254  }
255  template <typename T, typename R, typename Context>
256  inline void RunWithBroadcast(
257  const T* /*a*/,
258  const T* /*b*/,
259  R* /*out*/,
260  size_t /*pre*/,
261  size_t /*n*/,
262  Context*) {
263  CAFFE_NOT_IMPLEMENTED;
264  }
265  template <typename T, typename R, typename Context>
266  inline void RunWithBroadcast2(
267  const T* /*a*/,
268  const T* /*b*/,
269  R* /*out*/,
270  size_t /*pre*/,
271  size_t /*n*/,
272  size_t /*post*/,
273  Context*) {
274  CAFFE_NOT_IMPLEMENTED;
275  }
276 };
277 
278 // Gradient operator for elementwise division.
279 template <class Context>
280 class DivGradientOp final : public Operator<Context> {
281  public:
282  USE_SIMPLE_CTOR_DTOR(DivGradientOp);
283  USE_OPERATOR_CONTEXT_FUNCTIONS;
284 
285  bool RunOnDevice() override;
286 };
287 
288 namespace SRLHelper {
289 
290 template <typename T>
291 void sum2one(const T* a, T* y, size_t n);
292 
293 template <typename T>
294 void RunWithBroadcastFront(const T* a, T* y, size_t pre, size_t n, CPUContext*);
295 
296 template <typename T>
297 void RunWithBroadcastBack(const T* a, T* y, size_t post, size_t n, CPUContext*);
298 
299 template <typename T>
300 void RunWithBroadcast2(
301  const T* a,
302  T* y,
303  size_t pre,
304  size_t n,
305  size_t post,
306  CPUContext*);
307 
308 } // namespace SRLHelper
309 
310 // Sum reduction operator that is used for computing the gradient in cases
311 // where the forward op is in broadcast mode.
312 template <class Context>
313 class SumReduceLikeOp final : public Operator<Context> {
314  public:
315  USE_OPERATOR_CONTEXT_FUNCTIONS;
316  SumReduceLikeOp(const OperatorDef& operator_def, Workspace* ws)
317  : Operator<Context>(operator_def, ws),
318  OP_SINGLE_ARG(int, "axis", axis_, -1),
319  OP_SINGLE_ARG(string, "axis_str", axis_str_, ""),
320  OP_SINGLE_ARG(string, "order", order_, "NCHW") {
321  if (axis_ != -1) {
322  // Get axis from an explicit axis argument.
323  CAFFE_ENFORCE_EQ(
324  axis_str_.size(),
325  0,
326  "Args axis and axis_str cannot be used simultaneously.");
327  } else if (axis_str_.size()) {
328  // Get the axis index semantically.
329  CAFFE_ENFORCE_EQ(
330  axis_str_.size(), 1, "Unsupported axis string", axis_str_);
331  size_t semantic_axis = order_.find(axis_str_);
332  CAFFE_ENFORCE_NE(
333  semantic_axis,
334  string::npos,
335  "Unrecognizable axis string ",
336  axis_str_,
337  " from order string ",
338  order_);
339  axis_ = semantic_axis;
340  }
341  }
342 
343  bool RunOnDevice() override {
344  return DispatchHelper<TensorTypes<float, double>>::call(this, Input(0));
345  }
346 
347  template <typename T>
348  bool DoRunWithType();
349 
350  private:
351  int axis_;
352  string axis_str_;
353  string order_;
354  Tensor<Context> ones_;
355  Tensor<Context> sum_buffer_;
356 };
357 
358 template <class Context>
360  auto& Y = Input(0);
361  auto& Z = Input(1);
362  auto& dZ = Input(2);
363  auto* dX = Output(0);
364  auto* dY = Output(1);
365 
366  dX->ResizeLike(Y);
367  dY->ResizeLike(Y);
368 
369  const float* Ydata = Y.template data<float>();
370  const float* Zdata = Z.template data<float>();
371  const float* dZdata = dZ.template data<float>();
372  float* dXdata = dX->template mutable_data<float>();
373  float* dYdata = dY->template mutable_data<float>();
374 
375  ElementWiseDivide(context_, Y.size(), dXdata, dYdata, dZdata, Ydata, Zdata);
376  return true;
377 }
378 
379 // For arithmetic operators, Eigen provides a good way to vectorize even
380 // when broadcasting.
381 #define EIGEN_FUNCTOR(name, eigen_op, input_type, output_type) \
382  struct Eigen##name##Functor { \
383  template <int b_is_scalar, typename T, typename R> \
384  inline void Run(size_t n, const T* a, const T* b, R* out, CPUContext*) { \
385  if (b_is_scalar) { \
386  EigenVectorArrayMap<R>(out, n) = \
387  eigen_op((ConstEigenVectorArrayMap<T>(a, n)), (b[0])); \
388  } else { \
389  EigenVectorArrayMap<R>(out, n) = eigen_op( \
390  (ConstEigenVectorArrayMap<T>(a, n)), \
391  (ConstEigenVectorArrayMap<T>(b, n))); \
392  } \
393  } \
394  template <typename T, typename R> \
395  void RunWithBroadcast( \
396  const T* a, \
397  const T* b, \
398  R* out, \
399  size_t pre, \
400  size_t n, \
401  CPUContext*) { \
402  EigenArrayMap<R>(out, n, pre) = eigen_op( \
403  (ConstEigenArrayMap<T>(a, n, pre).colwise()), \
404  (ConstEigenVectorArrayMap<T>(b, n))); \
405  } \
406  template <typename T, typename R> \
407  void RunWithBroadcast2( \
408  const T* a, \
409  const T* b, \
410  R* out, \
411  size_t pre, \
412  size_t n, \
413  size_t post, \
414  CPUContext*) { \
415  for (int i = 0; i < pre; ++i) { \
416  EigenArrayMap<R>(out + i * n * post, post, n) = eigen_op( \
417  (ConstEigenArrayMap<T>(a + i * n * post, post, n).rowwise()), \
418  (Eigen::Map<const Eigen::Array<T, 1, Eigen::Dynamic>>(b, n))); \
419  } \
420  } \
421  }; \
422  REGISTER_CPU_OPERATOR( \
423  name, \
424  BinaryElementwiseOp< \
425  input_type, \
426  CPUContext, \
427  Eigen##name##Functor, \
428  output_type>)
429 
430 } // namespace caffe2
431 
432 #endif // CAFFE2_OPERATORS_ELEMENTWISE_OP_H_
TIndex dim(const int i) const
Returns the i-th dimension of the tensor.
Definition: tensor.h:671
Tensor is the basic class in Caffe2 that stores a contiguous memory with its shape information...
Definition: tensor.h:93
The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
Definition: context.h:66
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Performs a binary operation (e.g.
int ndim() const
Returns the number of dimensions of the data.
Definition: tensor.h:589
WithDefaultConstructor is a functor that can be used as the functor of an UnaryElementwiseWithArgsOp...