Caffe2 - C++ API
A deep learning, cross platform ML framework
elementwise_op.cc
1 #include "caffe2/operators/elementwise_op.h"
2 
3 namespace caffe2 {
4 
5 // For some comparison and logical operators, eigen does not have vectorized
6 // math so we need to improvise.
7 #define NAIVE_FUNCTOR(name, op, input_type, output_type) \
8  struct Naive##name##Functor { \
9  template <int b_is_scalar, typename T, typename R> \
10  inline void Run(size_t n, const T* a, const T* b, R* out, CPUContext*) { \
11  for (int i = 0; i < n; ++i) { \
12  out[i] = op(a[i], b[b_is_scalar ? 0 : i]); \
13  } \
14  } \
15  template <typename T, typename R> \
16  void RunWithBroadcast( \
17  const T* a, \
18  const T* b, \
19  R* out, \
20  size_t pre, \
21  size_t n, \
22  CPUContext*) { \
23  for (int i = 0; i < pre; ++i) { \
24  for (int j = 0; j < n; ++j) { \
25  out[i * n + j] = op(a[i * n + j], b[j]); \
26  } \
27  } \
28  } \
29  template <typename T, typename R> \
30  void RunWithBroadcast2( \
31  const T* a, \
32  const T* b, \
33  R* out, \
34  size_t pre, \
35  size_t n, \
36  size_t post, \
37  CPUContext*) { \
38  for (int i = 0; i < pre; ++i) { \
39  for (int j = 0; j < n; ++j) { \
40  for (int k = 0; k < post; ++k) { \
41  out[(i * n + j) * post + k] = op(a[(i * n + j) * post + k], b[j]); \
42  } \
43  } \
44  } \
45  } \
46  }; \
47  REGISTER_CPU_OPERATOR( \
48  name, \
49  BinaryElementwiseOp< \
50  input_type, \
51  CPUContext, \
52  Naive##name##Functor, \
53  output_type>)
54 
55 #define NAIVE_LT(x, y) ((x) < (y))
56 NAIVE_FUNCTOR(LT, NAIVE_LT, NumericTypes, FixedType<bool>);
57 #undef NAIVE_LT
58 #define NAIVE_LE(x, y) ((x) <= (y))
59 NAIVE_FUNCTOR(LE, NAIVE_LE, NumericTypes, FixedType<bool>);
60 #undef NAIVE_LE
61 #define NAIVE_GT(x, y) ((x) > (y))
62 NAIVE_FUNCTOR(GT, NAIVE_GT, NumericTypes, FixedType<bool>);
63 #undef NAIVE_GT
64 #define NAIVE_GE(x, y) ((x) >= (y))
65 NAIVE_FUNCTOR(GE, NAIVE_GE, NumericTypes, FixedType<bool>);
66 #undef NAIVE_GE
67 #define NAIVE_EQ(x, y) ((x) == (y))
68 NAIVE_FUNCTOR(EQ, NAIVE_EQ, IntBoolTypes, FixedType<bool>);
69 #undef NAIVE_EQ
70 #define NAIVE_AND(x, y) ((x) & (y))
71 NAIVE_FUNCTOR(And, NAIVE_AND, BoolTypes, FixedType<bool>);
72 #undef NAIVE_AND
73 #define NAIVE_OR(x, y) ((x) | (y))
74 NAIVE_FUNCTOR(Or, NAIVE_OR, BoolTypes, FixedType<bool>);
75 #undef NAIVE_OR
76 #define NAIVE_XOR(x, y) ((x) ^ (y))
77 NAIVE_FUNCTOR(Xor, NAIVE_XOR, BoolTypes, FixedType<bool>);
78 #undef NAIVE_XOR
79 
80 struct NotFunctor {
81  inline void operator()(const int n, const bool* x, bool* y, CPUContext*) {
82  for (int i = 0; i < n; ++i) {
83  y[i] = !x[i];
84  }
85  }
86 };
87 REGISTER_CPU_OPERATOR(
88  Not,
90 
91 template <typename T>
92 void SRLHelper::sum2one(const T* x, T* y, size_t n) {
93  *y = ConstEigenArrayMap<T>(x, n, 1).sum();
94 }
95 
96 template <typename T>
97 void SRLHelper::RunWithBroadcastFront(
98  const T* x,
99  T* y,
100  size_t pre,
101  size_t n,
102  CPUContext*) {
103  EigenArrayMap<T>(y, n, 1) = ConstEigenArrayMap<T>(x, n, pre).rowwise().sum();
104 }
105 
106 template <typename T>
107 void SRLHelper::RunWithBroadcastBack(
108  const T* x,
109  T* y,
110  size_t post,
111  size_t n,
112  CPUContext*) {
113  EigenArrayMap<T>(y, 1, n) = ConstEigenArrayMap<T>(x, post, n).colwise().sum();
114 }
115 
116 template <typename T>
117 void SRLHelper::RunWithBroadcast2(
118  const T* a,
119  T* y,
120  size_t pre,
121  size_t n,
122  size_t post,
123  CPUContext*) {
124  for (int i = 0; i < n; ++i) {
125  y[i] = 0;
126  for (int j = 0; j < pre; ++j) {
127  for (int k = 0; k < post; ++k) {
128  y[i] += a[(j * n + i) * post + k];
129  }
130  }
131  }
132 }
133 
134 template <>
135 template <typename T>
137  const auto& A = Input(0);
138  const auto& B = Input(1);
139  auto* C = Output(0);
140  CAFFE_ENFORCE(&B != C, "In-place is not allowed.");
141  C->ResizeLike(B);
142  const T* Adata = A.template data<T>();
143  auto* Cdata = C->template mutable_data<T>();
144  if (B.size() == 1) {
145  auto count = A.size();
146  SRLHelper::sum2one<T>(Adata, Cdata, count);
147  } else {
148  size_t pre, n, post;
149  std::tie(pre, n, post) = calculate_broadcast_sizes(A, B, axis_);
150  if (post == 1) {
151  SRLHelper::RunWithBroadcastFront<T>(Adata, Cdata, pre, n, &context_);
152  } else if (pre == 1) {
153  SRLHelper::RunWithBroadcastBack<T>(Adata, Cdata, post, n, &context_);
154  } else {
155  SRLHelper::RunWithBroadcast2<T>(Adata, Cdata, pre, n, post, &context_);
156  }
157  }
158  return true;
159 }
160 REGISTER_CPU_OPERATOR(SumReduceLike, SumReduceLikeOp<CPUContext>);
161 
162 } // namespace caffe2
The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
Definition: context.h:66
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...