1 #ifndef CAFFE2_OPERATORS_ELEMENTWISE_OP_H_ 2 #define CAFFE2_OPERATORS_ELEMENTWISE_OP_H_ 4 #include "caffe2/core/common_omp.h" 5 #include "caffe2/core/context.h" 6 #include "caffe2/core/logging.h" 7 #include "caffe2/core/operator.h" 8 #include "caffe2/core/tensor.h" 9 #include "caffe2/utils/math.h" 15 using NumericTypes = TensorTypes<int32_t, int64_t, float, double>;
16 using IntTypes = TensorTypes<int32_t, int64_t>;
17 using BoolTypes = TensorTypes<bool>;
18 using IntBoolTypes = TensorTypes<int32_t, int64_t, bool>;
38 USE_OPERATOR_CONTEXT_FUNCTIONS;
42 bool RunOnDevice()
override {
47 bool DoRunWithType() {
48 auto& input = Input(0);
49 auto* output = Output(0);
50 output->ResizeLike(input);
51 using R =
typename TypeMap::template type<T>;
54 input.template data<T>(),
55 output->template mutable_data<R>(),
69 template <
typename Functor>
73 template <
typename In,
typename Out,
typename Context>
74 void operator()(
int n,
const In* in, Out* out, Context* c) {
75 Functor()(n, in, out, c);
95 template <
typename Context>
96 std::tuple<size_t, size_t, size_t> calculate_broadcast_sizes(
103 "If you are doing broadcasting, input1 should have " 104 "a smaller or equal number of dimensions.");
109 axis >= 0 && axis <= A.
ndim() - B.
ndim(),
110 "Broadcast axis should be in the range of" 111 "[0, A.ndim() - B.ndim()], but axis = ",
115 while (b_dim_start < B.
ndim() && B.
dim(b_dim_start) == 1) {
118 int b_dim_end = B.
ndim() - 1;
119 while (b_dim_end >= b_dim_start && B.
dim(b_dim_end) == 1) {
122 size_t pre = 1, n = 1, post = 1;
123 for (
int i = 0; i < axis + b_dim_start; ++i) {
126 for (
int i = b_dim_start; i <= b_dim_end; ++i) {
128 A.
dim(i + axis), B.
dim(i),
"Broadcast dimension mismatch.");
131 for (
int i = axis + b_dim_end + 1; i < A.
ndim(); ++i) {
134 return std::make_tuple(pre, n, post);
161 USE_OPERATOR_CONTEXT_FUNCTIONS;
165 OP_SINGLE_ARG(
bool,
"broadcast", enable_broadcast_, 0),
166 OP_SINGLE_ARG(
int,
"axis", axis_, -1),
167 OP_SINGLE_ARG(
string,
"axis_str", axis_str_,
""),
168 OP_SINGLE_ARG(
string,
"order", order_,
"NCHW"),
171 if (enable_broadcast_) {
177 "Args axis and axis_str cannot be used simultaneously.");
178 }
else if (axis_str_.size()) {
181 axis_str_.size(), 1,
"Unsupported axis string", axis_str_);
182 size_t semantic_axis_ = order_.find(axis_str_);
186 "Unrecognizable axis string ",
188 " from order string ",
190 axis_ = semantic_axis_;
194 axis_ == -1 && axis_str_.size() == 0,
195 "Do not specify axis or axis_str if broadcast is not enabled.");
199 bool RunOnDevice()
override {
203 template <
typename T>
204 bool DoRunWithType() {
205 const auto& A = Input(0);
206 const auto& B = Input(1);
209 &B != C || !enable_broadcast_,
210 "In-place is allowed only with the first tensor when broadcasting");
212 const T* Adata = A.template data<T>();
213 const T* Bdata = B.template data<T>();
215 C->template mutable_data<typename TypeMap::template type<T>>();
216 if (!enable_broadcast_) {
220 "Dimension mismatch - did you forget to set broadcast=1?");
221 functor_.template Run<false>(A.size(), Adata, Bdata, Cdata, &context_);
222 }
else if (B.size() == 1) {
223 functor_.template Run<true>(A.size(), Adata, Bdata, Cdata, &context_);
226 std::tie(pre, n, post) = calculate_broadcast_sizes(A, B, axis_);
228 functor_.RunWithBroadcast(Adata, Bdata, Cdata, pre, n, &context_);
230 functor_.RunWithBroadcast2(
231 Adata, Bdata, Cdata, pre, n, post, &context_);
238 bool enable_broadcast_;
245 template <
typename Functor>
247 template <
bool b_is_scalar,
typename T,
typename R,
typename Context>
248 inline void Run(
size_t n,
const T* a,
const T* b, R* out, Context* c) {
250 CAFFE_THROW(
"Broadcast not supported.");
252 Functor().Run(n, a, b, out, c);
255 template <
typename T,
typename R,
typename Context>
256 inline void RunWithBroadcast(
263 CAFFE_NOT_IMPLEMENTED;
265 template <
typename T,
typename R,
typename Context>
266 inline void RunWithBroadcast2(
274 CAFFE_NOT_IMPLEMENTED;
279 template <
class Context>
283 USE_OPERATOR_CONTEXT_FUNCTIONS;
285 bool RunOnDevice()
override;
288 namespace SRLHelper {
290 template <
typename T>
291 void sum2one(
const T* a, T* y,
size_t n);
293 template <
typename T>
294 void RunWithBroadcastFront(
const T* a, T* y,
size_t pre,
size_t n,
CPUContext*);
296 template <
typename T>
297 void RunWithBroadcastBack(
const T* a, T* y,
size_t post,
size_t n,
CPUContext*);
299 template <
typename T>
300 void RunWithBroadcast2(
312 template <
class Context>
315 USE_OPERATOR_CONTEXT_FUNCTIONS;
318 OP_SINGLE_ARG(
int,
"axis", axis_, -1),
319 OP_SINGLE_ARG(
string,
"axis_str", axis_str_,
""),
320 OP_SINGLE_ARG(
string,
"order", order_,
"NCHW") {
326 "Args axis and axis_str cannot be used simultaneously.");
327 }
else if (axis_str_.size()) {
330 axis_str_.size(), 1,
"Unsupported axis string", axis_str_);
331 size_t semantic_axis = order_.find(axis_str_);
335 "Unrecognizable axis string ",
337 " from order string ",
339 axis_ = semantic_axis;
343 bool RunOnDevice()
override {
347 template <
typename T>
348 bool DoRunWithType();
358 template <
class Context>
363 auto* dX = Output(0);
364 auto* dY = Output(1);
369 const float* Ydata = Y.template data<float>();
370 const float* Zdata = Z.template data<float>();
371 const float* dZdata = dZ.template data<float>();
372 float* dXdata = dX->template mutable_data<float>();
373 float* dYdata = dY->template mutable_data<float>();
375 ElementWiseDivide(context_, Y.size(), dXdata, dYdata, dZdata, Ydata, Zdata);
381 #define EIGEN_FUNCTOR(name, eigen_op, input_type, output_type) \ 382 struct Eigen##name##Functor { \ 383 template <int b_is_scalar, typename T, typename R> \ 384 inline void Run(size_t n, const T* a, const T* b, R* out, CPUContext*) { \ 386 EigenVectorArrayMap<R>(out, n) = \ 387 eigen_op((ConstEigenVectorArrayMap<T>(a, n)), (b[0])); \ 389 EigenVectorArrayMap<R>(out, n) = eigen_op( \ 390 (ConstEigenVectorArrayMap<T>(a, n)), \ 391 (ConstEigenVectorArrayMap<T>(b, n))); \ 394 template <typename T, typename R> \ 395 void RunWithBroadcast( \ 402 EigenArrayMap<R>(out, n, pre) = eigen_op( \ 403 (ConstEigenArrayMap<T>(a, n, pre).colwise()), \ 404 (ConstEigenVectorArrayMap<T>(b, n))); \ 406 template <typename T, typename R> \ 407 void RunWithBroadcast2( \ 415 for (int i = 0; i < pre; ++i) { \ 416 EigenArrayMap<R>(out + i * n * post, post, n) = eigen_op( \ 417 (ConstEigenArrayMap<T>(a + i * n * post, post, n).rowwise()), \ 418 (Eigen::Map<const Eigen::Array<T, 1, Eigen::Dynamic>>(b, n))); \ 422 REGISTER_CPU_OPERATOR( \ 424 BinaryElementwiseOp< \ 427 Eigen##name##Functor, \ 432 #endif // CAFFE2_OPERATORS_ELEMENTWISE_OP_H_ TIndex dim(const int i) const
Returns the i-th dimension of the tensor.
Tensor is the basic class in Caffe2 that stores a contiguous memory with its shape information...
The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Performs a binary operation (e.g.
int ndim() const
Returns the number of dimensions of the data.
WithDefaultConstructor is a functor that can be used as the functor of an UnaryElementwiseWithArgsOp...