Caffe2 - C++ API: caffe2/operators/reduction

 #include "caffe2/operators/reduction_ops.h"
 
 namespace caffe2 {
 
 REGISTER_CPU_OPERATOR(SumElements, SumElementsOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(SumElementsInt, SumElementsIntOp<int, CPUContext>);
 REGISTER_CPU_OPERATOR(SumSqrElements, SumSqrElementsOp<CPUContext>);
 
 REGISTER_CPU_OPERATOR(
     SumElementsGradient,
     SumElementsGradientOp<float, CPUContext>);
 
 REGISTER_CPU_OPERATOR(RowwiseMax, MaxReductionOp<float, CPUContext, true>);
 REGISTER_CPU_OPERATOR(
     RowwiseMaxGradient,
     MaxReductionGradientOp<float, CPUContext, true>);
 REGISTER_CPU_OPERATOR(
     ColwiseMaxGradient,
     MaxReductionGradientOp<float, CPUContext, false>);
 REGISTER_CPU_OPERATOR(ColwiseMax, MaxReductionOp<float, CPUContext, false>);
 
 OPERATOR_SCHEMA(SumElements)
     .NumInputs(1)
     .NumOutputs(1)
     .ScalarType(TensorProto::FLOAT)
     .SetDoc("Sums the elements of the input tensor.")
     .Arg("average", "whether to average or not")
     .Input(0, "X", "Tensor to sum up")
     .Output(0, "sum", "Scalar sum");
 
 OPERATOR_SCHEMA(SumElementsInt)
     .NumInputs(1)
     .NumOutputs(1)
     .ScalarType(TensorProto::INT32)
     .SetDoc("Sums the integer elements of the input tensor.")
     .Input(0, "X", "Tensor to sum up")
     .Output(0, "sum", "Scalar sum");
 SHOULD_NOT_DO_GRADIENT(SumElementsInt);
 
 OPERATOR_SCHEMA(SumSqrElements)
     .NumInputs(1)
     .NumOutputs(1)
     .ScalarType(TensorProto::FLOAT)
     .SetDoc("Sums the squares elements of the input tensor.")
     .Arg("average", "whether to average or not")
     .Input(0, "X", "Tensor to sum up")
     .Output(0, "sum", "Scalar sum of squares");
 
 OPERATOR_SCHEMA(SumElementsGradient).NumInputs(2).NumOutputs(1);
 
 class GetSumElementsGradient : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;
   vector<OperatorDef> GetGradientDefs() override {
     return SingleGradientDef(
         "SumElementsGradient",
         "",
         vector<string>{I(0), GO(0)},
         vector<string>{GI(0)});
   }
 };
 REGISTER_GRADIENT(SumElements, GetSumElementsGradient);
 
 OPERATOR_SCHEMA(RowwiseMax)
     .NumInputs(1)
     .NumOutputs(1)
     .SetDoc("Compute row-wise max reduction of the input tensor.")
     .Input(
         0,
         "X",
         "A tenosr of dimensions batch_size x M x N to compute rowwise-max.")
     .Output(0, "Y", "batch_size x M rowwise-max results matrix.");
 
 OPERATOR_SCHEMA(RowwiseMaxGradient).NumInputs(3).NumOutputs(1);
 class GetRowwiseMaxGradient : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;
   vector<OperatorDef> GetGradientDefs() override {
     return SingleGradientDef(
         "RowwiseMaxGradient",
         "",
         vector<string>{I(0), O(0), GO(0)},
         vector<string>{GI(0)});
   }
 };
 REGISTER_GRADIENT(RowwiseMax, GetRowwiseMaxGradient);
 
 OPERATOR_SCHEMA(ColwiseMaxGradient);
 
 OPERATOR_SCHEMA(ColwiseMax)
     .NumInputs(1)
     .NumOutputs(1)
     .SetDoc("Compute column-wise max reduction of the input tensor.")
     .Input(
         0,
         "X",
         "A tenosr of dimensions batch_size x M x N to compute colwise-max.")
     .Output(0, "Y", "batch_size x N column-max results matrix.");
 
 OPERATOR_SCHEMA(ColumnMaxGradient).NumInputs(3).NumOutputs(1);
 class GetColwiseMaxGradient : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;
   vector<OperatorDef> GetGradientDefs() override {
     return SingleGradientDef(
         "ColwiseMaxGradient",
         "",
         vector<string>{I(0), O(0), GO(0)},
         vector<string>{GI(0)});
   }
 };
 REGISTER_GRADIENT(ColwiseMax, GetColwiseMaxGradient);
 
 template <typename T, class Context>
 bool SumElementsGradientOp<T, Context>::RunOnDevice()
 // TODO: T21635077 fix float-divide-by-zero undefined behavior
 #if defined(__has_feature)
 #if __has_feature(__address_sanitizer__)
     __attribute__((__no_sanitize__("float-divide-by-zero")))
 #endif
 #endif
 {
   auto& X = Input(0);
   TensorCPU sum_grad = TensorCPU(Input(1));
   auto* dX = Output(0);
   dX->ResizeLike(X);
   DCHECK_EQ(sum_grad.size(), 1);
   math::Set<T, Context>(
       dX->size(),
       static_cast<T>(sum_grad.data<T>()[0] * (average_ ? 1.0 / X.size() : 1)),
       dX->template mutable_data<T>(),
       &context_);
   return true;
 }
 
 template <typename T, class Context, bool ROWWISE>
 bool MaxReductionGradientOp<T, Context, ROWWISE>::RunOnDevice() {
   auto& X = Input(0);
   auto& Y = Input(1);
   auto& dY = Input(2);
 
   auto* dX = Output(0);
   dX->ResizeLike(X);
 
   CAFFE_ENFORCE_EQ(X.ndim(), 3);
 
   const int batch_size = X.dim32(0);
   const int M = X.dim32(1);
   const int N = X.dim32(2);
 
   const T* Xdata = X.template data<T>();
   const T* Ydata = Y.template data<T>();
   const T* dYdata = dY.template data<T>();
   T* dXdata = dX->template mutable_data<T>();
 
   const int input_size = M * N;
   for (int i = 0; i < batch_size; ++i) {
     const T* Xdata_i = Xdata + i * input_size;
     T* dXdata_i = dXdata + i * input_size;
     if (ROWWISE) {
       const T* Ydata_i = Ydata + i * M;
       const T* dYdata_i = dYdata + i * M;
       for (int m = 0; m < M; ++m) {
         const T* Xdata_m = Xdata_i + m * N;
         T* dXdata_m = dXdata_i + m * N;
         for (int n = 0; n < N; ++n) {
           if (Xdata_m[n] == Ydata_i[m]) {
             dXdata_m[n] = dYdata_i[m];
           } else {
             dXdata_m[n] = static_cast<T>(0);
           }
         }
       }
     } else {
       const T* Ydata_i = Ydata + i * N;
       const T* dYdata_i = dYdata + i * N;
       for (int n = 0; n < N; ++n) {
         for (int m = 0; m < M; ++m) {
           const T* Xdata_m = Xdata_i + m * N;
           T* dXdata_m = dXdata_i + m * N;
           if (Xdata_m[n] == Ydata_i[n]) {
             dXdata_m[n] = dYdata_i[n];
           } else {
             dXdata_m[n] = static_cast<T>(0);
           }
         }
       }
     }
   }
 
   return true;
 }
 
 } // namespace caffe2
Facebook Open Source