1 #include "caffe2/operators/reduction_ops.h" 5 REGISTER_CPU_OPERATOR(SumElements, SumElementsOp<float, CPUContext>);
6 REGISTER_CPU_OPERATOR(SumElementsInt, SumElementsIntOp<int, CPUContext>);
7 REGISTER_CPU_OPERATOR(SumSqrElements, SumSqrElementsOp<CPUContext>);
11 SumElementsGradientOp<float, CPUContext>);
13 REGISTER_CPU_OPERATOR(RowwiseMax, MaxReductionOp<float, CPUContext, true>);
14 REGISTER_CPU_OPERATOR(
16 MaxReductionGradientOp<float, CPUContext, true>);
17 REGISTER_CPU_OPERATOR(
19 MaxReductionGradientOp<float, CPUContext, false>);
20 REGISTER_CPU_OPERATOR(ColwiseMax, MaxReductionOp<float, CPUContext, false>);
22 OPERATOR_SCHEMA(SumElements)
25 .ScalarType(TensorProto::FLOAT)
26 .SetDoc(
"Sums the elements of the input tensor.")
27 .Arg(
"average",
"whether to average or not")
28 .Input(0,
"X",
"Tensor to sum up")
29 .Output(0,
"sum",
"Scalar sum");
31 OPERATOR_SCHEMA(SumElementsInt)
34 .ScalarType(TensorProto::INT32)
35 .SetDoc(
"Sums the integer elements of the input tensor.")
36 .Input(0,
"X",
"Tensor to sum up")
37 .Output(0,
"sum",
"Scalar sum");
38 SHOULD_NOT_DO_GRADIENT(SumElementsInt);
40 OPERATOR_SCHEMA(SumSqrElements)
43 .ScalarType(TensorProto::FLOAT)
44 .SetDoc(
"Sums the squares elements of the input tensor.")
45 .Arg(
"average",
"whether to average or not")
46 .Input(0,
"X",
"Tensor to sum up")
47 .Output(0,
"sum",
"Scalar sum of squares");
49 OPERATOR_SCHEMA(SumElementsGradient).NumInputs(2).NumOutputs(1);
52 using GradientMakerBase::GradientMakerBase;
53 vector<OperatorDef> GetGradientDefs()
override {
55 "SumElementsGradient",
57 vector<string>{I(0), GO(0)},
58 vector<string>{GI(0)});
63 OPERATOR_SCHEMA(RowwiseMax)
66 .SetDoc(
"Compute row-wise max reduction of the input tensor.")
70 "A tenosr of dimensions batch_size x M x N to compute rowwise-max.")
71 .Output(0,
"Y",
"batch_size x M rowwise-max results matrix.");
73 OPERATOR_SCHEMA(RowwiseMaxGradient).NumInputs(3).NumOutputs(1);
75 using GradientMakerBase::GradientMakerBase;
76 vector<OperatorDef> GetGradientDefs()
override {
80 vector<string>{I(0), O(0), GO(0)},
81 vector<string>{GI(0)});
86 OPERATOR_SCHEMA(ColwiseMaxGradient);
88 OPERATOR_SCHEMA(ColwiseMax)
91 .SetDoc(
"Compute column-wise max reduction of the input tensor.")
95 "A tenosr of dimensions batch_size x M x N to compute colwise-max.")
96 .Output(0,
"Y",
"batch_size x N column-max results matrix.");
98 OPERATOR_SCHEMA(ColumnMaxGradient).NumInputs(3).NumOutputs(1);
100 using GradientMakerBase::GradientMakerBase;
101 vector<OperatorDef> GetGradientDefs()
override {
103 "ColwiseMaxGradient",
105 vector<string>{I(0), O(0), GO(0)},
106 vector<string>{GI(0)});
111 template <
typename T,
class Context>
114 #if defined(__has_feature) 115 #if __has_feature(__address_sanitizer__) 116 __attribute__((__no_sanitize__(
"float-divide-by-zero")))
122 auto* dX = Output(0);
124 DCHECK_EQ(sum_grad.
size(), 1);
125 math::Set<T, Context>(
127 static_cast<T
>(sum_grad.
data<T>()[0] * (average_ ? 1.0 / X.size() : 1)),
128 dX->template mutable_data<T>(),
133 template <
typename T,
class Context,
bool ROWWISE>
139 auto* dX = Output(0);
142 CAFFE_ENFORCE_EQ(X.ndim(), 3);
144 const int batch_size = X.dim32(0);
145 const int M = X.dim32(1);
146 const int N = X.dim32(2);
148 const T* Xdata = X.template data<T>();
149 const T* Ydata = Y.template data<T>();
150 const T* dYdata = dY.template data<T>();
151 T* dXdata = dX->template mutable_data<T>();
153 const int input_size = M * N;
154 for (
int i = 0; i < batch_size; ++i) {
155 const T* Xdata_i = Xdata + i * input_size;
156 T* dXdata_i = dXdata + i * input_size;
158 const T* Ydata_i = Ydata + i * M;
159 const T* dYdata_i = dYdata + i * M;
160 for (
int m = 0; m < M; ++m) {
161 const T* Xdata_m = Xdata_i + m * N;
162 T* dXdata_m = dXdata_i + m * N;
163 for (
int n = 0; n < N; ++n) {
164 if (Xdata_m[n] == Ydata_i[m]) {
165 dXdata_m[n] = dYdata_i[m];
167 dXdata_m[n] =
static_cast<T
>(0);
172 const T* Ydata_i = Ydata + i * N;
173 const T* dYdata_i = dYdata + i * N;
174 for (
int n = 0; n < N; ++n) {
175 for (
int m = 0; m < M; ++m) {
176 const T* Xdata_m = Xdata_i + m * N;
177 T* dXdata_m = dXdata_i + m * N;
178 if (Xdata_m[n] == Ydata_i[n]) {
179 dXdata_m[n] = dYdata_i[n];
181 dXdata_m[n] =
static_cast<T
>(0);
const T * data() const
Returns a typed pointer of the underlying storage.
TIndex size() const
Returns the size (i.e.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...