2 #ifndef CAFFE2_OPERATORS_RECUDER_FUNCTORS_H_ 3 #define CAFFE2_OPERATORS_RECUDER_FUNCTORS_H_ 7 #include "caffe2/core/context.h" 8 #include "caffe2/core/tensor.h" 9 #include "caffe2/utils/math.h" 10 #include "caffe2/utils/proto_utils.h" 20 template <
typename T,
class Context>
22 template <
typename T,
class Context>
29 const TIndex block_size,
35 EigenVectorMap<T> out_vec(out, block_size);
36 out_vec = ConstEigenMatrixMap<T>(in, block_size, blocks).rowwise().sum();
40 template <
typename T,
class Context>
44 const TIndex block_size,
46 const T* segment_grad,
52 for (TIndex i = 0; i < blocks; ++i) {
53 context->template Copy<T, Context, Context>(
54 block_size, segment_grad, data_grad + block_size * i);
60 template <
typename T,
class Context>
62 template <
typename T,
class Context>
64 static constexpr
const char* name =
"Sum";
65 static constexpr
const char* doc =
66 "Summation is done element-wise across slices of the input tensor and " 67 "doesn't change the shape of the individual blocks.";
71 template <
typename T,
class Context>
73 template <
typename T,
class Context>
80 const TIndex block_size,
85 for (
int j = 0; j < block_size; ++j) {
86 T max_value = std::numeric_limits<T>::lowest();
87 for (
int i = 0; i < blocks; ++i) {
88 max_value = std::max(max_value, in[i * block_size + j]);
91 for (
int i = 0; i < blocks; ++i) {
92 scaled_exp_sum += std::exp(in[i * block_size + j] - max_value);
94 *(out++) = std::log(scaled_exp_sum) + max_value;
100 template <
typename T,
class Context>
104 const TIndex block_size,
106 const T* segment_grad,
111 for (
int j = 0; j < block_size; ++j) {
112 const T out_grad = *(segment_grad++);
113 const T offset = *(data_out++);
114 for (
int i = 0; i < blocks; ++i) {
115 auto idx = i * block_size + j;
116 data_grad[idx] = out_grad * std::exp(data_in[idx] - offset);
123 template <
typename T,
class Context>
125 template <
typename T,
class Context>
127 static constexpr
const char* name =
"LogSumExp";
128 static constexpr
const char* doc =
129 "LogSumExp computes the element-wise log of the sum of exponentials of " 130 "input slices. Operation doesn't change the shape of individual blocks.";
133 template <
typename T,
class Context>
135 template <
typename T,
class Context>
138 template <
typename T>
142 const TIndex block_size,
147 for (
int j = 0; j < block_size; ++j) {
148 T max_value = std::numeric_limits<T>::lowest();
149 for (
int i = 0; i < blocks; ++i) {
150 max_value = std::max(max_value, in[i * block_size + j]);
152 T scaled_exp_sum = 0;
153 for (
int i = 0; i < blocks; ++i) {
154 scaled_exp_sum += std::exp(in[i * block_size + j] - max_value);
156 scaled_exp_sum /= blocks;
157 *(out++) = std::log(scaled_exp_sum) + max_value;
162 template <
typename T,
class Context>
166 const TIndex block_size,
168 const T* segment_grad,
173 for (
int j = 0; j < block_size; ++j) {
174 const T out_grad = *(segment_grad++);
175 const T offset = *(data_out++);
176 for (
int i = 0; i < blocks; ++i) {
177 auto idx = i * block_size + j;
178 data_grad[idx] = out_grad * std::exp(data_in[idx] - offset) / blocks;
185 template <
typename T,
class Context>
187 template <
typename T,
class Context>
189 static constexpr
const char* name =
"LogMeanExp";
190 static constexpr
const char* doc =
191 "LogMeanExp computes the element-wise log of the mean of exponentials of " 192 "input slices. Operation doesn't change the shape of individual blocks.";
195 template <
typename T,
class Context>
197 template <
typename T,
class Context>
200 template <
typename T>
204 const TIndex block_size,
209 for (
int j = 0; j < block_size; ++j) {
211 for (
int i = 0; i < blocks; ++i) {
212 avg_value += in[i * block_size + j] / blocks;
214 *(out++) = avg_value;
219 template <
typename T,
class Context>
223 const TIndex block_size,
225 const T* segment_grad,
230 const auto in_grad = 1.0 / blocks;
231 for (
int j = 0; j < block_size; ++j) {
232 const T out_grad = *(segment_grad++);
233 for (
int i = 0; i < blocks; ++i) {
234 auto idx = i * block_size + j;
235 data_grad[idx] = out_grad * in_grad;
242 template <
typename T,
class Context>
244 template <
typename T,
class Context>
246 static constexpr
const char* name =
"Mean";
247 static constexpr
const char* doc =
248 "Mean computation is done element-wise, so that each element of the " 249 "output slice corresponds to the average value of the respective " 250 "elements in the input slices. Operation doesn't change the shape of " 251 "individual blocks.";
254 template <
typename T,
class Context>
256 template <
typename T,
class Context>
259 template <
typename T>
263 const TIndex block_size,
268 for (
int j = 0; j < block_size; ++j) {
269 T max_value = std::numeric_limits<T>::lowest();
270 for (
int i = 0; i < blocks; ++i) {
271 max_value = std::max(max_value, in[i * block_size + j]);
273 *(out++) = max_value;
278 template <
typename T,
class Context>
282 const TIndex block_size,
284 const T* segment_grad,
290 static_cast<void*>(data_grad), 0, blocks * block_size *
sizeof(T));
291 for (
int j = 0; j < block_size; ++j) {
292 const T out_grad = *(segment_grad++);
293 const T out = data_out[j];
294 for (
int i = 0; i < blocks; ++i) {
295 auto idx = i * block_size + j;
296 if (out == data_in[idx]) {
297 data_grad[idx] = out_grad;
305 template <
typename T,
class Context>
307 template <
typename T,
class Context>
309 static constexpr
const char* name =
"Max";
310 static constexpr
const char* doc =
311 "Max computation is done element-wise, so that each element of the " 312 "output slice corresponds to the max value of the respective " 313 "elements in the input slices. Operation doesn't change the shape of " 314 "individual blocks. This implementation imitates torch nn.Max operator. " 315 "If the maximum value occurs more than once, the operator will return " 316 "the first occurence of value. When computing the gradient using the " 317 "backward propagation, the gradient input corresponding to the first " 318 "occurence of the maximum value will be used.";
328 static constexpr
int kInputCount = 1;
332 vector<TIndex> block_shape;
335 explicit Meta(
bool first =
true) : first_dim(first) {}
337 void computeMeta(
const std::vector<TIndex>& dims,
int skip_dims) {
338 first_dim ? block_shape.assign(dims.begin() + skip_dims, dims.end())
339 : block_shape.assign(dims.begin(), dims.end() - skip_dims);
347 auto& dims = value.
dims();
348 computeMeta(dims, skip_dims);
351 void appendOutputShape(vector<TIndex>* output_shape) {
352 output_shape->insert(
353 output_shape->end(), block_shape.begin(), block_shape.end());
356 vector<TIndex> getOutputShape(
const TensorShape& in,
int skip_dims) {
357 vector<TIndex> dims(in.dims().begin(), in.dims().end());
358 computeMeta(dims, skip_dims);
363 template <
int FixedSize>
370 static constexpr std::array<int, 0> originalInputs() {
371 return std::array<int, 0>();
374 static constexpr
bool computeLength() {
378 static int numAuxInputsWithGrads(
const OperatorDef& ) {
382 static bool requiresDataInput(
const OperatorDef& ) {
387 static bool requiresForwardOutput() {
393 vector<TIndex> block_shape;
399 bool first_dim =
true)
400 : first_dim(first_dim) {
401 auto& dims = out_grad.
dims();
402 first_dim ? block_shape.assign(dims.begin() + skip_dims, dims.end())
403 : block_shape.assign(dims.begin(), dims.end() - skip_dims);
404 block_size = first_dim
405 ? out_grad.size_from_dim(skip_dims)
406 : out_grad.size_from_dim(out_grad.
ndim() - skip_dims);
409 void observeOriginalInput(
415 void appendGradShape(vector<TIndex>* output_shape) {
416 output_shape->insert(
417 output_shape->end(), block_shape.begin(), block_shape.end());
423 template <
typename T,
class Context>
425 template <
typename T,
class Context>
428 template <
typename T>
434 : current_size_(0), out_(out) {
436 if (meta.first_dim) {
437 memset(out, 0,
sizeof(T) * meta.block_size);
440 template <
int FixedSize>
446 if (meta.first_dim) {
447 math::AxpyFixedSize<T, CPUContext, FixedSize>(
448 meta.block_size, 1, in, out_, context);
450 math::Sum<T, CPUContext>(
451 meta.block_size, in, out_ + current_size_++, context);
460 template <
typename T,
class Context>
471 template <
int FixedSize>
478 if (FixedSize == 1) {
479 *data_grad = *s_grad_;
480 }
else if (meta.first_dim) {
481 context->template Copy<T, Context, Context>(
482 meta.block_size, s_grad_, data_grad);
484 math::Set<T, Context>(length, s_grad_[offset], data_grad, context);
493 template <
typename T,
class Context>
495 template <
typename T,
class Context>
497 static constexpr
const char* name =
"Sum";
498 static constexpr
const char* doc =
499 "Summation is done element-wise across slices of the input tensor and " 500 "doesn't change the shape of the individual blocks.";
501 static void PopulateSchema(
OpSchema& ) {}
505 template <
typename T,
class Context>
507 template <
typename T,
class Context>
510 template <
typename T>
513 static constexpr
int kInputCount = 2;
522 explicit Meta(
bool first =
true) : first_dim(first) {}
528 skip_dims, value.
ndim(),
"SCALARS mustn't have extra dimensions");
529 scalars = value.
data<T>();
532 BaseReducer::Meta::observeInput(input, value, skip_dims);
539 memset(out, 0,
sizeof(T) * meta.block_size);
541 template <
int FixedSize>
543 process(
const Meta& meta,
const T* in, TIndex offset,
CPUContext* context) {
546 "WeightedSumReducer implemented only for " 547 "front dimensions reduction");
548 math::AxpyFixedSize<T, CPUContext, FixedSize>(
549 meta.block_size, meta.scalars[offset], in, out_, context);
556 template <
typename T,
class Context>
560 static constexpr std::array<int, 1> originalInputs() {
564 static int numAuxInputsWithGrads(
const OperatorDef& def) {
565 return GetFlagArgument(def,
"grad_on_weights");
568 static bool requiresDataInput(
const OperatorDef& def) {
569 return numAuxInputsWithGrads(def) > 0;
578 using BaseReducerGradient::Meta::Meta;
580 void observeOriginalInput(
585 CAFFE_ENFORCE_EQ(1, original_input);
586 scalars = value.
data<T>();
600 template <
int FixedSize>
607 math::ScaleFixedSize<T, CPUContext, FixedSize>(
608 meta.block_size, meta.scalars[offset], s_grad_, data_grad, context);
613 template <
int FixedSize>
614 void fillGradWithMainInput(
621 math::ScaleFixedSize<T, CPUContext, FixedSize>(
622 meta.block_size, meta.scalars[offset], s_grad_, data_grad, context);
624 meta.block_size, s_grad_, data, meta.scalars_grad + offset, context);
632 template <
typename T,
class Context>
634 template <
typename T,
class Context>
636 static constexpr
const char* name =
"WeightedSum";
637 static constexpr
const char* doc =
638 "Input slices are first scaled by SCALARS and then summed element-wise. " 639 "It doesn't change the shape of the individual blocks.";
640 static void PopulateSchema(
OpSchema& schema) {
641 schema.Input(0,
"DATA",
"Input tensor for the summation");
645 "Scalar multipliers for the input slices. Must be a vector with the " 646 "length matching the number of slices");
649 "Produce also gradient for `weights`. For now it's only supported in " 650 "`Lengths`-based operators");
654 template <
typename T,
class Context>
656 template <
typename T,
class Context>
659 template <
typename T>
665 : out_(out), current_size_(0) {
666 if (meta.first_dim) {
667 memset(out, 0,
sizeof(T) * meta.block_size);
671 template <
int FixedSize>
677 if (meta.first_dim) {
678 math::AxpyFixedSize<T, CPUContext, FixedSize>(
679 meta.block_size, 1, in, out_, context);
681 math::Sum<T, CPUContext>(
682 meta.block_size, in, out_ + current_size_, context);
687 template <
int FixedSize>
689 if (meta.first_dim) {
690 if (current_size_ > 0) {
691 math::ScaleFixedSize<T, CPUContext, FixedSize>(
692 meta.block_size, 1.0 / current_size_, out_, out_, context);
695 math::ScaleFixedSize<T, CPUContext, FixedSize>(
696 current_size_, 1.0 / meta.block_size, out_, out_, context);
705 template <
typename T,
class Context>
708 static constexpr
bool computeLength() {
720 template <
int FixedSize>
727 CAFFE_ENFORCE_GT(length, 0,
"Segment length must be > 0");
728 if (meta.first_dim) {
729 math::ScaleFixedSize<T, CPUContext, FixedSize>(
730 meta.block_size, 1.0 / length, s_grad_, data_grad, context);
732 math::Set<T, CPUContext>(
733 length, s_grad_[offset] * 1.0f / length, data_grad, context);
742 template <
typename T,
class Context>
744 template <
typename T,
class Context>
746 static constexpr
const char* name =
"Mean";
747 static constexpr
const char* doc =
748 "Mean computes the element-wise mean of the input slices. " 749 "Operation doesn't change the shape of the individual blocks.";
750 static void PopulateSchema(
OpSchema& ) {}
753 template <
typename T,
class Context>
755 template <
typename T,
class Context>
758 template <
typename T>
764 : out_(out), current_size_(0) {}
766 template <
int FixedSize>
774 "MaxReducer implemented only for front dimensions reduction");
775 if (current_size_ > 0) {
776 EigenVectorMap<T> output_vec(out_, meta.block_size);
778 output_vec.cwiseMax(ConstEigenVectorMap<T>(in, meta.block_size));
780 memcpy(out_, in,
sizeof(T) * meta.block_size);
790 template <
typename T,
class Context>
793 static bool requiresDataInput(
const OperatorDef& ) {
797 static bool requiresForwardOutput() {
809 template <
int FixedSize>
810 void fillGradWithMainInputAndForwardOutput(
814 const T* forward_output,
818 for (TIndex i = 0; i < meta.block_size; ++i) {
819 data_grad[i] = data[i] == forward_output[i] ? s_grad_[i] : 0;
828 template <
typename T,
class Context>
830 template <
typename T,
class Context>
832 static constexpr
const char* name =
"Max";
833 static constexpr
const char* doc =
834 "Max computes the element-wise max of the input slices. " 835 "Operation doesn't change the shape of the individual blocks.";
836 static void PopulateSchema(
OpSchema& ) {}
841 #endif // CAFFE2_OPERATORS_RECUDER_FUNCTORS_H_
const T * data() const
Returns a typed pointer of the underlying storage.
A class to record the schema of an op.
The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
T * mutable_data()
Returns a typed pointer of the underlying storage.
const vector< TIndex > & dims() const
Returns the dimensions of the tensor as a vector.
TIndex size_from_dim_(int k, const vector< TIndex > &dims)
Return product of all dimensions starting from K.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
void ResizeLike(const Tensor< OtherContext > &src_tensor)
Resize the tensor like the source tensor.
int ndim() const
Returns the number of dimensions of the data.