1 #ifndef CAFFE2_OPERATORS_SEGMENT_REDUCTION_OP_H_ 2 #define CAFFE2_OPERATORS_SEGMENT_REDUCTION_OP_H_ 4 #include "caffe2/core/context.h" 5 #include "caffe2/core/logging.h" 6 #include "caffe2/core/operator.h" 7 #include "caffe2/operators/reducer_functors.h" 11 template <
typename TData>
18 return dataInput.template IsType<TData>();
22 getBlockPtr(TIndex in_block_size, TIndex idx, TIndex = 1) {
23 return static_cast<const TData*
>(data_) + in_block_size * idx;
27 const void* data_ =
nullptr;
51 USE_OPERATOR_CONTEXT_FUNCTIONS;
54 bool RunOnDevice()
override {
55 auto& dataInput = Input(DATA);
56 auto& segment_ids = Input(SEGMENT_IDS);
57 auto* output = Output(0);
59 CAFFE_ENFORCE_EQ(1, segment_ids.ndim(),
"SEGMENT_IDS must be a vector");
60 auto N = segment_ids.dim(0);
64 "SEGMENT_IDS must have the same length as outer dimension of DATA");
66 OPERATOR_NEEDS_FEATURE(
67 inputAccessor_.observeInput(dataInput),
68 "Unsupported input type: ",
69 dataInput.meta().name(),
72 const SIndex* s_ids = segment_ids.template data<SIndex>();
74 const SIndex K = N > 0 ? s_ids[N - 1] + 1 : 0;
75 auto shape = dataInput.dims();
77 output->Resize(shape);
79 T* out = output->template mutable_data<T>();
85 TIndex block_size = dataInput.size() / N;
88 CAFFE_ENFORCE_EQ(0, s_ids[0],
"Indices must be sorted and not have gaps");
89 for (TIndex i = 0; i < N;) {
91 for (++i; i < N && s_ids[start] == s_ids[i]; ++i)
97 inputAccessor_.getBlockPtr(block_size, start, i - start),
98 out + block_size * s_ids[start],
106 "Indices must be sorted and not have gaps");
112 static constexpr
int kNumInputs = 2;
113 INPUT_TAGS(DATA, SEGMENT_IDS);
116 InputAccessor inputAccessor_;
123 class RangeReducerGradient>
126 USE_OPERATOR_CONTEXT_FUNCTIONS;
129 bool RunOnDevice()
override {
131 auto& data_in = Input(DATA_IN);
132 auto& data_out = Input(DATA_OUT);
133 auto& segment_grads = Input(SEGMENT_GRADS);
134 auto& segment_ids = Input(SEGMENT_IDS);
135 auto* data_grads = Output(0);
137 CAFFE_ENFORCE_EQ(1, segment_ids.ndim(),
"SEGMENT_IDS must be a vector");
138 TIndex N = segment_ids.dim(0);
140 const SIndex* s_ids = segment_ids.template data<SIndex>();
141 const T* s_grads = segment_grads.template data<T>();
142 const T* d_in = data_in.template data<T>();
143 const T* d_out = data_out.template data<T>();
145 auto shape = segment_grads.dims();
147 data_grads->Resize(shape);
149 const SIndex K = segment_grads.dim(0);
150 T* out = data_grads->template mutable_data<T>();
156 TIndex block_size = segment_grads.size_from_dim(1);
159 CAFFE_ENFORCE_EQ(0, s_ids[0],
"Indices must be sorted and not have gaps");
162 K - 1, s_ids[N - 1],
"Indices must be sorted and not have gaps");
163 for (TIndex i = 0; i < N;) {
165 for (++i; i < N && s_ids[start] == s_ids[i]; ++i)
168 auto expanded_idx = block_size * start;
169 auto reduced_idx = block_size * s_ids[start];
170 RangeReducerGradient()(
173 s_grads + reduced_idx,
184 "Indices must be sorted and not have gaps");
190 static constexpr
int kNumInputs = 4;
191 INPUT_TAGS(DATA_IN, DATA_OUT, SEGMENT_GRADS, SEGMENT_IDS);
194 template <
typename T,
typename SIndex,
typename Context,
typename ReducerDef>
196 using OpDef = ReducerDef;
197 static constexpr
const char* basename =
"SortedSegmentRange";
198 static constexpr
const char* doc = R
"DOC( 199 Applies '{op}' to each segment of input tensor. In order to allow for more 200 efficient implementation of '{op}', the input segments have to be contiguous 203 SEGMENT_IDS is a vector that maps each of the first dimension slices of the 204 DATA to a particular group (segment). Values belonging to the same segment are 207 The first dimension of the output is equal to the number of input segments, 208 i.e. `SEGMENT_IDS[-1]+1`. Other dimensions are inherited from the input tensor. 212 static void PopulateSchema(
OpSchema& schema) {
213 schema.Input(0,
"DATA",
"Input tensor to be aggregated");
217 "Vector with the same length as the first dimension of DATA " 218 "and values in the range 0..K-1 and in increasing order that " 219 "maps each slice of DATA to one of the segments");
223 "Aggregated tensor with the first dimension of K and the " 224 "other dimentsions inherited from DATA");
230 typename ReducerDef::template Reducer<T, Context>>;
235 typename ReducerDef::template ReducerGradient<T, Context>>;
237 using GradientMakerBase::GradientMakerBase;
238 vector<OperatorDef> GetGradientDefs()
override {
239 return SingleGradientDef(
240 string(basename) + ReducerDef::name +
"Gradient",
242 vector<string>{I(0), O(0), GO(0), I(1)},
244 vector<string>{GI(0)});
285 USE_OPERATOR_CONTEXT_FUNCTIONS;
289 OP_SINGLE_ARG(
int,
"num_reduce_dim", num_reduce_dims_, 1) {}
291 bool RunOnDevice()
override {
292 auto& data = Input(0);
295 TIndex in_block_size = FirstDim
296 ? data.size_from_dim(num_reduce_dims_)
297 : data.size_to_dim(data.ndim() - num_reduce_dims_);
299 this, in_block_size);
302 template <
int FixedSize>
303 bool DoRunWithValue() {
304 auto& data = Input(0);
305 auto* output = Output(0);
307 CAFFE_ENFORCE_LE(num_reduce_dims_, data.ndim());
309 typename Reducer::Meta ctx(FirstDim);
310 ctx.observeInput(0, data, num_reduce_dims_);
311 for (
int i = 1; i < Reducer::kInputCount; ++i) {
312 auto& aux_in = Input(i);
313 ctx.observeInput(i, aux_in, num_reduce_dims_);
316 OPERATOR_NEEDS_FEATURE(
317 inputAccessor_.observeInput(data),
318 "Unsupported input type: ",
322 vector<TIndex> shape;
323 ctx.appendOutputShape(&shape);
324 output->Resize(shape);
326 T* out = output->template mutable_data<T>();
328 const int block_size = FirstDim
329 ? data.size_from_dim(num_reduce_dims_)
330 : data.size_from_dim(data.ndim() - num_reduce_dims_);
332 const int num_blocks = block_size > 0 ? data.size() / block_size : 0;
334 Reducer r(ctx, out, &context_);
335 for (TIndex i = 0; i < num_blocks; ++i) {
336 r.template process<FixedSize>(
337 ctx, inputAccessor_.getBlockPtr(block_size, i), i, &context_);
339 r.template finish<FixedSize>(ctx, &context_);
343 static constexpr
int kNumInputs = Reducer::kInputCount;
346 int num_reduce_dims_;
347 InputAccessor inputAccessor_;
353 class ReducerGradient,
354 bool FirstDim =
true>
357 USE_OPERATOR_CONTEXT_FUNCTIONS;
360 const OperatorDef& operator_def,
363 OP_SINGLE_ARG(
int,
"num_reduce_dim", num_reduce_dims_, 1) {}
365 bool RunOnDevice()
override {
368 TIndex grad_block_size = Input(REDUCTION_GRAD).size();
370 this, grad_block_size);
373 template <
int FixedSize>
374 bool DoRunWithValue() {
375 auto& reduction_grad = Input(REDUCTION_GRAD);
376 auto& source_shape = OperatorBase::Input<TensorCPU>(SOURCE_SHAPE);
378 auto* data_grads = Output(0);
380 typename ReducerGradient::Meta ctx(reduction_grad, 0, FirstDim);
381 for (
int i = 0; i < ReducerGradient::originalInputs().size(); ++i) {
382 auto& aux_in = Input(i);
383 ctx.observeOriginalInput(
384 ReducerGradient::originalInputs()[i],
390 const T* r_grad = reduction_grad.template data<T>();
392 CAFFE_ENFORCE_LE(num_reduce_dims_, source_shape.size());
394 vector<TIndex> shape(
395 source_shape.template data<TIndex>(),
396 source_shape.template data<TIndex>() + source_shape.size());
398 data_grads->Resize(shape);
400 TIndex block_size = FirstDim
401 ? data_grads->size_from_dim(num_reduce_dims_)
402 : data_grads->size_from_dim(data_grads->ndim() - num_reduce_dims_);
403 TIndex block_num = block_size > 0 ? data_grads->size() / block_size : 0;
405 T* out = data_grads->template mutable_data<T>();
407 ReducerGradient r(ctx, r_grad, &context_);
408 for (TIndex i = 0; i < block_num; ++i) {
409 r.template fillGrad<FixedSize>(
411 out + block_size * i,
414 FirstDim ? block_num : block_size);
419 static constexpr
int kNumInputs =
420 ReducerGradient::originalInputs().size() + 2;
422 REDUCTION_GRAD = ReducerGradient::originalInputs().size(),
427 int num_reduce_dims_;
430 template <
typename T,
typename Context,
typename ReducerDef>
432 using OpDef = ReducerDef;
433 static constexpr
const char* basename =
"ReduceFront";
434 static constexpr
const char* doc = R
"DOC( 435 Reduces the input tensor along the first dimension of the input tensor by 436 applying '{op}'. This op acts in a similar way to SortedSegment{op} and 437 UnsortedSegment{op} but as if all input slices belong to a single segment. 441 static void PopulateSchema(
OpSchema& schema) {
443 0,
"DATA",
"Input tensor to be reduced on the first dimension");
445 const vector<TensorShape>& in) {
446 CAFFE_ENFORCE_EQ(1, in.size());
448 int num_reduce_dims = helper.GetSingleArgument<
int>(
"num_reduce_dim", 1);
449 typename ReducerDef::template Reducer<T, Context>::Meta ctx(
true);
450 vector<TIndex> out_dims = ctx.getOutputShape(in[0], num_reduce_dims);
451 return vector<TensorShape>{
452 CreateTensorShape(out_dims, in[0].data_type())};
454 ReducerDef::PopulateSchema(schema);
456 using ReducerGradient =
457 typename ReducerDef::template ReducerGradient<T, Context>;
461 typename ReducerDef::template Reducer<T, Context>,
466 using GradientMakerBase::GradientMakerBase;
467 vector<OperatorDef> GetGradientDefs()
override {
469 string tmp_dims =
"_" + O(0) +
"_dims";
471 vector<string> grad_ins;
472 for (
const int i : ReducerGradient::originalInputs()) {
473 grad_ins.push_back(I(i));
475 grad_ins.push_back(GO(0));
476 grad_ins.push_back(tmp_dims);
478 vector<Argument> args;
479 if (ArgumentHelper::HasArgument(def_,
"num_reduce_dim")) {
480 args.push_back(GetArgument(def_,
"num_reduce_dim"));
483 return vector<OperatorDef>{
485 "Shape",
"", vector<string>{I(0)}, vector<string>{tmp_dims}),
487 string(basename) + ReducerDef::name +
"Gradient",
491 vector<string>{GI(0)}),
497 template <
typename T,
typename Context,
typename ReducerDef>
499 using OpDef = ReducerDef;
500 static constexpr
const char* basename =
"ReduceBack";
501 static constexpr
const char* doc = R
"DOC( 502 Reduces the input tensor along the last dimension of the input tensor by 503 applying '{op}'. This op acts in a similar way to SortedSegment{op} and 504 UnsortedSegment{op} but as if all input slices belong to a single segment. 508 static void PopulateSchema(
OpSchema& schema) {
510 0,
"DATA",
"Input tensor to be reduced on the first dimension");
512 const vector<TensorShape>& in) {
513 CAFFE_ENFORCE_EQ(1, in.size());
515 int num_reduce_dims = helper.GetSingleArgument<
int>(
"num_reduce_dim", 1);
516 typename ReducerDef::template Reducer<T, Context>::Meta ctx(
false);
517 vector<TIndex> out_dims = ctx.getOutputShape(in[0], num_reduce_dims);
518 return vector<TensorShape>{
519 CreateTensorShape(out_dims, in[0].data_type())};
521 ReducerDef::PopulateSchema(schema);
523 using ReducerGradient =
524 typename ReducerDef::template ReducerGradient<T, Context>;
528 typename ReducerDef::template Reducer<T, Context>,
533 using GradientMakerBase::GradientMakerBase;
534 vector<OperatorDef> GetGradientDefs()
override {
536 string tmp_dims =
"_" + O(0) +
"_dims";
538 vector<string> grad_ins;
539 for (
const int i : ReducerGradient::originalInputs()) {
540 grad_ins.push_back(I(i));
542 grad_ins.push_back(GO(0));
543 grad_ins.push_back(tmp_dims);
545 vector<Argument> args;
546 if (ArgumentHelper::HasArgument(def_,
"num_reduce_dim")) {
547 args.push_back(GetArgument(def_,
"num_reduce_dim"));
550 return vector<OperatorDef>{
552 "Shape",
"", vector<string>{I(0)}, vector<string>{tmp_dims}),
554 string(basename) + ReducerDef::name +
"Gradient",
558 vector<string>{GI(0)}),
591 bool SparseFused =
true,
595 USE_OPERATOR_CONTEXT_FUNCTIONS;
598 bool RunOnDevice()
override {
601 this, Input(INDICES));
604 return DoRunWithType<TIndex>();
608 template <
typename IndexType>
609 bool DoRunWithType() {
612 TIndex in_block_size = Input(0).size_from_dim(1);
614 this, in_block_size);
617 template <
typename IndexType,
int FixedSize>
618 bool DoRunWithValue() {
619 auto& dataInput = Input(0);
620 auto& segment_ids = Input(SEGMENT_IDS);
621 auto* output = Output(0);
623 CAFFE_ENFORCE_EQ(1, segment_ids.ndim(),
"SEGMENT_IDS must be a vector");
624 TIndex N = segment_ids.dim(0);
625 const TIndex M = dataInput.dim(0);
627 const IndexType* idxs;
629 auto& indices = Input(INDICES);
630 CAFFE_ENFORCE_EQ(1, indices.ndim(),
"INDICES must be a vector");
634 "SEGMENT_IDS must have the same length as INDICES");
635 idxs = indices.template data<IndexType>();
638 N, M,
"DATA must have the same first dimension as SEGMENT_IDS");
643 typename Reducer::Meta ctx;
644 ctx.observeInput(0, dataInput, 1);
645 for (
int i = 1; i < Reducer::kInputCount; ++i) {
646 auto& aux_in = Input(i);
652 " must have the same first dim as SEGMENT_IDS");
653 ctx.observeInput(i, aux_in, 1);
656 OPERATOR_NEEDS_FEATURE(
657 inputAccessor_.observeInput(dataInput),
658 "Unsupported input type: ",
659 dataInput.meta().name(),
662 const SIndex* s_ids = segment_ids.template data<SIndex>();
664 const SIndex K = N > 0 ? s_ids[N - 1] + 1 : 0;
665 vector<TIndex> shape;
667 ctx.appendOutputShape(&shape);
668 output->Resize(shape);
670 T* out = output->template mutable_data<T>();
674 TIndex in_block_size = dataInput.size_from_dim(1);
675 TIndex out_block_size = output->size_from_dim(1);
678 CAFFE_ENFORCE_EQ(0, s_ids[0],
"Indices must be sorted and not have gaps");
679 for (TIndex i = 0; i < N;) {
682 Reducer r(ctx, out + out_block_size * s_ids[start], &context_);
683 for (; i < N && s_ids[start] == s_ids[i]; ++i) {
687 0 <= idxs[i] && idxs[i] < M,
688 "Index out of bounds: ",
696 r.template process<FixedSize>(
697 ctx, inputAccessor_.getBlockPtr(in_block_size, idx), i, &context_);
700 r.template finish<FixedSize>(ctx, &context_);
706 "Indices must be sorted and not have gaps");
713 INDICES = Reducer::kInputCount,
714 SEGMENT_IDS = Reducer::kInputCount + (SparseFused ? 1 : 0)
716 static constexpr
int kSelfInputs = SparseFused ? 2 : 1;
717 static constexpr
int kNumInputs = Reducer::kInputCount + kSelfInputs;
720 InputAccessor inputAccessor_;
724 template <
typename T,
typename SIndex,
class Context,
class ReducerGradient>
727 USE_OPERATOR_CONTEXT_FUNCTIONS;
730 bool RunOnDevice()
override {
733 TIndex grad_block_size = Input(SEGMENT_GRADS).size_from_dim(1);
735 this, grad_block_size);
738 template <
int FixedSize>
739 bool DoRunWithValue() {
740 auto& segment_grads = Input(SEGMENT_GRADS);
741 auto& segment_ids = Input(SEGMENT_IDS);
742 auto* data_grads = Output(0);
744 CAFFE_ENFORCE_EQ(1, segment_ids.ndim(),
"SEGMENT_IDS must be a vector");
745 TIndex N = segment_ids.dim(0);
747 typename ReducerGradient::Meta ctx(segment_grads, 1);
748 for (
int i = 0; i < ReducerGradient::originalInputs().size(); ++i) {
749 auto& aux_in = Input(i);
755 " must have the same first dim as SEGMENT_IDS");
756 ctx.observeOriginalInput(
757 ReducerGradient::originalInputs()[i], aux_in,
nullptr , 1);
760 const SIndex* s_ids = segment_ids.template data<SIndex>();
761 const T* s_grads = segment_grads.template data<T>();
763 vector<TIndex> shape;
765 ctx.appendGradShape(&shape);
766 data_grads->Resize(shape);
768 TIndex d_block_size = data_grads->size_from_dim(1);
769 const SIndex K = segment_grads.dim(0);
770 TIndex s_block_size = segment_grads.size_from_dim(1);
771 T* out = data_grads->template mutable_data<T>();
778 CAFFE_ENFORCE_EQ(0, s_ids[0],
"Indices must be sorted and not have gaps");
781 K - 1, s_ids[N - 1],
"Indices must be sorted and not have gaps");
782 for (TIndex i = 0; i < N;) {
786 if (ReducerGradient::computeLength()) {
787 for (; end < N && s_ids[start] == s_ids[end]; ++end) {
791 ReducerGradient r(ctx, s_grads + s_block_size * s_ids[start], &context_);
792 for (; i < N && s_ids[start] == s_ids[i]; ++i) {
793 r.template fillGrad<FixedSize>(
794 ctx, out + d_block_size * i, i, &context_, end - start);
802 "Indices must be sorted and not have gaps");
812 static constexpr
int kNumInputs =
813 ReducerGradient::originalInputs().size() + 2;
815 SEGMENT_GRADS = ReducerGradient::originalInputs().size(),
824 typename ReducerGradient,
828 using GradientMakerBase::GradientMakerBase;
829 vector<OperatorDef> GetGradientDefs()
override {
831 !ReducerGradient::requiresDataInput(Def()),
832 "grads on aux inputs are not yet implemented for Segment operators.");
833 vector<string> grad_ins;
834 for (
const int i : ReducerGradient::originalInputs()) {
835 grad_ins.push_back(I(i));
837 grad_ins.push_back(GO(0));
838 grad_ins.push_back(I(ForwardOp::SEGMENT_IDS));
839 vector<OperatorDef> r{CreateOperatorDef(
840 string(Sorted ?
"SortedSegment" :
"UnsortedSegment") +
841 ReducerDef::name +
"Gradient",
845 vector<string>{SparseFused ? GI_V(0) : GI(0)})};
847 SetSparse(0, I(ForwardOp::INDICES), GI_V(0));
853 template <
typename T,
typename SIndex,
typename Context,
typename ReducerDef>
855 using OpDef = ReducerDef;
856 static constexpr
const char* basename =
"SortedSegment";
857 static constexpr
const char* doc = R
"DOC( 858 Applies '{op}' to each segment of input tensor. Segments need to be sorted and 859 contiguous. See also UnsortedSegment{op} that doesn't have this requirement. 861 SEGMENT_IDS is a vector that maps each of the first dimension slices of the 862 DATA to a particular group (segment). Values belonging to the same segment are 865 The first dimension of the output is equal to the number of input segments, 866 i.e. `SEGMENT_IDS[-1]+1`. Other dimensions are inherited from the input tensor. 870 static void PopulateSchema(
OpSchema& schema) {
871 schema.Input(0,
"DATA",
"Input tensor, slices of which are aggregated.");
873 Reducer::kInputCount,
875 "Vector with the same length as the first dimension of DATA " 876 "and values in the range 0..K-1 and in increasing order that " 877 "maps each slice of DATA to one of the segments");
881 "Aggregated output tensor. Has the first dimension of K " 882 "(the number of segments).");
883 ReducerDef::PopulateSchema(schema);
885 using Reducer =
typename ReducerDef::template Reducer<T, Context>;
886 using ReducerGradient =
887 typename ReducerDef::template ReducerGradient<T, Context>;
899 template <
typename T,
typename SIndex,
typename Context,
typename ReducerDef>
901 using OpDef = ReducerDef;
902 static constexpr
const char* basename =
"SparseSortedSegment";
903 static constexpr
const char* doc = R
"DOC( 904 Pulls in slices of the input tensor, groups them into segments and applies 905 '{op}' to each segment. Segments need to be sorted and contiguous. See also 906 SparseUnsortedSegment{op} that doesn't have this requirement. 908 This op is basically Gather and SortedSegment{op} fused together. 910 INDICES should contain integers in range 0..N-1 where N is the first dimension 911 of DATA. INDICES represent which slices of DATA need to be pulled in. 913 SEGMENT_IDS is a vector that maps each referenced slice of the DATA to a 914 particular group (segment). Values belonging to the same segment are aggregated 915 together. SEGMENT_IDS should have the same dimension as INDICES. 917 The first dimension of the output is equal to the number of input segments, 918 i.e. `SEGMENT_IDS[-1]+1`. Other dimensions are inherited from the input tensor. 922 static void PopulateSchema(
OpSchema& schema) {
923 schema.Input(0,
"DATA",
"Input tensor, slices of which are aggregated.");
925 Reducer::kInputCount,
927 "Integer vector containing indices of the first dimension of DATA for " 928 "the slices that are being aggregated");
930 Reducer::kInputCount + 1,
932 "Vector with the same length as INDICES and values in the range " 933 "0..K-1 and in increasing order that maps each slice of DATA referenced" 934 " by INDICES to one of the segments");
938 "Aggregated output tensor. Has the first dimension of K " 939 "(the number of segments).");
940 ReducerDef::PopulateSchema(schema);
942 using Reducer =
typename ReducerDef::template Reducer<T, Context>;
943 using ReducerGradient =
944 typename ReducerDef::template ReducerGradient<T, Context>;
992 bool SparseFused =
true,
996 USE_OPERATOR_CONTEXT_FUNCTIONS;
1000 OP_SINGLE_ARG(
int,
"num_segments", num_segments_, -1) {}
1002 bool RunOnDevice()
override {
1005 this, Input(INDICES));
1008 return DoRunWithType<TIndex>();
1012 template <
typename IndexType>
1013 bool DoRunWithType() {
1016 TIndex in_block_size = Input(0).size_from_dim(1);
1018 this, in_block_size);
1021 template <
typename IndexType,
int FixedSize>
1022 bool DoRunWithValue() {
1023 auto& data = Input(0);
1024 auto& segment_ids = Input(SEGMENT_IDS);
1025 auto* output = Output(0);
1027 CAFFE_ENFORCE_EQ(1, segment_ids.ndim(),
"SEGMENT_IDS must be a vector");
1028 TIndex N = segment_ids.dim(0);
1029 const TIndex M = data.dim(0);
1031 const IndexType* idxs;
1033 auto& indices = Input(INDICES);
1034 CAFFE_ENFORCE_EQ(1, indices.ndim(),
"INDICES must be a vector");
1038 "SEGMENT_IDS must have the same length as INDICES");
1039 idxs = indices.template data<IndexType>();
1042 N, M,
"DATA must have the same first dimension as SEGMENT_IDS");
1047 typename Reducer::Meta ctx;
1048 ctx.observeInput(0, data, 1);
1049 for (
int i = 1; i < Reducer::kInputCount; ++i) {
1050 auto& aux_in = Input(i);
1056 " must have the same first dim as SEGMENT_IDS");
1057 ctx.observeInput(i, aux_in, 1);
1060 const SIndex* s_ids = segment_ids.template data<SIndex>();
1061 OPERATOR_NEEDS_FEATURE(
1062 inputAccessor_.observeInput(data),
1063 "Unsupported input type: ",
1069 if (num_segments_ != -1) {
1073 for (TIndex i = 0; i < N; ++i) {
1074 K = std::max(K, s_ids[i] + 1);
1078 vector<TIndex> shape;
1080 ctx.appendOutputShape(&shape);
1081 output->Resize(shape);
1083 TIndex in_block_size = data.size_from_dim(1);
1084 TIndex out_block_size = output->size_from_dim(1);
1085 T* out = output->template mutable_data<T>();
1088 reducers_.reserve(K);
1089 for (TIndex i = 0; i < K; ++i) {
1090 reducers_.emplace_back(ctx, out + out_block_size * i, &context_);
1093 for (TIndex i = 0; i < N; ++i) {
1094 auto s_id = s_ids[i];
1096 0 <= s_id && s_id < K,
1097 "Segment id out of range: ",
1104 0 <= idxs[i] && idxs[i] < M,
1105 "Index out of bounds: ",
1113 reducers_[s_id].template process<FixedSize>(
1114 ctx, inputAccessor_.getBlockPtr(in_block_size, idx), i, &context_);
1117 for (TIndex i = 0; i < K; ++i) {
1118 reducers_[i].template finish<FixedSize>(ctx, &context_);
1126 INDICES = Reducer::kInputCount,
1127 SEGMENT_IDS = Reducer::kInputCount + (SparseFused ? 1 : 0)
1129 static constexpr
int kSelfInputs = SparseFused ? 2 : 1;
1130 static constexpr
int kNumInputs = Reducer::kInputCount + kSelfInputs;
1133 TIndex num_segments_;
1135 vector<Reducer> reducers_;
1136 InputAccessor inputAccessor_;
1140 template <
typename T,
typename SIndex,
class Context,
class ReducerGradient>
1143 USE_OPERATOR_CONTEXT_FUNCTIONS;
1146 bool RunOnDevice()
override {
1149 TIndex grad_block_size = Input(SEGMENT_GRADS).size_from_dim(1);
1151 this, grad_block_size);
1154 template <
int FixedSize>
1155 bool DoRunWithValue() {
1156 auto& segment_grads = Input(SEGMENT_GRADS);
1157 auto& segment_ids = Input(SEGMENT_IDS);
1158 auto* data_grads = Output(0);
1160 CAFFE_ENFORCE_EQ(1, segment_ids.ndim(),
"SEGMENT_IDS must be a vector");
1161 TIndex N = segment_ids.dim(0);
1163 typename ReducerGradient::Meta ctx(segment_grads, 1);
1164 for (
int i = 0; i < ReducerGradient::originalInputs().size(); ++i) {
1165 auto& aux_in = Input(i);
1171 " must have the same first dim as SEGMENT_IDS");
1172 ctx.observeOriginalInput(
1173 ReducerGradient::originalInputs()[i], aux_in,
nullptr , 1);
1176 const SIndex* s_ids = segment_ids.template data<SIndex>();
1177 const T* s_grads = segment_grads.template data<T>();
1179 vector<TIndex> shape;
1181 ctx.appendGradShape(&shape);
1182 data_grads->Resize(shape);
1184 TIndex d_block_size = data_grads->size_from_dim(1);
1185 const SIndex K = segment_grads.dim(0);
1186 TIndex s_block_size = segment_grads.size_from_dim(1);
1187 T* out = data_grads->template mutable_data<T>();
1189 if (ReducerGradient::computeLength()) {
1190 segment_length_.resize(K, 0);
1191 for (
int i = 0; i < N; ++i) {
1192 auto s_id = s_ids[i];
1194 0 <= s_id && s_id < K,
1195 "Segment id out of range: ",
1199 segment_length_[s_ids[i]]++;
1204 reducers_.reserve(K);
1205 for (SIndex i = 0; i < K; ++i) {
1206 reducers_.emplace_back(ctx, s_grads + s_block_size * i, &context_);
1209 for (TIndex i = 0; i < N; ++i) {
1210 auto s_id = s_ids[i];
1211 if (ReducerGradient::computeLength()) {
1212 reducers_[s_id].template fillGrad<FixedSize>(
1213 ctx, out + d_block_size * i, i, &context_, segment_length_[s_id]);
1215 reducers_[s_id].template fillGrad<FixedSize>(
1216 ctx, out + d_block_size * i, i, &context_, 0);
1228 static constexpr
int kNumInputs =
1229 ReducerGradient::originalInputs().size() + 2;
1231 SEGMENT_GRADS = ReducerGradient::originalInputs().size(),
1237 vector<ReducerGradient> reducers_;
1238 vector<int> segment_length_;
1241 template <
typename T,
typename SIndex,
typename Context,
typename ReducerDef>
1243 using OpDef = ReducerDef;
1244 static constexpr
const char* basename =
"UnsortedSegment";
1245 static constexpr
const char* doc = R
"DOC( 1246 Applies '{op}' to each segment of input tensor. Segments ids can appear in 1247 arbitrary order (unlike in SortedSegment{op}). 1249 SEGMENT_IDS is a vector that maps each of the first dimension slices of the 1250 DATA to a particular group (segment). Values belonging to the same segment are 1251 aggregated together. 1253 If `num_segments` argument is passed it would be used as a first dimension for 1254 the output. Otherwise, it'd be dynamically calculated from as the max value of 1255 SEGMENT_IDS plus one. Other output dimensions are inherited from the input 1260 static void PopulateSchema(
OpSchema& schema) {
1263 "Optional int argument specifying the number of output segments and " 1264 "thus the first dimension of the output");
1265 schema.Input(0,
"DATA",
"Input tensor, slices of which are aggregated.");
1267 Reducer::kInputCount,
1269 "Integer vector with the same length as the first dimension of DATA " 1270 "that maps each slice of DATA to one of the segments");
1274 "Aggregated output tensor. Has the first dimension of equal to the " 1275 "number of segments.");
1276 ReducerDef::PopulateSchema(schema);
1278 using Reducer =
typename ReducerDef::template Reducer<T, Context>;
1279 using ReducerGradient =
1280 typename ReducerDef::template ReducerGradient<T, Context>;
1285 typename ReducerDef::template Reducer<T, Context>,
1297 template <
typename T,
typename SIndex,
typename Context,
typename ReducerDef>
1299 using OpDef = ReducerDef;
1300 static constexpr
const char* basename =
"SparseUnsortedSegment";
1301 static constexpr
const char* doc = R
"DOC( 1302 Pulls in slices of the input tensor, groups them into segments and applies 1303 '{op}' to each segment. Segments ids can appear in arbitrary order (unlike in 1304 SparseSortedSegment{op}). 1306 This op is basically Gather and UnsortedSegment{op} fused together. 1308 INDICES should contain integers in range 0..N-1 where N is the first dimension 1309 of DATA. INDICES represent which slices of DATA need to be pulled in. 1311 SEGMENT_IDS is a vector that maps each referenced slice of the DATA to a 1312 particular group (segment). Values belonging to the same segment are aggregated 1313 together. SEGMENT_IDS should have the same dimension as INDICES. 1315 If `num_segments` argument is passed it would be used as a first dimension for 1316 the output. Otherwise, it'd be dynamically calculated from as the max value of 1317 SEGMENT_IDS plus one. Other output dimensions are inherited from the input 1322 static void PopulateSchema(
OpSchema& schema) {
1323 schema.Input(0,
"DATA",
"Input tensor, slices of which are aggregated.");
1325 Reducer::kInputCount,
1327 "Integer vector containing indices of the first dimension of DATA for " 1328 "the slices that are being aggregated");
1330 Reducer::kInputCount + 1,
1332 "Integer vector with the same length as INDICES that maps each slice " 1333 "of DATA referenced by INDICES to one of the segments");
1337 "Aggregated output tensor. Has the first dimension of equal to the " 1338 "number of segments.");
1339 ReducerDef::PopulateSchema(schema);
1341 using Reducer =
typename ReducerDef::template Reducer<T, Context>;
1342 using ReducerGradient =
1343 typename ReducerDef::template ReducerGradient<T, Context>;
1389 bool SparseFused =
true,
1393 USE_OPERATOR_CONTEXT_FUNCTIONS;
1396 bool RunOnDevice()
override {
1399 this, Input(INDICES));
1402 return DoRunWithType<TIndex>();
1406 template <
typename IndexType>
1407 bool DoRunWithType() {
1410 TIndex in_block_size = Input(0).size_from_dim(1);
1412 this, in_block_size);
1415 template <
typename IndexType,
int FixedSize>
1416 bool DoRunWithValue() {
1417 auto& dataInput = Input(0);
1418 auto& lengthsInput = Input(LENGTHS);
1419 auto* output = Output(0);
1421 CAFFE_ENFORCE_EQ(1, lengthsInput.ndim(),
"LENGTHS must be a vector");
1422 const TIndex dataSize = dataInput.dim(0);
1424 TIndex dataToReduceSize;
1425 const TIndex outputSize = lengthsInput.dim(0);
1427 const IndexType* indices;
1429 auto& indicesInput = Input(INDICES);
1430 CAFFE_ENFORCE_EQ(1, indicesInput.ndim(),
"INDICES must be a vector");
1431 indices = indicesInput.template data<IndexType>();
1432 dataToReduceSize = indicesInput.dim(0);
1434 dataToReduceSize = dataSize;
1437 typename Reducer::Meta ctx;
1438 ctx.observeInput(0, dataInput, 1);
1439 for (
int i = 1; i < Reducer::kInputCount; ++i) {
1440 auto& aux_in = Input(i);
1442 dataToReduceSize == aux_in.dim(0),
1445 " must have the same first dim as SEGMENT_IDS");
1446 ctx.observeInput(i, aux_in, 1);
1449 const TLengths* lengths = lengthsInput.template data<TLengths>();
1451 OPERATOR_NEEDS_FEATURE(
1452 inputAccessor_.observeInput(dataInput),
1453 "Unsupported input type: ",
1454 dataInput.meta().name(),
1457 vector<TIndex> shape{outputSize};
1458 ctx.appendOutputShape(&shape);
1459 output->Resize(shape);
1461 TIndex in_block_size = dataInput.size_from_dim(1);
1462 TIndex out_block_size = output->size_from_dim(1);
1463 TData* out = output->template mutable_data<TData>();
1465 TIndex dataIndex = 0;
1466 for (TIndex rangeIndex = 0; rangeIndex < outputSize; ++rangeIndex) {
1467 Reducer reducer(ctx, out + out_block_size * rangeIndex, &context_);
1468 for (TIndex start = dataIndex; dataIndex < start + lengths[rangeIndex];
1472 idx = indices[dataIndex];
1474 0 <= idx && idx < dataSize,
1477 "th index from the input indices is out of bounds: ",
1479 " vs. valid range 0 to ",
1484 0 <= idx && idx < dataSize,
1485 "When calculating the ",
1487 "th output with length=",
1488 lengths[rangeIndex],
1489 ", the index is out of bounds: ",
1491 " vs. valid range 0 to ",
1495 const TData* input = inputAccessor_.getBlockPtr(in_block_size, idx);
1496 reducer.template process<FixedSize>(ctx, input, dataIndex, &context_);
1498 reducer.template finish<FixedSize>(ctx, &context_);
1501 dataIndex == dataToReduceSize, dataIndex,
" != ", dataToReduceSize);
1507 INDICES = Reducer::kInputCount,
1508 LENGTHS = Reducer::kInputCount + (SparseFused ? 1 : 0)
1510 static constexpr
int kSelfInputs = SparseFused ? 2 : 1;
1511 static constexpr
int kNumInputs = Reducer::kInputCount + kSelfInputs;
1514 InputAccessor inputAccessor_;
1529 class ReducerGradient,
1530 bool GradientNeedIndices =
false>
1533 USE_OPERATOR_CONTEXT_FUNCTIONS;
1536 bool RunOnDevice()
override {
1539 TIndex gradBlockSize = Input(SEGMENT_GRADS).size_from_dim(1);
1541 this, gradBlockSize);
1544 template <
int FixedSize>
1545 bool DoRunWithValue() {
1546 auto& segmentGradsInput = Input(SEGMENT_GRADS);
1547 auto& lengthsInput = Input(LENGTHS);
1548 auto* dataGradsOutput = Output(0);
1550 CAFFE_ENFORCE(lengthsInput.ndim() == 1,
"LENGTHS must be a vector");
1551 TIndex reducedDataSize = 0;
1552 TIndex numSegments = lengthsInput.dim(0);
1553 CAFFE_ENFORCE(segmentGradsInput.ndim() > 0);
1554 CAFFE_ENFORCE(numSegments == segmentGradsInput.dim(0));
1555 const TLengths* lengths = lengthsInput.template data<TLengths>();
1556 for (TIndex i = 0; i < numSegments; ++i) {
1557 reducedDataSize += lengths[i];
1560 typename ReducerGradient::Meta ctx(segmentGradsInput, 1);
1561 for (
int i = 0; i < ReducerGradient::originalInputs().size(); ++i) {
1562 auto& aux_in = Input(i);
1568 " must have the same first dim as SEGMENT_IDS");
1569 ctx.observeOriginalInput(
1570 ReducerGradient::originalInputs()[i], aux_in,
nullptr , 1);
1573 const T* segmentGrads = segmentGradsInput.template data<T>();
1575 vector<TIndex> shape;
1576 shape.push_back(reducedDataSize);
1577 ctx.appendGradShape(&shape);
1578 dataGradsOutput->Resize(shape);
1580 TIndex dataGradsBlockSize = dataGradsOutput->size_from_dim(1);
1581 TIndex segmentBlockSize = segmentGradsInput.size_from_dim(1);
1582 T* dataGrads = dataGradsOutput->template mutable_data<T>();
1584 TIndex dataIndex = 0;
1585 for (TIndex rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
1586 ReducerGradient reducer(
1587 ctx, segmentGrads + segmentBlockSize * rangeIndex, &context_);
1588 for (TIndex start = dataIndex; dataIndex < start + lengths[rangeIndex];
1590 reducer.template fillGrad<FixedSize>(
1592 dataGrads + dataGradsBlockSize * dataIndex,
1595 lengths[rangeIndex]);
1599 dataIndex == reducedDataSize, dataIndex,
" != ", reducedDataSize);
1607 static constexpr
int kNumInputs = ReducerGradient::originalInputs().size() +
1608 2 + (GradientNeedIndices ? 1 : 0);
1610 SEGMENT_GRADS = ReducerGradient::originalInputs().size(),
1622 class ReducerGradient,
1623 bool SparseFused =
true,
1624 bool GradientNeedIndices =
false>
1627 USE_OPERATOR_CONTEXT_FUNCTIONS;
1630 bool RunOnDevice()
override {
1633 this, Input(INDICES));
1636 return DoRunWithType<TIndex>();
1640 template <
typename IndexType>
1641 bool DoRunWithType() {
1644 TIndex in_block_size = Input(SEGMENT_GRADS).size_from_dim(1);
1646 call(
this, in_block_size);
1649 template <
typename IndexType,
int FixedSize>
1650 bool DoRunWithValue() {
1651 auto& dataInput = Input(DATA_INPUT);
1652 auto& segmentGradsInput = Input(SEGMENT_GRADS);
1653 auto& lengthsInput = Input(LENGTHS);
1654 auto* dataGradsOutput = Output(0);
1656 CAFFE_ENFORCE(lengthsInput.ndim() == 1,
"LENGTHS must be a vector");
1657 TIndex numSegments = lengthsInput.dim(0);
1658 CAFFE_ENFORCE(segmentGradsInput.ndim() > 0);
1659 CAFFE_ENFORCE(numSegments == segmentGradsInput.dim(0));
1660 const TLengths* lengths = lengthsInput.template data<TLengths>();
1662 typename ReducerGradient::Meta ctx(segmentGradsInput, 1);
1663 for (
int i = 0; i < ReducerGradient::originalInputs().size(); ++i) {
1664 int aux_num = ReducerGradient::originalInputs()[i];
1665 auto& aux_in = Input(i);
1666 auto* aux_grad = aux_num < OutputSize() ? Output(aux_num) :
nullptr;
1667 ctx.observeOriginalInput(aux_num, aux_in, aux_grad, 1);
1671 TIndex dataToReduceSize;
1672 const IndexType* indices =
nullptr;
1674 auto& indicesInput = Input(INDICES);
1675 indices = indicesInput.template data<IndexType>();
1676 dataToReduceSize = indicesInput.dim(0);
1678 dataToReduceSize = dataInput.dim(0);
1681 const T* segmentGrads = segmentGradsInput.template data<T>();
1683 vector<TIndex> shape;
1684 shape.push_back(dataToReduceSize);
1685 ctx.appendGradShape(&shape);
1686 dataGradsOutput->Resize(shape);
1688 TIndex dataGradsBlockSize = dataGradsOutput->size_from_dim(1);
1689 TIndex segmentBlockSize = segmentGradsInput.size_from_dim(1);
1690 T* dataGrads = dataGradsOutput->template mutable_data<T>();
1692 const T* data = dataInput.template data<T>();
1694 TIndex dataIndex = 0;
1695 for (TIndex rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
1696 ReducerGradient reducer(
1697 ctx, segmentGrads + segmentBlockSize * rangeIndex, &context_);
1698 for (TIndex start = dataIndex; dataIndex < start + lengths[rangeIndex];
1703 data_pos = indices[dataIndex];
1705 data_pos = dataIndex;
1707 reducer.template fillGradWithMainInput<FixedSize>(
1709 data + dataGradsBlockSize * data_pos,
1710 dataGrads + dataGradsBlockSize * dataIndex,
1713 lengths[rangeIndex]);
1724 static constexpr
int kNumInputs = ReducerGradient::originalInputs().size() +
1725 3 + (SparseFused ? 1 : 0) + (GradientNeedIndices ? 1 : 0);
1727 SEGMENT_GRADS = ReducerGradient::originalInputs().size(),
1736 template <
typename T,
typename TLengths,
class Context,
class ReducerGradient>
1740 USE_OPERATOR_CONTEXT_FUNCTIONS;
1743 bool RunOnDevice()
override {
1746 TIndex in_block_size = Input(SEGMENT_GRADS).size_from_dim(1);
1748 this, in_block_size);
1751 template <
int FixedSize>
1752 bool DoRunWithValue() {
1753 auto& dataInput = Input(DATA_INPUT);
1754 auto& segmentGradsInput = Input(SEGMENT_GRADS);
1755 auto& lengthsInput = Input(LENGTHS);
1756 auto& forwardOutputInput = Input(FORWARD_OUTPUT);
1757 auto* dataGradsOutput = Output(0);
1759 CAFFE_ENFORCE(lengthsInput.ndim() == 1,
"LENGTHS must be a vector");
1760 TIndex numSegments = lengthsInput.dim(0);
1761 CAFFE_ENFORCE(segmentGradsInput.ndim() > 0);
1762 CAFFE_ENFORCE(numSegments == segmentGradsInput.dim(0));
1763 const TLengths* lengths = lengthsInput.template data<TLengths>();
1765 typename ReducerGradient::Meta ctx(segmentGradsInput, 1);
1766 for (
int i = 0; i < ReducerGradient::originalInputs().size(); ++i) {
1767 int aux_num = ReducerGradient::originalInputs()[i];
1768 auto& aux_in = Input(i);
1769 auto* aux_grad = aux_num < OutputSize() ? Output(aux_num) :
nullptr;
1770 ctx.observeOriginalInput(aux_num, aux_in, aux_grad, 1);
1773 CAFFE_ENFORCE(forwardOutputInput.ndim() > 0);
1774 CAFFE_ENFORCE(numSegments == forwardOutputInput.dim(0));
1775 const T* forwardOutput = forwardOutputInput.template data<T>();
1777 TIndex dataToReduceSize = dataInput.dim(0);
1779 const T* segmentGrads = segmentGradsInput.template data<T>();
1781 vector<TIndex> shape;
1782 shape.push_back(dataToReduceSize);
1783 ctx.appendGradShape(&shape);
1784 dataGradsOutput->Resize(shape);
1786 TIndex dataGradsBlockSize = dataGradsOutput->size_from_dim(1);
1787 TIndex segmentBlockSize = segmentGradsInput.size_from_dim(1);
1788 T* dataGrads = dataGradsOutput->template mutable_data<T>();
1790 const T* data = dataInput.template data<T>();
1792 TIndex dataIndex = 0;
1793 for (TIndex rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
1794 ReducerGradient reducer(
1795 ctx, segmentGrads + segmentBlockSize * rangeIndex, &context_);
1796 for (TIndex start = dataIndex; dataIndex < start + lengths[rangeIndex];
1799 reducer.template fillGradWithMainInputAndForwardOutput<FixedSize>(
1801 data + dataGradsBlockSize * dataIndex,
1802 dataGrads + dataGradsBlockSize * dataIndex,
1803 forwardOutput + segmentBlockSize * rangeIndex,
1806 lengths[rangeIndex]);
1817 static constexpr
int kNumInputs =
1818 ReducerGradient::originalInputs().size() + 4;
1820 FORWARD_OUTPUT = ReducerGradient::originalInputs().size(),
1830 typename ReducerDef,
1831 typename ReducerGradient,
1833 bool GradientNeedIndices =
false>
1835 using GradientMakerBase::GradientMakerBase;
1836 vector<OperatorDef> GetGradientDefs()
override {
1837 vector<string> grad_ins;
1838 string suffix =
"Gradient";
1839 for (
const int i : ReducerGradient::originalInputs()) {
1840 grad_ins.push_back(I(i));
1842 if (ReducerGradient::requiresForwardOutput()) {
1843 grad_ins.push_back(O(0));
1846 "Forward pass output not yet supported as input for backward pass " 1847 "for SparseLengthsXXX operators");
1848 suffix =
"AndForwardOutput" + suffix;
1850 grad_ins.push_back(GO(0));
1851 grad_ins.push_back(I(ForwardOp::LENGTHS));
1852 bool indices_pushed =
false;
1853 if (ReducerGradient::requiresDataInput(Def())) {
1854 grad_ins.push_back(I(0));
1856 grad_ins.push_back(I(ForwardOp::INDICES));
1857 indices_pushed =
true;
1859 suffix =
"WithMainInput" + suffix;
1861 if (GradientNeedIndices && !indices_pushed) {
1863 grad_ins.push_back(I(ForwardOp::INDICES));
1867 grad_ins.push_back(I(0));
1870 vector<string> grad_outs;
1871 grad_outs.push_back({SparseFused ? GI_V(0) : GI(0)});
1872 int aux_grads = ReducerGradient::numAuxInputsWithGrads(Def());
1873 for (
int i = 1; i <= aux_grads; ++i) {
1874 grad_outs.push_back(GI(i));
1876 vector<OperatorDef> r{CreateOperatorDef(
1877 string(SparseFused ?
"SparseLengths" :
"Lengths") +
1878 string(GradientNeedIndices ?
"IndicesInGradient" :
"") +
1879 ReducerDef::name + suffix,
1884 SetSparse(0, I(ForwardOp::INDICES), GI_V(0));
1894 typename ReducerDef,
1895 bool GradientNeedIndices =
false>
1897 using OpDef = ReducerDef;
1898 static constexpr
const char* basename =
"Lengths";
1899 static constexpr
const char* doc = R
"DOC( 1900 Applies '{op}' to each segment of the input tensor. Segments are defined 1903 LENGTHS is a vector that maps each of the first dimension slices of the 1904 DATA to a particular group (segment). Values belonging to the same segment are 1905 aggregated together. 1907 For example LENGTHS = [2, 1] stands for segments DATA[0..1] and DATA[2] 1909 The first dimension of the output is equal to the number of input segments, 1910 i.e. `len(LENGTHS)`. Other dimensions are inherited from the input tensor. 1914 static void PopulateSchema(
OpSchema& schema) {
1915 schema.Input(0,
"DATA",
"Input tensor, slices of which are aggregated.");
1917 Reducer::kInputCount,
1919 "Vector with the same sum of elements as the first dimension of DATA");
1923 "Aggregated output tensor. Has the first dimension of len(LENGTHS) ");
1925 [](
const OperatorDef& def,
const vector<TensorShape>& in) {
1926 vector<TensorShape> out(0);
1928 for (
int d : in[Reducer::kInputCount].dims()) {
1931 for (
int j = 1; j < in[0].dims_size(); j++) {
1932 output.add_dims(in[0].dims(j));
1934 output.set_data_type(in[0].data_type());
1935 out.push_back(output);
1938 ReducerDef::PopulateSchema(schema);
1940 using Reducer =
typename ReducerDef::template Reducer<T, Context>;
1941 using ReducerGradient =
1942 typename ReducerDef::template ReducerGradient<T, Context>;
1963 GradientNeedIndices>;
1970 typename ReducerDef,
1971 bool GradientNeedIndices =
false>
1973 using OpDef = ReducerDef;
1974 static constexpr
const char* basename =
"SparseLengths";
1975 static constexpr
const char* doc = R
"DOC( 1976 Pulls in slices of the input tensor, groups them into segments and applies 1977 '{op}' to each segment. Segments are defined by their LENGTHS. 1979 This op is basically Gather and Lengths{op} fused together. 1981 INDICES should contain integers in range 0..N-1 where N is the first dimension 1982 of DATA. INDICES represent which slices of DATA need to be pulled in. 1984 LENGTHS is a vector that defines slice sizes by first dimention of DATA. Values 1985 belonging to the same segment are aggregated together. sum(LENGTHS) has 1986 to match INDICES size. 1988 The first dimension of the output is equal to the number of input segment, 1989 i.e. `len(LENGTHS)`. Other dimensions are inherited from the input tensor. 1993 static void PopulateSchema(
OpSchema& schema) {
1994 schema.Input(0,
"DATA",
"Input tensor, slices of which are aggregated.");
1996 Reducer::kInputCount,
1998 "Integer vector containing indices of the first dimension of DATA for " 1999 "the slices that are being aggregated");
2001 Reducer::kInputCount + 1,
2003 "Non negative vector with sum of elements equal to INDICES length");
2007 "Aggregated output tensor. Has the first dimension of K " 2008 "(the number of segments).");
2009 ReducerDef::PopulateSchema(schema);
2011 using Reducer =
typename ReducerDef::template Reducer<T, Context>;
2012 using ReducerGradient =
2013 typename ReducerDef::template ReducerGradient<T, Context>;
2041 GradientNeedIndices>;
2045 #endif // CAFFE2_OPERATORS_SEGMENT_REDUCTION_OP_H_
A class to record the schema of an op.
Segment reduction op with optional fused embedding lookup.
A helper class to index into arguments.
Segment reduction op with optional fused embedding lookup.
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Base implementation for segment reduction op that leverages continuity of the data.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
const void * raw_data() const
Returns a const raw void* pointer of the underlying storage.
Simple non-segmented reduction over the first few dimensions of the tensor.
OpSchema & TensorInferenceFunction(TensorInferenceFunctionType function)
Sets the tensor inference function, which is a std::function object defined in operator_schema.h.
Unsorted segment reduction op with optional fused embedding lookup.