1 #ifndef CAFFE2_OPERATORS_UTILITY_OPS_H_ 2 #define CAFFE2_OPERATORS_UTILITY_OPS_H_ 6 #include "caffe2/core/common_omp.h" 7 #include "caffe2/core/context.h" 8 #include "caffe2/core/logging.h" 9 #include "caffe2/core/operator.h" 10 #include "caffe2/core/types.h" 11 #include "caffe2/utils/math.h" 18 template <
class Context>
21 USE_OPERATOR_CONTEXT_FUNCTIONS;
25 bool RunOnDevice()
override;
33 using GradientMakerBase::GradientMakerBase;
34 std::vector<OperatorDef> GetGradientDefs()
override {
35 return {CreateOperatorDef(
38 std::vector<string>{GO(0)},
39 std::vector<string>{GI(0)})};
43 template <
class Context>
46 USE_OPERATOR_CONTEXT_FUNCTIONS;
51 bool RunOnDevice()
override {
52 int64_t nanoseconds =
static_cast<long int>(
53 std::chrono::duration_cast<std::chrono::nanoseconds>(
54 std::chrono::high_resolution_clock::now().time_since_epoch())
57 TensorCPU* output = OperatorBase::Output<TensorCPU>(0);
59 *output->template mutable_data<int64_t>() = nanoseconds;
65 const char kPrintFileExtension[] =
".log";
67 template <
class Context>
70 USE_OPERATOR_CONTEXT_FUNCTIONS;
75 operator_def.input(0),
76 OperatorBase::GetSingleArgument<int>(
"to_file", 0)
77 ? ws->
RootFolder() +
"/" + operator_def.input(0) +
80 OperatorBase::GetSingleArgument<int>(
"limit", 0)),
81 every_n_(OperatorBase::GetSingleArgument<int>(
"every_n", 1)) {
82 CAFFE_ENFORCE_GE(every_n_, 1);
85 bool RunOnDevice()
override {
86 if (++occurrences_mod_n_ > every_n_) {
87 occurrences_mod_n_ -= every_n_;
89 if (occurrences_mod_n_ != 1) {
94 !OperatorBase::InputIsType<TensorCPU>(0)) {
95 LOG(INFO) <<
"Blob of type: " 96 << OperatorBase::Inputs().at(0)->meta().name();
100 if (Input(0).size() == 0) {
101 tensor_printer_.PrintMeta(Input(0));
115 if (OperatorBase::InputIsType<TensorCPU>(0)) {
117 this, OperatorBase::Input<TensorCPU>(0));
124 template <
typename T>
125 bool DoRunWithType() {
131 if (OperatorBase::InputIsType<TensorCPU>(0)) {
132 tensor = &OperatorBase::Input<TensorCPU>(0);
134 tensor_copy_if_needed.
CopyFrom(Input(0), &context_);
136 context_.FinishDeviceComputation();
137 tensor = &tensor_copy_if_needed;
139 tensor_printer_.Print<T>(*tensor);
146 int occurrences_mod_n_{0};
163 template <
class Context>
166 USE_OPERATOR_CONTEXT_FUNCTIONS;
169 bool RunOnDevice()
override {
170 auto& input = Input(0);
171 CAFFE_ENFORCE_GE(input.size(), 0,
"Tensor is not initialized");
172 Output(0)->ResizeLike(input);
173 Output(0)->ShareData(input);
185 template <
class Context>
188 USE_OPERATOR_CONTEXT_FUNCTIONS;
191 bool RunOnDevice()
override {
192 const auto& input = Input(0);
193 auto* output = Output(0);
194 CAFFE_ENFORCE_GT(input.ndim(), 0,
"Input has to be at least a vector.");
197 if (&input != output) {
198 output->ResizeLike(input);
199 output->CopyFrom(input, &context_);
205 template <
class Context>
208 USE_OPERATOR_CONTEXT_FUNCTIONS;
211 bool RunOnDevice()
override {
212 auto& input = Input(0);
213 auto* output = Output(0);
215 input.dims().size(), 1,
"The rank of the tensor must be >= 1.");
216 output->Resize(input.size());
218 context_.template CopyItems<Context, Context>(
222 output->raw_mutable_data(input.meta()));
228 template <
class Context>
231 USE_OPERATOR_CONTEXT_FUNCTIONS;
234 bool RunOnDevice()
override {
235 auto& input0 = Input(0);
236 auto& input1 = Input(1);
237 auto* output = Output(0);
238 CAFFE_ENFORCE_EQ(input0.size(), input1.size());
239 output->ResizeLike(Input(1));
240 context_.template CopyItems<Context, Context>(
244 output->raw_mutable_data(input0.meta()));
249 template <
class Context>
252 USE_OPERATOR_CONTEXT_FUNCTIONS;
253 USE_SIMPLE_CTOR_DTOR(
SumOp);
255 template <
typename T,
typename M>
256 bool DoRunWithType() {
257 auto& input0 = Input(0);
258 auto* output = Output(0);
259 if (InputSize() == 1) {
260 output->CopyFrom(input0, &context_);
263 output->ResizeLike(input0);
264 T* output_data = output->template mutable_data<T>();
266 for (
int i = 1; i < InputSize(); ++i) {
267 if (output->dims() != Input(i).dims()) {
269 "Check failed: output->dims() == Input(i).dims().",
270 "Description: Input #",
272 ", input dimension:",
274 " should match output dimension: ",
282 input0.template data<T>(),
283 Input(1).template data<T>(),
287 for (
int i = 2; i < InputSize(); ++i) {
291 Input(i).template data<T>(),
298 bool RunOnDevice()
override {
299 if (Input(0).
template IsType<float>()) {
300 return DoRunWithType<float, float>();
301 }
else if (Input(0).template IsType<int>()) {
302 return DoRunWithType<int, int>();
305 "Sum operator only supports 32-bit float and ints, but",
306 " input was of type ",
307 Input(0).meta().name());
317 template <
class Context>
320 USE_OPERATOR_CONTEXT_FUNCTIONS;
323 template <
typename DstType>
324 bool DoRunWithType() {
325 CAFFE_ENFORCE_EQ(InputSize() % 2, 0);
327 auto& weight0 = Input(1);
328 CAFFE_ENFORCE_GT(X0.size(), 0);
329 CAFFE_ENFORCE_EQ(weight0.size(), 1);
330 int size = X0.size();
331 auto* output = Output(0);
332 output->ResizeLike(X0);
333 math::Scale<DstType, Context>(
335 weight0.template data<float>(),
336 X0.template data<DstType>(),
337 output->template mutable_data<DstType>(),
339 for (
int i = 2; i < InputSize(); i += 2) {
344 LOG(ERROR) <<
"Input #" << i <<
" is the same as output. " 345 <<
"If you want to do in-place updates, put the output as " 349 auto& weight = Input(i + 1);
350 CAFFE_ENFORCE_EQ(X.size(), size);
351 CAFFE_ENFORCE_EQ(weight.size(), 1);
352 math::Axpy<DstType, Context>(
354 weight.template data<float>(),
355 X.template data<DstType>(),
356 output->template mutable_data<DstType>(),
361 bool RunOnDevice()
override;
364 template <
class Context>
367 USE_OPERATOR_CONTEXT_FUNCTIONS;
371 grad_on_w_(OperatorBase::GetSingleArgument<bool>(
"grad_on_w",
false)) {}
373 template <
typename DstType>
374 bool DoRunWithType() {
375 CAFFE_ENFORCE_EQ(InputSize() % 2, 1);
376 auto output_size = grad_on_w_ ? InputSize() - 1 : InputSize() / 2;
377 CAFFE_ENFORCE_EQ(OutputSize(), output_size);
380 const auto* dY_data = dY.template data<DstType>();
381 int size = dY.size();
384 for (
int i = 0; i < InputSize() / 2; i++) {
385 auto& cur_w = Input(2 * i + 2);
386 CAFFE_ENFORCE_EQ(cur_w.size(), 1);
387 auto* cur_dX = Output(i);
388 cur_dX->ResizeLike(dY);
390 math::Scale<DstType, Context>(
392 cur_w.template data<float>(),
394 cur_dX->template mutable_data<DstType>(),
398 auto& cur_X = Input(2 * i + 1);
399 CAFFE_ENFORCE_EQ(cur_X.size(), size);
400 auto* cur_dw = Output(i + output_size / 2);
402 math::Dot<DstType, Context>(
405 cur_X.template data<DstType>(),
406 cur_dw->template mutable_data<float>(),
414 bool RunOnDevice()
override;
458 template <
typename T,
class Context>
461 USE_OPERATOR_CONTEXT_FUNCTIONS;
465 bool RunOnDevice()
override {
470 template <
typename Index>
471 bool DoRunWithType() {
472 TIndex block_size = Input(0).size_from_dim(1);
476 template <
typename Index,
int FixedSize>
477 bool DoRunWithValue() {
478 CAFFE_ENFORCE_EQ(InputSize() % 2, 1);
480 auto& weight0 = Input(1);
481 auto& indices = Input(2);
482 auto* output = Output(0);
483 CAFFE_ENFORCE_EQ(&X0, output,
"In place operation is required");
485 CAFFE_ENFORCE_GT(X0.size(), 0);
486 CAFFE_ENFORCE_GT(X0.ndim(), 0,
"X0 has to be at least the vector");
487 CAFFE_ENFORCE_EQ(weight0.size(), 1);
488 TIndex M = X0.size();
489 TIndex N = X0.dim(0);
490 TIndex K = indices.size();
491 TIndex block_size = M / N;
492 T* data = output->template mutable_data<T>();
493 const Index* idxs = indices.template data<Index>();
494 T w0 = *weight0.template data<T>();
497 for (
int i = 0; i < K; ++i) {
501 "Index out of bounds: ",
505 math::ScaleFixedSize<T, Context, FixedSize>(
508 data + block_size * idx,
509 data + block_size * idx,
513 for (
int inp = 3; inp < InputSize(); inp += 2) {
514 auto& X = Input(inp);
515 auto& weight = Input(inp + 1);
516 CAFFE_ENFORCE_EQ(X.size(), block_size * K);
517 CAFFE_ENFORCE_EQ(weight.size(), 1);
518 const T* x_data = X.template data<T>();
519 T w = *weight.template data<T>();
520 for (
int i = 0; i < K; ++i) {
523 DCHECK(0 <= idx && idx < N) <<
"Index out of bounds: " << idx
524 <<
", range 0 to " << N;
525 math::AxpyFixedSize<T, Context, FixedSize>(
528 x_data + block_size * i,
529 data + block_size * idx,
564 template <
class Context>
567 USE_OPERATOR_CONTEXT_FUNCTIONS;
572 runners_({{{TensorProto_DataType_INT32, TensorProto_DataType_FLOAT},
573 &ScatterAssignOp::DoRun<int32_t, float>},
574 {{TensorProto_DataType_INT32, TensorProto_DataType_FLOAT16},
575 &ScatterAssignOp::DoRun<int32_t, float16>},
576 {{TensorProto_DataType_INT32, TensorProto_DataType_INT32},
577 &ScatterAssignOp::DoRun<int32_t, int32_t>},
578 {{TensorProto_DataType_INT32, TensorProto_DataType_INT64},
579 &ScatterAssignOp::DoRun<int32_t, int64_t>},
580 {{TensorProto_DataType_INT64, TensorProto_DataType_FLOAT},
581 &ScatterAssignOp::DoRun<int64_t, float>},
582 {{TensorProto_DataType_INT64, TensorProto_DataType_FLOAT16},
583 &ScatterAssignOp::DoRun<int64_t, float16>},
584 {{TensorProto_DataType_INT64, TensorProto_DataType_INT32},
585 &ScatterAssignOp::DoRun<int64_t, int32_t>},
586 {{TensorProto_DataType_INT64, TensorProto_DataType_INT64},
587 &ScatterAssignOp::DoRun<int64_t, int64_t>}}) {}
589 bool RunOnDevice()
override {
590 const auto& data = Input(DATA);
591 const auto& slices = Input(SLICES);
592 auto& indices = Input(INDICES);
594 const auto dataType = TypeMetaToDataType(data.meta());
595 const auto slicesType = TypeMetaToDataType(slices.
meta());
596 const auto indicesType = TypeMetaToDataType(indices.meta());
597 auto* output = Output(0);
599 auto runner = GetRunner(dataType, slicesType, indicesType);
607 map<std::pair<TensorProto_DataType, TensorProto_DataType>, RunnerType>
612 RunnerType GetRunner(
613 const TensorProto_DataType dataType,
614 const TensorProto_DataType slicesType,
615 const TensorProto_DataType indicesType) {
616 CAFFE_ENFORCE_EQ(dataType, slicesType,
"Data and slice types must match");
617 auto it = runners_.find({indicesType, dataType});
619 it != runners_.end(),
620 "Could not find the runner corresponding to indicesType, dataType = ",
627 template <
typename Index,
typename T>
629 auto& input = Input(DATA);
630 auto& indices = Input(INDICES);
631 auto& slices = Input(SLICES);
632 auto* output = Output(0);
633 CAFFE_ENFORCE_EQ(&input, output,
"In place operation is required");
635 CAFFE_ENFORCE_GT(input.ndim(), 0,
"X0 has to be at least the vector");
636 TIndex M = input.size();
637 TIndex N = input.dim(0);
638 TIndex K = indices.size();
639 TIndex block_size = M / N;
640 CAFFE_ENFORCE_EQ(slices.
size(), block_size * K);
643 T* data = output->template mutable_data<T>();
644 const Index* idxs = indices.template data<Index>();
645 const T* slicesData = slices.template data<T>();
646 DoScatterAssign(data, idxs, slicesData, N, K, block_size);
649 template <
typename Index,
typename T>
650 void DoScatterAssign(
657 for (
int i = 0; i < K; ++i) {
660 DCHECK(0 <= idx && idx < N) <<
"Index out of bounds: " << idx
661 <<
", range 0 to " << N;
662 context_.template Copy<T, Context, Context>(
663 block_size, slicesData + block_size * i, data + block_size * idx);
667 INPUT_TAGS(DATA, INDICES, SLICES);
670 template <
class Context,
class DstContext,
class SrcContext>
673 USE_OPERATOR_CONTEXT_FUNCTIONS;
674 USE_SIMPLE_CTOR_DTOR(
CopyOp);
676 bool RunOnDevice()
override {
677 auto& input = OperatorBase::Input<Tensor<SrcContext>>(0);
678 auto* output = OperatorBase::Output<Tensor<DstContext>>(0);
679 output->ResizeLike(input);
680 this->context_.template CopyItems<SrcContext, DstContext>(
684 output->raw_mutable_data(input.meta()));
689 template <
class Context,
class DstContext,
class SrcContext>
696 template <
class Context>
699 USE_OPERATOR_CONTEXT_FUNCTIONS;
702 bool RunOnDevice()
override {
703 auto& input = Input(0);
704 auto* output = Output(0);
705 auto* input_data = input.template data<int32_t>();
707 CAFFE_ENFORCE(input.dims().size() == 1,
"Input must be a vector.");
709 std::accumulate(input_data, input_data + input.size(), 0);
711 output->Resize(total_length);
712 auto* output_data = output->template mutable_data<int32_t>();
714 for (
int i = 0; i < input.size(); ++i) {
715 auto len = input_data[i];
716 std::fill(output_data, output_data + len, i);
723 template <
class Context>
726 USE_OPERATOR_CONTEXT_FUNCTIONS;
729 bool RunOnDevice()
override {
730 auto& input = Input(0);
731 auto* output = Output(0);
732 auto* input_data = input.template data<int32_t>();
734 CAFFE_ENFORCE(input.dims().size() == 1,
"Input must be a vector.");
735 auto size = input.size();
737 output->Resize(size, 2);
738 auto* output_data = output->template mutable_data<int32_t>();
741 for (
int i = 0; i < size; ++i) {
742 auto len = input_data[i];
743 output_data[i * 2] = offset;
744 output_data[i * 2 + 1] = len;
751 template <
class Context>
754 USE_OPERATOR_CONTEXT_FUNCTIONS;
757 bool RunOnDevice()
override {
761 template <
typename Index>
762 bool DoRunWithType() {
763 auto& input = Input(0);
764 if (input.ndim() == 2) {
766 input.dim32(0) == 1 || input.dim32(1) == 1,
767 "Input must be a vector.");
769 CAFFE_ENFORCE_EQ(input.ndim(), 1,
"Input must be a vector.");
771 auto* input_data = input.template data<Index>();
772 auto input_size = input.size();
773 auto* output = Output(0);
775 auto num_segments = input_size ? input_data[input_size - 1] + 1 : 0;
776 if (InputSize() > 1) {
777 CAFFE_ENFORCE_GE(Input(1).ndim(), 1);
781 "The number of segments inferred should *NOT* be larger " 782 "than the size of Input(1)'s first dimension");
783 num_segments = Input(1).dim(0);
785 CAFFE_ENFORCE(0 <= num_segments,
"Indices must be in 0..K-1 range");
786 output->Resize(num_segments);
787 auto* output_data = output->template mutable_data<int32_t>();
788 if (num_segments == 0) {
791 std::fill(output_data, output_data + num_segments, 0);
793 for (int64_t i = 0; i < input_size; i++) {
795 prev <= input_data[i],
796 "Segment ids must be sorted: ",
800 prev = input_data[i];
801 output_data[input_data[i]] += 1;
808 template <
class Context>
811 USE_OPERATOR_CONTEXT_FUNCTIONS;
814 bool RunOnDevice()
override {
818 template <
typename Index>
819 bool DoRunWithType() {
820 auto& input = Input(0);
821 CAFFE_ENFORCE(input.dims().size() == 1,
"Input must be a vector.");
822 auto* input_data = input.template data<Index>();
823 auto input_size = input.size();
824 auto* output = Output(0);
826 auto num_segments = input_size ? input_data[input_size - 1] + 1 : 0;
827 if (InputSize() > 1) {
828 CAFFE_ENFORCE_GE(Input(1).ndim(), 1);
832 "The number of segments inferred should *NOT* be larger " 833 "than the size of Input(1)'s first dimension");
834 num_segments = Input(1).dim(0);
836 CAFFE_ENFORCE(0 <= num_segments,
"Indices must be in 0..K-1 range");
837 output->Resize(num_segments, 2);
838 auto* output_data = output->template mutable_data<int32_t>();
839 if (num_segments == 0) {
842 std::fill(output_data, output_data + num_segments * 2, 0);
843 Index prev = input_data[0];
844 for (int64_t i = 0; i < input_size; i++) {
846 prev <= input_data[i],
847 "Segment ids must be sorted: ",
851 while (prev != input_data[i]) {
853 output_data[prev * 2] = i;
855 output_data[input_data[i] * 2 + 1] += 1;
862 template <
class Context>
865 USE_OPERATOR_CONTEXT_FUNCTIONS;
868 power_(OperatorBase::GetSingleArgument<float>(
"power", 0.5)) {}
870 bool RunOnDevice()
override {
874 template <
typename Index>
875 bool DoRunWithType() {
876 auto& input = Input(0);
877 CAFFE_ENFORCE(input.dims().size() == 1,
"Input must be a vector.");
878 auto* input_data = input.template data<Index>();
879 auto input_size = input.size();
880 auto* output = Output(0);
882 int64_t output_size = 0;
883 for (
auto i = 0; i < input_size; i++) {
884 CAFFE_ENFORCE_GE(input_data[i], 0,
"unexpected negative length value");
885 output_size += input_data[i];
888 std::function<float(const int64_t& length, const float& power)> getWeight;
890 getWeight = [](
const int64_t& length,
const float& ) {
891 return 1.0 / std::sqrt(length);
893 }
else if (power_ == 1) {
894 getWeight = [](
const int64_t& length,
const float& ) {
898 getWeight = [](
const int64_t& length,
const float& power) {
899 return 1.0 / std::pow(length, power);
903 output->Resize(output_size);
904 auto* output_data = output->template mutable_data<float>();
906 for (
auto i = 0; i < input_size; i++) {
907 auto len = input_data[i];
911 CAFFE_ENFORCE_LE(cnt + len, output_size,
"unexpected lengths value");
913 float weight_value = getWeight(len, power_);
914 std::fill(output_data + cnt, output_data + cnt + len, weight_value);
925 template <
class Context>
928 USE_OPERATOR_CONTEXT_FUNCTIONS;
931 bool RunOnDevice()
override {
932 auto& input = Input(0);
933 auto* output = OperatorBase::Output<TensorCPU>(0);
934 output->Resize(std::vector<TIndex>{});
935 *output->template mutable_data<bool>() = input.size() > 0;
940 template <
class Context>
943 USE_OPERATOR_CONTEXT_FUNCTIONS;
946 bool RunOnDevice()
override {
947 auto& input = Input(0);
948 auto* output = OperatorBase::Output<TensorCPU>(0);
949 output->Resize(std::vector<TIndex>{});
950 *output->template mutable_data<bool>() = (input.size() == 0);
956 template <
class Context>
959 USE_OPERATOR_CONTEXT_FUNCTIONS;
960 USE_SIMPLE_CTOR_DTOR(
SizeOp);
962 bool RunOnDevice()
override {
963 auto& input = Input(0);
964 auto* output = Output(0);
966 output->Resize(vector<TIndex>());
967 auto* output_data = output->template mutable_data<int64_t>();
969 auto size = input.size();
970 math::Set<int64_t, Context>(
971 1,
static_cast<int64_t
>(size), output_data, &context_);
978 template <
class Context>
981 USE_OPERATOR_CONTEXT_FUNCTIONS;
984 bool RunOnDevice()
override {
985 auto& input = Input(0);
987 CAFFE_ENFORCE(input.dims().size() == 1,
"Input must be a vector.");
988 auto* output = Output(0);
989 auto* input_data = input.template data<int32_t>();
991 auto size = input.size();
992 auto first = input_data[0];
994 for (
int i = 1; i < size; i++) {
996 input_data[i] == first,
"All elements of input must be same ");
1000 auto* output_data = output->template mutable_data<int32_t>();
1001 output_data[0] = size;
1002 output_data[1] = first;
1008 template <
class Context>
1011 USE_OPERATOR_CONTEXT_FUNCTIONS;
1014 bool RunOnDevice()
override {
1016 this, OperatorBase::Input<TensorCPU>(INDICES));
1019 template <
typename Index>
1020 bool DoRunWithType() {
1023 auto& data = Input(DATA);
1024 auto& indices = Input(INDICES);
1025 auto* output = Output(0);
1027 CAFFE_ENFORCE_GE(data.ndim(), 1,
"DATA should be at least 1-D");
1028 auto shape = indices.dims();
1029 shape.insert(shape.end(), data.dims().begin() + 1, data.dims().end());
1030 output->Resize(shape);
1032 int block_size = data.size_from_dim(1);
1033 auto block_bytesize = data.size_from_dim(1) * data.meta().itemsize();
1034 int N = indices.size();
1036 auto src_base =
static_cast<const char*
>(data.raw_data());
1037 const Index* idxs = indices.template data<Index>();
1038 auto out =
static_cast<char*
>(output->raw_mutable_data(data.meta()));
1040 for (
int i = 0; i < N; ++i) {
1043 0 <= idx && idx < data.dim(0),
1044 "INDICES element is out of DATA bounds, id=",
1048 auto src = src_base + idx * block_bytesize;
1049 context_.template CopyItems<Context, Context>(
1050 data.meta(), block_size, src, out + block_bytesize * i);
1055 INPUT_TAGS(DATA, INDICES);
1058 template <
class Context>
1061 USE_OPERATOR_CONTEXT_FUNCTIONS;
1064 bool RunOnDevice()
override {
1066 this, OperatorBase::Input<TensorCPU>(RANGES));
1069 template <
typename Index>
1070 bool DoRunWithType() {
1071 auto& data = Input(DATA);
1072 auto& ranges = Input(RANGES);
1073 auto* outputData = Output(0);
1074 auto* outputLengths = Output(1);
1076 auto batchSize = ranges.dim(0);
1077 CAFFE_ENFORCE(data.ndim() == 1,
"Data has to be 1-D");
1078 CAFFE_ENFORCE(ranges.ndim() == 3,
"Ranges must be 3-D");
1079 CAFFE_ENFORCE(ranges.dim(1) > 0,
"There has to be at least one range");
1081 ranges.dim(2), 2,
"Ranges last dimention should be of size 2");
1083 auto* rawData =
static_cast<const char*
>(data.raw_data());
1084 auto* rangesData = ranges.template data<Index>();
1086 outputLengths->Resize(batchSize);
1087 auto* outputLengthsPtr = outputLengths->template mutable_data<int32_t>();
1089 size_t blockSize = ranges.size_from_dim(1);
1090 for (
size_t i = 0; i < batchSize; ++i) {
1091 auto end = start + blockSize;
1092 outputLengthsPtr[i] = accumulate(rangesData, start, end);
1096 size_t outputSize = accumulate(rangesData, 0, ranges.size());
1097 outputData->Resize(outputSize);
1099 auto outputRawData =
1100 static_cast<char*
>(outputData->raw_mutable_data(data.meta()));
1101 VLOG(1) <<
"Copying data";
1102 size_t outputOffsetBytes = 0;
1103 auto itemsize = data.meta().itemsize();
1104 for (
int i = 0; i < ranges.size(); i += 2) {
1105 auto rangeStart = rangesData[i];
1106 auto rangeLength = rangesData[i + 1];
1110 auto rangeSizeBytes = rangeLength * itemsize;
1111 CAFFE_ENFORCE(outputOffsetBytes < outputSize * itemsize);
1112 CAFFE_ENFORCE(rangeStart + rangeLength <= data.size());
1113 context_.template CopyItems<Context, Context>(
1116 rawData + rangeStart * itemsize,
1117 outputRawData + outputOffsetBytes);
1118 outputOffsetBytes += rangeSizeBytes;
1120 CAFFE_ENFORCE(outputOffsetBytes == outputSize * itemsize);
1124 INPUT_TAGS(DATA, RANGES, LENGTHS);
1127 template <
typename Index>
1128 size_t accumulate(
Index* ranges,
size_t start,
size_t end) {
1130 for (
int i = start + 1; i < end; i += 2) {
1131 result += ranges[i];
1137 template <
class Context>
1140 USE_OPERATOR_CONTEXT_FUNCTIONS;
1143 bool RunOnDevice()
override {
1145 this, OperatorBase::Input<TensorCPU>(INDICES));
1148 template <
typename Index>
1149 bool DoRunWithType() {
1150 auto& items = Input(ITEMS);
1151 auto& lengths = Input(LENGTHS);
1152 auto& indices = Input(INDICES);
1153 auto* output = Output(0);
1155 CAFFE_ENFORCE_GE(items.ndim(), 1,
"ITEMS should be at least 1-D");
1156 CAFFE_ENFORCE_EQ(lengths.ndim(), 1,
"LENGTHS should be 1-D");
1157 CAFFE_ENFORCE_EQ(indices.ndim(), 1,
"INDICES should be 1-D");
1159 const auto* lengths_data = lengths.template data<int32_t>();
1160 const auto* indices_data = indices.template data<Index>();
1162 TIndex total_length = 0;
1163 for (
size_t i = 0; i < indices.size(); ++i) {
1164 auto idx = indices_data[i];
1165 CAFFE_ENFORCE_LT(idx, lengths.size());
1166 total_length += lengths_data[idx];
1168 auto shape = items.dims();
1169 shape[0] = total_length;
1170 output->Resize(shape);
1173 TIndex running_offset = 0;
1174 offsets_.reserve(lengths.size());
1175 for (
size_t i = 0; i < lengths.size(); ++i) {
1176 offsets_.push_back(running_offset);
1177 running_offset += lengths_data[i];
1182 "LENGTHS must match the first dimension of ITEMS");
1184 auto src_base =
static_cast<const char*
>(items.raw_data());
1185 auto block_size = items.size_from_dim(1);
1186 auto block_bytesize = block_size * items.itemsize();
1187 auto out =
static_cast<char*
>(output->raw_mutable_data(items.meta()));
1189 for (
size_t i = 0; i < indices.size(); ++i) {
1190 auto idx = indices_data[i];
1191 auto length = lengths_data[idx];
1192 context_.template CopyItems<Context, Context>(
1194 length * block_size,
1195 src_base + offsets_[idx] * block_bytesize,
1197 out += length * block_bytesize;
1202 std::vector<TIndex> offsets_;
1204 INPUT_TAGS(ITEMS, LENGTHS, INDICES);
1213 template <
class Context>
1216 USE_OPERATOR_CONTEXT_FUNCTIONS;
1219 bool RunOnDevice()
override {
1221 auto& input = Input(0);
1222 if (input.template IsType<int32_t>()) {
1224 }
else if (input.template IsType<int64_t>()) {
1227 LOG(FATAL) <<
"Unsupported type of input in Unique: " 1228 << input.meta().name();
1239 template <
typename T>
1243 OUTPUT_TAGS(UNIQUE, REMAPPING);
1246 template <
class Context>
1249 USE_OPERATOR_CONTEXT_FUNCTIONS;
1252 bool RunOnDevice()
override {
1253 size_t coalesced_size = 0;
1254 for (
int i = 0; i < InputSize(); ++i) {
1256 !Input(i).meta().ctor(),
1257 "Must only coalesce fundamental types, error at input: ",
1261 auto roundToAlignment = [](
size_t bytes) ->
size_t {
1262 return ((bytes + gCaffe2Alignment - 1) / gCaffe2Alignment) *
1266 for (
int i = 0; i < InputSize(); ++i) {
1267 coalesced_size += roundToAlignment(Input(i).nbytes());
1270 auto* coalesced = Output(OutputSize() - 1);
1271 coalesced->Resize(coalesced_size);
1272 math::Set<uint8_t, Context>(
1275 coalesced->template mutable_data<uint8_t>(),
1278 size_t coalesced_offset = 0;
1279 for (
auto i = 0; i < InputSize(); ++i) {
1280 const auto input_nbytes = Input(i).nbytes();
1281 context_.template CopyBytes<Context, Context>(
1283 (
const uint8_t*)Input(i).raw_data(),
1284 coalesced->template mutable_data<uint8_t>() + coalesced_offset);
1291 Output(i)->ResizeLike(Input(i));
1292 Output(i)->ShareExternalPointer(
1294 coalesced->template mutable_data<uint8_t>() + coalesced_offset),
1297 coalesced_offset += roundToAlignment(input_nbytes);
1303 template <
typename T,
class Context>
1309 OperatorBase::GetSingleArgument<float>(
"lower_bound", 0.0)),
1311 OperatorBase::GetSingleArgument<float>(
"upper_bound", 1.0)),
1312 num_buckets_(OperatorBase::GetSingleArgument<int>(
"num_buckets", 1)) {
1313 CAFFE_ENFORCE_GT(num_buckets_, 0);
1315 num_output_buckets_ = num_buckets_ + 2;
1316 accumulate_hist_ = std::vector<int64_t>(num_output_buckets_, 0);
1319 USE_OPERATOR_CONTEXT_FUNCTIONS;
1321 bool RunOnDevice()
override {
1322 auto& X = Input(X_IN);
1323 auto* X_data = X.template data<T>();
1325 auto* cur_hist = Output(CUR_HIST);
1326 auto* acc_hist = Output(ACC_HIST);
1327 cur_hist->Resize(num_output_buckets_);
1328 acc_hist->Resize(num_output_buckets_);
1329 auto* cur_hist_data = cur_hist->template mutable_data<int64_t>();
1330 auto* acc_hist_data = acc_hist->template mutable_data<int64_t>();
1331 auto segment = (upper_bound_ - lower_bound_) / num_buckets_;
1332 math::Set<int64_t, Context>(
1333 num_output_buckets_, 0, cur_hist_data, &context_);
1335 for (
int i = 0; i < N; i++) {
1336 int bucket_index = -1;
1337 if (X_data[i] < lower_bound_) {
1339 }
else if (X_data[i] >= upper_bound_) {
1340 bucket_index = num_buckets_ + 1;
1342 bucket_index = (int)((X_data[i] - lower_bound_) / segment) + 1;
1344 cur_hist_data[bucket_index] += 1;
1345 accumulate_hist_[bucket_index] += 1;
1348 for (
int i = 0; i < num_output_buckets_; i++) {
1349 acc_hist_data[i] = accumulate_hist_[i];
1359 int num_output_buckets_;
1360 std::vector<int64_t> accumulate_hist_;
1363 OUTPUT_TAGS(CUR_HIST, ACC_HIST);
1366 template <
class Context>
1369 USE_OPERATOR_CONTEXT_FUNCTIONS;
1372 bool RunOnDevice()
override {
1377 template <
typename T>
1378 T readScalarInput(
const int index) {
1379 if (std::is_same<Context, TensorCPU>::value) {
1380 return Input(index).template data<T>()[0];
1382 local_.template CopyFrom<Context>(Input(index));
1383 return local_.template data<T>()[0];
1387 template <
typename T>
1388 bool DoRunWithType() {
1393 for (
int i = 0; i < InputSize(); ++i) {
1394 CAFFE_ENFORCE_EQ(Input(0).ndim(), 0,
"All inputs must be scalar.");
1397 switch (InputSize()) {
1399 stop = readScalarInput<T>(0);
1402 start = readScalarInput<T>(0);
1403 stop = readScalarInput<T>(1);
1406 step = readScalarInput<T>(2);
1407 start = readScalarInput<T>(0);
1408 stop = readScalarInput<T>(1);
1411 CAFFE_ENFORCE_NE(step, 0,
"Step size cannot be 0.");
1413 auto diff = stop - start;
1414 if (std::is_integral<T>::value) {
1417 length = diff / step;
1418 if (length * step < diff) {
1422 length =
static_cast<int>(ceil(diff / step));
1424 auto* output = Output(0);
1429 output->template mutable_data<T>();
1432 output->Resize(length);
1433 return DoRunOnDevice<T>(start, step, output);
1437 template <
typename T>
1438 bool DoRunOnDevice(
const T& start,
const T& step,
Tensor<Context>* output);
1447 #endif // CAFFE2_OPERATORS_UTILITY_OPS_H_ const string & RootFolder()
Return the root folder of the workspace.
const TypeMeta & meta() const
Returns the TypeMeta object associated with the current data type.
Tensor is the basic class in Caffe2 that stores a contiguous memory with its shape information...
Update slices of the tensor in-place by overriding.
void CopyFrom(const Tensor< SrcContext > &src, ContextForCopy *context)
Copies the data from a source tensor, with a contex provided to carry out the underlying memcpy opera...
TIndex size() const
Returns the size (i.e.
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
void Resize(Ts...dim_source)
Resizes a tensor.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Update slices of the tensor in-place with weighted sum.
Alias op makes the output and the input share the same underlying storage.
Deduplicates input indices vector and optionally produces reverse remapping.