Caffe2 - C++ API
A deep learning, cross platform ML framework
utility_ops.h
1 #ifndef CAFFE2_OPERATORS_UTILITY_OPS_H_
2 #define CAFFE2_OPERATORS_UTILITY_OPS_H_
3 
4 #include <math.h>
5 
6 #include "caffe2/core/common_omp.h"
7 #include "caffe2/core/context.h"
8 #include "caffe2/core/logging.h"
9 #include "caffe2/core/operator.h"
10 #include "caffe2/core/types.h"
11 #include "caffe2/utils/math.h"
12 
13 #include <map>
14 #include <utility>
15 
16 namespace caffe2 {
17 
18 template <class Context>
19 class NanCheckOp final : public Operator<Context> {
20  public:
21  USE_OPERATOR_CONTEXT_FUNCTIONS;
22  NanCheckOp(const OperatorDef& operator_def, Workspace* ws)
23  : Operator<Context>(operator_def, ws) {}
24 
25  bool RunOnDevice() override;
26 
27  private:
28  TensorPrinter tensorPrinter_;
29  Tensor<Context> scratch_;
30 };
31 
33  using GradientMakerBase::GradientMakerBase;
34  std::vector<OperatorDef> GetGradientDefs() override {
35  return {CreateOperatorDef(
36  "NanCheck",
37  "",
38  std::vector<string>{GO(0)},
39  std::vector<string>{GI(0)})};
40  }
41 };
42 
43 template <class Context>
44 class WallClockTimeOp final : public Operator<Context> {
45  public:
46  USE_OPERATOR_CONTEXT_FUNCTIONS;
47 
48  WallClockTimeOp(const OperatorDef& operator_def, Workspace* ws)
49  : Operator<Context>(operator_def, ws) {}
50 
51  bool RunOnDevice() override {
52  int64_t nanoseconds = static_cast<long int>(
53  std::chrono::duration_cast<std::chrono::nanoseconds>(
54  std::chrono::high_resolution_clock::now().time_since_epoch())
55  .count());
56 
57  TensorCPU* output = OperatorBase::Output<TensorCPU>(0);
58  output->Resize();
59  *output->template mutable_data<int64_t>() = nanoseconds;
60 
61  return true;
62  }
63 };
64 
65 const char kPrintFileExtension[] = ".log";
66 
67 template <class Context>
68 class PrintOp final : public Operator<Context> {
69  public:
70  USE_OPERATOR_CONTEXT_FUNCTIONS;
71  USE_DISPATCH_HELPER;
72  PrintOp(const OperatorDef& operator_def, Workspace* ws)
73  : Operator<Context>(operator_def, ws),
74  tensor_printer_(
75  operator_def.input(0),
76  OperatorBase::GetSingleArgument<int>("to_file", 0)
77  ? ws->RootFolder() + "/" + operator_def.input(0) +
78  kPrintFileExtension
79  : "",
80  OperatorBase::GetSingleArgument<int>("limit", 0)),
81  every_n_(OperatorBase::GetSingleArgument<int>("every_n", 1)) {
82  CAFFE_ENFORCE_GE(every_n_, 1);
83  }
84 
85  bool RunOnDevice() override {
86  if (++occurrences_mod_n_ > every_n_) {
87  occurrences_mod_n_ -= every_n_;
88  }
89  if (occurrences_mod_n_ != 1) {
90  return true;
91  }
92 
93  if (!OperatorBase::InputIsType<Tensor<Context>>(0) &&
94  !OperatorBase::InputIsType<TensorCPU>(0)) {
95  LOG(INFO) << "Blob of type: "
96  << OperatorBase::Inputs().at(0)->meta().name();
97  return true;
98  }
99  // special-case empty tensors since they may have no meta()
100  if (Input(0).size() == 0) {
101  tensor_printer_.PrintMeta(Input(0));
102  return true;
103  }
104 
105  using Types = TensorTypes<
106  float,
107  double,
108  int,
109  long,
110  bool,
111  char,
112  unsigned char,
113  std::string>;
114 
115  if (OperatorBase::InputIsType<TensorCPU>(0)) {
117  this, OperatorBase::Input<TensorCPU>(0));
118  } else {
119  return DispatchHelper<Types>::call(this, Input(0));
120  }
121  }
122 
123  private:
124  template <typename T>
125  bool DoRunWithType() {
126  // A simple strategy to copy tensor if needed, and have the tensor pointer
127  // pointing to the right instantiation. Note that tensor_copy_if_needed
128  // will handle memory deallocation itself so no smart pointer is needed.
129  const TensorCPU* tensor;
130  TensorCPU tensor_copy_if_needed;
131  if (OperatorBase::InputIsType<TensorCPU>(0)) {
132  tensor = &OperatorBase::Input<TensorCPU>(0);
133  } else {
134  tensor_copy_if_needed.CopyFrom(Input(0), &context_);
135  // Make sure that the copy is finished.
136  context_.FinishDeviceComputation();
137  tensor = &tensor_copy_if_needed;
138  }
139  tensor_printer_.Print<T>(*tensor);
140  return true;
141  }
142 
143  private:
144  TensorPrinter tensor_printer_;
145  int every_n_;
146  int occurrences_mod_n_{0};
147 };
148 
163 template <class Context>
164 class AliasOp final : public Operator<Context> {
165  public:
166  USE_OPERATOR_CONTEXT_FUNCTIONS;
167  USE_SIMPLE_CTOR_DTOR(AliasOp);
168 
169  bool RunOnDevice() override {
170  auto& input = Input(0);
171  CAFFE_ENFORCE_GE(input.size(), 0, "Tensor is not initialized");
172  Output(0)->ResizeLike(input);
173  Output(0)->ShareData(input);
174  return true;
175  }
176 };
177 
185 template <class Context>
186 class EnsureDenseOp final : public Operator<Context> {
187  public:
188  USE_OPERATOR_CONTEXT_FUNCTIONS;
189  USE_SIMPLE_CTOR_DTOR(EnsureDenseOp)
190 
191  bool RunOnDevice() override {
192  const auto& input = Input(0);
193  auto* output = Output(0);
194  CAFFE_ENFORCE_GT(input.ndim(), 0, "Input has to be at least a vector.");
195  // it is allowed to have the output inplace overwrite the input but also
196  // allow the output to be copied from the input
197  if (&input != output) {
198  output->ResizeLike(input);
199  output->CopyFrom(input, &context_);
200  }
201  return true;
202  }
203 };
204 
205 template <class Context>
206 class FlattenToVecOp : public Operator<Context> {
207  public:
208  USE_OPERATOR_CONTEXT_FUNCTIONS;
209  USE_SIMPLE_CTOR_DTOR(FlattenToVecOp);
210 
211  bool RunOnDevice() override {
212  auto& input = Input(0);
213  auto* output = Output(0);
214  CAFFE_ENFORCE_GE(
215  input.dims().size(), 1, "The rank of the tensor must be >= 1.");
216  output->Resize(input.size());
217 
218  context_.template CopyItems<Context, Context>(
219  input.meta(),
220  input.size(),
221  input.raw_data(),
222  output->raw_mutable_data(input.meta()));
223  return true;
224  }
225 };
226 
227 // Output gets the data of input(0), but reshapes it like input(1).
228 template <class Context>
229 class ResizeLikeOp : public Operator<Context> {
230  public:
231  USE_OPERATOR_CONTEXT_FUNCTIONS;
232  USE_SIMPLE_CTOR_DTOR(ResizeLikeOp);
233 
234  bool RunOnDevice() override {
235  auto& input0 = Input(0);
236  auto& input1 = Input(1);
237  auto* output = Output(0);
238  CAFFE_ENFORCE_EQ(input0.size(), input1.size());
239  output->ResizeLike(Input(1));
240  context_.template CopyItems<Context, Context>(
241  input0.meta(),
242  input0.size(),
243  input0.raw_data(),
244  output->raw_mutable_data(input0.meta()));
245  return true;
246  }
247 };
248 
249 template <class Context>
250 class SumOp : public Operator<Context> {
251  public:
252  USE_OPERATOR_CONTEXT_FUNCTIONS;
253  USE_SIMPLE_CTOR_DTOR(SumOp);
254 
255  template <typename T, typename M>
256  bool DoRunWithType() {
257  auto& input0 = Input(0);
258  auto* output = Output(0);
259  if (InputSize() == 1) {
260  output->CopyFrom(input0, &context_);
261  return true;
262  }
263  output->ResizeLike(input0);
264  T* output_data = output->template mutable_data<T>();
265  // Dimension checking
266  for (int i = 1; i < InputSize(); ++i) {
267  if (output->dims() != Input(i).dims()) {
268  CAFFE_THROW(
269  "Check failed: output->dims() == Input(i).dims().",
270  "Description: Input #",
271  i,
272  ", input dimension:",
273  Input(i).dims(),
274  " should match output dimension: ",
275  output->dims());
276  }
277  }
278 
279  // Add the first two - works if in-place or not.
280  math::Add(
281  output->size(),
282  input0.template data<T>(),
283  Input(1).template data<T>(),
284  output_data,
285  &context_);
286  // Add remaining.
287  for (int i = 2; i < InputSize(); ++i) {
288  math::Add(
289  output->size(),
290  output_data,
291  Input(i).template data<T>(),
292  output_data,
293  &context_);
294  }
295  return true;
296  }
297 
298  bool RunOnDevice() override {
299  if (Input(0).template IsType<float>()) {
300  return DoRunWithType<float, float>();
301  } else if (Input(0).template IsType<int>()) {
302  return DoRunWithType<int, int>();
303  } else {
304  CAFFE_THROW(
305  "Sum operator only supports 32-bit float and ints, but",
306  " input was of type ",
307  Input(0).meta().name());
308  }
309  }
310 };
311 
312 // WeightedSumOp computes the weighted sum of several tensors. The input should
313 // be in the form X_0, weight_0, X_1, weight_1, ... where X_i all have the same
314 // shape, and weight_i are size 1 tensors that specifies the weight of each
315 // vector. Note that if one wants to do in-place computation, it could only be
316 // done with X_0 also as the output, but not other X_i.
317 template <class Context>
318 class WeightedSumOp : public Operator<Context> {
319  public:
320  USE_OPERATOR_CONTEXT_FUNCTIONS;
321  USE_SIMPLE_CTOR_DTOR(WeightedSumOp);
322 
323  template <typename DstType>
324  bool DoRunWithType() {
325  CAFFE_ENFORCE_EQ(InputSize() % 2, 0);
326  auto& X0 = Input(0);
327  auto& weight0 = Input(1);
328  CAFFE_ENFORCE_GT(X0.size(), 0);
329  CAFFE_ENFORCE_EQ(weight0.size(), 1);
330  int size = X0.size();
331  auto* output = Output(0);
332  output->ResizeLike(X0);
333  math::Scale<DstType, Context>(
334  size,
335  weight0.template data<float>(),
336  X0.template data<DstType>(),
337  output->template mutable_data<DstType>(),
338  &context_);
339  for (int i = 2; i < InputSize(); i += 2) {
340  auto& X = Input(i);
341  // Do a check: if the input is the same as output, we have a problem -
342  // in-place update should always only happen with the zeroth input.
343  if (&X == output) {
344  LOG(ERROR) << "Input #" << i << " is the same as output. "
345  << "If you want to do in-place updates, put the output as "
346  << "input #0.";
347  return false;
348  }
349  auto& weight = Input(i + 1);
350  CAFFE_ENFORCE_EQ(X.size(), size);
351  CAFFE_ENFORCE_EQ(weight.size(), 1);
352  math::Axpy<DstType, Context>(
353  size,
354  weight.template data<float>(),
355  X.template data<DstType>(),
356  output->template mutable_data<DstType>(),
357  &context_);
358  }
359  return true;
360  }
361  bool RunOnDevice() override;
362 };
363 
364 template <class Context>
365 class WeightedSumGradientOp : public Operator<Context> {
366  public:
367  USE_OPERATOR_CONTEXT_FUNCTIONS;
368 
369  WeightedSumGradientOp(const OperatorDef& operator_def, Workspace* ws)
370  : Operator<Context>(operator_def, ws),
371  grad_on_w_(OperatorBase::GetSingleArgument<bool>("grad_on_w", false)) {}
372 
373  template <typename DstType>
374  bool DoRunWithType() {
375  CAFFE_ENFORCE_EQ(InputSize() % 2, 1);
376  auto output_size = grad_on_w_ ? InputSize() - 1 : InputSize() / 2;
377  CAFFE_ENFORCE_EQ(OutputSize(), output_size);
378 
379  auto& dY = Input(0);
380  const auto* dY_data = dY.template data<DstType>();
381  int size = dY.size();
382 
383  // The input size should be the input size of the forward op plus 1
384  for (int i = 0; i < InputSize() / 2; i++) {
385  auto& cur_w = Input(2 * i + 2);
386  CAFFE_ENFORCE_EQ(cur_w.size(), 1);
387  auto* cur_dX = Output(i);
388  cur_dX->ResizeLike(dY);
389 
390  math::Scale<DstType, Context>(
391  size,
392  cur_w.template data<float>(),
393  dY_data,
394  cur_dX->template mutable_data<DstType>(),
395  &context_);
396 
397  if (grad_on_w_) {
398  auto& cur_X = Input(2 * i + 1);
399  CAFFE_ENFORCE_EQ(cur_X.size(), size);
400  auto* cur_dw = Output(i + output_size / 2);
401  cur_dw->Resize(1);
402  math::Dot<DstType, Context>(
403  size,
404  dY_data,
405  cur_X.template data<DstType>(),
406  cur_dw->template mutable_data<float>(),
407  &context_);
408  }
409  }
410 
411  return true;
412  }
413 
414  bool RunOnDevice() override;
415 
416  private:
417  bool grad_on_w_;
418 };
419 
458 template <typename T, class Context>
459 class ScatterWeightedSumOp : public Operator<Context> {
460  public:
461  USE_OPERATOR_CONTEXT_FUNCTIONS;
462  USE_SIMPLE_CTOR_DTOR(ScatterWeightedSumOp);
463  USE_DISPATCH_HELPER;
464 
465  bool RunOnDevice() override {
466  return DispatchHelper<TensorTypes<int32_t, int64_t>>::call(this, Input(2));
467  }
468 
469  private:
470  template <typename Index>
471  bool DoRunWithType() {
472  TIndex block_size = Input(0).size_from_dim(1);
473  return DispatchHelper<FixedValues<1>, Index>::call(this, block_size);
474  }
475 
476  template <typename Index, int FixedSize>
477  bool DoRunWithValue() {
478  CAFFE_ENFORCE_EQ(InputSize() % 2, 1);
479  auto& X0 = Input(0);
480  auto& weight0 = Input(1);
481  auto& indices = Input(2);
482  auto* output = Output(0);
483  CAFFE_ENFORCE_EQ(&X0, output, "In place operation is required");
484 
485  CAFFE_ENFORCE_GT(X0.size(), 0);
486  CAFFE_ENFORCE_GT(X0.ndim(), 0, "X0 has to be at least the vector");
487  CAFFE_ENFORCE_EQ(weight0.size(), 1);
488  TIndex M = X0.size();
489  TIndex N = X0.dim(0);
490  TIndex K = indices.size();
491  TIndex block_size = M / N;
492  T* data = output->template mutable_data<T>();
493  const Index* idxs = indices.template data<Index>();
494  T w0 = *weight0.template data<T>();
495  // It's most likely a constant so exact comparison is fine
496  if (w0 != 1.0) {
497  for (int i = 0; i < K; ++i) {
498  Index idx = idxs[i];
499  CAFFE_ENFORCE(
500  0 <= idx && idx < N,
501  "Index out of bounds: ",
502  idx,
503  ", range 0 to ",
504  N);
505  math::ScaleFixedSize<T, Context, FixedSize>(
506  block_size,
507  w0,
508  data + block_size * idx,
509  data + block_size * idx,
510  &context_);
511  }
512  }
513  for (int inp = 3; inp < InputSize(); inp += 2) {
514  auto& X = Input(inp);
515  auto& weight = Input(inp + 1);
516  CAFFE_ENFORCE_EQ(X.size(), block_size * K);
517  CAFFE_ENFORCE_EQ(weight.size(), 1);
518  const T* x_data = X.template data<T>();
519  T w = *weight.template data<T>();
520  for (int i = 0; i < K; ++i) {
521  Index idx = idxs[i];
522  // double-checking the indices, but it's fine as it's DCHECK only
523  DCHECK(0 <= idx && idx < N) << "Index out of bounds: " << idx
524  << ", range 0 to " << N;
525  math::AxpyFixedSize<T, Context, FixedSize>(
526  block_size,
527  w,
528  x_data + block_size * i,
529  data + block_size * idx,
530  &context_);
531  }
532  }
533  return true;
534  }
535  Tensor<CPUContext> x_data_host_;
536  Tensor<CPUContext> weights_host_;
537  Tensor<Context> x_data_device_;
538  Tensor<Context> weights_device_;
539 };
540 
564 template <class Context>
565 class ScatterAssignOp : public Operator<Context> {
566  public:
567  USE_OPERATOR_CONTEXT_FUNCTIONS;
568  virtual ~ScatterAssignOp() {}
569 
570  ScatterAssignOp(const OperatorDef& operator_def, Workspace* ws)
571  : Operator<Context>(operator_def, ws),
572  runners_({{{TensorProto_DataType_INT32, TensorProto_DataType_FLOAT},
573  &ScatterAssignOp::DoRun<int32_t, float>},
574  {{TensorProto_DataType_INT32, TensorProto_DataType_FLOAT16},
575  &ScatterAssignOp::DoRun<int32_t, float16>},
576  {{TensorProto_DataType_INT32, TensorProto_DataType_INT32},
577  &ScatterAssignOp::DoRun<int32_t, int32_t>},
578  {{TensorProto_DataType_INT32, TensorProto_DataType_INT64},
579  &ScatterAssignOp::DoRun<int32_t, int64_t>},
580  {{TensorProto_DataType_INT64, TensorProto_DataType_FLOAT},
581  &ScatterAssignOp::DoRun<int64_t, float>},
582  {{TensorProto_DataType_INT64, TensorProto_DataType_FLOAT16},
583  &ScatterAssignOp::DoRun<int64_t, float16>},
584  {{TensorProto_DataType_INT64, TensorProto_DataType_INT32},
585  &ScatterAssignOp::DoRun<int64_t, int32_t>},
586  {{TensorProto_DataType_INT64, TensorProto_DataType_INT64},
587  &ScatterAssignOp::DoRun<int64_t, int64_t>}}) {}
588 
589  bool RunOnDevice() override {
590  const auto& data = Input(DATA);
591  const auto& slices = Input(SLICES);
592  auto& indices = Input(INDICES);
593 
594  const auto dataType = TypeMetaToDataType(data.meta());
595  const auto slicesType = TypeMetaToDataType(slices.meta());
596  const auto indicesType = TypeMetaToDataType(indices.meta());
597  auto* output = Output(0);
598 
599  auto runner = GetRunner(dataType, slicesType, indicesType);
600  (this->*runner)();
601  return true;
602  }
603 
604  private:
605  typedef void (ScatterAssignOp::*RunnerType)();
606  typedef std::
607  map<std::pair<TensorProto_DataType, TensorProto_DataType>, RunnerType>
608  RunnerMap;
609 
610  RunnerMap runners_;
611 
612  RunnerType GetRunner(
613  const TensorProto_DataType dataType,
614  const TensorProto_DataType slicesType,
615  const TensorProto_DataType indicesType) {
616  CAFFE_ENFORCE_EQ(dataType, slicesType, "Data and slice types must match");
617  auto it = runners_.find({indicesType, dataType});
618  CAFFE_ENFORCE(
619  it != runners_.end(),
620  "Could not find the runner corresponding to indicesType, dataType = ",
621  indicesType,
622  " ",
623  dataType);
624  return it->second;
625  }
626 
627  template <typename Index, typename T>
628  void DoRun() {
629  auto& input = Input(DATA);
630  auto& indices = Input(INDICES);
631  auto& slices = Input(SLICES);
632  auto* output = Output(0);
633  CAFFE_ENFORCE_EQ(&input, output, "In place operation is required");
634 
635  CAFFE_ENFORCE_GT(input.ndim(), 0, "X0 has to be at least the vector");
636  TIndex M = input.size();
637  TIndex N = input.dim(0);
638  TIndex K = indices.size();
639  TIndex block_size = M / N;
640  CAFFE_ENFORCE_EQ(slices.size(), block_size * K);
641  // TODO(dzhulgakov): it can be made to work with arbitrary data type by
642  // using raw_mutable_data
643  T* data = output->template mutable_data<T>();
644  const Index* idxs = indices.template data<Index>();
645  const T* slicesData = slices.template data<T>();
646  DoScatterAssign(data, idxs, slicesData, N, K, block_size);
647  }
648 
649  template <typename Index, typename T>
650  void DoScatterAssign(
651  T* data,
652  const Index* idxs,
653  const T* slicesData,
654  TIndex N,
655  TIndex K,
656  TIndex block_size) {
657  for (int i = 0; i < K; ++i) {
658  Index idx = idxs[i];
659  // double-checking the indices, but it's fine as it's DCHECK only
660  DCHECK(0 <= idx && idx < N) << "Index out of bounds: " << idx
661  << ", range 0 to " << N;
662  context_.template Copy<T, Context, Context>(
663  block_size, slicesData + block_size * i, data + block_size * idx);
664  }
665  }
666 
667  INPUT_TAGS(DATA, INDICES, SLICES);
668 };
669 
670 template <class Context, class DstContext, class SrcContext>
671 class CopyOp : public Operator<Context> {
672  public:
673  USE_OPERATOR_CONTEXT_FUNCTIONS;
674  USE_SIMPLE_CTOR_DTOR(CopyOp);
675 
676  bool RunOnDevice() override {
677  auto& input = OperatorBase::Input<Tensor<SrcContext>>(0);
678  auto* output = OperatorBase::Output<Tensor<DstContext>>(0);
679  output->ResizeLike(input);
680  this->context_.template CopyItems<SrcContext, DstContext>(
681  input.meta(),
682  input.size(),
683  input.raw_data(),
684  output->raw_mutable_data(input.meta()));
685  return true;
686  }
687 };
688 
689 template <class Context, class DstContext, class SrcContext>
690 class CopyOnDeviceLikeOp : public CopyOp<Context, DstContext, SrcContext> {
691  public:
692  CopyOnDeviceLikeOp(const OperatorDef& operator_def, Workspace* ws)
693  : CopyOp<Context, DstContext, SrcContext>(operator_def, ws) {}
694 };
695 
696 template <class Context>
697 class LengthsToSegmentIdsOp : public Operator<Context> {
698  public:
699  USE_OPERATOR_CONTEXT_FUNCTIONS;
700  USE_SIMPLE_CTOR_DTOR(LengthsToSegmentIdsOp);
701 
702  bool RunOnDevice() override {
703  auto& input = Input(0);
704  auto* output = Output(0);
705  auto* input_data = input.template data<int32_t>();
706 
707  CAFFE_ENFORCE(input.dims().size() == 1, "Input must be a vector.");
708  auto total_length =
709  std::accumulate(input_data, input_data + input.size(), 0);
710 
711  output->Resize(total_length);
712  auto* output_data = output->template mutable_data<int32_t>();
713 
714  for (int i = 0; i < input.size(); ++i) {
715  auto len = input_data[i];
716  std::fill(output_data, output_data + len, i);
717  output_data += len;
718  }
719  return true;
720  }
721 };
722 
723 template <class Context>
724 class LengthsToRangesOp : public Operator<Context> {
725  public:
726  USE_OPERATOR_CONTEXT_FUNCTIONS;
727  USE_SIMPLE_CTOR_DTOR(LengthsToRangesOp);
728 
729  bool RunOnDevice() override {
730  auto& input = Input(0);
731  auto* output = Output(0);
732  auto* input_data = input.template data<int32_t>();
733 
734  CAFFE_ENFORCE(input.dims().size() == 1, "Input must be a vector.");
735  auto size = input.size();
736 
737  output->Resize(size, 2);
738  auto* output_data = output->template mutable_data<int32_t>();
739 
740  int32_t offset = 0;
741  for (int i = 0; i < size; ++i) {
742  auto len = input_data[i];
743  output_data[i * 2] = offset;
744  output_data[i * 2 + 1] = len;
745  offset += len;
746  }
747  return true;
748  }
749 };
750 
751 template <class Context>
752 class SegmentIdsToLengthsOp : public Operator<Context> {
753  public:
754  USE_OPERATOR_CONTEXT_FUNCTIONS;
755  USE_SIMPLE_CTOR_DTOR(SegmentIdsToLengthsOp);
756 
757  bool RunOnDevice() override {
758  return DispatchHelper<TensorTypes<int32_t, int64_t>>::call(this, Input(0));
759  }
760 
761  template <typename Index>
762  bool DoRunWithType() {
763  auto& input = Input(0);
764  if (input.ndim() == 2) {
765  CAFFE_ENFORCE(
766  input.dim32(0) == 1 || input.dim32(1) == 1,
767  "Input must be a vector.");
768  } else {
769  CAFFE_ENFORCE_EQ(input.ndim(), 1, "Input must be a vector.");
770  }
771  auto* input_data = input.template data<Index>();
772  auto input_size = input.size();
773  auto* output = Output(0);
774  // segment id starts from 0
775  auto num_segments = input_size ? input_data[input_size - 1] + 1 : 0;
776  if (InputSize() > 1) {
777  CAFFE_ENFORCE_GE(Input(1).ndim(), 1);
778  CAFFE_ENFORCE_LE(
779  num_segments,
780  Input(1).dim(0),
781  "The number of segments inferred should *NOT* be larger "
782  "than the size of Input(1)'s first dimension");
783  num_segments = Input(1).dim(0);
784  }
785  CAFFE_ENFORCE(0 <= num_segments, "Indices must be in 0..K-1 range");
786  output->Resize(num_segments);
787  auto* output_data = output->template mutable_data<int32_t>();
788  if (num_segments == 0) {
789  return true;
790  }
791  std::fill(output_data, output_data + num_segments, 0);
792  Index prev = 0; // Assume that segment_id >= 0.
793  for (int64_t i = 0; i < input_size; i++) {
794  CAFFE_ENFORCE(
795  prev <= input_data[i],
796  "Segment ids must be sorted: ",
797  prev,
798  " vs ",
799  input_data[i]);
800  prev = input_data[i];
801  output_data[input_data[i]] += 1;
802  }
803 
804  return true;
805  }
806 };
807 
808 template <class Context>
809 class SegmentIdsToRangesOp : public Operator<Context> {
810  public:
811  USE_OPERATOR_CONTEXT_FUNCTIONS;
812  USE_SIMPLE_CTOR_DTOR(SegmentIdsToRangesOp);
813 
814  bool RunOnDevice() override {
815  return DispatchHelper<TensorTypes<int32_t, int64_t>>::call(this, Input(0));
816  }
817 
818  template <typename Index>
819  bool DoRunWithType() {
820  auto& input = Input(0);
821  CAFFE_ENFORCE(input.dims().size() == 1, "Input must be a vector.");
822  auto* input_data = input.template data<Index>();
823  auto input_size = input.size();
824  auto* output = Output(0);
825  // segment id starts from 0
826  auto num_segments = input_size ? input_data[input_size - 1] + 1 : 0;
827  if (InputSize() > 1) {
828  CAFFE_ENFORCE_GE(Input(1).ndim(), 1);
829  CAFFE_ENFORCE_LE(
830  num_segments,
831  Input(1).dim(0),
832  "The number of segments inferred should *NOT* be larger "
833  "than the size of Input(1)'s first dimension");
834  num_segments = Input(1).dim(0);
835  }
836  CAFFE_ENFORCE(0 <= num_segments, "Indices must be in 0..K-1 range");
837  output->Resize(num_segments, 2);
838  auto* output_data = output->template mutable_data<int32_t>();
839  if (num_segments == 0) {
840  return true;
841  }
842  std::fill(output_data, output_data + num_segments * 2, 0);
843  Index prev = input_data[0];
844  for (int64_t i = 0; i < input_size; i++) {
845  CAFFE_ENFORCE(
846  prev <= input_data[i],
847  "Segment ids must be sorted: ",
848  prev,
849  " vs ",
850  input_data[i]);
851  while (prev != input_data[i]) {
852  ++prev;
853  output_data[prev * 2] = i;
854  }
855  output_data[input_data[i] * 2 + 1] += 1;
856  }
857 
858  return true;
859  }
860 };
861 
862 template <class Context>
863 class LengthsToWeightsOp : public Operator<Context> {
864  public:
865  USE_OPERATOR_CONTEXT_FUNCTIONS;
866  LengthsToWeightsOp(const OperatorDef& operator_def, Workspace* ws)
867  : Operator<Context>(operator_def, ws),
868  power_(OperatorBase::GetSingleArgument<float>("power", 0.5)) {}
869 
870  bool RunOnDevice() override {
871  return DispatchHelper<TensorTypes<int32_t, int64_t>>::call(this, Input(0));
872  }
873 
874  template <typename Index>
875  bool DoRunWithType() {
876  auto& input = Input(0);
877  CAFFE_ENFORCE(input.dims().size() == 1, "Input must be a vector.");
878  auto* input_data = input.template data<Index>();
879  auto input_size = input.size();
880  auto* output = Output(0);
881 
882  int64_t output_size = 0;
883  for (auto i = 0; i < input_size; i++) {
884  CAFFE_ENFORCE_GE(input_data[i], 0, "unexpected negative length value");
885  output_size += input_data[i];
886  }
887 
888  std::function<float(const int64_t& length, const float& power)> getWeight;
889  if (power_ == 0.5) {
890  getWeight = [](const int64_t& length, const float& /*power*/) {
891  return 1.0 / std::sqrt(length);
892  };
893  } else if (power_ == 1) {
894  getWeight = [](const int64_t& length, const float& /*power*/) {
895  return 1.0 / length;
896  };
897  } else {
898  getWeight = [](const int64_t& length, const float& power) {
899  return 1.0 / std::pow(length, power);
900  };
901  }
902 
903  output->Resize(output_size);
904  auto* output_data = output->template mutable_data<float>();
905  int64_t cnt = 0;
906  for (auto i = 0; i < input_size; i++) {
907  auto len = input_data[i];
908  if (len == 0) {
909  continue;
910  }
911  CAFFE_ENFORCE_LE(cnt + len, output_size, "unexpected lengths value");
912 
913  float weight_value = getWeight(len, power_);
914  std::fill(output_data + cnt, output_data + cnt + len, weight_value);
915  cnt += len;
916  }
917 
918  return true;
919  }
920 
921  private:
922  float power_;
923 };
924 
925 template <class Context>
926 class HasElementsOp : public Operator<Context> {
927  public:
928  USE_OPERATOR_CONTEXT_FUNCTIONS;
929  USE_SIMPLE_CTOR_DTOR(HasElementsOp);
930 
931  bool RunOnDevice() override {
932  auto& input = Input(0);
933  auto* output = OperatorBase::Output<TensorCPU>(0);
934  output->Resize(std::vector<TIndex>{});
935  *output->template mutable_data<bool>() = input.size() > 0;
936  return true;
937  }
938 };
939 
940 template <class Context>
941 class IsEmptyOp : public Operator<Context> {
942  public:
943  USE_OPERATOR_CONTEXT_FUNCTIONS;
944  USE_SIMPLE_CTOR_DTOR(IsEmptyOp);
945 
946  bool RunOnDevice() override {
947  auto& input = Input(0);
948  auto* output = OperatorBase::Output<TensorCPU>(0);
949  output->Resize(std::vector<TIndex>{});
950  *output->template mutable_data<bool>() = (input.size() == 0);
951  return true;
952  }
953 };
954 
955 // Return the size of a tensor
956 template <class Context>
957 class SizeOp : public Operator<Context> {
958  public:
959  USE_OPERATOR_CONTEXT_FUNCTIONS;
960  USE_SIMPLE_CTOR_DTOR(SizeOp);
961 
962  bool RunOnDevice() override {
963  auto& input = Input(0);
964  auto* output = Output(0);
965 
966  output->Resize(vector<TIndex>());
967  auto* output_data = output->template mutable_data<int64_t>();
968 
969  auto size = input.size();
970  math::Set<int64_t, Context>(
971  1, static_cast<int64_t>(size), output_data, &context_);
972 
973  return true;
974  }
975 };
976 
977 // returns a shape to be passed to Reshape
978 template <class Context>
979 class LengthsToShapeOp : public Operator<Context> {
980  public:
981  USE_OPERATOR_CONTEXT_FUNCTIONS;
982  USE_SIMPLE_CTOR_DTOR(LengthsToShapeOp);
983 
984  bool RunOnDevice() override {
985  auto& input = Input(0);
986 
987  CAFFE_ENFORCE(input.dims().size() == 1, "Input must be a vector.");
988  auto* output = Output(0);
989  auto* input_data = input.template data<int32_t>();
990 
991  auto size = input.size();
992  auto first = input_data[0];
993 
994  for (int i = 1; i < size; i++) {
995  CAFFE_ENFORCE(
996  input_data[i] == first, "All elements of input must be same ");
997  }
998 
999  output->Resize(2);
1000  auto* output_data = output->template mutable_data<int32_t>();
1001  output_data[0] = size;
1002  output_data[1] = first;
1003 
1004  return true;
1005  }
1006 };
1007 
1008 template <class Context>
1009 class GatherOp : public Operator<Context> {
1010  public:
1011  USE_OPERATOR_CONTEXT_FUNCTIONS;
1012  USE_SIMPLE_CTOR_DTOR(GatherOp);
1013 
1014  bool RunOnDevice() override {
1016  this, OperatorBase::Input<TensorCPU>(INDICES));
1017  }
1018 
1019  template <typename Index>
1020  bool DoRunWithType() {
1021  // If we endup using it on GPU doing O(N) memcpy is probably not best :)
1022  // TODO: implement prefetching if it starts mattering (TF does it)
1023  auto& data = Input(DATA);
1024  auto& indices = Input(INDICES);
1025  auto* output = Output(0);
1026 
1027  CAFFE_ENFORCE_GE(data.ndim(), 1, "DATA should be at least 1-D");
1028  auto shape = indices.dims();
1029  shape.insert(shape.end(), data.dims().begin() + 1, data.dims().end());
1030  output->Resize(shape);
1031 
1032  int block_size = data.size_from_dim(1);
1033  auto block_bytesize = data.size_from_dim(1) * data.meta().itemsize();
1034  int N = indices.size();
1035 
1036  auto src_base = static_cast<const char*>(data.raw_data());
1037  const Index* idxs = indices.template data<Index>();
1038  auto out = static_cast<char*>(output->raw_mutable_data(data.meta()));
1039 
1040  for (int i = 0; i < N; ++i) {
1041  auto idx = idxs[i];
1042  CAFFE_ENFORCE(
1043  0 <= idx && idx < data.dim(0),
1044  "INDICES element is out of DATA bounds, id=",
1045  idx,
1046  " data_dim=",
1047  data.dim(0));
1048  auto src = src_base + idx * block_bytesize;
1049  context_.template CopyItems<Context, Context>(
1050  data.meta(), block_size, src, out + block_bytesize * i);
1051  }
1052  return true;
1053  }
1054 
1055  INPUT_TAGS(DATA, INDICES);
1056 };
1057 
1058 template <class Context>
1059 class GatherRangesOp : public Operator<Context> {
1060  public:
1061  USE_OPERATOR_CONTEXT_FUNCTIONS;
1062  USE_SIMPLE_CTOR_DTOR(GatherRangesOp);
1063 
1064  bool RunOnDevice() override {
1066  this, OperatorBase::Input<TensorCPU>(RANGES));
1067  }
1068 
1069  template <typename Index>
1070  bool DoRunWithType() {
1071  auto& data = Input(DATA);
1072  auto& ranges = Input(RANGES);
1073  auto* outputData = Output(0);
1074  auto* outputLengths = Output(1);
1075 
1076  auto batchSize = ranges.dim(0);
1077  CAFFE_ENFORCE(data.ndim() == 1, "Data has to be 1-D");
1078  CAFFE_ENFORCE(ranges.ndim() == 3, "Ranges must be 3-D");
1079  CAFFE_ENFORCE(ranges.dim(1) > 0, "There has to be at least one range");
1080  CAFFE_ENFORCE_EQ(
1081  ranges.dim(2), 2, "Ranges last dimention should be of size 2");
1082 
1083  auto* rawData = static_cast<const char*>(data.raw_data());
1084  auto* rangesData = ranges.template data<Index>();
1085 
1086  outputLengths->Resize(batchSize);
1087  auto* outputLengthsPtr = outputLengths->template mutable_data<int32_t>();
1088  size_t start = 0;
1089  size_t blockSize = ranges.size_from_dim(1);
1090  for (size_t i = 0; i < batchSize; ++i) {
1091  auto end = start + blockSize;
1092  outputLengthsPtr[i] = accumulate(rangesData, start, end);
1093  start = end;
1094  }
1095 
1096  size_t outputSize = accumulate(rangesData, 0, ranges.size());
1097  outputData->Resize(outputSize);
1098 
1099  auto outputRawData =
1100  static_cast<char*>(outputData->raw_mutable_data(data.meta()));
1101  VLOG(1) << "Copying data";
1102  size_t outputOffsetBytes = 0;
1103  auto itemsize = data.meta().itemsize();
1104  for (int i = 0; i < ranges.size(); i += 2) {
1105  auto rangeStart = rangesData[i];
1106  auto rangeLength = rangesData[i + 1];
1107  if (!rangeLength) {
1108  continue;
1109  }
1110  auto rangeSizeBytes = rangeLength * itemsize;
1111  CAFFE_ENFORCE(outputOffsetBytes < outputSize * itemsize);
1112  CAFFE_ENFORCE(rangeStart + rangeLength <= data.size());
1113  context_.template CopyItems<Context, Context>(
1114  data.meta(),
1115  rangeLength,
1116  rawData + rangeStart * itemsize,
1117  outputRawData + outputOffsetBytes);
1118  outputOffsetBytes += rangeSizeBytes;
1119  }
1120  CAFFE_ENFORCE(outputOffsetBytes == outputSize * itemsize);
1121  return true;
1122  }
1123 
1124  INPUT_TAGS(DATA, RANGES, LENGTHS);
1125 
1126  private:
1127  template <typename Index>
1128  size_t accumulate(Index* ranges, size_t start, size_t end) {
1129  size_t result = 0;
1130  for (int i = start + 1; i < end; i += 2) {
1131  result += ranges[i];
1132  }
1133  return result;
1134  }
1135 };
1136 
1137 template <class Context>
1138 class LengthsGatherOp : public Operator<Context> {
1139  public:
1140  USE_OPERATOR_CONTEXT_FUNCTIONS;
1141  USE_SIMPLE_CTOR_DTOR(LengthsGatherOp);
1142 
1143  bool RunOnDevice() override {
1145  this, OperatorBase::Input<TensorCPU>(INDICES));
1146  }
1147 
1148  template <typename Index>
1149  bool DoRunWithType() {
1150  auto& items = Input(ITEMS);
1151  auto& lengths = Input(LENGTHS);
1152  auto& indices = Input(INDICES);
1153  auto* output = Output(0);
1154 
1155  CAFFE_ENFORCE_GE(items.ndim(), 1, "ITEMS should be at least 1-D");
1156  CAFFE_ENFORCE_EQ(lengths.ndim(), 1, "LENGTHS should be 1-D");
1157  CAFFE_ENFORCE_EQ(indices.ndim(), 1, "INDICES should be 1-D");
1158 
1159  const auto* lengths_data = lengths.template data<int32_t>();
1160  const auto* indices_data = indices.template data<Index>();
1161 
1162  TIndex total_length = 0;
1163  for (size_t i = 0; i < indices.size(); ++i) {
1164  auto idx = indices_data[i];
1165  CAFFE_ENFORCE_LT(idx, lengths.size());
1166  total_length += lengths_data[idx];
1167  }
1168  auto shape = items.dims();
1169  shape[0] = total_length;
1170  output->Resize(shape);
1171 
1172  offsets_.clear();
1173  TIndex running_offset = 0;
1174  offsets_.reserve(lengths.size());
1175  for (size_t i = 0; i < lengths.size(); ++i) {
1176  offsets_.push_back(running_offset);
1177  running_offset += lengths_data[i];
1178  }
1179  CAFFE_ENFORCE_EQ(
1180  items.dim(0),
1181  running_offset,
1182  "LENGTHS must match the first dimension of ITEMS");
1183 
1184  auto src_base = static_cast<const char*>(items.raw_data());
1185  auto block_size = items.size_from_dim(1);
1186  auto block_bytesize = block_size * items.itemsize();
1187  auto out = static_cast<char*>(output->raw_mutable_data(items.meta()));
1188 
1189  for (size_t i = 0; i < indices.size(); ++i) {
1190  auto idx = indices_data[i];
1191  auto length = lengths_data[idx];
1192  context_.template CopyItems<Context, Context>(
1193  items.meta(),
1194  length * block_size,
1195  src_base + offsets_[idx] * block_bytesize,
1196  out);
1197  out += length * block_bytesize;
1198  }
1199  return true;
1200  }
1201 
1202  std::vector<TIndex> offsets_;
1203 
1204  INPUT_TAGS(ITEMS, LENGTHS, INDICES);
1205 };
1206 
1207 // Since we just do copying, consider untemplating it on T and using raw_data()
1213 template <class Context>
1214 class UniqueOp : public Operator<Context> {
1215  public:
1216  USE_OPERATOR_CONTEXT_FUNCTIONS;
1217  USE_SIMPLE_CTOR_DTOR(UniqueOp);
1218 
1219  bool RunOnDevice() override {
1220  // Use run-time polymorphism
1221  auto& input = Input(0);
1222  if (input.template IsType<int32_t>()) {
1223  DoRun<int32_t>();
1224  } else if (input.template IsType<int64_t>()) {
1225  DoRun<int64_t>();
1226  } else {
1227  LOG(FATAL) << "Unsupported type of input in Unique: "
1228  << input.meta().name();
1229  }
1230  return true;
1231  }
1232 
1233  private:
1234  vector<int> order_;
1235  Tensor<Context> thrust_unique_buffer_;
1236  Tensor<Context> cuda_order_buffer_;
1237  Tensor<Context> second_order_buffer_;
1238 
1239  template <typename T>
1240  void DoRun();
1241 
1242  public:
1243  OUTPUT_TAGS(UNIQUE, REMAPPING);
1244 };
1245 
1246 template <class Context>
1247 class UnsafeCoalesceOp final : public Operator<Context> {
1248  public:
1249  USE_OPERATOR_CONTEXT_FUNCTIONS;
1251 
1252  bool RunOnDevice() override {
1253  size_t coalesced_size = 0;
1254  for (int i = 0; i < InputSize(); ++i) {
1255  CAFFE_ENFORCE(
1256  !Input(i).meta().ctor(),
1257  "Must only coalesce fundamental types, error at input: ",
1258  i);
1259  }
1260 
1261  auto roundToAlignment = [](size_t bytes) -> size_t {
1262  return ((bytes + gCaffe2Alignment - 1) / gCaffe2Alignment) *
1263  gCaffe2Alignment;
1264  };
1265 
1266  for (int i = 0; i < InputSize(); ++i) {
1267  coalesced_size += roundToAlignment(Input(i).nbytes());
1268  }
1269 
1270  auto* coalesced = Output(OutputSize() - 1);
1271  coalesced->Resize(coalesced_size);
1272  math::Set<uint8_t, Context>(
1273  coalesced_size,
1274  0.0,
1275  coalesced->template mutable_data<uint8_t>(),
1276  &context_);
1277 
1278  size_t coalesced_offset = 0;
1279  for (auto i = 0; i < InputSize(); ++i) {
1280  const auto input_nbytes = Input(i).nbytes();
1281  context_.template CopyBytes<Context, Context>(
1282  input_nbytes,
1283  (const uint8_t*)Input(i).raw_data(),
1284  coalesced->template mutable_data<uint8_t>() + coalesced_offset);
1285 
1286  // Note: this could cause Input(i) to free it's data if
1287  // Output(i) and Input(i) alias each other. This is safe on a
1288  // GPU (as the copy will happen-before the free), but it's
1289  // worth mentioning.
1290 
1291  Output(i)->ResizeLike(Input(i));
1292  Output(i)->ShareExternalPointer(
1293  static_cast<void*>(
1294  coalesced->template mutable_data<uint8_t>() + coalesced_offset),
1295  Input(i).meta(),
1296  input_nbytes);
1297  coalesced_offset += roundToAlignment(input_nbytes);
1298  }
1299  return true;
1300  }
1301 };
1302 
1303 template <typename T, class Context>
1304 class AccumulateHistogramOp : public Operator<Context> {
1305  public:
1306  AccumulateHistogramOp(const OperatorDef& def, Workspace* ws)
1307  : Operator<Context>(def, ws),
1308  lower_bound_(
1309  OperatorBase::GetSingleArgument<float>("lower_bound", 0.0)),
1310  upper_bound_(
1311  OperatorBase::GetSingleArgument<float>("upper_bound", 1.0)),
1312  num_buckets_(OperatorBase::GetSingleArgument<int>("num_buckets", 1)) {
1313  CAFFE_ENFORCE_GT(num_buckets_, 0);
1314  // 2 more for histograms < lower_bound, >= upper_bound respectively
1315  num_output_buckets_ = num_buckets_ + 2;
1316  accumulate_hist_ = std::vector<int64_t>(num_output_buckets_, 0);
1317  }
1318 
1319  USE_OPERATOR_CONTEXT_FUNCTIONS;
1320 
1321  bool RunOnDevice() override {
1322  auto& X = Input(X_IN);
1323  auto* X_data = X.template data<T>();
1324  int N = X.size();
1325  auto* cur_hist = Output(CUR_HIST);
1326  auto* acc_hist = Output(ACC_HIST);
1327  cur_hist->Resize(num_output_buckets_);
1328  acc_hist->Resize(num_output_buckets_);
1329  auto* cur_hist_data = cur_hist->template mutable_data<int64_t>();
1330  auto* acc_hist_data = acc_hist->template mutable_data<int64_t>();
1331  auto segment = (upper_bound_ - lower_bound_) / num_buckets_;
1332  math::Set<int64_t, Context>(
1333  num_output_buckets_, 0, cur_hist_data, &context_);
1334 
1335  for (int i = 0; i < N; i++) {
1336  int bucket_index = -1;
1337  if (X_data[i] < lower_bound_) {
1338  bucket_index = 0;
1339  } else if (X_data[i] >= upper_bound_) {
1340  bucket_index = num_buckets_ + 1;
1341  } else {
1342  bucket_index = (int)((X_data[i] - lower_bound_) / segment) + 1;
1343  }
1344  cur_hist_data[bucket_index] += 1;
1345  accumulate_hist_[bucket_index] += 1;
1346  }
1347 
1348  for (int i = 0; i < num_output_buckets_; i++) {
1349  acc_hist_data[i] = accumulate_hist_[i];
1350  }
1351 
1352  return true;
1353  }
1354 
1355  private:
1356  float lower_bound_;
1357  float upper_bound_;
1358  int num_buckets_;
1359  int num_output_buckets_;
1360  std::vector<int64_t> accumulate_hist_;
1361 
1362  INPUT_TAGS(X_IN);
1363  OUTPUT_TAGS(CUR_HIST, ACC_HIST);
1364 };
1365 
1366 template <class Context>
1367 class RangeOp : public Operator<Context> {
1368  public:
1369  USE_OPERATOR_CONTEXT_FUNCTIONS;
1370  USE_SIMPLE_CTOR_DTOR(RangeOp)
1371 
1372  bool RunOnDevice() override {
1374  this, Input(0));
1375  }
1376 
1377  template <typename T>
1378  T readScalarInput(const int index) {
1379  if (std::is_same<Context, TensorCPU>::value) {
1380  return Input(index).template data<T>()[0];
1381  } else {
1382  local_.template CopyFrom<Context>(Input(index));
1383  return local_.template data<T>()[0];
1384  }
1385  }
1386 
1387  template <typename T>
1388  bool DoRunWithType() {
1389  T stop = 0;
1390  T start = 0;
1391  T step = 1;
1392 
1393  for (int i = 0; i < InputSize(); ++i) {
1394  CAFFE_ENFORCE_EQ(Input(0).ndim(), 0, "All inputs must be scalar.");
1395  }
1396 
1397  switch (InputSize()) {
1398  case 1:
1399  stop = readScalarInput<T>(0);
1400  break;
1401  case 2:
1402  start = readScalarInput<T>(0);
1403  stop = readScalarInput<T>(1);
1404  break;
1405  case 3:
1406  step = readScalarInput<T>(2);
1407  start = readScalarInput<T>(0);
1408  stop = readScalarInput<T>(1);
1409  break;
1410  }
1411  CAFFE_ENFORCE_NE(step, 0, "Step size cannot be 0.");
1412  int length;
1413  auto diff = stop - start;
1414  if (std::is_integral<T>::value) {
1415  // Avoid casting to and from floats in case it introduces rounding and
1416  // avoid mod because the compiler doesn't strip unused code until later.
1417  length = diff / step;
1418  if (length * step < diff) {
1419  length += 1;
1420  }
1421  } else {
1422  length = static_cast<int>(ceil(diff / step));
1423  }
1424  auto* output = Output(0);
1425  // Match numpy's behavior here.
1426  if (length <= 0) {
1427  output->Resize(0);
1428  // Called for the side effect of setting the data.
1429  output->template mutable_data<T>();
1430  return true;
1431  } else {
1432  output->Resize(length);
1433  return DoRunOnDevice<T>(start, step, output);
1434  }
1435  }
1436 
1437  template <typename T>
1438  bool DoRunOnDevice(const T& start, const T& step, Tensor<Context>* output);
1439 
1440  private:
1441  // local CPU tensor for copying constants.
1442  TensorCPU local_;
1443 };
1444 
1445 } // namespace caffe2
1446 
1447 #endif // CAFFE2_OPERATORS_UTILITY_OPS_H_
const string & RootFolder()
Return the root folder of the workspace.
Definition: workspace.h:167
const TypeMeta & meta() const
Returns the TypeMeta object associated with the current data type.
Definition: tensor.h:648
Tensor is the basic class in Caffe2 that stores a contiguous memory with its shape information...
Definition: tensor.h:93
Update slices of the tensor in-place by overriding.
Definition: utility_ops.h:565
void CopyFrom(const Tensor< SrcContext > &src, ContextForCopy *context)
Copies the data from a source tensor, with a contex provided to carry out the underlying memcpy opera...
Definition: tensor.h:166
TIndex size() const
Returns the size (i.e.
Definition: tensor.h:593
Pass inputs to outputs.
Definition: utility_ops.h:186
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
void Resize(Ts...dim_source)
Resizes a tensor.
Definition: tensor.h:288
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Update slices of the tensor in-place with weighted sum.
Definition: utility_ops.h:459
Alias op makes the output and the input share the same underlying storage.
Definition: utility_ops.h:164
Deduplicates input indices vector and optionally produces reverse remapping.
Definition: utility_ops.h:1214