Caffe2 - C++ API
A deep learning, cross platform ML framework
concat_op.cc
1 #include "caffe2/mobile/contrib/arm-compute/core/context.h"
2 #include "caffe2/mobile/contrib/arm-compute/core/operator.h"
3 #include "caffe2/operators/concat_split_op.h"
4 
5 namespace caffe2 {
6 
7 template <typename T> class GLConcatOp final : public Operator<GLContext> {
8 public:
9  GLConcatOp(const OperatorDef &operator_def, Workspace *ws)
10  : Operator<GLContext>(operator_def, ws) {}
11  virtual ~GLConcatOp() noexcept {}
12  USE_OPERATOR_FUNCTIONS(GLContext);
13  bool RunOnDevice() override;
14 private:
15  arm_compute::GCDepthConcatenateLayer concat_layer_;
16  bool first_run_ = true, second_run_ = true;
17  std::vector<GLContext::deleted_unique_ptr<const GLTensor<T>>> inputs_;
18  int channelCount_ = 0;
19 };
20 
21 
22 template <typename T>
24 
25  CAFFE_ENFORCE(InputSize() <= 4 && InputSize() >= 2, "Number \
26  of input must be between 2 and 4.");
27 
28  auto *X0blob = OperatorBase::Inputs()[0];
29  auto X0 = GLContext::getGLTensor<T>(X0blob);
30  if (first_run_) {
31  inputs_.push_back(std::move(X0));
32  }
33 
34  int N = inputs_[0]->dim32(0);
35  int channels = inputs_[0]->dim32(1);
36  int height = inputs_[0]->dim32(2);
37  int width = inputs_[0]->dim32(3);
38  std::vector<const Blob*> inputsBlob;
39  inputsBlob.push_back(X0blob);
40 
41  if (first_run_) {
42  channelCount_ = channels;
43  for (int i = 1; i < Inputs().size(); ++i) {
44  auto *Xblob = OperatorBase::Inputs()[i];
45  auto X = GLContext::getGLTensor<T>(Xblob);
46  CAFFE_ENFORCE_EQ(N, X->dim32(0), X->dim32(0));
47  CAFFE_ENFORCE_EQ(height, X->dim32(2), X->dim32(2));
48  CAFFE_ENFORCE_EQ(width, X->dim32(3), X->dim32(3));
49  channelCount_ += X->dim32(1);
50  inputs_.push_back(std::move(X));
51  }
52  }
53 
54  for (int i = 1; i < Inputs().size(); ++i) {
55  auto *Xblob = OperatorBase::Inputs()[i];
56  inputsBlob.push_back(Xblob);
57  }
58  std::vector<int> output_dims = {N, channelCount_, height, width};
59  GLTensor<T> *Y =
60  OperatorBase::Outputs()[0]->template GetMutable<GLTensor<T>>();
61  if (first_run_) {
62  first_run_ = false;
63  Y->Resize(output_dims);
64 
65  std::vector<arm_compute::IGCTensor*> inputsGC;
66  for (int i = 0; i < inputs_.size(); ++i) {
67  inputsGC.push_back(inputs_[i]->get_underlying());
68  }
69  concat_layer_.configure(inputsGC, Y->get_underlying());
70  } else {
71  for (int i = 0; i < inputs_.size(); ++i) {
72  auto* X = inputs_[i].get();
73  auto* Xblob = inputsBlob[i];
74  X->lazy_allocate(Xblob, second_run_, true);
75  }
76  if (second_run_) {
77  second_run_ = false;
78  Y->allocate();
79  }
80  concat_layer_.run();
81  }
82 
83  return true;
84 }
85 
86 REGISTER_GL_OPERATOR(Concat, GLConcatOp<DataType>);
87 
88 } // namespace caffe2
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...