Caffe2 - C++ API
A deep learning, cross platform ML framework
conv_op.cc
1 #include "arm_compute/graph/Graph.h"
2 #include "arm_compute/graph/Nodes.h"
3 #include "caffe2/mobile/contrib/arm-compute/core/context.h"
4 #include "caffe2/mobile/contrib/arm-compute/core/operator.h"
5 
6 #include "caffe2/operators/conv_op.h"
7 
8 namespace caffe2 {
9 
10 template <typename T>
11 class GLConvOp final : public ConvPoolOpBase<GLContext> {
12  public:
13  USE_CONV_POOL_BASE_FUNCTIONS(GLContext);
14  GLConvOp(const OperatorDef& operator_def, Workspace* ws)
15  : ConvPoolOpBase<GLContext>(operator_def, ws) {
16  // Since this is the default convolution implementation, we will
17  // use CAFFE_ENFORCE instead of OPERATOR_NEEDS_FEATURE.
18  CAFFE_ENFORCE(
19  group_ == 1 || order_ == StorageOrder::NCHW,
20  "Group convolution only supports NCHW order right now.");
21  }
22  ~GLConvOp() {}
23 
24  bool RunOnDevice() override;
25 private:
26  arm_compute::GCDirectConvolutionLayer conv_;
27  bool first_run_ = true, second_run_ = true;
28  GLContext::deleted_unique_ptr<const GLTensor<T>> X_, filter_, bias_;
29 };
30 
31 template <typename T>
33  auto *Xblob = OperatorBase::Inputs()[0];
34  auto *filterblob = OperatorBase::Inputs()[1];
35  auto *biasblob = OperatorBase::Inputs()[2];
36 
37  if (first_run_) {
38  X_ = GLContext::getGLTensor<T>(Xblob);
39  filter_ = GLContext::getGLTensor<T>(filterblob);
40  bias_ = GLContext::getGLTensor<T>(biasblob);
41  }
42 
43  GLTensor<T> *Y =
44  OperatorBase::Outputs()[0]->template GetMutable<GLTensor<T>>();
45 
46  const int N = X_->dim32(0), H = X_->dim32(2), W = X_->dim32(3), C = X_->dim32(1);
47 
48  CAFFE_ENFORCE_EQ(kernel_.size(), 2,
49  "Only 2d convolution is supported with ARM compute backend");
50 
51  CAFFE_ENFORCE(X_->ndim(), filter_->ndim());
52  const int M = filter_->dim32(0);
53  CAFFE_ENFORCE(filter_->dim32(2) == kernel_h());
54  CAFFE_ENFORCE(filter_->dim32(3) == kernel_w());
55  CAFFE_ENFORCE(filter_->dim32(1) == C);
56 
57  if (first_run_) {
58  first_run_ = false;
59 
60  // resize output accordingly
61  TensorCPU fakeX;
62  fakeX.Resize(X_->dims());
63  TensorCPU fakeY;
64  ConvPoolOpBase<GLContext>::SetOutputSize(fakeX, &fakeY, filter_->dim32(0));
65  Y->ResizeLike(fakeY);
66  LOG(INFO) << "[C2DEBUG] dims of X " << X_->dims();
67  LOG(INFO) << "[C2DEBUG] dims of X(gctensor) "
68  << X_->get_underlying()->info()->dimension(3) << " "
69  << X_->get_underlying()->info()->dimension(2) << " "
70  << X_->get_underlying()->info()->dimension(1) << " "
71  << X_->get_underlying()->info()->dimension(0) << " "
72  ;
73  LOG(INFO) << "[C2DEBUG] dims of Y " << Y->dims();
74  LOG(INFO) << "[C2DEBUG] dims of Y(gctensor) "
75  << Y->get_underlying()->info()->dimension(3) << " "
76  << Y->get_underlying()->info()->dimension(2) << " "
77  << Y->get_underlying()->info()->dimension(1) << " "
78  << Y->get_underlying()->info()->dimension(0) << " "
79  ;
80 
81  conv_.configure(
82  X_->get_underlying(), filter_->get_underlying(), bias_->get_underlying(),
83  Y->get_underlying(),
84  arm_compute::PadStrideInfo(stride_[0], stride_[1], pads_[0], pads_[1]));
85 
86  } else {
87  // Always attempt to copy the CPU to GPU on input
88  X_->lazy_allocate(Xblob, second_run_, true);
89  filter_->lazy_allocate(filterblob, second_run_, second_run_);
90  bias_->lazy_allocate(biasblob, second_run_, second_run_);
91  if (second_run_) {
92  second_run_ = false;
93  if (Y->get_underlying() != X_->get_underlying()) {
94  Y->allocate();
95  }
96  }
97  conv_.run();
98  }
99 
100  return true;
101 }
102 
103 REGISTER_GL_OPERATOR(Conv, GLConvOp<DataType>);
104 
105 } // namespace caffe2
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
void Resize(Ts...dim_source)
Resizes a tensor.
Definition: tensor.h:288
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...