Caffe2 - C++ API
A deep learning, cross platform ML framework
pool_op.cc
1 #include "caffe2/operators/pool_op.h"
2 #include "caffe2/mobile/contrib/arm-compute/core/context.h"
3 #include "caffe2/mobile/contrib/arm-compute/core/operator.h"
4 
5 namespace caffe2 {
6 
7 template <typename T>
8 class GLAveragePoolOp final : public ConvPoolOpBase<GLContext> {
9  public:
10  USE_CONV_POOL_BASE_FUNCTIONS(GLContext);
11  GLAveragePoolOp(const OperatorDef& operator_def, Workspace* ws)
12  : ConvPoolOpBase<GLContext>(operator_def, ws) {
13  }
14  ~GLAveragePoolOp() {}
15 
16  bool RunOnDeviceWithOrderNCHW() override;
17  bool RunOnDeviceWithOrderNHWC() override;
18 private:
19  arm_compute::GCPoolingLayer pooling_layer_;
20  bool first_run_ = true, second_run_ = true;
21  GLContext::deleted_unique_ptr<const GLTensor<T>> X_;
22 };
23 
24 template<typename T>
25 class GLMaxPoolOp final : public ConvPoolOpBase<GLContext> {
26  public:
27  USE_CONV_POOL_BASE_FUNCTIONS(GLContext);
28  GLMaxPoolOp(const OperatorDef& operator_def, Workspace* ws)
29  : ConvPoolOpBase<GLContext>(operator_def, ws) {
30  }
31  ~GLMaxPoolOp() {}
32 
33  bool RunOnDeviceWithOrderNCHW() override;
34  bool RunOnDeviceWithOrderNHWC() override;
35 private:
36  arm_compute::GCPoolingLayer pooling_layer_;
37  bool first_run_ = true, second_run_ = true;
38  GLContext::deleted_unique_ptr<const GLTensor<T>> X_;
39 };
40 
41 template <>
43 
44  auto *Xblob = OperatorBase::Inputs()[0];
45  if (first_run_) {
46  X_ = GLContext::getGLTensor<half>(Xblob);
47  }
48 
49  int N = X_->dim32(0);
50  int channels = X_->dim32(1);
51  int height = X_->dim32(2);
52  int width = X_->dim32(3);
53 
54  GLTensor<half> *Y =
55  OperatorBase::Outputs()[0]->template GetMutable<GLTensor<half>>();
56  if (first_run_) {
57  first_run_ = false;
58  CAFFE_ENFORCE_EQ(kernel_.size(), 2, "ARM OpenGL only supports 2D pooling");
59  CAFFE_ENFORCE_EQ(kernel_h(), kernel_w(),
60  "ARM OpenGL only supports equal kernel size");
61  if (global_pooling_) {
62  vector<TIndex> output_dims = {N, channels, 1, 1};
63  Y->Resize(output_dims);
64  } else {
65  vector<TIndex> output_dims = {N, channels, 0, 0};
66  output_dims[2] = (height + pad_t() + pad_b() - kernel_h()) / stride_h() + 1;
67  output_dims[3] = (width + pad_l() + pad_r() - kernel_w()) / stride_w() + 1;
68  Y->Resize(output_dims);
69  }
70  if (global_pooling_) {
71  arm_compute::PoolingLayerInfo info(arm_compute::PoolingType::AVG);
72  pooling_layer_.configure(X_->get_underlying(), Y->get_underlying(), info);
73  } else {
74  arm_compute::PadStrideInfo ps_info(stride_w(), stride_h(), pad_l(), pad_r(),
75  pad_t(), pad_b(),
76  arm_compute::DimensionRoundingType::FLOOR);
77  arm_compute::PoolingLayerInfo info(arm_compute::PoolingType::AVG, kernel_h(),
78  ps_info);
79  pooling_layer_.configure(X_->get_underlying(), Y->get_underlying(), info);
80  }
81  } else {
82  X_->lazy_allocate(Xblob, second_run_, true);
83  if (second_run_) {
84  second_run_ = false;
85  Y->allocate();
86  }
87  pooling_layer_.run();
88  }
89 
90  return true;
91 }
92 
94 
95  auto *Xblob = OperatorBase::Inputs()[0];
96  if (first_run_) {
97  X_ = GLContext::getGLTensor<half>(Xblob);
98  }
99 
100  int N = X_->dim32(0);
101  int channels = X_->dim32(1);
102  int height = X_->dim32(2);
103  int width = X_->dim32(3);
104 
105  GLTensor<half> *Y =
106  OperatorBase::Outputs()[0]->template GetMutable<GLTensor<half>>();
107 
108  if (first_run_) {
109  first_run_ = false;
110  CAFFE_ENFORCE_EQ(kernel_.size(), 2, "ARM OpenGL only supports 2D pooling");
111  CAFFE_ENFORCE_EQ(kernel_h(), kernel_w(),
112  "ARM OpenGL only supports equal kernel size");
113  if (global_pooling_) {
114  vector<TIndex> output_dims = {N, channels, 1, 1};
115  Y->Resize(output_dims);
116  } else {
117  vector<int> output_dims = {1, 0, 0, 0};
118  output_dims[1] = channels;
119  output_dims[2] = (height + pad_t() + pad_b() - kernel_h()) / stride_h() + 1;
120  output_dims[3] = (width + pad_l() + pad_r() - kernel_w()) / stride_w() + 1;
121  Y->Resize(output_dims);
122  }
123  if (global_pooling_) {
124  arm_compute::PoolingLayerInfo info(arm_compute::PoolingType::MAX);
125  pooling_layer_.configure(X_->get_underlying(), Y->get_underlying(), info);
126  } else {
127  arm_compute::PadStrideInfo ps_info(stride_w(), stride_h(), pad_l(), pad_r(),
128  pad_t(), pad_b(),
129  arm_compute::DimensionRoundingType::FLOOR);
130  arm_compute::PoolingLayerInfo info(arm_compute::PoolingType::MAX, kernel_h(),
131  ps_info);
132  pooling_layer_.configure(X_->get_underlying(), Y->get_underlying(), info);
133  }
134  } else {
135  X_->lazy_allocate(Xblob, second_run_, true);
136  if (second_run_) {
137  second_run_ = false;
138  Y->allocate();
139  }
140  pooling_layer_.run();
141  }
142 
143  return true;
144 }
145 
146 template <>
148  return false;
149 }
150 
151 template <>
153  return false;
154 }
155 
156 REGISTER_GL_OPERATOR(AveragePool, GLAveragePoolOp<DataType>);
157 REGISTER_GL_OPERATOR(MaxPool, GLMaxPoolOp<DataType>);
158 
159 } // namespace caffe2
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...