Caffe2 - C++ API
A deep learning, cross platform ML framework
conv_pool_op_base.h
1 #ifndef CAFFE2_OPERATORS_CONV_POOL_OP_BASE_H_
2 #define CAFFE2_OPERATORS_CONV_POOL_OP_BASE_H_
3 
4 #include <vector>
5 
6 #include "caffe2/core/context.h"
7 #include "caffe2/core/logging.h"
8 #include "caffe2/core/operator.h"
9 #include "caffe2/proto/caffe2_legacy.pb.h"
10 #include "caffe2/utils/math.h"
11 
12 // This macro is here just to allow us to experiment with padding values that
13 // determines, when we have an odd number of pads, which side gets the one
14 // additional pad value, the head side, or the tail side. Setting it to false
15 // will enable the TensorFlow behavior, and setting it to true will enable
16 // a behavior more consistent with Caffe and CuDNN.
17 // This only affects the case when you set legacy pad to VALID or SAME. The
18 // behavior inherits from the early designs of Google's CNN implementation,
19 // where padding values are implicitly calculated instead of explicitly
20 // specified. This is still the case with TensorFlow. Many frameworks have
21 // followed a slightly different approach of explicitly giving padding values,
22 // in which case the value of this constant value does not matter.
23 const bool CAFFE2_PAD_HEAD_MORE = false;
24 
25 namespace caffe2 {
26 
27 template <class Context>
28 class ConvPoolOpBase : public Operator<Context> {
29  public:
30  USE_OPERATOR_CONTEXT_FUNCTIONS;
31  ConvPoolOpBase(const OperatorDef& operator_def, Workspace* ws)
32  : Operator<Context>(operator_def, ws),
33  legacy_pad_(
34  static_cast<LegacyPadding>(OperatorBase::GetSingleArgument<int>(
35  "legacy_pad",
36  LegacyPadding::NOTSET))),
37  global_pooling_(
38  OperatorBase::GetSingleArgument<int>("global_pooling", 0)),
39  kernel_(OperatorBase::GetRepeatedArgument<int>("kernels")),
40  dilation_(OperatorBase::GetRepeatedArgument<int>("dilations")),
41  stride_(OperatorBase::GetRepeatedArgument<int>("strides")),
42  pads_(OperatorBase::GetRepeatedArgument<int>("pads")),
43  float16_compute_(
44  OperatorBase::GetSingleArgument<bool>("float16_compute", false)),
45  group_(OperatorBase::GetSingleArgument<int>("group", 1)),
46  order_(StringToStorageOrder(
47  OperatorBase::GetSingleArgument<string>("order", "NCHW"))),
48  shared_buffer_(
49  OperatorBase::GetSingleArgument<int>("shared_buffer", 0)),
50  ws_(ws) {
51  // For the padding, they should either be the legacy padding strategy
52  // (VALID or SAME), or an explicit, non-negative value.
53  if (legacy_pad_ == LegacyPadding::VALID ||
54  legacy_pad_ == LegacyPadding::SAME) {
55  CAFFE_ENFORCE(
57  "If you use legacy padding VALID or SAME, you should not specify "
58  "any specific padding values.");
59  }
60 
61  // Get old arguments values.
62  if (OperatorBase::HasArgument("kernel")) {
63  kernel_.resize(2, OperatorBase::GetSingleArgument<int>("kernel", 0));
64  } else if (
65  OperatorBase::HasArgument("kernel_h") &&
66  OperatorBase::HasArgument("kernel_w")) {
67  kernel_.push_back(OperatorBase::GetSingleArgument<int>("kernel_h", 0));
68  kernel_.push_back(OperatorBase::GetSingleArgument<int>("kernel_w", 0));
69  }
70 
71  if (OperatorBase::HasArgument("stride")) {
72  stride_.resize(2, OperatorBase::GetSingleArgument<int>("stride", 0));
73  } else if (
74  OperatorBase::HasArgument("stride_h") &&
75  OperatorBase::HasArgument("stride_w")) {
76  stride_.push_back(OperatorBase::GetSingleArgument<int>("stride_h", 0));
77  stride_.push_back(OperatorBase::GetSingleArgument<int>("stride_w", 0));
78  }
79 
80  if (OperatorBase::HasArgument("dilation")) {
81  dilation_.resize(2, OperatorBase::GetSingleArgument<int>("dilation", 0));
82  } else if (
83  OperatorBase::HasArgument("dilation_h") &&
84  OperatorBase::HasArgument("dilation_w")) {
85  dilation_.push_back(
86  OperatorBase::GetSingleArgument<int>("dilation_h", 0));
87  dilation_.push_back(
88  OperatorBase::GetSingleArgument<int>("dilation_w", 0));
89  }
90 
91  if (OperatorBase::HasArgument("pad")) {
92  CAFFE_ENFORCE(
93  legacy_pad_ != LegacyPadding::VALID &&
94  legacy_pad_ != LegacyPadding::SAME,
95  "If you use legacy padding VALID or SAME, you should not specify "
96  "any specific padding values.");
97  pads_.resize(4, OperatorBase::GetSingleArgument<int>("pad", 0));
98  } else if (
99  OperatorBase::HasArgument("pad_t") &&
100  OperatorBase::HasArgument("pad_l") &&
101  OperatorBase::HasArgument("pad_b") &&
102  OperatorBase::HasArgument("pad_r")) {
103  CAFFE_ENFORCE(
104  legacy_pad_ != LegacyPadding::VALID &&
105  legacy_pad_ != LegacyPadding::SAME,
106  "If you use legacy padding VALID or SAME, you should not specify "
107  "any specific padding values.");
108  pads_.push_back(OperatorBase::GetSingleArgument<int>("pad_t", 0));
109  pads_.push_back(OperatorBase::GetSingleArgument<int>("pad_l", 0));
110  pads_.push_back(OperatorBase::GetSingleArgument<int>("pad_b", 0));
111  pads_.push_back(OperatorBase::GetSingleArgument<int>("pad_r", 0));
112  }
113 
114  // Fill default values.
115  if (kernel_.size() == 0) {
116  kernel_.assign({0, 0});
117  }
118 
119  if (stride_.size() == 0) {
120  stride_.resize(kernel_.size(), 1);
121  }
122 
123  if (pads_.size() == 0) {
124  pads_.resize(kernel_.size() * 2, 0);
125  }
126 
127  if (dilation_.size() == 0) {
128  dilation_.resize(kernel_.size(), 1);
129  }
130 
131  CAFFE_ENFORCE_EQ(stride_.size(), kernel_.size());
132  CAFFE_ENFORCE_EQ(dilation_.size(), kernel_.size());
133 
134  if (legacy_pad_ != LegacyPadding::VALID &&
135  legacy_pad_ != LegacyPadding::SAME) {
136  CAFFE_ENFORCE_EQ(pads_.size(), 2 * kernel_.size());
137  }
138 
139  if (global_pooling_) {
140  for (int dim = 0; dim < kernel_.size(); ++dim) {
141  CAFFE_ENFORCE(
142  pads_[2 * dim] == 0 && pads_[2 * dim + 1] == 0 &&
143  dilation_[dim] == 1 && stride_[dim] == 1,
144  "If global_pooling is set pad, dilation and stride shouldn't be set.");
145  }
146  }
147 
148  AllocateAndCopy(kernel_, kernel_device_);
149  AllocateAndCopy(stride_, stride_device_);
150  AllocateAndCopy(dilation_, dilation_device_);
151  AllocateAndCopy(pads_, pads_device_);
152 
153  // Check kernel only if we are doing conv or pooling. The reason is that a
154  // few other ops, like PadImage, are also using this base class. We really
155  // need to clean this up.
156  if (operator_def.name().find("Conv") == 0 ||
157  operator_def.name().find("Pool") != std::string::npos) {
158  for (int dim = 0; dim < kernel_.size(); ++dim) {
159  CAFFE_ENFORCE_GE(pads_[dim], 0);
160  CAFFE_ENFORCE_GE(pads_[kernel_.size() + dim], 0);
161  CAFFE_ENFORCE(
162  kernel_[dim],
163  "If you are doing convolution or pooling, you will need to set "
164  "explicitly the kernel size.");
165  }
166  }
167 
168  for (int dim = 0; dim < kernel_.size(); ++dim) {
169  CAFFE_ENFORCE_GE(kernel_[dim], 0);
170  CAFFE_ENFORCE_GE(dilation_[dim], 0);
171  CAFFE_ENFORCE_GE(stride_[dim], 0);
172  }
173 
174  if (group_ != 1) {
175  for (int dim = 0; dim < kernel_.size(); ++dim) {
176  CAFFE_ENFORCE_EQ(
177  dilation_[dim],
178  1,
179  "When group is used, dilation should not be set at the same time.");
180  }
181  }
182  }
183 
184  // Returns the input image dimensions for the current storage order type.
185  vector<int> GetDims(const Tensor<Context>& input) {
186  vector<int> dims;
187  switch (order_) {
188  case StorageOrder::NCHW:
189  dims.assign(input.dims().begin() + 2, input.dims().end());
190  break;
191  case StorageOrder::NHWC:
192  dims.assign(input.dims().begin() + 1, input.dims().end() - 1);
193  break;
194  default:
195  CAFFE_THROW("Unknown storage order : ", order_);
196  }
197  return dims;
198  }
199 
200  // Returns the size of the input image for the current storage type.
201  int GetDimsSize(const Tensor<Context>& input) {
202  int size = 0;
203  switch (order_) {
204  case StorageOrder::NCHW:
205  size = std::accumulate(
206  input.dims().begin() + 2,
207  input.dims().end(),
208  1,
209  std::multiplies<int>());
210  break;
211  case StorageOrder::NHWC:
212  size = std::accumulate(
213  input.dims().begin() + 1,
214  input.dims().end() - 1,
215  1,
216  std::multiplies<int>());
217  break;
218  default:
219  CAFFE_THROW("Unknown storage order : ", order_);
220  }
221  return size;
222  }
223 
224  // Sets the output size. The output channel is manually provided since
225  // it may not be identical to the input channels.
226  // This function can be used in the forward functions to obtain the output
227  // sizes.
228  // Note(jiayq): the templatization of this function is mainly to help
229  // implementations that do not use first-class Tensor objects, such as the
230  // MKL operator. One can still call this function with dummy
231  // Tensor<CPUContext> objects in order to obtain the sizes.
232  template <typename AlternativeContext>
233  void SetOutputSize(
234  const Tensor<AlternativeContext>& input,
236  int output_channel) {
237  CAFFE_ENFORCE(input.size() > 0);
238  vector<int> output_dims;
239  int N = input.dim32(0);
240  bool channel_first;
241  InferOutputSize(
242  input.dims(),
243  output_channel,
244  order_,
245  global_pooling_,
246  legacy_pad_,
247  N,
248  kernel_,
249  output_dims,
250  dilation_,
251  stride_,
252  pads_,
253  channel_first);
254 
255  if (channel_first) {
256  output_dims.insert(output_dims.begin(), {N, output_channel});
257  } else {
258  output_dims.insert(output_dims.begin(), N);
259  output_dims.push_back(output_channel);
260  }
261  output->Resize(output_dims);
262  }
263 
264  // Helper function that is also called from OperatorSchema. Modified
265  // kernel parameters and output output_dims and channel_first.
266  static inline void InferOutputSize(
267  vector<TIndex> input_dims,
268  int /*output_channel*/,
269  StorageOrder order,
270  bool global_pooling,
271  LegacyPadding legacy_pad,
272  int /*N*/,
273  vector<int>& kernel,
274  vector<int>& output_dims,
275  const vector<int>& dilation,
276  const vector<int>& stride,
277  vector<int>& pads,
278  bool& channel_first) {
279  channel_first = false; // initialized to suppress compiler warning.
280  vector<TIndex> dims;
281  switch (order) {
282  case StorageOrder::NHWC:
283  channel_first = false;
284  dims.assign(input_dims.begin() + 1, input_dims.end() - 1);
285  break;
286  case StorageOrder::NCHW:
287  // Old Caffe order.
288  channel_first = true;
289  dims.assign(input_dims.begin() + 2, input_dims.end());
290  break;
291  default:
292  CAFFE_THROW("Unknown Storage order: ", order);
293  }
294 
295  if (global_pooling) {
296  kernel.assign(dims.begin(), dims.end());
297  output_dims.assign(dims.size(), 1);
298  } else {
299  for (int dim = 0; dim < dims.size(); ++dim) {
300  int dim_size = 0;
301  ComputeSizeAndPad(
302  dims[dim],
303  stride[dim],
304  kernel[dim],
305  dilation[dim],
306  legacy_pad,
307  &pads[dim],
308  &pads[dims.size() + dim],
309  &dim_size);
310  output_dims.push_back(dim_size);
311  }
312  }
313  }
314 
315  // ComputePads could be used in backward functions to figure out the padding
316  // values for the given input.
317  void ComputePads(const vector<int>& dims) {
318  if (global_pooling_) {
319  kernel_ = dims;
320  } else if (legacy_pad_ != LegacyPadding::NOTSET) {
321  int output_unused;
322  for (int dim = 0; dim < dims.size(); ++dim) {
323  ComputeSizeAndPad(
324  dims[dim],
325  stride_[dim],
326  kernel_[dim],
327  dilation_[dim],
328  legacy_pad_,
329  &pads_[dim],
330  &pads_[dims.size() + dim],
331  &output_unused);
332  }
333  }
334  }
335 
336  void SetDeviceTensor(const std::vector<int>& data, Tensor<Context>* tensor) {
337  bool reset_tensor_device_ = false;
338 
339  if (tensor->size() != data.size()) {
340  tensor->Resize(data.size());
341  reset_tensor_device_ = true;
342  } else {
343  const int* tensor_data = tensor->template data<int>();
344  for (int d_i = 0; d_i < data.size(); ++d_i) {
345  if (tensor_data[d_i] != data[d_i]) {
346  reset_tensor_device_ = true;
347  break;
348  }
349  }
350  }
351 
352  if (reset_tensor_device_) {
353  context_.template Copy<int, CPUContext, Context>(
354  data.size(), data.data(), tensor->template mutable_data<int>());
355  }
356  }
357 
358  template <typename T>
359  void SetBiasMultiplier(const int size, Tensor<Context>* bias_multiplier_) {
360  if (bias_multiplier_->size() != size) {
361  // If the helper bias multiplier is not image size, reshape and fill it
362  // with one.
363  bias_multiplier_->Resize(std::vector<TIndex>{size});
364  math::Set<T, Context>(
365  size,
366  static_cast<T>(1),
367  bias_multiplier_->template mutable_data<T>(),
368  &context_);
369  }
370  }
371 
372  bool RunOnDevice() override {
373  if (!global_pooling_) {
374  for (int dim = 0; dim < kernel_.size(); ++dim) {
375  CAFFE_ENFORCE_GT(kernel_[dim], 0);
376  }
377  }
378  switch (order_) {
379  case StorageOrder::NHWC:
380  // VLOG(2) << "Running NHWC";
381  return RunOnDeviceWithOrderNHWC();
382  case StorageOrder::NCHW:
383  // VLOG(2) << "Running NCHW";
384  return RunOnDeviceWithOrderNCHW();
385  default:
386  CAFFE_THROW("Unknown Storage order: ", order_);
387  }
388  }
389 
390  // The actual function that does the computation, if the different
391  // storage order leads to different implementations.
392  virtual bool RunOnDeviceWithOrderNHWC() {
393  CAFFE_NOT_IMPLEMENTED;
394  }
395  virtual bool RunOnDeviceWithOrderNCHW() {
396  CAFFE_NOT_IMPLEMENTED;
397  }
398 
399  static struct OpSchema::Cost CostInferenceForConv(
400  const OperatorDef& def,
401  const vector<TensorShape>& inputs) {
402  struct OpSchema::Cost c;
403  const TensorShape X = inputs[0];
404  const TensorShape W = inputs[1];
405  const TensorShape Y = TensorInferenceForConv(def, inputs)[0];
406  ArgumentHelper helper(def);
407  const auto order =
408  StringToStorageOrder(helper.GetSingleArgument<string>("order", "NCHW"));
409 
410  unsigned long long N;
411  unsigned long long Y_t = 1;
412  unsigned long long Y_h;
413  unsigned long long Y_w;
414  unsigned long long kernel_t = 1;
415  unsigned long long kernel_h;
416  unsigned long long kernel_w;
417  unsigned long long in_channels;
418  unsigned long long out_channels;
419 
420  N = X.dims(0);
421  if (X.dims_size() == 5) {
422  // 3D convolution
423  CAFFE_ENFORCE_EQ(order, StorageOrder::NCHW, "Conv3D only supports NCHW");
424  Y_t = Y.dims(2);
425  Y_h = Y.dims(3);
426  Y_w = Y.dims(4);
427  kernel_t = W.dims(2);
428  kernel_h = W.dims(3);
429  kernel_w = W.dims(4);
430  in_channels = W.dims(1);
431  out_channels = W.dims(0);
432  } else {
433  // 2D convolution
434  CAFFE_ENFORCE_EQ(X.dims_size(), 4, "Conv2D should have 4D input tensor");
435  if (order == StorageOrder::NHWC) {
436  Y_h = Y.dims(1);
437  Y_w = Y.dims(2);
438  kernel_h = W.dims(1);
439  kernel_w = W.dims(2);
440  in_channels = W.dims(3);
441  out_channels = W.dims(0);
442  } else {
443  Y_h = Y.dims(2);
444  Y_w = Y.dims(3);
445  kernel_h = W.dims(2);
446  kernel_w = W.dims(3);
447  in_channels = W.dims(1);
448  out_channels = W.dims(0);
449  }
450  }
451  // grouping is NOT properly handled yet
452  c.flops = N * Y_t * Y_h * Y_w * kernel_t * kernel_w * kernel_h *
453  in_channels * out_channels * 2;
454  c.bytes_moved = N * out_channels * Y_t * Y_h * Y_w * sizeof(float);
455  c.params_bytes = out_channels * in_channels * kernel_t * kernel_h *
456  kernel_w * sizeof(float);
457  return c;
458  }
459 
460  static vector<TensorShape> TensorInferenceForSchema(
461  const OperatorDef& def,
462  const vector<TensorShape>& in,
463  int output_channel) {
464  ArgumentHelper helper(def);
465  CAFFE_ENFORCE_GT(in.size(), 0);
466  CAFFE_ENFORCE_GT(in[0].dims_size(), 0);
467  int N = in[0].dims(0);
468  bool channel_first;
469  vector<int> pads = helper.GetRepeatedArgument<int>("pads");
470  vector<int> kernel = helper.GetRepeatedArgument<int>("kernels");
471  vector<int> strides = helper.GetRepeatedArgument<int>("strides");
472  vector<int> dilations = helper.GetRepeatedArgument<int>("dilation");
473  if (helper.HasArgument("pad")) {
474  pads.resize(4, helper.GetSingleArgument<int>("pad", 0));
475  } else if (
476  helper.HasArgument("pad_t") && helper.HasArgument("pad_l") &&
477  helper.HasArgument("pad_b") && helper.HasArgument("pad_r")) {
478  pads.push_back(helper.GetSingleArgument<int>("pad_t", 0));
479  pads.push_back(helper.GetSingleArgument<int>("pad_l", 0));
480  pads.push_back(helper.GetSingleArgument<int>("pad_b", 0));
481  pads.push_back(helper.GetSingleArgument<int>("pad_r", 0));
482  }
483 
484  if (helper.HasArgument("kernel")) {
485  kernel.resize(2, helper.GetSingleArgument<int>("kernel", 1));
486  } else if (
487  helper.HasArgument("kernel_h") && helper.HasArgument("kernel_w")) {
488  kernel.push_back(helper.GetSingleArgument<int>("kernel_h", 1));
489  kernel.push_back(helper.GetSingleArgument<int>("kernel_w", 1));
490  }
491 
492  if (helper.HasArgument("stride")) {
493  strides.resize(2, helper.GetSingleArgument<int>("stride", 1));
494  } else if (
495  helper.HasArgument("stride_h") && helper.HasArgument("stride_w")) {
496  strides.push_back(helper.GetSingleArgument<int>("stride_h", 1));
497  strides.push_back(helper.GetSingleArgument<int>("stride_w", 1));
498  }
499 
500  if (helper.HasArgument("dilation")) {
501  strides.resize(2, helper.GetSingleArgument<int>("dilation", 1));
502  } else if (
503  helper.HasArgument("dilation_h") && helper.HasArgument("dilation_w")) {
504  strides.push_back(helper.GetSingleArgument<int>("dilation_h", 1));
505  strides.push_back(helper.GetSingleArgument<int>("dilation_w", 1));
506  }
507 
508  auto check_and_set_default_value =
509  [](vector<int>& vec, int size, int value) {
510  if (vec.size() == 0) {
511  vec.resize(size, value);
512  }
513  };
514 
515  check_and_set_default_value(kernel, 2, 1);
516  check_and_set_default_value(strides, kernel.size(), 1);
517  check_and_set_default_value(pads, kernel.size() * 2, 0);
518  check_and_set_default_value(dilations, kernel.size(), 1);
519 
520  vector<int> output_dims;
522  GetDimsVector(in[0]),
523  output_channel,
524  StringToStorageOrder(helper.GetSingleArgument<string>("order", "NCHW")),
525  helper.GetSingleArgument<int>("global_pooling", 0),
526  static_cast<LegacyPadding>(
527  helper.GetSingleArgument<int>("legacy_pad", LegacyPadding::NOTSET)),
528  N,
529  kernel,
530  output_dims,
531  dilations,
532  strides,
533  pads,
534  channel_first);
535  vector<TensorShape> out(1);
536  if (channel_first) {
537  output_dims.insert(output_dims.begin(), {N, output_channel});
538  } else {
539  output_dims.push_back(output_channel);
540  output_dims.insert(output_dims.begin(), N);
541  }
542 
543  out[0] = CreateTensorShape(output_dims, TensorProto::FLOAT);
544  return out;
545  }
546 
547  static vector<TensorShape> TensorInferenceForConv(
548  const OperatorDef& def,
549  const vector<TensorShape>& in) {
550  return TensorInferenceForSchema(def, in, in[1].dims(0));
551  }
552 
553  static vector<TensorShape> TensorInferenceForPool(
554  const OperatorDef& def,
555  const vector<TensorShape>& in) {
556  ArgumentHelper helper(def);
557  auto order =
558  StringToStorageOrder(helper.GetSingleArgument<string>("order", "NCHW"));
559  int num_channels =
560  (order == StorageOrder::NCHW ? in[0].dims(1) : in[0].dims(3));
561  return TensorInferenceForSchema(def, in, num_channels);
562  }
563 
564  virtual ~ConvPoolOpBase() {}
565 
566  protected:
567  LegacyPadding legacy_pad_;
568  bool global_pooling_;
569  vector<int> kernel_;
570  vector<int> dilation_;
571  vector<int> stride_;
572  vector<int> pads_;
573 
574  bool float16_compute_;
575 
576  // We need the above parameters to be available for the devices.
577  Tensor<Context> kernel_device_;
578  Tensor<Context> dilation_device_;
579  Tensor<Context> stride_device_;
580  Tensor<Context> pads_device_;
581 
582  int group_;
583  StorageOrder order_;
584  bool shared_buffer_;
585  Workspace* ws_;
586 
587  static inline void ComputeSizeAndPad(
588  const int in_size,
589  const int stride,
590  const int kernel,
591  const int dilation,
592  LegacyPadding legacy_pad,
593  int* pad_head,
594  int* pad_tail,
595  int* out_size) {
596  const int dkernel = dilation * (kernel - 1) + 1;
597  switch (legacy_pad) {
598  case LegacyPadding::NOTSET:
599  // We will just use the direct padding head and tail values, but we
600  // will verify that they are non-negative.
601  CAFFE_ENFORCE_GE(in_size + *pad_head + *pad_tail, dkernel);
602  *out_size = static_cast<int>(
603  static_cast<float>(in_size + *pad_head + *pad_tail - dkernel) /
604  stride +
605  1);
606  break;
607  case LegacyPadding::VALID:
608  *pad_head = 0;
609  *pad_tail = 0;
610  *out_size = (in_size - dkernel) / stride + 1;
611  break;
612  case LegacyPadding::SAME: {
613  CAFFE_ENFORCE(
614  1 == dilation, "Dilation not supported for legacy padding.");
615  int legacy_target_size = (in_size + stride - 1) / stride;
616  int pad_needed = (legacy_target_size - 1) * stride + kernel - in_size;
617  if (CAFFE2_PAD_HEAD_MORE) {
618  *pad_head = (pad_needed + 1) / 2;
619  } else {
620  *pad_head = pad_needed / 2;
621  }
622  *pad_tail = pad_needed - *pad_head;
623  *out_size = (in_size + pad_needed - dkernel) / stride + 1;
624  break;
625  }
626  case LegacyPadding::CAFFE_LEGACY_POOLING:
627  // This is in order to adapt Caffe's pooling padding case. In this case,
628  // we will only use pad_head and will compute pad_tail to match the
629  // old caffe pooling strategy. Also see caffe2_legacy.proto for more
630  // details.
631  CAFFE_ENFORCE_GE(*pad_head, 0);
632  // Here, notice that caffe casts UP while caffe2 casts DOWN for the
633  // output size computation.
634  *out_size = std::ceil(
635  static_cast<float>(in_size + *pad_head * 2 - kernel) / stride + 1);
636  // If we have padding, caffe also ensures that the last pooling starts
637  // strictly inside the image (instead of at the padding); otherwise clip
638  // the last.
639  if (*pad_head > 0 && (*out_size - 1) * stride >= in_size + *pad_head) {
640  --*out_size;
641  }
642  // Now, compare the output size with the standard Caffe2 output size.
643  // The
644  // caffe2 standard output size should always be no larger than the
645  // output
646  // size of caffe.
647  int standard_out_size = static_cast<int>(
648  static_cast<float>(in_size + *pad_head * 2 - kernel) / stride + 1);
649  CAFFE_ENFORCE_GE(
650  *out_size,
651  standard_out_size,
652  "This should never happen. If this happens, double check the logic "
653  "above.");
654  if (*out_size > standard_out_size) {
655  LOG(WARNING)
656  << "You are hitting a case where Caffe's legacy padding calculation "
657  "is hit. This leads to inefficient and sometimes incorrect "
658  "results. We are keeping this behavior for backward compatibility"
659  ", but you are strongly recommended to move away from it.";
660  }
661  *pad_tail = *pad_head + stride * (*out_size - standard_out_size);
662  break;
663  }
664  }
665 
666  // Accessors for 2D conv params.
667 
668  inline int pad_t() const {
669  return pads_[0];
670  }
671 
672  inline int pad_l() const {
673  return pads_[1];
674  }
675 
676  inline int pad_b() const {
677  return pads_[2];
678  }
679 
680  inline int pad_r() const {
681  return pads_[3];
682  }
683 
684  inline int kernel_h() const {
685  return kernel_[0];
686  }
687 
688  inline int kernel_w() const {
689  return kernel_[1];
690  }
691 
692  inline int stride_h() const {
693  return stride_[0];
694  }
695 
696  inline int stride_w() const {
697  return stride_[1];
698  }
699 
700  inline int dilation_h() const {
701  return dilation_[0];
702  }
703 
704  inline int dilation_w() const {
705  return dilation_[1];
706  }
707 
708  private:
709  inline void AllocateAndCopy(const vector<int>& vec, Tensor<Context>& tensor) {
710  tensor.Resize(vec.size());
711  context_.template Copy<int, CPUContext, Context>(
712  vec.size(), vec.data(), tensor.template mutable_data<int>());
713  }
714 
715 #define USE_CONV_POOL_BASE_FUNCTIONS(Context) \
716  USE_OPERATOR_FUNCTIONS(Context); \
717  using ConvPoolOpBase<Context>::pads_; \
718  using ConvPoolOpBase<Context>::pads_device_; \
719  using ConvPoolOpBase<Context>::pad_t; \
720  using ConvPoolOpBase<Context>::pad_l; \
721  using ConvPoolOpBase<Context>::pad_b; \
722  using ConvPoolOpBase<Context>::pad_r; \
723  using ConvPoolOpBase<Context>::legacy_pad_; \
724  using ConvPoolOpBase<Context>::global_pooling_; \
725  using ConvPoolOpBase<Context>::kernel_; \
726  using ConvPoolOpBase<Context>::kernel_device_; \
727  using ConvPoolOpBase<Context>::kernel_h; \
728  using ConvPoolOpBase<Context>::kernel_w; \
729  using ConvPoolOpBase<Context>::dilation_; \
730  using ConvPoolOpBase<Context>::dilation_device_; \
731  using ConvPoolOpBase<Context>::dilation_h; \
732  using ConvPoolOpBase<Context>::dilation_w; \
733  using ConvPoolOpBase<Context>::stride_; \
734  using ConvPoolOpBase<Context>::stride_device_; \
735  using ConvPoolOpBase<Context>::stride_h; \
736  using ConvPoolOpBase<Context>::stride_w; \
737  using ConvPoolOpBase<Context>::group_; \
738  using ConvPoolOpBase<Context>::order_; \
739  using ConvPoolOpBase<Context>::shared_buffer_; \
740  using ConvPoolOpBase<Context>::GetDims; \
741  using ConvPoolOpBase<Context>::GetDimsSize; \
742  using ConvPoolOpBase<Context>::SetDeviceTensor; \
743  using ConvPoolOpBase<Context>::ws_
744 };
745 
746 } // namespace caffe2
747 
748 #endif // CAFFE2_OPERATORS_CONV_POOL_OP_BASE_H_
Tensor is the basic class in Caffe2 that stores a contiguous memory with its shape information...
Definition: tensor.h:93
int dim32(const int i) const
Returns the i-th dimension of the tensor in int.
Definition: tensor.h:657
TIndex size() const
Returns the size (i.e.
Definition: tensor.h:593
A helper class to index into arguments.
Definition: proto_utils.h:198
const vector< TIndex > & dims() const
Returns the dimensions of the tensor as a vector.
Definition: tensor.h:611
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
void Resize(Ts...dim_source)
Resizes a tensor.
Definition: tensor.h:288
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
bool HasArgument(const string &name) const
Checks if the operator has an argument of the given name.
Definition: operator.h:37