1 #ifndef CAFFE2_OPERATORS_CONV_POOL_OP_BASE_H_ 2 #define CAFFE2_OPERATORS_CONV_POOL_OP_BASE_H_ 6 #include "caffe2/core/context.h" 7 #include "caffe2/core/logging.h" 8 #include "caffe2/core/operator.h" 9 #include "caffe2/proto/caffe2_legacy.pb.h" 10 #include "caffe2/utils/math.h" 23 const bool CAFFE2_PAD_HEAD_MORE =
false;
27 template <
class Context>
30 USE_OPERATOR_CONTEXT_FUNCTIONS;
34 static_cast<LegacyPadding>(OperatorBase::GetSingleArgument<int>(
36 LegacyPadding::NOTSET))),
38 OperatorBase::GetSingleArgument<int>(
"global_pooling", 0)),
39 kernel_(OperatorBase::GetRepeatedArgument<int>(
"kernels")),
40 dilation_(OperatorBase::GetRepeatedArgument<int>(
"dilations")),
41 stride_(OperatorBase::GetRepeatedArgument<int>(
"strides")),
42 pads_(OperatorBase::GetRepeatedArgument<int>(
"pads")),
44 OperatorBase::GetSingleArgument<bool>(
"float16_compute",
false)),
45 group_(OperatorBase::GetSingleArgument<int>(
"group", 1)),
46 order_(StringToStorageOrder(
47 OperatorBase::GetSingleArgument<string>(
"order",
"NCHW"))),
49 OperatorBase::GetSingleArgument<int>(
"shared_buffer", 0)),
53 if (legacy_pad_ == LegacyPadding::VALID ||
54 legacy_pad_ == LegacyPadding::SAME) {
57 "If you use legacy padding VALID or SAME, you should not specify " 58 "any specific padding values.");
63 kernel_.resize(2, OperatorBase::GetSingleArgument<int>(
"kernel", 0));
67 kernel_.push_back(OperatorBase::GetSingleArgument<int>(
"kernel_h", 0));
68 kernel_.push_back(OperatorBase::GetSingleArgument<int>(
"kernel_w", 0));
72 stride_.resize(2, OperatorBase::GetSingleArgument<int>(
"stride", 0));
76 stride_.push_back(OperatorBase::GetSingleArgument<int>(
"stride_h", 0));
77 stride_.push_back(OperatorBase::GetSingleArgument<int>(
"stride_w", 0));
81 dilation_.resize(2, OperatorBase::GetSingleArgument<int>(
"dilation", 0));
86 OperatorBase::GetSingleArgument<int>(
"dilation_h", 0));
88 OperatorBase::GetSingleArgument<int>(
"dilation_w", 0));
93 legacy_pad_ != LegacyPadding::VALID &&
94 legacy_pad_ != LegacyPadding::SAME,
95 "If you use legacy padding VALID or SAME, you should not specify " 96 "any specific padding values.");
97 pads_.resize(4, OperatorBase::GetSingleArgument<int>(
"pad", 0));
104 legacy_pad_ != LegacyPadding::VALID &&
105 legacy_pad_ != LegacyPadding::SAME,
106 "If you use legacy padding VALID or SAME, you should not specify " 107 "any specific padding values.");
108 pads_.push_back(OperatorBase::GetSingleArgument<int>(
"pad_t", 0));
109 pads_.push_back(OperatorBase::GetSingleArgument<int>(
"pad_l", 0));
110 pads_.push_back(OperatorBase::GetSingleArgument<int>(
"pad_b", 0));
111 pads_.push_back(OperatorBase::GetSingleArgument<int>(
"pad_r", 0));
115 if (kernel_.size() == 0) {
116 kernel_.assign({0, 0});
119 if (stride_.size() == 0) {
120 stride_.resize(kernel_.size(), 1);
123 if (pads_.size() == 0) {
124 pads_.resize(kernel_.size() * 2, 0);
127 if (dilation_.size() == 0) {
128 dilation_.resize(kernel_.size(), 1);
131 CAFFE_ENFORCE_EQ(stride_.size(), kernel_.size());
132 CAFFE_ENFORCE_EQ(dilation_.size(), kernel_.size());
134 if (legacy_pad_ != LegacyPadding::VALID &&
135 legacy_pad_ != LegacyPadding::SAME) {
136 CAFFE_ENFORCE_EQ(pads_.size(), 2 * kernel_.size());
139 if (global_pooling_) {
140 for (
int dim = 0; dim < kernel_.size(); ++dim) {
142 pads_[2 * dim] == 0 && pads_[2 * dim + 1] == 0 &&
143 dilation_[dim] == 1 && stride_[dim] == 1,
144 "If global_pooling is set pad, dilation and stride shouldn't be set.");
148 AllocateAndCopy(kernel_, kernel_device_);
149 AllocateAndCopy(stride_, stride_device_);
150 AllocateAndCopy(dilation_, dilation_device_);
151 AllocateAndCopy(pads_, pads_device_);
156 if (operator_def.name().find(
"Conv") == 0 ||
157 operator_def.name().find(
"Pool") != std::string::npos) {
158 for (
int dim = 0; dim < kernel_.size(); ++dim) {
159 CAFFE_ENFORCE_GE(pads_[dim], 0);
160 CAFFE_ENFORCE_GE(pads_[kernel_.size() + dim], 0);
163 "If you are doing convolution or pooling, you will need to set " 164 "explicitly the kernel size.");
168 for (
int dim = 0; dim < kernel_.size(); ++dim) {
169 CAFFE_ENFORCE_GE(kernel_[dim], 0);
170 CAFFE_ENFORCE_GE(dilation_[dim], 0);
171 CAFFE_ENFORCE_GE(stride_[dim], 0);
175 for (
int dim = 0; dim < kernel_.size(); ++dim) {
179 "When group is used, dilation should not be set at the same time.");
188 case StorageOrder::NCHW:
189 dims.assign(input.
dims().begin() + 2, input.
dims().end());
191 case StorageOrder::NHWC:
192 dims.assign(input.
dims().begin() + 1, input.
dims().end() - 1);
195 CAFFE_THROW(
"Unknown storage order : ", order_);
204 case StorageOrder::NCHW:
205 size = std::accumulate(
206 input.
dims().begin() + 2,
209 std::multiplies<int>());
211 case StorageOrder::NHWC:
212 size = std::accumulate(
213 input.
dims().begin() + 1,
214 input.
dims().end() - 1,
216 std::multiplies<int>());
219 CAFFE_THROW(
"Unknown storage order : ", order_);
232 template <
typename AlternativeContext>
236 int output_channel) {
237 CAFFE_ENFORCE(input.
size() > 0);
238 vector<int> output_dims;
239 int N = input.
dim32(0);
256 output_dims.insert(output_dims.begin(), {N, output_channel});
258 output_dims.insert(output_dims.begin(), N);
259 output_dims.push_back(output_channel);
261 output->
Resize(output_dims);
266 static inline void InferOutputSize(
267 vector<TIndex> input_dims,
271 LegacyPadding legacy_pad,
274 vector<int>& output_dims,
275 const vector<int>& dilation,
276 const vector<int>& stride,
278 bool& channel_first) {
279 channel_first =
false;
282 case StorageOrder::NHWC:
283 channel_first =
false;
284 dims.assign(input_dims.begin() + 1, input_dims.end() - 1);
286 case StorageOrder::NCHW:
288 channel_first =
true;
289 dims.assign(input_dims.begin() + 2, input_dims.end());
292 CAFFE_THROW(
"Unknown Storage order: ", order);
295 if (global_pooling) {
296 kernel.assign(dims.begin(), dims.end());
297 output_dims.assign(dims.size(), 1);
299 for (
int dim = 0; dim < dims.
size(); ++dim) {
308 &pads[dims.size() + dim],
310 output_dims.push_back(dim_size);
317 void ComputePads(
const vector<int>& dims) {
318 if (global_pooling_) {
320 }
else if (legacy_pad_ != LegacyPadding::NOTSET) {
322 for (
int dim = 0; dim < dims.
size(); ++dim) {
330 &pads_[dims.size() + dim],
336 void SetDeviceTensor(
const std::vector<int>& data,
Tensor<Context>* tensor) {
337 bool reset_tensor_device_ =
false;
339 if (tensor->
size() != data.size()) {
340 tensor->
Resize(data.size());
341 reset_tensor_device_ =
true;
343 const int* tensor_data = tensor->template data<int>();
344 for (
int d_i = 0; d_i < data.size(); ++d_i) {
345 if (tensor_data[d_i] != data[d_i]) {
346 reset_tensor_device_ =
true;
352 if (reset_tensor_device_) {
353 context_.template Copy<int, CPUContext, Context>(
354 data.size(), data.data(), tensor->template mutable_data<int>());
358 template <
typename T>
359 void SetBiasMultiplier(
const int size,
Tensor<Context>* bias_multiplier_) {
360 if (bias_multiplier_->
size() != size) {
363 bias_multiplier_->
Resize(std::vector<TIndex>{size});
364 math::Set<T, Context>(
367 bias_multiplier_->template mutable_data<T>(),
372 bool RunOnDevice()
override {
373 if (!global_pooling_) {
374 for (
int dim = 0; dim < kernel_.size(); ++dim) {
375 CAFFE_ENFORCE_GT(kernel_[dim], 0);
379 case StorageOrder::NHWC:
381 return RunOnDeviceWithOrderNHWC();
382 case StorageOrder::NCHW:
384 return RunOnDeviceWithOrderNCHW();
386 CAFFE_THROW(
"Unknown Storage order: ", order_);
392 virtual bool RunOnDeviceWithOrderNHWC() {
393 CAFFE_NOT_IMPLEMENTED;
395 virtual bool RunOnDeviceWithOrderNCHW() {
396 CAFFE_NOT_IMPLEMENTED;
400 const OperatorDef& def,
401 const vector<TensorShape>& inputs) {
403 const TensorShape X = inputs[0];
404 const TensorShape W = inputs[1];
405 const TensorShape Y = TensorInferenceForConv(def, inputs)[0];
408 StringToStorageOrder(helper.GetSingleArgument<
string>(
"order",
"NCHW"));
410 unsigned long long N;
411 unsigned long long Y_t = 1;
412 unsigned long long Y_h;
413 unsigned long long Y_w;
414 unsigned long long kernel_t = 1;
415 unsigned long long kernel_h;
416 unsigned long long kernel_w;
417 unsigned long long in_channels;
418 unsigned long long out_channels;
421 if (X.dims_size() == 5) {
423 CAFFE_ENFORCE_EQ(order, StorageOrder::NCHW,
"Conv3D only supports NCHW");
427 kernel_t = W.dims(2);
428 kernel_h = W.dims(3);
429 kernel_w = W.dims(4);
430 in_channels = W.dims(1);
431 out_channels = W.dims(0);
434 CAFFE_ENFORCE_EQ(X.dims_size(), 4,
"Conv2D should have 4D input tensor");
435 if (order == StorageOrder::NHWC) {
438 kernel_h = W.dims(1);
439 kernel_w = W.dims(2);
440 in_channels = W.dims(3);
441 out_channels = W.dims(0);
445 kernel_h = W.dims(2);
446 kernel_w = W.dims(3);
447 in_channels = W.dims(1);
448 out_channels = W.dims(0);
452 c.flops = N * Y_t * Y_h * Y_w * kernel_t * kernel_w * kernel_h *
453 in_channels * out_channels * 2;
454 c.bytes_moved = N * out_channels * Y_t * Y_h * Y_w *
sizeof(float);
455 c.params_bytes = out_channels * in_channels * kernel_t * kernel_h *
456 kernel_w *
sizeof(float);
460 static vector<TensorShape> TensorInferenceForSchema(
461 const OperatorDef& def,
462 const vector<TensorShape>& in,
463 int output_channel) {
465 CAFFE_ENFORCE_GT(in.size(), 0);
466 CAFFE_ENFORCE_GT(in[0].dims_size(), 0);
467 int N = in[0].dims(0);
469 vector<int> pads = helper.GetRepeatedArgument<
int>(
"pads");
470 vector<int> kernel = helper.GetRepeatedArgument<
int>(
"kernels");
471 vector<int> strides = helper.GetRepeatedArgument<
int>(
"strides");
472 vector<int> dilations = helper.GetRepeatedArgument<
int>(
"dilation");
473 if (helper.HasArgument(
"pad")) {
474 pads.resize(4, helper.GetSingleArgument<
int>(
"pad", 0));
476 helper.HasArgument(
"pad_t") && helper.HasArgument(
"pad_l") &&
477 helper.HasArgument(
"pad_b") && helper.HasArgument(
"pad_r")) {
478 pads.push_back(helper.GetSingleArgument<
int>(
"pad_t", 0));
479 pads.push_back(helper.GetSingleArgument<
int>(
"pad_l", 0));
480 pads.push_back(helper.GetSingleArgument<
int>(
"pad_b", 0));
481 pads.push_back(helper.GetSingleArgument<
int>(
"pad_r", 0));
484 if (helper.HasArgument(
"kernel")) {
485 kernel.resize(2, helper.GetSingleArgument<
int>(
"kernel", 1));
487 helper.HasArgument(
"kernel_h") && helper.HasArgument(
"kernel_w")) {
488 kernel.push_back(helper.GetSingleArgument<
int>(
"kernel_h", 1));
489 kernel.push_back(helper.GetSingleArgument<
int>(
"kernel_w", 1));
492 if (helper.HasArgument(
"stride")) {
493 strides.resize(2, helper.GetSingleArgument<
int>(
"stride", 1));
495 helper.HasArgument(
"stride_h") && helper.HasArgument(
"stride_w")) {
496 strides.push_back(helper.GetSingleArgument<
int>(
"stride_h", 1));
497 strides.push_back(helper.GetSingleArgument<
int>(
"stride_w", 1));
500 if (helper.HasArgument(
"dilation")) {
501 strides.resize(2, helper.GetSingleArgument<
int>(
"dilation", 1));
503 helper.HasArgument(
"dilation_h") && helper.HasArgument(
"dilation_w")) {
504 strides.push_back(helper.GetSingleArgument<
int>(
"dilation_h", 1));
505 strides.push_back(helper.GetSingleArgument<
int>(
"dilation_w", 1));
508 auto check_and_set_default_value =
509 [](vector<int>& vec,
int size,
int value) {
510 if (vec.size() == 0) {
511 vec.resize(size, value);
515 check_and_set_default_value(kernel, 2, 1);
516 check_and_set_default_value(strides, kernel.size(), 1);
517 check_and_set_default_value(pads, kernel.size() * 2, 0);
518 check_and_set_default_value(dilations, kernel.size(), 1);
520 vector<int> output_dims;
522 GetDimsVector(in[0]),
524 StringToStorageOrder(helper.GetSingleArgument<
string>(
"order",
"NCHW")),
525 helper.GetSingleArgument<
int>(
"global_pooling", 0),
526 static_cast<LegacyPadding>(
527 helper.GetSingleArgument<
int>(
"legacy_pad", LegacyPadding::NOTSET)),
535 vector<TensorShape> out(1);
537 output_dims.insert(output_dims.begin(), {N, output_channel});
539 output_dims.push_back(output_channel);
540 output_dims.insert(output_dims.begin(), N);
543 out[0] = CreateTensorShape(output_dims, TensorProto::FLOAT);
547 static vector<TensorShape> TensorInferenceForConv(
548 const OperatorDef& def,
549 const vector<TensorShape>& in) {
550 return TensorInferenceForSchema(def, in, in[1].dims(0));
553 static vector<TensorShape> TensorInferenceForPool(
554 const OperatorDef& def,
555 const vector<TensorShape>& in) {
558 StringToStorageOrder(helper.GetSingleArgument<
string>(
"order",
"NCHW"));
560 (order == StorageOrder::NCHW ? in[0].dims(1) : in[0].dims(3));
561 return TensorInferenceForSchema(def, in, num_channels);
567 LegacyPadding legacy_pad_;
568 bool global_pooling_;
570 vector<int> dilation_;
574 bool float16_compute_;
587 static inline void ComputeSizeAndPad(
592 LegacyPadding legacy_pad,
596 const int dkernel = dilation * (kernel - 1) + 1;
597 switch (legacy_pad) {
598 case LegacyPadding::NOTSET:
601 CAFFE_ENFORCE_GE(in_size + *pad_head + *pad_tail, dkernel);
602 *out_size =
static_cast<int>(
603 static_cast<float>(in_size + *pad_head + *pad_tail - dkernel) /
607 case LegacyPadding::VALID:
610 *out_size = (in_size - dkernel) / stride + 1;
612 case LegacyPadding::SAME: {
614 1 == dilation,
"Dilation not supported for legacy padding.");
615 int legacy_target_size = (in_size + stride - 1) / stride;
616 int pad_needed = (legacy_target_size - 1) * stride + kernel - in_size;
617 if (CAFFE2_PAD_HEAD_MORE) {
618 *pad_head = (pad_needed + 1) / 2;
620 *pad_head = pad_needed / 2;
622 *pad_tail = pad_needed - *pad_head;
623 *out_size = (in_size + pad_needed - dkernel) / stride + 1;
626 case LegacyPadding::CAFFE_LEGACY_POOLING:
631 CAFFE_ENFORCE_GE(*pad_head, 0);
634 *out_size = std::ceil(
635 static_cast<float>(in_size + *pad_head * 2 - kernel) / stride + 1);
639 if (*pad_head > 0 && (*out_size - 1) * stride >= in_size + *pad_head) {
647 int standard_out_size =
static_cast<int>(
648 static_cast<float>(in_size + *pad_head * 2 - kernel) / stride + 1);
652 "This should never happen. If this happens, double check the logic " 654 if (*out_size > standard_out_size) {
656 <<
"You are hitting a case where Caffe's legacy padding calculation " 657 "is hit. This leads to inefficient and sometimes incorrect " 658 "results. We are keeping this behavior for backward compatibility" 659 ", but you are strongly recommended to move away from it.";
661 *pad_tail = *pad_head + stride * (*out_size - standard_out_size);
668 inline int pad_t()
const {
672 inline int pad_l()
const {
676 inline int pad_b()
const {
680 inline int pad_r()
const {
684 inline int kernel_h()
const {
688 inline int kernel_w()
const {
692 inline int stride_h()
const {
696 inline int stride_w()
const {
700 inline int dilation_h()
const {
704 inline int dilation_w()
const {
709 inline void AllocateAndCopy(
const vector<int>& vec,
Tensor<Context>& tensor) {
710 tensor.
Resize(vec.size());
711 context_.template Copy<int, CPUContext, Context>(
712 vec.size(), vec.data(), tensor.template mutable_data<int>());
715 #define USE_CONV_POOL_BASE_FUNCTIONS(Context) \ 716 USE_OPERATOR_FUNCTIONS(Context); \ 717 using ConvPoolOpBase<Context>::pads_; \ 718 using ConvPoolOpBase<Context>::pads_device_; \ 719 using ConvPoolOpBase<Context>::pad_t; \ 720 using ConvPoolOpBase<Context>::pad_l; \ 721 using ConvPoolOpBase<Context>::pad_b; \ 722 using ConvPoolOpBase<Context>::pad_r; \ 723 using ConvPoolOpBase<Context>::legacy_pad_; \ 724 using ConvPoolOpBase<Context>::global_pooling_; \ 725 using ConvPoolOpBase<Context>::kernel_; \ 726 using ConvPoolOpBase<Context>::kernel_device_; \ 727 using ConvPoolOpBase<Context>::kernel_h; \ 728 using ConvPoolOpBase<Context>::kernel_w; \ 729 using ConvPoolOpBase<Context>::dilation_; \ 730 using ConvPoolOpBase<Context>::dilation_device_; \ 731 using ConvPoolOpBase<Context>::dilation_h; \ 732 using ConvPoolOpBase<Context>::dilation_w; \ 733 using ConvPoolOpBase<Context>::stride_; \ 734 using ConvPoolOpBase<Context>::stride_device_; \ 735 using ConvPoolOpBase<Context>::stride_h; \ 736 using ConvPoolOpBase<Context>::stride_w; \ 737 using ConvPoolOpBase<Context>::group_; \ 738 using ConvPoolOpBase<Context>::order_; \ 739 using ConvPoolOpBase<Context>::shared_buffer_; \ 740 using ConvPoolOpBase<Context>::GetDims; \ 741 using ConvPoolOpBase<Context>::GetDimsSize; \ 742 using ConvPoolOpBase<Context>::SetDeviceTensor; \ 743 using ConvPoolOpBase<Context>::ws_ 748 #endif // CAFFE2_OPERATORS_CONV_POOL_OP_BASE_H_ Tensor is the basic class in Caffe2 that stores a contiguous memory with its shape information...
int dim32(const int i) const
Returns the i-th dimension of the tensor in int.
TIndex size() const
Returns the size (i.e.
A helper class to index into arguments.
const vector< TIndex > & dims() const
Returns the dimensions of the tensor as a vector.
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
void Resize(Ts...dim_source)
Resizes a tensor.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
bool HasArgument(const string &name) const
Checks if the operator has an argument of the given name.