2 #ifndef CAFFE2_OPERATORS_DEFORM_CONV_OP_IMPL_H_ 3 #define CAFFE2_OPERATORS_DEFORM_CONV_OP_IMPL_H_ 5 #include "caffe2/core/context.h" 7 #include "caffe2/core/logging.h" 8 #include "caffe2/core/operator.h" 9 #include "caffe2/operators/conv_pool_op_base.h" 10 #include "caffe2/operators/deform_conv_op.h" 11 #include "caffe2/utils/math.h" 15 template <
typename T,
class Context>
16 bool DeformConvOp<T, Context>::RunOnDeviceWithOrderNCHW() {
19 auto& filter = Input(FILTER);
21 const int N = X.dim32(0), C = X.dim32(1);
22 CAFFE_ENFORCE_EQ(X.ndim(), filter.ndim());
23 const int M = filter.dim32(0);
25 C == filter.dim32(1) * group_,
26 "Convolution op: input channels does not match: # of input channels ",
28 " is not equal to kernel channels * group:",
34 "The number of output channels is not divisible by group.");
37 "Deformable convolution only supports 2d kernel, has ",
42 "Deformable convolution only supports 4d offset, has ",
45 CAFFE_ENFORCE_EQ(offset.dim32(0), N);
47 C % deformable_group_ == 0,
48 "The number of input channels ",
50 " is not divisible by deformable group ",
53 M % deformable_group_ == 0,
54 "The number of output channels ",
56 " is not divisible by deformable group ",
59 offset.dim32(1) == 2 * kernel_h() * kernel_w() * deformable_group_,
60 "Deformable convolution: offset 1st dimension must equal " 61 "2 * kernel_h * kernel_w * deformable_group: 2 * ",
70 (X.dim32(2) + pad_t() + pad_b() - (dilation_h() * (kernel_h() - 1) + 1)) /
75 (X.dim32(3) + pad_l() + pad_r() - (dilation_w() * (kernel_w() - 1) + 1)) /
79 int kernel_dims_size = 1;
80 for (
int i = 0; i < kernel_.size(); ++i) {
81 CAFFE_ENFORCE(filter.dim32(i + 2) == kernel_[i]);
82 kernel_dims_size *= kernel_[i];
85 ConvPoolOpBase<Context>::SetOutputSize(X, Y, filter.dim32(0));
87 const vector<int> input_dims = GetDims(X);
88 const vector<int> output_dims = GetDims(*Y);
89 const int input_image_size = this->GetDimsSize(X);
90 const int output_image_size = this->GetDimsSize(*Y);
92 vector<int> img_shape;
93 img_shape.assign(X.dims().begin() + 1, X.dims().end());
95 vector<int> buffer_shape;
96 buffer_shape.push_back(C / group_ * kernel_dims_size);
98 buffer_shape.end(), output_dims.begin(), output_dims.end());
101 const int kernel_dim = C / group_ * kernel_dims_size;
104 const int input_offset = C / group_ * input_image_size;
105 const int output_offset = M / group_ * output_image_size;
106 const int offset_offset = offset.size() / offset.dim32(0);
107 const int filter_offset = filter.size() / group_;
111 const T* Xdata = X.template data<T>();
112 const T* offset_data = offset.template data<T>();
114 if (InputSize() == 4) {
115 auto& bias = Input(BIAS);
116 CAFFE_ENFORCE(bias.ndim() == 1);
117 CAFFE_ENFORCE(bias.dim32(0) == M);
118 if (bias_multiplier_.size() != output_image_size) {
122 bias_multiplier_.Resize(vector<TIndex>(1, output_image_size));
123 math::Set<T, Context>(
126 bias_multiplier_.template mutable_data<T>(),
130 T* Ydata = Y->template mutable_data<T>();
131 const T* bias_data =
nullptr;
132 if (InputSize() == 4) {
133 bias_data = Input(BIAS).template data<T>();
137 col_buffer->Resize(buffer_shape);
138 T* col_buffer_data = col_buffer->template mutable_data<T>();
140 for (
int image_id = 0; image_id < N; ++image_id) {
141 for (
int group_id = 0; group_id < group_; ++group_id) {
143 Xdata + group_id * input_offset,
149 math::Gemm<T, Context>(
156 filter.template data<T>() + group_id * filter_offset,
159 Ydata + group_id * output_offset,
163 math::Gemm<T, Context>(
171 bias_multiplier_.template data<T>(),
176 Xdata += input_offset * group_;
177 Ydata += output_offset * group_;
178 offset_data += offset_offset;
182 if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
183 runWithSharedBuffer<Context>(ws_, f);
190 template <
typename T,
class Context>
191 bool DeformConvGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
192 auto& X = Input(INPUT);
193 auto& offset = Input(OFFSET);
194 auto& filter = Input(FILTER);
195 auto& dY = Input(OUTPUT_GRAD);
196 auto* dfilter = Output(FILTER_GRAD);
197 auto* doffset = Output(OFFSET_GRAD);
198 const int N = X.dim32(0), C = X.dim32(1);
200 const vector<int> input_dims = this->GetDims(X);
201 const int input_image_size = this->GetDimsSize(X);
203 const vector<int> output_dims = this->GetDims(dY);
205 const int output_image_size = this->GetDimsSize(dY);
207 ConvPoolOpBase<Context>::ComputePads(input_dims);
208 CAFFE_ENFORCE_EQ(X.ndim(), filter.ndim());
209 const int M = filter.dim32(0);
210 CAFFE_ENFORCE(filter.dim32(1) * group_ == C);
214 "Deformable convolution only supports 2d kernel, has ",
219 "Deformable convolution only supports 4d offset, has ",
222 CAFFE_ENFORCE_EQ(offset.dim32(0), N);
224 C % deformable_group_ == 0,
225 "The number of input channels ",
227 " is not divisible by deformable group ",
230 M % deformable_group_ == 0,
231 "The number of output channels ",
233 " is not divisible by deformable group ",
236 offset.dim32(1) == 2 * kernel_h() * kernel_w() * deformable_group_,
237 "Deformable convolution: offset 1st dimension must equal " 238 "2 * kernel_h * kernel_w * deformable_group: 2 * ",
247 (X.dim32(2) + pad_t() + pad_b() - (dilation_h() * (kernel_h() - 1) + 1)) /
252 (X.dim32(3) + pad_l() + pad_r() - (dilation_w() * (kernel_w() - 1) + 1)) /
256 int kernel_dims_size = 1;
257 for (
int i = 0; i < kernel_.size(); ++i) {
258 CAFFE_ENFORCE(filter.dim32(i + 2) == kernel_[i]);
259 kernel_dims_size *= kernel_[i];
262 CAFFE_ENFORCE(M % group_ == 0);
263 dfilter->ResizeLike(filter);
264 doffset->ResizeLike(offset);
267 const int kernel_dim = C / group_ * kernel_dims_size;
270 const int input_offset = C / group_ * input_image_size;
271 const int output_offset = M / group_ * output_image_size;
272 const int offset_offset = offset.size() / offset.dim32(0);
273 const int filter_offset = filter.size() / group_;
277 vector<TIndex> img_shape;
278 img_shape.assign(X.dims().begin() + 1, X.dims().end());
279 vector<TIndex> col_buffer_shape;
280 col_buffer_shape.push_back(C * kernel_dims_size);
281 col_buffer_shape.insert(
282 col_buffer_shape.end(), output_dims.begin(), output_dims.end());
283 col_buffer_.Resize(col_buffer_shape);
285 const int col_buffer_offset = col_buffer_.size() / group_;
287 const T* Xdata = X.template data<T>();
288 const T* filter_data = filter.template data<T>();
289 const T* offset_data = offset.template data<T>();
290 const T* dYdata = dY.template data<T>();
291 T* col_buffer_data = col_buffer_.template mutable_data<T>();
292 T* dfilter_data = dfilter->template mutable_data<T>();
293 T* doffset_data = doffset->template mutable_data<T>();
296 math::Set<T, Context>(dfilter->size(), 0, dfilter_data, &context_);
298 T* dbias_data =
nullptr;
300 auto* dbias = Output(BIAS_OR_INPUT_GRAD);
302 if (bias_multiplier_.size() != output_image_size) {
304 bias_multiplier_.Resize(vector<TIndex>(1, output_image_size));
305 math::Set<T, Context>(
308 bias_multiplier_.template mutable_data<T>(),
311 dbias_data = dbias->template mutable_data<T>();
312 math::Set<T, Context>(dbias->size(), 0, dbias_data, &context_);
316 if (OutputSize() == 4 || (no_bias_ && (OutputSize() == 3))) {
317 auto* dX = Output(no_bias_ ? BIAS_OR_INPUT_GRAD : INPUT_GRAD);
319 dXdata = dX->template mutable_data<T>();
320 math::Set<T, Context>(dX->size(), 0, dXdata, &context_);
323 for (
int image_id = 0; image_id < N; ++image_id) {
324 for (
int group_id = 0; group_id < group_; ++group_id) {
325 math::Gemm<T, Context>(
332 filter_data + group_id * filter_offset,
333 dYdata + group_id * output_offset,
335 col_buffer_data + group_id * col_buffer_offset,
340 DeformableCol2imCoord(
351 col_buffer_data, offset_data, X.dims(), col_buffer_shape, dXdata);
352 dXdata += input_offset * group_;
357 Xdata, offset_data, X.dims(), col_buffer_shape, col_buffer_data);
359 for (
int group_id = 0; group_id < group_; ++group_id) {
360 math::Gemm<T, Context>(
367 dYdata + group_id * output_offset,
368 col_buffer_data + group_id * col_buffer_offset,
370 dfilter_data + group_id * filter_offset,
376 math::Gemv<T, Context>(
382 bias_multiplier_.template data<T>(),
388 Xdata += input_offset * group_;
389 dYdata += output_offset * group_;
390 offset_data += offset_offset;
391 doffset_data += offset_offset;
398 #endif // CAFFE2_OPERATORS_DEFORM_CONV_OP_IMPL_H_ A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Commandline flags support for Caffe2.