Caffe2 - C++ API
A deep learning, cross platform ML framework
deform_conv_op_impl.h
1 // conv_op_impl.h is the templated implementation of the conv_op.h file.
2 #ifndef CAFFE2_OPERATORS_DEFORM_CONV_OP_IMPL_H_
3 #define CAFFE2_OPERATORS_DEFORM_CONV_OP_IMPL_H_
4 
5 #include "caffe2/core/context.h"
6 #include "caffe2/core/flags.h"
7 #include "caffe2/core/logging.h"
8 #include "caffe2/core/operator.h"
9 #include "caffe2/operators/conv_pool_op_base.h"
10 #include "caffe2/operators/deform_conv_op.h"
11 #include "caffe2/utils/math.h"
12 
13 namespace caffe2 {
14 
15 template <typename T, class Context>
16 bool DeformConvOp<T, Context>::RunOnDeviceWithOrderNCHW() {
17  const Tensor<Context>& X = Input(INPUT);
18  const Tensor<Context>& offset = Input(OFFSET);
19  auto& filter = Input(FILTER);
20  Tensor<Context>* Y = Output(0);
21  const int N = X.dim32(0), C = X.dim32(1);
22  CAFFE_ENFORCE_EQ(X.ndim(), filter.ndim());
23  const int M = filter.dim32(0);
24  CAFFE_ENFORCE(
25  C == filter.dim32(1) * group_,
26  "Convolution op: input channels does not match: # of input channels ",
27  C,
28  " is not equal to kernel channels * group:",
29  filter.dim32(1),
30  "*",
31  group_);
32  CAFFE_ENFORCE(
33  M % group_ == 0,
34  "The number of output channels is not divisible by group.");
35  CAFFE_ENFORCE(
36  kernel_.size() == 2,
37  "Deformable convolution only supports 2d kernel, has ",
38  kernel_.size(),
39  "d kernel.");
40  CAFFE_ENFORCE(
41  offset.ndim() == 4,
42  "Deformable convolution only supports 4d offset, has ",
43  offset.ndim(),
44  "d offset.");
45  CAFFE_ENFORCE_EQ(offset.dim32(0), N);
46  CAFFE_ENFORCE(
47  C % deformable_group_ == 0,
48  "The number of input channels ",
49  C,
50  " is not divisible by deformable group ",
51  deformable_group_);
52  CAFFE_ENFORCE(
53  M % deformable_group_ == 0,
54  "The number of output channels ",
55  M,
56  " is not divisible by deformable group ",
57  deformable_group_);
58  CAFFE_ENFORCE(
59  offset.dim32(1) == 2 * kernel_h() * kernel_w() * deformable_group_,
60  "Deformable convolution: offset 1st dimension must equal "
61  "2 * kernel_h * kernel_w * deformable_group: 2 * ",
62  kernel_h(),
63  " * ",
64  kernel_w(),
65  " * ",
66  deformable_group_);
67 
68  CAFFE_ENFORCE_EQ(
69  offset.dim32(2),
70  (X.dim32(2) + pad_t() + pad_b() - (dilation_h() * (kernel_h() - 1) + 1)) /
71  stride_h() +
72  1);
73  CAFFE_ENFORCE_EQ(
74  offset.dim32(3),
75  (X.dim32(3) + pad_l() + pad_r() - (dilation_w() * (kernel_w() - 1) + 1)) /
76  stride_w() +
77  1);
78 
79  int kernel_dims_size = 1;
80  for (int i = 0; i < kernel_.size(); ++i) {
81  CAFFE_ENFORCE(filter.dim32(i + 2) == kernel_[i]);
82  kernel_dims_size *= kernel_[i];
83  }
84 
85  ConvPoolOpBase<Context>::SetOutputSize(X, Y, filter.dim32(0));
86 
87  const vector<int> input_dims = GetDims(X);
88  const vector<int> output_dims = GetDims(*Y);
89  const int input_image_size = this->GetDimsSize(X);
90  const int output_image_size = this->GetDimsSize(*Y);
91 
92  vector<int> img_shape;
93  img_shape.assign(X.dims().begin() + 1, X.dims().end());
94 
95  vector<int> buffer_shape;
96  buffer_shape.push_back(C / group_ * kernel_dims_size);
97  buffer_shape.insert(
98  buffer_shape.end(), output_dims.begin(), output_dims.end());
99 
100  // The dimension of each kernel
101  const int kernel_dim = C / group_ * kernel_dims_size;
102  // The offset corresponding to a single input image, and a single output
103  // image.
104  const int input_offset = C / group_ * input_image_size;
105  const int output_offset = M / group_ * output_image_size;
106  const int offset_offset = offset.size() / offset.dim32(0);
107  const int filter_offset = filter.size() / group_;
108 
109  // The col buffer is stored in CHW order as well - kernel_dim, and the height
110  // and width.
111  const T* Xdata = X.template data<T>();
112  const T* offset_data = offset.template data<T>();
113 
114  if (InputSize() == 4) {
115  auto& bias = Input(BIAS);
116  CAFFE_ENFORCE(bias.ndim() == 1);
117  CAFFE_ENFORCE(bias.dim32(0) == M);
118  if (bias_multiplier_.size() != output_image_size) {
119  // If the helper bias multiplier is not image size, reshape and fill it
120  // with
121  // one.
122  bias_multiplier_.Resize(vector<TIndex>(1, output_image_size));
123  math::Set<T, Context>(
124  output_image_size,
125  static_cast<T>(1),
126  bias_multiplier_.template mutable_data<T>(),
127  &context_);
128  }
129  }
130  T* Ydata = Y->template mutable_data<T>();
131  const T* bias_data = nullptr;
132  if (InputSize() == 4) {
133  bias_data = Input(BIAS).template data<T>();
134  }
135 
136  auto f = [&](Tensor<Context>* col_buffer) {
137  col_buffer->Resize(buffer_shape);
138  T* col_buffer_data = col_buffer->template mutable_data<T>();
139  // Im2col, followed by gemm.
140  for (int image_id = 0; image_id < N; ++image_id) {
141  for (int group_id = 0; group_id < group_; ++group_id) {
142  DeformableIm2col(
143  Xdata + group_id * input_offset,
144  offset_data,
145  X.dims(),
146  col_buffer->dims(),
147  col_buffer_data);
148  // Weight term
149  math::Gemm<T, Context>(
150  CblasNoTrans,
151  CblasNoTrans,
152  M / group_,
153  output_image_size,
154  kernel_dim,
155  1,
156  filter.template data<T>() + group_id * filter_offset,
157  col_buffer_data,
158  0,
159  Ydata + group_id * output_offset,
160  &context_);
161  }
162  if (bias_data) {
163  math::Gemm<T, Context>(
164  CblasNoTrans,
165  CblasNoTrans,
166  M,
167  output_image_size,
168  1,
169  1,
170  bias_data,
171  bias_multiplier_.template data<T>(),
172  1,
173  Ydata,
174  &context_);
175  }
176  Xdata += input_offset * group_;
177  Ydata += output_offset * group_;
178  offset_data += offset_offset;
179  }
180  };
181 
182  if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
183  runWithSharedBuffer<Context>(ws_, f);
184  } else {
185  f(&col_buffer_);
186  }
187  return true;
188 }
189 
190 template <typename T, class Context>
191 bool DeformConvGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
192  auto& X = Input(INPUT);
193  auto& offset = Input(OFFSET);
194  auto& filter = Input(FILTER);
195  auto& dY = Input(OUTPUT_GRAD);
196  auto* dfilter = Output(FILTER_GRAD);
197  auto* doffset = Output(OFFSET_GRAD);
198  const int N = X.dim32(0), C = X.dim32(1);
199 
200  const vector<int> input_dims = this->GetDims(X);
201  const int input_image_size = this->GetDimsSize(X);
202 
203  const vector<int> output_dims = this->GetDims(dY);
204  // The output image size is the spatial size of the output.
205  const int output_image_size = this->GetDimsSize(dY);
206 
207  ConvPoolOpBase<Context>::ComputePads(input_dims);
208  CAFFE_ENFORCE_EQ(X.ndim(), filter.ndim());
209  const int M = filter.dim32(0);
210  CAFFE_ENFORCE(filter.dim32(1) * group_ == C);
211 
212  CAFFE_ENFORCE(
213  kernel_.size() == 2,
214  "Deformable convolution only supports 2d kernel, has ",
215  kernel_.size(),
216  "d kernel.");
217  CAFFE_ENFORCE(
218  offset.ndim() == 4,
219  "Deformable convolution only supports 4d offset, has ",
220  offset.ndim(),
221  "d offset.");
222  CAFFE_ENFORCE_EQ(offset.dim32(0), N);
223  CAFFE_ENFORCE(
224  C % deformable_group_ == 0,
225  "The number of input channels ",
226  C,
227  " is not divisible by deformable group ",
228  deformable_group_);
229  CAFFE_ENFORCE(
230  M % deformable_group_ == 0,
231  "The number of output channels ",
232  M,
233  " is not divisible by deformable group ",
234  deformable_group_);
235  CAFFE_ENFORCE(
236  offset.dim32(1) == 2 * kernel_h() * kernel_w() * deformable_group_,
237  "Deformable convolution: offset 1st dimension must equal "
238  "2 * kernel_h * kernel_w * deformable_group: 2 * ",
239  kernel_h(),
240  " * ",
241  kernel_w(),
242  " * ",
243  deformable_group_);
244 
245  CAFFE_ENFORCE_EQ(
246  offset.dim32(2),
247  (X.dim32(2) + pad_t() + pad_b() - (dilation_h() * (kernel_h() - 1) + 1)) /
248  stride_h() +
249  1);
250  CAFFE_ENFORCE_EQ(
251  offset.dim32(3),
252  (X.dim32(3) + pad_l() + pad_r() - (dilation_w() * (kernel_w() - 1) + 1)) /
253  stride_w() +
254  1);
255 
256  int kernel_dims_size = 1;
257  for (int i = 0; i < kernel_.size(); ++i) {
258  CAFFE_ENFORCE(filter.dim32(i + 2) == kernel_[i]);
259  kernel_dims_size *= kernel_[i];
260  }
261 
262  CAFFE_ENFORCE(M % group_ == 0);
263  dfilter->ResizeLike(filter);
264  doffset->ResizeLike(offset);
265 
266  // The dimension of each kernel
267  const int kernel_dim = C / group_ * kernel_dims_size;
268  // The offset corresponding to a single input image, and a single output
269  // image.
270  const int input_offset = C / group_ * input_image_size;
271  const int output_offset = M / group_ * output_image_size;
272  const int offset_offset = offset.size() / offset.dim32(0);
273  const int filter_offset = filter.size() / group_;
274 
275  // The col buffer is stored in CHW order as well - kernel_dim, and the
276  // height and width.
277  vector<TIndex> img_shape;
278  img_shape.assign(X.dims().begin() + 1, X.dims().end());
279  vector<TIndex> col_buffer_shape;
280  col_buffer_shape.push_back(C * kernel_dims_size);
281  col_buffer_shape.insert(
282  col_buffer_shape.end(), output_dims.begin(), output_dims.end());
283  col_buffer_.Resize(col_buffer_shape);
284 
285  const int col_buffer_offset = col_buffer_.size() / group_;
286 
287  const T* Xdata = X.template data<T>();
288  const T* filter_data = filter.template data<T>();
289  const T* offset_data = offset.template data<T>();
290  const T* dYdata = dY.template data<T>();
291  T* col_buffer_data = col_buffer_.template mutable_data<T>();
292  T* dfilter_data = dfilter->template mutable_data<T>();
293  T* doffset_data = doffset->template mutable_data<T>();
294 
295  // Pre-setting the gradients to zero.
296  math::Set<T, Context>(dfilter->size(), 0, dfilter_data, &context_);
297 
298  T* dbias_data = nullptr;
299  if (!no_bias_) {
300  auto* dbias = Output(BIAS_OR_INPUT_GRAD);
301  dbias->Resize(M);
302  if (bias_multiplier_.size() != output_image_size) {
303  // If the helper bias multiplier is not M, reshape and fill it with one.
304  bias_multiplier_.Resize(vector<TIndex>(1, output_image_size));
305  math::Set<T, Context>(
306  output_image_size,
307  static_cast<T>(1),
308  bias_multiplier_.template mutable_data<T>(),
309  &context_);
310  }
311  dbias_data = dbias->template mutable_data<T>();
312  math::Set<T, Context>(dbias->size(), 0, dbias_data, &context_);
313  }
314 
315  T* dXdata = nullptr;
316  if (OutputSize() == 4 || (no_bias_ && (OutputSize() == 3))) {
317  auto* dX = Output(no_bias_ ? BIAS_OR_INPUT_GRAD : INPUT_GRAD);
318  dX->ResizeLike(X);
319  dXdata = dX->template mutable_data<T>();
320  math::Set<T, Context>(dX->size(), 0, dXdata, &context_);
321  }
322 
323  for (int image_id = 0; image_id < N; ++image_id) {
324  for (int group_id = 0; group_id < group_; ++group_id) {
325  math::Gemm<T, Context>(
326  CblasTrans,
327  CblasNoTrans,
328  kernel_dim,
329  output_image_size,
330  M / group_,
331  1,
332  filter_data + group_id * filter_offset,
333  dYdata + group_id * output_offset,
334  0,
335  col_buffer_data + group_id * col_buffer_offset,
336  &context_);
337  }
338 
339  // Gradient with respect to offsets
340  DeformableCol2imCoord(
341  col_buffer_data,
342  Xdata,
343  offset_data,
344  X.dims(),
345  col_buffer_shape,
346  doffset_data);
347 
348  // Gradient with respect to input data
349  if (dXdata) {
350  DeformableCol2im(
351  col_buffer_data, offset_data, X.dims(), col_buffer_shape, dXdata);
352  dXdata += input_offset * group_;
353  }
354 
355  // Gradient with respect to filter
356  DeformableIm2col(
357  Xdata, offset_data, X.dims(), col_buffer_shape, col_buffer_data);
358 
359  for (int group_id = 0; group_id < group_; ++group_id) {
360  math::Gemm<T, Context>(
361  CblasNoTrans,
362  CblasTrans,
363  M / group_,
364  kernel_dim,
365  output_image_size,
366  1,
367  dYdata + group_id * output_offset,
368  col_buffer_data + group_id * col_buffer_offset,
369  1,
370  dfilter_data + group_id * filter_offset,
371  &context_);
372  }
373 
374  // Gradient with respect to bias
375  if (dbias_data) {
376  math::Gemv<T, Context>(
377  CblasNoTrans,
378  M,
379  output_image_size,
380  1,
381  dYdata,
382  bias_multiplier_.template data<T>(),
383  1,
384  dbias_data,
385  &context_);
386  }
387 
388  Xdata += input_offset * group_;
389  dYdata += output_offset * group_;
390  offset_data += offset_offset;
391  doffset_data += offset_offset;
392  }
393 
394  return true;
395 }
396 } // namespace caffe2
397 
398 #endif // CAFFE2_OPERATORS_DEFORM_CONV_OP_IMPL_H_
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Commandline flags support for Caffe2.