Caffe2 - C++ API
A deep learning, cross platform ML framework
conv_transpose_op_impl.h
1 // conv_transpose_op_impl.h is the templated implementation of the
2 // conv_transpose_op.h file.
3 #ifndef CAFFE2_OPERATORS_CONV_TRANSPOSE_OP_IMPL_H_
4 #define CAFFE2_OPERATORS_CONV_TRANSPOSE_OP_IMPL_H_
5 
6 #include "caffe2/core/context.h"
7 #include "caffe2/core/logging.h"
8 #include "caffe2/core/operator.h"
9 #include "caffe2/operators/conv_op_shared.h"
10 #include "caffe2/operators/conv_transpose_op.h"
11 #include "caffe2/operators/conv_transpose_unpool_op_base.h"
12 #include "caffe2/utils/math.h"
13 
14 CAFFE2_DECLARE_bool(caffe2_force_shared_col_buffer);
15 
16 namespace caffe2 {
17 
18 template <typename T, class Context>
19 bool ConvTransposeOp<T, Context>::RunOnDeviceWithOrderNCHW() {
20  const Tensor<Context>& X = Input(INPUT);
21  auto& filter = Input(FILTER);
22  Tensor<Context>* Y = Output(0);
23  const int N = X.dim32(0), M = X.dim32(1), H = X.dim32(2), W = X.dim32(3);
24  CAFFE_ENFORCE(filter.ndim() == 4, "filter must be 4D tensor");
25  CAFFE_ENFORCE(
26  filter.dim32(0) == M,
27  "filter number must be equal to input channel number");
28  const int C = filter.dim32(1);
29  CAFFE_ENFORCE(
30  filter.dim32(2) == this->kernel_h(),
31  "filter height must be equal to kernel height");
32  CAFFE_ENFORCE(
33  filter.dim32(3) == this->kernel_w(),
34  "filter width must be equal to kernel width");
35  ConvTransposeUnpoolBase<Context>::SetOutputSize(X, Y, C);
36 
37  const int kernel_dim = C * this->kernel_h() * this->kernel_w();
38  const int input_image_size = H * W;
39  const int output_image_size = Y->dim32(2) * Y->dim32(3);
40 
41 #ifndef __ARM_NEON__
42  if (InputSize() == 3) {
43  auto& bias = Input(BIAS);
44  CAFFE_ENFORCE(bias.ndim() == 1, "bias must be 1D tensor");
45  CAFFE_ENFORCE(
46  bias.dim32(0) == C,
47  "bias dimension must be equal to output channel number");
48  if (bias_multiplier_.size() != output_image_size) {
49  bias_multiplier_.Resize(vector<TIndex>(1, output_image_size));
50  T* bm_data = bias_multiplier_.template mutable_data<T>();
51  math::Set<T, Context>(
52  output_image_size,
53  static_cast<T>(1),
54  bm_data,
55  &context_);
56  }
57  }
58 #endif // !__ARM_NEON__
59 
60  const T* Xdata = X.template data<T>();
61  const T* filter_data = filter.template data<T>();
62  T* Ydata = Y->template mutable_data<T>();
63 
64  auto f = [&](Tensor<Context>* col_buffer) {
65  col_buffer->Resize(
66  vector<TIndex>{C, this->kernel_h(), this->kernel_w(), H, W});
67  T* col_buffer_data = col_buffer->template mutable_data<T>();
68  for (auto image_id = 0; image_id < N; ++image_id) {
69  // Weight term
70  math::Gemm<T, Context>(
71  CblasTrans,
72  CblasNoTrans,
73  kernel_dim,
74  input_image_size,
75  M,
76  1,
77  filter_data,
78  Xdata,
79  0,
80  col_buffer_data,
81  &context_);
82 
83  // Col2im
84  math::Col2im<T, Context, StorageOrder::NCHW>(
85  col_buffer_data,
86  C,
87  Y->dim32(2),
88  Y->dim32(3),
89  this->kernel_h(),
90  this->kernel_w(),
91  1,
92  1,
93  this->pad_t(),
94  this->pad_l(),
95  this->pad_b(),
96  this->pad_r(),
97  this->stride_h(),
98  this->stride_w(),
99  Ydata,
100  &context_);
101 
102  // Bias term
103  if (InputSize() == 3) {
104  const T* bias_data = Input(BIAS).template data<T>();
105 #ifndef __ARM_NEON__
106  const T* bm_data = bias_multiplier_.template data<T>();
107  math::Gemm<T, Context>(
108  CblasNoTrans,
109  CblasNoTrans,
110  C,
111  output_image_size,
112  1,
113  1,
114  bias_data,
115  bm_data,
116  1,
117  Ydata,
118  &context_);
119 #else
120  math::BiasCHW<T, Context>(
121  bias_data,
122  C,
123  output_image_size,
124  Ydata,
125  &context_);
126 #endif // !__ARM_NEON__
127  }
128 
129  Xdata += M * H * W;
130  Ydata += Y->size() / Y->dim32(0);
131  }
132  };
133  if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
134  runWithSharedBuffer<Context>(ws_, f);
135  } else {
136  f(&col_buffer_);
137  }
138  return true;
139 }
140 
141 template <typename T, class Context>
142 bool ConvTransposeOp<T, Context>::RunOnDeviceWithOrderNHWC() {
143  const Tensor<Context>& X = Input(INPUT);
144  auto& filter = Input(FILTER);
145  Tensor<Context>* Y = Output(0);
146  const auto N = X.dim32(0), H = X.dim32(1), W = X.dim32(2), M = X.dim32(3);
147  CAFFE_ENFORCE(filter.ndim() == 4, "filter must be 4D tensor");
148  CAFFE_ENFORCE(
149  filter.dim32(0) == M,
150  "filter number must be equal to input channel number");
151  CAFFE_ENFORCE(
152  filter.dim32(1) == this->kernel_h(),
153  "filter height must be equal to kernel height");
154  CAFFE_ENFORCE(
155  filter.dim32(2) == this->kernel_w(),
156  "filter width must be equal to kernel width");
157  const int C = filter.dim32(3);
158  ConvTransposeUnpoolBase<Context>::SetOutputSize(X, Y, C);
159 
160  const auto kernel_dim = C * this->kernel_h() * this->kernel_w();
161  const auto input_image_size = H * W;
162  const auto output_image_size = Y->dim32(1) * Y->dim32(2);
163 
164  if (InputSize() == 3) {
165  auto& bias = Input(BIAS);
166  CAFFE_ENFORCE(bias.ndim() == 1, "bias must be 1D tensor");
167  CAFFE_ENFORCE(
168  bias.dim32(0) == C,
169  "bias dimension must be equal to output channel number");
170  if (bias_multiplier_.size() != output_image_size) {
171  bias_multiplier_.Resize(vector<TIndex>(1, output_image_size));
172  T* bm_data = bias_multiplier_.template mutable_data<T>();
173  math::Set<T, Context>(
174  output_image_size,
175  static_cast<T>(1),
176  bm_data,
177  &context_);
178  }
179  }
180  const T* Xdata = X.template data<T>();
181  const T* filter_data = filter.template data<T>();
182  T* Ydata = Y->template mutable_data<T>();
183 
184  auto f = [&](Tensor<Context>* /*col_buffer*/) {
185  col_buffer_.Resize(
186  vector<TIndex>{H, W, this->kernel_h(), this->kernel_w(), C});
187  T* col_buffer_data = col_buffer_.template mutable_data<T>();
188  for (auto image_id = 0; image_id < N; ++image_id) {
189  // Weight term
190  math::Gemm<T, Context>(
191  CblasNoTrans,
192  CblasNoTrans,
193  input_image_size,
194  kernel_dim,
195  M,
196  1,
197  Xdata,
198  filter_data,
199  0,
200  col_buffer_data,
201  &context_);
202  // Col2im
203  math::Col2im<T, Context, StorageOrder::NHWC>(
204  col_buffer_data,
205  C,
206  Y->dim32(1),
207  Y->dim32(2),
208  this->kernel_h(),
209  this->kernel_w(),
210  1,
211  1,
212  this->pad_t(),
213  this->pad_l(),
214  this->pad_b(),
215  this->pad_r(),
216  this->stride_h(),
217  this->stride_w(),
218  Ydata,
219  &context_);
220  // Bias term
221  if (InputSize() == 3) {
222  const T* bm_data = bias_multiplier_.template data<T>();
223  const T* bias_data = Input(BIAS).template data<T>();
224  math::Gemm<T, Context>(
225  CblasNoTrans,
226  CblasNoTrans,
227  output_image_size,
228  C,
229  1,
230  1,
231  bm_data,
232  bias_data,
233  1,
234  Ydata,
235  &context_);
236  }
237  Xdata += M * H * W;
238  Ydata += Y->size() / Y->dim32(0);
239  }
240  };
241  if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
242  runWithSharedBuffer<Context>(ws_, f);
243  } else {
244  f(&col_buffer_);
245  }
246  return true;
247 }
248 
249 template <typename T, class Context>
250 bool ConvTransposeGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
251  auto& X = Input(INPUT);
252  auto& filter = Input(FILTER);
253  auto& dY = Input(OUTPUT_GRAD);
254  auto* dfilter = Output(FILTER_GRAD);
255  const int N = X.dim32(0), M = X.dim32(1), H = X.dim32(2), W = X.dim32(3);
256  // We only handle LegacyPadding::NOTSET case and ignore cases of
257  // LegacyPadding::VALID and LegacyPadding::SAME
258  // Thus, we don't need to manually compute padding values
259  // We simply use the values from the user
260  CAFFE_ENFORCE(filter.ndim() == 4);
261  const int C = filter.dim32(1);
262  CAFFE_ENFORCE(
263  filter.dim32(2) == this->kernel_h(),
264  "filter height must be equal to kernel height");
265  CAFFE_ENFORCE(
266  filter.dim32(3) == this->kernel_w(),
267  "filter width must be equal to kernel width");
268  dfilter->ResizeLike(filter);
269 
270  const int kernel_dim = C * this->kernel_h() * this->kernel_w();
271  const int output_image_size = dY.dim32(2) * dY.dim32(3);
272  // The col buffer is stored in CHW order as well
273  col_buffer_.Resize(
274  vector<TIndex>{C, this->kernel_h(), this->kernel_w(), H, W});
275  if (!no_bias_) {
276  auto* dbias = Output(BIAS_OR_INPUT_GRAD);
277  dbias->Resize(C);
278  if (bias_multiplier_.size() != output_image_size) {
279  bias_multiplier_.Resize(1, output_image_size);
280  T* bm_data = bias_multiplier_.template mutable_data<T>();
281  math::Set<T, Context>(
282  output_image_size,
283  static_cast<T>(1),
284  bm_data,
285  &context_);
286  }
287  }
288  T* col_buffer_data = col_buffer_.template mutable_data<T>();
289  const T* Xdata = X.template data<T>();
290  const T* filter_data = filter.template data<T>();
291  const T* dYdata = dY.template data<T>();
292  T* dfilter_data = dfilter->template mutable_data<T>();
293  // Pre-setting the gradients to zero
294  math::Set<T, Context>(dfilter->size(), 0, dfilter_data, &context_);
295  if (!no_bias_) {
296  auto* dbias = Output(BIAS_OR_INPUT_GRAD);
297  T* dbias_data = dbias->template mutable_data<T>();
298  math::Set<T, Context>(dbias->size(), 0, dbias_data, &context_);
299  }
300  for (auto image_id = 0; image_id < N; ++image_id) {
301  // gradient w.r.t. filters. Im2col followed by Gemm
302  // Im2col.
303  math::Im2col<T, Context, StorageOrder::NCHW>(
304  dYdata,
305  C,
306  dY.dim32(2),
307  dY.dim32(3),
308  this->kernel_h(),
309  this->kernel_w(),
310  1,
311  1,
312  this->pad_t(),
313  this->pad_l(),
314  this->pad_b(),
315  this->pad_r(),
316  this->stride_h(),
317  this->stride_w(),
318  col_buffer_data,
319  &context_);
320  // Gemm
321  math::Gemm<T, Context>(
322  CblasNoTrans,
323  CblasTrans,
324  M,
325  kernel_dim,
326  H * W,
327  1,
328  Xdata,
329  col_buffer_data,
330  1,
331  dfilter_data,
332  &context_);
333  // gradient w.r.t. bias
334  if (!no_bias_) {
335  const T* bm_data = bias_multiplier_.template data<T>();
336  T* input_grad_data = Output(BIAS_OR_INPUT_GRAD)->template mutable_data<T>();
337  math::Gemm<T, Context>(
338  CblasNoTrans,
339  CblasNoTrans,
340  C,
341  1,
342  output_image_size,
343  1,
344  dYdata,
345  bm_data,
346  1,
347  input_grad_data,
348  &context_);
349  }
350  dYdata += dY.size() / dY.dim32(0);
351  Xdata += X.size() / X.dim32(0);
352  }
353  if (OutputSize() == 3 || (no_bias_ && (OutputSize() == 2))) {
354  // Compute gradients w.r.t. the input
355  // Since we have changed dYdata in the above loop, we will need to reset.
356  dYdata = dY.template data<T>();
357  auto* dX = Output(no_bias_ ? BIAS_OR_INPUT_GRAD : INPUT_GRAD);
358  dX->ResizeLike(X);
359  T* dXdata = dX->template mutable_data<T>();
360  for (auto image_id = 0; image_id < N; ++image_id) {
361  // Im2col.
362  // TODO(zyan3): Probably duplicate work as in gradient computation
363  // w.r.t filters
364  math::Im2col<T, Context, StorageOrder::NCHW>(
365  dYdata,
366  C,
367  dY.dim32(2),
368  dY.dim32(3),
369  this->kernel_h(),
370  this->kernel_w(),
371  1,
372  1,
373  this->pad_t(),
374  this->pad_l(),
375  this->pad_b(),
376  this->pad_r(),
377  this->stride_h(),
378  this->stride_w(),
379  col_buffer_data,
380  &context_);
381  // Gemm
382  math::Gemm<T, Context>(
383  CblasNoTrans,
384  CblasNoTrans,
385  M,
386  H * W,
387  kernel_dim,
388  1,
389  filter_data,
390  col_buffer_data,
391  0,
392  dXdata,
393  &context_);
394  dYdata += dY.size() / dY.dim32(0);
395  dXdata += X.size() / X.dim32(0);
396  }
397  }
398  return true;
399 }
400 
401 template <typename T, class Context>
402 bool ConvTransposeGradientOp<T, Context>::RunOnDeviceWithOrderNHWC() {
403  auto& X = Input(INPUT);
404  auto& filter = Input(FILTER);
405  auto& dY = Input(OUTPUT_GRAD);
406  auto* dfilter = Output(FILTER_GRAD);
407  const int N = X.dim32(0), H = X.dim32(1), W = X.dim32(2), M = X.dim32(3);
408  // We only handle LegacyPadding::NOTSET case and ignore cases of
409  // LegacyPadding::VALID and LegacyPadding::SAME
410  // Thus, we don't need to manually compute padding values
411  // We simply use the values from the user
412  CAFFE_ENFORCE(filter.ndim() == 4, "filter must be 4D tensor");
413  CAFFE_ENFORCE(
414  filter.dim32(1) == this->kernel_h(),
415  "filter height must be equal to kernel height");
416  CAFFE_ENFORCE(
417  filter.dim32(2) == this->kernel_w(),
418  "filter width must be equal to kernel width");
419  const int C = filter.dim32(3);
420  dfilter->ResizeLike(filter);
421 
422  const int kernel_dim = C * this->kernel_h() * this->kernel_w();
423  const int output_image_size = dY.dim32(1) * dY.dim32(2);
424  // The col buffer is stored in HWC order as well
425  col_buffer_.Resize(
426  vector<TIndex>{H, W, this->kernel_h(), this->kernel_w(), C});
427  if (!no_bias_) {
428  auto* dbias = Output(BIAS_OR_INPUT_GRAD);
429  dbias->Resize(C);
430  if (bias_multiplier_.size() != output_image_size) {
431  bias_multiplier_.Resize(1, output_image_size);
432  T* bm_data = bias_multiplier_.template mutable_data<T>();
433  math::Set<T, Context>(
434  output_image_size,
435  static_cast<T>(1),
436  bm_data,
437  &context_);
438  }
439  }
440  T* col_buffer_data = col_buffer_.template mutable_data<T>();
441  const T* Xdata = X.template data<T>();
442  const T* filter_data = filter.template data<T>();
443  const T* dYdata = dY.template data<T>();
444  T* dfilter_data = dfilter->template mutable_data<T>();
445  // Pre-setting the gradients to zero
446  math::Set<T, Context>(dfilter->size(), 0, dfilter_data, &context_);
447  if (!no_bias_) {
448  auto* dbias = Output(BIAS_OR_INPUT_GRAD);
449  T* dbias_data = dbias->template mutable_data<T>();
450  math::Set<T, Context>(dbias->size(), 0, dbias_data, &context_);
451  }
452  for (auto image_id = 0; image_id < N; ++image_id) {
453  // gradient w.r.t. filters. Im2col followed by Gemm
454  // Im2col.
455  math::Im2col<T, Context, StorageOrder::NHWC>(
456  dYdata,
457  C,
458  dY.dim32(1),
459  dY.dim32(2),
460  this->kernel_h(),
461  this->kernel_w(),
462  1,
463  1,
464  this->pad_t(),
465  this->pad_l(),
466  this->pad_b(),
467  this->pad_r(),
468  this->stride_h(),
469  this->stride_w(),
470  col_buffer_data,
471  &context_);
472  // Gemm
473  math::Gemm<T, Context>(
474  CblasTrans,
475  CblasNoTrans,
476  M,
477  kernel_dim,
478  H * W,
479  1,
480  Xdata,
481  col_buffer_data,
482  1,
483  dfilter_data,
484  &context_);
485  // gradients w.r.t. bias
486  if (!no_bias_) {
487  const T* bm_data = bias_multiplier_.template data<T>();
488  T* input_grad_data = Output(BIAS_OR_INPUT_GRAD)->template mutable_data<T>();
489  math::Gemm<T, Context>(
490  CblasTrans,
491  CblasNoTrans,
492  C,
493  1,
494  output_image_size,
495  1,
496  dYdata,
497  bm_data,
498  1,
499  input_grad_data,
500  &context_);
501  }
502  dYdata += dY.size() / dY.dim32(0);
503  Xdata += X.size() / X.dim32(0);
504  }
505  if (OutputSize() == 3 || (no_bias_ && (OutputSize() == 2))) {
506  // Compute gradients w.r.t. the input
507  // Since we have changed dYdata in the above loop, we will need to reset.
508  dYdata = dY.template data<T>();
509  auto* dX = Output(no_bias_ ? BIAS_OR_INPUT_GRAD : INPUT_GRAD);
510  dX->ResizeLike(X);
511  T* dXdata = dX->template mutable_data<T>();
512  for (auto image_id = 0; image_id < N; ++image_id) {
513  // Im2col.
514  // TODO(zyan3): Probably duplicate work as in gradient computation
515  // w.r.t filters
516  math::Im2col<T, Context, StorageOrder::NHWC>(
517  dYdata,
518  C,
519  dY.dim32(1),
520  dY.dim32(2),
521  this->kernel_h(),
522  this->kernel_w(),
523  1,
524  1,
525  this->pad_t(),
526  this->pad_l(),
527  this->pad_b(),
528  this->pad_r(),
529  this->stride_h(),
530  this->stride_w(),
531  col_buffer_data,
532  &context_);
533  // Gemm
534  math::Gemm<T, Context>(
535  CblasNoTrans,
536  CblasTrans,
537  H * W,
538  M,
539  kernel_dim,
540  1,
541  col_buffer_data,
542  filter_data,
543  0,
544  dXdata,
545  &context_);
546  dYdata += dY.size() / dY.dim32(0);
547  dXdata += X.size() / X.dim32(0);
548  }
549  }
550  return true;
551 }
552 
553 } // namespace caffe2
554 #endif // CAFFE2_OPERATORS_CONV_TRANSPOSE_OP_IMPL_H_
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...