3 #ifndef CAFFE2_OPERATORS_CONV_TRANSPOSE_OP_IMPL_H_ 4 #define CAFFE2_OPERATORS_CONV_TRANSPOSE_OP_IMPL_H_ 6 #include "caffe2/core/context.h" 7 #include "caffe2/core/logging.h" 8 #include "caffe2/core/operator.h" 9 #include "caffe2/operators/conv_op_shared.h" 10 #include "caffe2/operators/conv_transpose_op.h" 11 #include "caffe2/operators/conv_transpose_unpool_op_base.h" 12 #include "caffe2/utils/math.h" 14 CAFFE2_DECLARE_bool(caffe2_force_shared_col_buffer);
18 template <
typename T,
class Context>
19 bool ConvTransposeOp<T, Context>::RunOnDeviceWithOrderNCHW() {
21 auto& filter = Input(FILTER);
23 const int N = X.dim32(0), M = X.dim32(1), H = X.dim32(2), W = X.dim32(3);
24 CAFFE_ENFORCE(filter.ndim() == 4,
"filter must be 4D tensor");
27 "filter number must be equal to input channel number");
28 const int C = filter.dim32(1);
30 filter.dim32(2) == this->kernel_h(),
31 "filter height must be equal to kernel height");
33 filter.dim32(3) == this->kernel_w(),
34 "filter width must be equal to kernel width");
35 ConvTransposeUnpoolBase<Context>::SetOutputSize(X, Y, C);
37 const int kernel_dim = C * this->kernel_h() * this->kernel_w();
38 const int input_image_size = H * W;
39 const int output_image_size = Y->dim32(2) * Y->dim32(3);
42 if (InputSize() == 3) {
43 auto& bias = Input(BIAS);
44 CAFFE_ENFORCE(bias.ndim() == 1,
"bias must be 1D tensor");
47 "bias dimension must be equal to output channel number");
48 if (bias_multiplier_.size() != output_image_size) {
49 bias_multiplier_.Resize(vector<TIndex>(1, output_image_size));
50 T* bm_data = bias_multiplier_.template mutable_data<T>();
51 math::Set<T, Context>(
58 #endif // !__ARM_NEON__ 60 const T* Xdata = X.template data<T>();
61 const T* filter_data = filter.template data<T>();
62 T* Ydata = Y->template mutable_data<T>();
66 vector<TIndex>{C, this->kernel_h(), this->kernel_w(), H, W});
67 T* col_buffer_data = col_buffer->template mutable_data<T>();
68 for (
auto image_id = 0; image_id < N; ++image_id) {
70 math::Gemm<T, Context>(
84 math::Col2im<T, Context, StorageOrder::NCHW>(
103 if (InputSize() == 3) {
104 const T* bias_data = Input(BIAS).template data<T>();
106 const T* bm_data = bias_multiplier_.template data<T>();
107 math::Gemm<T, Context>(
120 math::BiasCHW<T, Context>(
126 #endif // !__ARM_NEON__ 130 Ydata += Y->size() / Y->dim32(0);
133 if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
134 runWithSharedBuffer<Context>(ws_, f);
141 template <
typename T,
class Context>
142 bool ConvTransposeOp<T, Context>::RunOnDeviceWithOrderNHWC() {
144 auto& filter = Input(FILTER);
146 const auto N = X.dim32(0), H = X.dim32(1), W = X.dim32(2), M = X.dim32(3);
147 CAFFE_ENFORCE(filter.ndim() == 4,
"filter must be 4D tensor");
149 filter.dim32(0) == M,
150 "filter number must be equal to input channel number");
152 filter.dim32(1) == this->kernel_h(),
153 "filter height must be equal to kernel height");
155 filter.dim32(2) == this->kernel_w(),
156 "filter width must be equal to kernel width");
157 const int C = filter.dim32(3);
158 ConvTransposeUnpoolBase<Context>::SetOutputSize(X, Y, C);
160 const auto kernel_dim = C * this->kernel_h() * this->kernel_w();
161 const auto input_image_size = H * W;
162 const auto output_image_size = Y->dim32(1) * Y->dim32(2);
164 if (InputSize() == 3) {
165 auto& bias = Input(BIAS);
166 CAFFE_ENFORCE(bias.ndim() == 1,
"bias must be 1D tensor");
169 "bias dimension must be equal to output channel number");
170 if (bias_multiplier_.size() != output_image_size) {
171 bias_multiplier_.Resize(vector<TIndex>(1, output_image_size));
172 T* bm_data = bias_multiplier_.template mutable_data<T>();
173 math::Set<T, Context>(
180 const T* Xdata = X.template data<T>();
181 const T* filter_data = filter.template data<T>();
182 T* Ydata = Y->template mutable_data<T>();
186 vector<TIndex>{H, W, this->kernel_h(), this->kernel_w(), C});
187 T* col_buffer_data = col_buffer_.template mutable_data<T>();
188 for (
auto image_id = 0; image_id < N; ++image_id) {
190 math::Gemm<T, Context>(
203 math::Col2im<T, Context, StorageOrder::NHWC>(
221 if (InputSize() == 3) {
222 const T* bm_data = bias_multiplier_.template data<T>();
223 const T* bias_data = Input(BIAS).template data<T>();
224 math::Gemm<T, Context>(
238 Ydata += Y->size() / Y->dim32(0);
241 if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
242 runWithSharedBuffer<Context>(ws_, f);
249 template <
typename T,
class Context>
250 bool ConvTransposeGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
251 auto& X = Input(INPUT);
252 auto& filter = Input(FILTER);
253 auto& dY = Input(OUTPUT_GRAD);
254 auto* dfilter = Output(FILTER_GRAD);
255 const int N = X.dim32(0), M = X.dim32(1), H = X.dim32(2), W = X.dim32(3);
260 CAFFE_ENFORCE(filter.ndim() == 4);
261 const int C = filter.dim32(1);
263 filter.dim32(2) == this->kernel_h(),
264 "filter height must be equal to kernel height");
266 filter.dim32(3) == this->kernel_w(),
267 "filter width must be equal to kernel width");
268 dfilter->ResizeLike(filter);
270 const int kernel_dim = C * this->kernel_h() * this->kernel_w();
271 const int output_image_size = dY.dim32(2) * dY.dim32(3);
274 vector<TIndex>{C, this->kernel_h(), this->kernel_w(), H, W});
276 auto* dbias = Output(BIAS_OR_INPUT_GRAD);
278 if (bias_multiplier_.size() != output_image_size) {
279 bias_multiplier_.Resize(1, output_image_size);
280 T* bm_data = bias_multiplier_.template mutable_data<T>();
281 math::Set<T, Context>(
288 T* col_buffer_data = col_buffer_.template mutable_data<T>();
289 const T* Xdata = X.template data<T>();
290 const T* filter_data = filter.template data<T>();
291 const T* dYdata = dY.template data<T>();
292 T* dfilter_data = dfilter->template mutable_data<T>();
294 math::Set<T, Context>(dfilter->size(), 0, dfilter_data, &context_);
296 auto* dbias = Output(BIAS_OR_INPUT_GRAD);
297 T* dbias_data = dbias->template mutable_data<T>();
298 math::Set<T, Context>(dbias->size(), 0, dbias_data, &context_);
300 for (
auto image_id = 0; image_id < N; ++image_id) {
303 math::Im2col<T, Context, StorageOrder::NCHW>(
321 math::Gemm<T, Context>(
335 const T* bm_data = bias_multiplier_.template data<T>();
336 T* input_grad_data = Output(BIAS_OR_INPUT_GRAD)->template mutable_data<T>();
337 math::Gemm<T, Context>(
350 dYdata += dY.size() / dY.dim32(0);
351 Xdata += X.size() / X.dim32(0);
353 if (OutputSize() == 3 || (no_bias_ && (OutputSize() == 2))) {
356 dYdata = dY.template data<T>();
357 auto* dX = Output(no_bias_ ? BIAS_OR_INPUT_GRAD : INPUT_GRAD);
359 T* dXdata = dX->template mutable_data<T>();
360 for (
auto image_id = 0; image_id < N; ++image_id) {
364 math::Im2col<T, Context, StorageOrder::NCHW>(
382 math::Gemm<T, Context>(
394 dYdata += dY.size() / dY.dim32(0);
395 dXdata += X.size() / X.dim32(0);
401 template <
typename T,
class Context>
402 bool ConvTransposeGradientOp<T, Context>::RunOnDeviceWithOrderNHWC() {
403 auto& X = Input(INPUT);
404 auto& filter = Input(FILTER);
405 auto& dY = Input(OUTPUT_GRAD);
406 auto* dfilter = Output(FILTER_GRAD);
407 const int N = X.dim32(0), H = X.dim32(1), W = X.dim32(2), M = X.dim32(3);
412 CAFFE_ENFORCE(filter.ndim() == 4,
"filter must be 4D tensor");
414 filter.dim32(1) == this->kernel_h(),
415 "filter height must be equal to kernel height");
417 filter.dim32(2) == this->kernel_w(),
418 "filter width must be equal to kernel width");
419 const int C = filter.dim32(3);
420 dfilter->ResizeLike(filter);
422 const int kernel_dim = C * this->kernel_h() * this->kernel_w();
423 const int output_image_size = dY.dim32(1) * dY.dim32(2);
426 vector<TIndex>{H, W, this->kernel_h(), this->kernel_w(), C});
428 auto* dbias = Output(BIAS_OR_INPUT_GRAD);
430 if (bias_multiplier_.size() != output_image_size) {
431 bias_multiplier_.Resize(1, output_image_size);
432 T* bm_data = bias_multiplier_.template mutable_data<T>();
433 math::Set<T, Context>(
440 T* col_buffer_data = col_buffer_.template mutable_data<T>();
441 const T* Xdata = X.template data<T>();
442 const T* filter_data = filter.template data<T>();
443 const T* dYdata = dY.template data<T>();
444 T* dfilter_data = dfilter->template mutable_data<T>();
446 math::Set<T, Context>(dfilter->size(), 0, dfilter_data, &context_);
448 auto* dbias = Output(BIAS_OR_INPUT_GRAD);
449 T* dbias_data = dbias->template mutable_data<T>();
450 math::Set<T, Context>(dbias->size(), 0, dbias_data, &context_);
452 for (
auto image_id = 0; image_id < N; ++image_id) {
455 math::Im2col<T, Context, StorageOrder::NHWC>(
473 math::Gemm<T, Context>(
487 const T* bm_data = bias_multiplier_.template data<T>();
488 T* input_grad_data = Output(BIAS_OR_INPUT_GRAD)->template mutable_data<T>();
489 math::Gemm<T, Context>(
502 dYdata += dY.size() / dY.dim32(0);
503 Xdata += X.size() / X.dim32(0);
505 if (OutputSize() == 3 || (no_bias_ && (OutputSize() == 2))) {
508 dYdata = dY.template data<T>();
509 auto* dX = Output(no_bias_ ? BIAS_OR_INPUT_GRAD : INPUT_GRAD);
511 T* dXdata = dX->template mutable_data<T>();
512 for (
auto image_id = 0; image_id < N; ++image_id) {
516 math::Im2col<T, Context, StorageOrder::NHWC>(
534 math::Gemm<T, Context>(
546 dYdata += dY.size() / dY.dim32(0);
547 dXdata += X.size() / X.dim32(0);
554 #endif // CAFFE2_OPERATORS_CONV_TRANSPOSE_OP_IMPL_H_ A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...