Caffe2 - C++ API
A deep learning, cross platform ML framework
locally_connected_op.h
1 #ifndef CAFFE2_OPERATORS_LOCALLY_CONNECTED_OP_H_
2 #define CAFFE2_OPERATORS_LOCALLY_CONNECTED_OP_H_
3 
4 #include <vector>
5 
6 #include "caffe2/core/context.h"
7 #include "caffe2/core/operator.h"
8 #include "caffe2/operators/conv_op_shared.h"
9 #include "caffe2/operators/conv_pool_op_base.h"
10 #include "caffe2/operators/locally_connected_op_util.h"
11 
12 namespace caffe2 {
13 
14 template <typename T, class Context>
15 class LocallyConnectedOp final : public ConvPoolOpBase<Context> {
16  public:
17  USE_CONV_POOL_BASE_FUNCTIONS(Context);
18 
19  LocallyConnectedOp(const OperatorDef& operator_def, Workspace* ws)
20  : ConvPoolOpBase<Context>(operator_def, ws) {
21  // Since this is the default locally connected implementation, we will
22  // use CAFFE_ENFORCE instead of OPERATOR_NEEDS_FEATURE.
23  CAFFE_ENFORCE(
24  group_ == 1 || order_ == StorageOrder::NCHW,
25  "Group locally connected only supports NCHW order right now.");
26  }
27 
28  ~LocallyConnectedOp() = default;
29 
30  bool RunOnDeviceWithOrderNCHW() override;
31  bool RunOnDeviceWithOrderNHWC() override;
32 
33  private:
34  void RunOnDeviceWithOrderNCHWImpl(
35  const lc_op_util::ShapeParams& shape,
36  const T* X_data,
37  const T* filter_data,
38  const T* bias_data,
39  T* Y_data,
40  Tensor<Context>* column_buffer,
41  Tensor<Context>* column_transposed_buffer,
42  Tensor<Context>* output_buffer);
43 
44  void RunOnDeviceWithOrderNHWCImpl(
45  const lc_op_util::ShapeParams& shape,
46  const T* X_data,
47  const T* filter_data,
48  const T* bias_data,
49  T* Y_data,
50  Tensor<Context>* column_buffer,
51  Tensor<Context>* column_transposed_buffer,
52  Tensor<Context>* Y_transposed_buffer);
53 
54  void SetColumnBufferShape(
55  const int N,
56  const int C,
57  const int kernel_size,
58  const int output_image_size,
59  std::vector<int>* column_dims,
60  std::vector<int>* column_transposed_dims);
61 
62  void SetYTranposedBufferShape(
63  const int N,
64  const int M,
65  const int output_image_size,
66  std::vector<int>* Y_transposed_dims);
67 
68  Tensor<Context> bias_multiplier_;
69 
70  // Buffer.
71  Tensor<Context> column_buffer_;
72  Tensor<Context> column_transposed_buffer_;
73  Tensor<Context> Y_transposed_buffer_;
74 
75  // Dims devices.
76  Tensor<Context> X_dims_device_;
77  Tensor<Context> column_dims_device_;
78  Tensor<Context> column_transposed_dims_device_;
79  Tensor<Context> column_axes_device_;
80  Tensor<Context> Y_dims_device_;
81  Tensor<Context> Y_transposed_dims_device_;
82  Tensor<Context> Y_transposed_axes_device_;
83 
84  // Input: X, W, b
85  // Output: Y
86  INPUT_TAGS(INPUT, FILTER, BIAS);
87 };
88 
89 template <typename T, class Context>
90 class LocallyConnectedGradientOp final : public ConvPoolOpBase<Context> {
91  public:
92  USE_CONV_POOL_BASE_FUNCTIONS(Context);
93 
94  LocallyConnectedGradientOp(const OperatorDef& operator_def, Workspace* ws)
95  : ConvPoolOpBase<Context>(operator_def, ws),
96  no_bias_(OperatorBase::GetSingleArgument<int>("no_bias", 0)) {
97  CAFFE_ENFORCE(
98  !(no_bias_ && OutputSize() == 3),
99  "If bias is not present, you should not have 3 grad output.");
100  CAFFE_ENFORCE(
101  group_ == 1 || order_ == StorageOrder::NCHW,
102  "Group locally connected only supports NCHW order right now.");
103  }
104 
105  ~LocallyConnectedGradientOp() = default;
106 
107  bool RunOnDeviceWithOrderNCHW() override;
108  bool RunOnDeviceWithOrderNHWC() override;
109 
110  private:
111  void RunOnDeviceWithOrderNCHWImpl(
112  const lc_op_util::ShapeParams& shape,
113  const T* X_data,
114  const T* filter_data,
115  const T* dY_data,
116  T* dfilter_data,
117  T* dX_data,
118  T* dbias_data,
119  Tensor<Context>* column_buffer,
120  Tensor<Context>* column_transposed_buffer,
121  Tensor<Context>* dY_transposed_buffer);
122 
123  void RunOnDeviceWithOrderNHWCImpl(
124  const lc_op_util::ShapeParams& shape,
125  const T* X_data,
126  const T* filter_data,
127  const T* dY_data,
128  T* dfilter_data,
129  T* dX_data,
130  T* dbias_data,
131  Tensor<Context>* column_buffer,
132  Tensor<Context>* column_transposed_buffer,
133  Tensor<Context>* dY_transposed_buffer);
134 
135  void SetColumnBufferShape(
136  const int N,
137  const int C,
138  const int kernel_size,
139  const int output_image_size,
140  std::vector<int>* column_dims,
141  std::vector<int>* column_transposed_dims);
142 
143  void SetDYTranposedBufferShape(
144  const int N,
145  const int M,
146  const int output_image_size,
147  std::vector<int>* dY_transposed_dims);
148 
149  const bool no_bias_;
150 
151  Tensor<Context> bias_multiplier_;
152 
153  // Buffer.
154  Tensor<Context> column_buffer_;
155  Tensor<Context> column_transposed_buffer_;
156  Tensor<Context> dY_transposed_buffer_;
157 
158  // Dims devices.
159  Tensor<Context> X_dims_device_;
160  Tensor<Context> column_dims_device_;
161  Tensor<Context> column_transposed_dims_device_;
162  Tensor<Context> column_axes_device_;
163  Tensor<Context> column_transposed_axes_device_;
164  Tensor<Context> dY_dims_device_;
165  Tensor<Context> dY_transposed_dims_device_;
166  Tensor<Context> dY_axes_device_;
167 
168  // input: X, W, dY
169  // output: dW, db, and optionally dX
170  INPUT_TAGS(INPUT, FILTER, OUTPUT_GRAD);
171  OUTPUT_TAGS(FILTER_GRAD, BIAS_OR_INPUT_GRAD, INPUT_GRAD);
172 };
173 
174 } // namespace caffe2
175 
176 #endif // CAFFE2_OPERATORS_LOCALLY_CONNECTED_OP_H_
Tensor is the basic class in Caffe2 that stores a contiguous memory with its shape information...
Definition: tensor.h:93
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...