Caffe2 - C++ API
A deep learning, cross platform ML framework
fully_connected_op.cc
1 #include <functional>
2 
3 #include "caffe2/operators/fully_connected_op.h"
4 
5 namespace caffe2 {
6 
7 REGISTER_CPU_OPERATOR(FC, FullyConnectedOp<CPUContext>);
8 REGISTER_CPU_OPERATOR(FCGradient, FullyConnectedGradientOp<CPUContext>);
9 
10 REGISTER_CPU_OPERATOR(
11  FCTransposed,
12  FullyConnectedOp<
13  CPUContext,
14  DefaultEngine,
15  false /* don't transpose weight */>);
16 REGISTER_CPU_OPERATOR(
17  FCTransposedGradient,
18  FullyConnectedGradientOp<
19  CPUContext,
20  DefaultEngine,
21  false /* don't transpose weight */>);
22 
23 namespace {
24 std::vector<TensorShape> FCShapeInference(
25  const OperatorDef& def,
26  const vector<TensorShape>& in,
27  bool pretransposed_weight) {
28  vector<TensorShape> out(1);
29  ArgumentHelper helper(def);
30 
31  auto axis = helper.GetSingleArgument<int32_t>("axis", 1);
32  const auto canonical_axis = canonical_axis_index_(axis, in[0].dims().size());
33  const int M = size_to_dim_(canonical_axis, GetDimsVector(in[0]));
34  auto axis_w = helper.GetSingleArgument<int32_t>("axis_w", 1);
35  const int canonical_axis_w =
36  canonical_axis_index_(axis_w, in[1].dims().size());
37  const int N = pretransposed_weight
38  ? size_from_dim_(canonical_axis_w, GetDimsVector(in[1]))
39  : size_to_dim_(canonical_axis_w, GetDimsVector(in[1]));
40 
41  vector<int> y_shape(in[0].dims().begin(), in[0].dims().end());
42  CAFFE_ENFORCE_LE(canonical_axis + 1, y_shape.size());
43  y_shape.resize(canonical_axis + 1);
44  y_shape[canonical_axis] = N;
45  out[0] = CreateTensorShape(y_shape, in[0].data_type());
46  return out;
47 }
48 
49 OpSchema::Cost CostInferenceForFC(
50  const OperatorDef& def,
51  const vector<TensorShape>& in,
52  bool pretransposed_weight) {
53  struct OpSchema::Cost c;
54  ArgumentHelper helper(def);
55 
56  auto axis = helper.GetSingleArgument<int32_t>("axis", 1);
57  const auto canonical_axis = canonical_axis_index_(axis, in[0].dims().size());
58  const int M = size_to_dim_(canonical_axis, GetDimsVector(in[0]));
59  const int K = size_from_dim_(canonical_axis, GetDimsVector(in[0]));
60  auto axis_w = helper.GetSingleArgument<int32_t>("axis_w", 1);
61  const int canonical_axis_w =
62  canonical_axis_index_(axis_w, in[1].dims().size());
63  const int N = pretransposed_weight
64  ? size_from_dim_(canonical_axis_w, GetDimsVector(in[1]))
65  : size_to_dim_(canonical_axis_w, GetDimsVector(in[1]));
66 
67  c.flops = 2 * K * M * N + M * N;
68  c.bytes_moved = M * N * sizeof(float);
69  c.params_bytes = (K * N + N) * sizeof(float);
70  return c;
71 }
72 
73 std::vector<TensorShape> FCGradientShapeInference(
74  const OperatorDef& def,
75  const vector<TensorShape>& in,
76  bool pretransposed_weight) {
77  vector<TensorShape> out(2);
78  ArgumentHelper helper(def);
79 
80  auto axis_w = helper.GetSingleArgument<int32_t>("axis_w", 1);
81  const int canonical_axis_w =
82  canonical_axis_index_(axis_w, in[1].dims().size());
83  const int N = pretransposed_weight
84  ? size_from_dim_(canonical_axis_w, GetDimsVector(in[1]))
85  : size_to_dim_(canonical_axis_w, GetDimsVector(in[1]));
86 
87  vector<int> dW_shape(in[1].dims().begin(), in[1].dims().end());
88  out[0] = CreateTensorShape(dW_shape, in[1].data_type());
89  out[1] = CreateTensorShape(vector<int>{N}, in[1].data_type()); // db
90  if (def.output_size() == 3) {
91  vector<int> dX_shape(in[0].dims().begin(), in[0].dims().end());
92  out.push_back(CreateTensorShape(dX_shape, in[0].data_type()));
93  }
94  return out;
95 }
96 
97 OpSchema::Cost CostInferenceForFCGradient(
98  const OperatorDef& def,
99  const vector<TensorShape>& in,
100  bool pretransposed_weight) {
101  struct OpSchema::Cost c;
102  ArgumentHelper helper(def);
103  std::vector<TensorShape> out =
104  FCGradientShapeInference(def, in, pretransposed_weight);
105 
106  CAFFE_ENFORCE_LT(0, out.size());
107  const TensorShape dW = out[0];
108  const TensorShape db = out[1];
109 
110  auto axis = helper.GetSingleArgument<int32_t>("axis", 1);
111  const auto canonical_axis = canonical_axis_index_(axis, in[0].dims().size());
112  const int M = size_to_dim_(canonical_axis, GetDimsVector(in[0]));
113  const int K = size_from_dim_(canonical_axis, GetDimsVector(in[0]));
114  auto axis_w = helper.GetSingleArgument<int32_t>("axis_w", 1);
115  const int canonical_axis_w =
116  canonical_axis_index_(axis_w, in[1].dims().size());
117  const int N = pretransposed_weight
118  ? size_from_dim_(canonical_axis_w, GetDimsVector(in[1]))
119  : size_to_dim_(canonical_axis_w, GetDimsVector(in[1]));
120 
121  uint64_t size_dW = 1;
122  for (int i = 0; i < dW.dims().size(); i++) {
123  size_dW *= dW.dims(i);
124  }
125 
126  uint64_t size_db = 1;
127  for (int i = 0; i < db.dims().size(); i++) {
128  size_db *= db.dims(i);
129  }
130 
131  c.flops = 2 * (M * N * K + M * N);
132  c.bytes_moved = (size_dW + size_db) * sizeof(float);
133  c.params_bytes = (K * N + N) * sizeof(float);
134 
135  if (out.size() == 3) {
136  const TensorShape dX = out[2];
137  uint64_t size_dX = 1;
138  for (int i = 0; i < dX.dims().size(); i++) {
139  size_dX *= dX.dims(i);
140  }
141 
142  c.flops += M * N * K;
143  c.bytes_moved += size_dX * sizeof(float);
144  }
145  return c;
146 }
147 
148 } // namespace
149 
150 using namespace std::placeholders;
151 OPERATOR_SCHEMA(FCTransposed)
152  .NumInputs(3)
153  .NumOutputs(1)
154  .TensorInferenceFunction(std::bind(FCShapeInference, _1, _2, true))
155  .CostInferenceFunction(std::bind(CostInferenceForFC, _1, _2, true))
156  .SetDoc(R"DOC(
157 Same as FC, but weight matrix is supposed to be already pretransposed.
158 FCTransposed stands for calling blass with no noTrans, noTrans
159 )DOC");
160 
161 OPERATOR_SCHEMA(FC)
162  .NumInputs(3)
163  .NumOutputs(1)
164  .TensorInferenceFunction(std::bind(FCShapeInference, _1, _2, false))
165  .CostInferenceFunction(std::bind(CostInferenceForFC, _1, _2, false))
166  .SetDoc(R"DOC(
167 Computes the result of passing an input vector X into a fully
168 connected layer with 2D weight matrix W and 1D bias vector b. That is,
169 the layer computes Y = X * W^T + b, where X has size (M x K),
170 W has size (N x K), b has size (N), and Y has size (M x N),
171 where M is often the batch size.
172 
173 
174 NOTE: X does not need to explicitly be a 2D vector; rather, it will be
175 coerced into one. For an arbitrary n-dimensional tensor
176 X \in [a_0, a_1, ...,a_{k-1}, a_k, ..., a_{n-1}] where a_i \in N+ and k is
177 the axis provided, then X will be coerced into a 2-dimensional tensor with
178 dimensions [a_0 * ... * a_{k-1}, a_k * ... * a_{n-1}]. For the default
179 case where axis=1, this means the X tensor will be coerced into a 2D tensor
180 of dimensions [a_0, a_1 * ... * a_{n-1}], where a_0 is often the batch size.
181 In this situation, we must have a_0 = M and a_1 * ... * a_{n-1} = K.
182 Lastly, even though b is a 1D vector of size N, it is copied/resized to
183 be size (M x N) implicitly and added to each vector in the batch.
184 Each of these dimensions must be matched correctly, or else the operator
185 will throw errors.
186 )DOC")
187  .Arg(
188  "axis",
189  "(int32_t) default to 1; describes the axis of the inputs; "
190  "defaults to one because the 0th axis most likely describes "
191  "the batch_size")
192  .Arg(
193  "axis_w",
194  "(int32_t) default to 1; describes the axis of the weight matrix W; "
195  "defaults to one because the 0th axis most likely describes "
196  "the batch_size")
197  .Arg("float16_compute", "Whether to use float-16 compute kernel")
198  .Input(
199  0,
200  "X",
201  "input tensor that's coerced into a 2D matrix of size (MxK) "
202  "as described above")
203  .Input(
204  1,
205  "W",
206  "A tensor that is coerced into a 2D blob of size (KxN) "
207  "containing fully connected weight matrix")
208  .Input(2, "b", "1D blob containing bias vector")
209  .Output(0, "Y", "2D output tensor")
210  .InheritOnnxSchema("Gemm");
211 
212 OPERATOR_SCHEMA(FCGradient)
213  .NumInputs(3)
214  .NumOutputs(2, 3)
215  .TensorInferenceFunction(std::bind(FCGradientShapeInference, _1, _2, false))
216  .CostInferenceFunction(
217  std::bind(CostInferenceForFCGradient, _1, _2, false));
218 OPERATOR_SCHEMA(FCTransposedGradient)
219  .NumInputs(3)
220  .NumOutputs(2, 3)
221  .TensorInferenceFunction(std::bind(FCGradientShapeInference, _1, _2, false))
222  .CostInferenceFunction(
223  std::bind(CostInferenceForFCGradient, _1, _2, false));
224 
225 namespace {
226 
228  using GradientMakerBase::GradientMakerBase;
229 
230  std::vector<OperatorDef> GetGradientDefs() override {
231  CAFFE_ENFORCE_EQ(def_.input_size(), 3);
232  CAFFE_ENFORCE(def_.type() == "FC" || def_.type() == "FCTransposed");
233  return SingleGradientDef(
234  def_.type() + "Gradient",
235  "",
236  vector<string>{I(0), I(1), GO(0)},
237  vector<string>{GI(1), GI(2), GI(0)});
238  }
239 };
240 
241 REGISTER_GRADIENT(FC, GetFCGradient);
242 REGISTER_GRADIENT(FCTransposed, GetFCGradient);
243 
244 } // namespace
245 
246 } // namespace caffe2
TIndex size_from_dim_(int k, const vector< TIndex > &dims)
Return product of all dimensions starting from K.
Definition: tensor.h:40
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...