3 #include "caffe2/operators/fully_connected_op.h" 7 REGISTER_CPU_OPERATOR(FC, FullyConnectedOp<CPUContext>);
8 REGISTER_CPU_OPERATOR(FCGradient, FullyConnectedGradientOp<CPUContext>);
10 REGISTER_CPU_OPERATOR(
16 REGISTER_CPU_OPERATOR(
18 FullyConnectedGradientOp<
24 std::vector<TensorShape> FCShapeInference(
25 const OperatorDef& def,
26 const vector<TensorShape>& in,
27 bool pretransposed_weight) {
28 vector<TensorShape> out(1);
29 ArgumentHelper helper(def);
31 auto axis = helper.GetSingleArgument<int32_t>(
"axis", 1);
32 const auto canonical_axis = canonical_axis_index_(axis, in[0].dims().size());
33 const int M = size_to_dim_(canonical_axis, GetDimsVector(in[0]));
34 auto axis_w = helper.GetSingleArgument<int32_t>(
"axis_w", 1);
35 const int canonical_axis_w =
36 canonical_axis_index_(axis_w, in[1].dims().size());
37 const int N = pretransposed_weight
39 : size_to_dim_(canonical_axis_w, GetDimsVector(in[1]));
41 vector<int> y_shape(in[0].dims().begin(), in[0].dims().end());
42 CAFFE_ENFORCE_LE(canonical_axis + 1, y_shape.size());
43 y_shape.resize(canonical_axis + 1);
44 y_shape[canonical_axis] = N;
45 out[0] = CreateTensorShape(y_shape, in[0].data_type());
49 OpSchema::Cost CostInferenceForFC(
50 const OperatorDef& def,
51 const vector<TensorShape>& in,
52 bool pretransposed_weight) {
53 struct OpSchema::Cost c;
54 ArgumentHelper helper(def);
56 auto axis = helper.GetSingleArgument<int32_t>(
"axis", 1);
57 const auto canonical_axis = canonical_axis_index_(axis, in[0].dims().size());
58 const int M = size_to_dim_(canonical_axis, GetDimsVector(in[0]));
59 const int K =
size_from_dim_(canonical_axis, GetDimsVector(in[0]));
60 auto axis_w = helper.GetSingleArgument<int32_t>(
"axis_w", 1);
61 const int canonical_axis_w =
62 canonical_axis_index_(axis_w, in[1].dims().size());
63 const int N = pretransposed_weight
65 : size_to_dim_(canonical_axis_w, GetDimsVector(in[1]));
67 c.flops = 2 * K * M * N + M * N;
68 c.bytes_moved = M * N *
sizeof(float);
69 c.params_bytes = (K * N + N) *
sizeof(
float);
73 std::vector<TensorShape> FCGradientShapeInference(
74 const OperatorDef& def,
75 const vector<TensorShape>& in,
76 bool pretransposed_weight) {
77 vector<TensorShape> out(2);
78 ArgumentHelper helper(def);
80 auto axis_w = helper.GetSingleArgument<int32_t>(
"axis_w", 1);
81 const int canonical_axis_w =
82 canonical_axis_index_(axis_w, in[1].dims().size());
83 const int N = pretransposed_weight
85 : size_to_dim_(canonical_axis_w, GetDimsVector(in[1]));
87 vector<int> dW_shape(in[1].dims().begin(), in[1].dims().end());
88 out[0] = CreateTensorShape(dW_shape, in[1].data_type());
89 out[1] = CreateTensorShape(vector<int>{N}, in[1].data_type());
90 if (def.output_size() == 3) {
91 vector<int> dX_shape(in[0].dims().begin(), in[0].dims().end());
92 out.push_back(CreateTensorShape(dX_shape, in[0].data_type()));
97 OpSchema::Cost CostInferenceForFCGradient(
98 const OperatorDef& def,
99 const vector<TensorShape>& in,
100 bool pretransposed_weight) {
101 struct OpSchema::Cost c;
102 ArgumentHelper helper(def);
103 std::vector<TensorShape> out =
104 FCGradientShapeInference(def, in, pretransposed_weight);
106 CAFFE_ENFORCE_LT(0, out.size());
107 const TensorShape dW = out[0];
108 const TensorShape db = out[1];
110 auto axis = helper.GetSingleArgument<int32_t>(
"axis", 1);
111 const auto canonical_axis = canonical_axis_index_(axis, in[0].dims().size());
112 const int M = size_to_dim_(canonical_axis, GetDimsVector(in[0]));
113 const int K =
size_from_dim_(canonical_axis, GetDimsVector(in[0]));
114 auto axis_w = helper.GetSingleArgument<int32_t>(
"axis_w", 1);
115 const int canonical_axis_w =
116 canonical_axis_index_(axis_w, in[1].dims().size());
117 const int N = pretransposed_weight
119 : size_to_dim_(canonical_axis_w, GetDimsVector(in[1]));
121 uint64_t size_dW = 1;
122 for (
int i = 0; i < dW.dims().size(); i++) {
123 size_dW *= dW.dims(i);
126 uint64_t size_db = 1;
127 for (
int i = 0; i < db.dims().size(); i++) {
128 size_db *= db.dims(i);
131 c.flops = 2 * (M * N * K + M * N);
132 c.bytes_moved = (size_dW + size_db) *
sizeof(
float);
133 c.params_bytes = (K * N + N) *
sizeof(
float);
135 if (out.size() == 3) {
136 const TensorShape dX = out[2];
137 uint64_t size_dX = 1;
138 for (
int i = 0; i < dX.dims().size(); i++) {
139 size_dX *= dX.dims(i);
142 c.flops += M * N * K;
143 c.bytes_moved += size_dX *
sizeof(float);
151 OPERATOR_SCHEMA(FCTransposed)
154 .TensorInferenceFunction(std::bind(FCShapeInference, _1, _2,
true))
155 .CostInferenceFunction(std::bind(CostInferenceForFC, _1, _2,
true))
157 Same as FC, but weight matrix is supposed to be already pretransposed. 158 FCTransposed stands for calling blass with no noTrans, noTrans 164 .TensorInferenceFunction(std::bind(FCShapeInference, _1, _2, false))
165 .CostInferenceFunction(std::bind(CostInferenceForFC, _1, _2,
false))
167 Computes the result of passing an input vector X into a fully 168 connected layer with 2D weight matrix W and 1D bias vector b. That is, 169 the layer computes Y = X * W^T + b, where X has size (M x K), 170 W has size (N x K), b has size (N), and Y has size (M x N), 171 where M is often the batch size. 174 NOTE: X does not need to explicitly be a 2D vector; rather, it will be 175 coerced into one. For an arbitrary n-dimensional tensor 176 X \in [a_0, a_1, ...,a_{k-1}, a_k, ..., a_{n-1}] where a_i \in N+ and k is 177 the axis provided, then X will be coerced into a 2-dimensional tensor with 178 dimensions [a_0 * ... * a_{k-1}, a_k * ... * a_{n-1}]. For the default 179 case where axis=1, this means the X tensor will be coerced into a 2D tensor 180 of dimensions [a_0, a_1 * ... * a_{n-1}], where a_0 is often the batch size. 181 In this situation, we must have a_0 = M and a_1 * ... * a_{n-1} = K. 182 Lastly, even though b is a 1D vector of size N, it is copied/resized to 183 be size (M x N) implicitly and added to each vector in the batch. 184 Each of these dimensions must be matched correctly, or else the operator 189 "(int32_t) default to 1; describes the axis of the inputs; " 190 "defaults to one because the 0th axis most likely describes " 194 "(int32_t) default to 1; describes the axis of the weight matrix W; " 195 "defaults to one because the 0th axis most likely describes " 197 .Arg(
"float16_compute",
"Whether to use float-16 compute kernel")
201 "input tensor that's coerced into a 2D matrix of size (MxK) " 202 "as described above")
206 "A tensor that is coerced into a 2D blob of size (KxN) " 207 "containing fully connected weight matrix")
208 .Input(2,
"b",
"1D blob containing bias vector")
209 .Output(0,
"Y",
"2D output tensor")
210 .InheritOnnxSchema(
"Gemm");
212 OPERATOR_SCHEMA(FCGradient)
215 .TensorInferenceFunction(std::bind(FCGradientShapeInference, _1, _2,
false))
216 .CostInferenceFunction(
217 std::bind(CostInferenceForFCGradient, _1, _2,
false));
218 OPERATOR_SCHEMA(FCTransposedGradient)
221 .TensorInferenceFunction(std::bind(FCGradientShapeInference, _1, _2,
false))
222 .CostInferenceFunction(
223 std::bind(CostInferenceForFCGradient, _1, _2,
false));
228 using GradientMakerBase::GradientMakerBase;
230 std::vector<OperatorDef> GetGradientDefs()
override {
231 CAFFE_ENFORCE_EQ(def_.input_size(), 3);
232 CAFFE_ENFORCE(def_.type() ==
"FC" || def_.type() ==
"FCTransposed");
233 return SingleGradientDef(
234 def_.type() +
"Gradient",
236 vector<string>{I(0), I(1), GO(0)},
237 vector<string>{GI(1), GI(2), GI(0)});
TIndex size_from_dim_(int k, const vector< TIndex > &dims)
Return product of all dimensions starting from K.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...