Caffe2 - C++ API
A deep learning, cross platform ML framework
spatial_batch_norm_op.cc
1 #include "caffe2/operators/spatial_batch_norm_op.h"
2 
3 namespace caffe2 {
4 
5 template <>
6 bool SpatialBNOp<CPUContext>::RunOnDevice() {
7  const auto& X = Input(INPUT);
8  const auto& scale = Input(SCALE);
9  const auto& bias = Input(BIAS);
10 
11  CAFFE_ENFORCE(X.ndim() >= 3 && X.ndim() <= 5);
12  const int N = X.dim32(0);
13  const int C =
14  (order_ == StorageOrder::NCHW ? X.dim32(1) : X.dim32(X.ndim() - 1));
15  const int H = (order_ == StorageOrder::NCHW ? X.dim32(2) : X.dim32(1));
16  const int W = X.ndim() > 3
17  ? (order_ == StorageOrder::NCHW ? X.dim32(3) : X.dim32(2))
18  : 1;
19  const int D = X.ndim() > 4
20  ? (order_ == StorageOrder::NCHW ? X.dim32(4) : X.dim32(3))
21  : 1;
22 
23  const int sample_size = H * W * D;
24  CAFFE_ENFORCE_EQ(scale.ndim(), 1);
25  CAFFE_ENFORCE_EQ(bias.ndim(), 1);
26  CAFFE_ENFORCE_EQ(scale.dim32(0), C);
27  CAFFE_ENFORCE_EQ(bias.dim32(0), C);
28 
29  ConstEigenVectorArrayMap<float> scale_arr(scale.data<float>(), C);
30  ConstEigenVectorArrayMap<float> bias_arr(bias.data<float>(), C);
31 
32  auto* Y = Output(OUTPUT);
33  Y->ResizeLike(X);
34 
35  if (!is_test_) {
36  // training mode
37  // Get the mean and variance.
38  // Note that, to be consistent with cudnn, we will output saved inverse
39  // std as output 5, but we will still use the same storage place to
40  // compute var as well. The inverse is going to be carried out at the end
41  // of the op.
42  Output(SAVED_MEAN)->Resize(C);
43  Output(SAVED_INV_VAR)->Resize(C);
44  EigenVectorArrayMap<float> mean(
45  Output(SAVED_MEAN)->mutable_data<float>(), C);
46  EigenVectorArrayMap<float> var(
47  Output(SAVED_INV_VAR)->mutable_data<float>(), C);
48 
49  if (num_batches_ > 1) {
50  ConstEigenVectorArrayMap<float> sums(Input(SUMS).data<float>(), C);
51  ConstEigenVectorArrayMap<float> sumsq(Input(SUMSQ).data<float>(), C);
52  const auto multi_batch_size = N * num_batches_ * sample_size;
53  mean = sums / multi_batch_size;
54  var = (sumsq - (sums * sums) / multi_batch_size) / multi_batch_size;
55  } else {
56  mean.setZero();
57  var.setZero();
58  switch (order_) {
59  case StorageOrder::NCHW: {
60  ConstEigenArrayMap<float> X_arr(X.data<float>(), sample_size, N * C);
61  for (int nc = 0; nc < N * C; ++nc) {
62  mean(nc % C) += X_arr.col(nc).sum();
63  }
64  mean /= N * sample_size;
65  for (int nc = 0; nc < N * C; ++nc) {
66  var(nc % C) +=
67  (X_arr.col(nc) - mean(nc % C)).matrix().squaredNorm();
68  }
69  var /= N * sample_size;
70  break;
71  }
72  case StorageOrder::NHWC: {
73  ConstEigenArrayMap<float> X_arr(X.data<float>(), C, N * sample_size);
74  for (int i = 0; i < N * sample_size; ++i) {
75  mean += X_arr.col(i);
76  }
77  mean /= N * sample_size;
78  for (int i = 0; i < N * sample_size; ++i) {
79  var += (X_arr.col(i) - mean) * (X_arr.col(i) - mean);
80  }
81  var /= N * sample_size;
82  break;
83  }
84  default:
85  CAFFE_THROW("Unknown storage order: ", order_);
86  }
87  }
88 
89  // Compute the running mean and running inv variance.
90  auto* running_mean = Output(RUNNING_MEAN);
91  auto* running_var = Output(RUNNING_VAR);
92  // Check if they are initialized
93  if (!running_mean->size()) {
94  running_mean->Resize(C);
95  EigenVectorArrayMap<float> running_mean_map(
96  running_mean->mutable_data<float>(), C);
97  running_mean_map.setZero();
98  }
99  if (!running_var->size()) {
100  running_var->Resize(C);
101  EigenVectorArrayMap<float> running_var_map(
102  running_var->mutable_data<float>(), C);
103  running_var_map.setZero();
104  }
105  EigenVectorArrayMap<float> running_mean_arr(
106  running_mean->mutable_data<float>(), C);
107  EigenVectorArrayMap<float> running_var_arr(
108  running_var->mutable_data<float>(), C);
109  running_mean_arr = running_mean_arr * momentum_ + mean * (1. - momentum_);
110  running_var_arr = running_var_arr * momentum_ + var * (1. - momentum_);
111  }
112 
113  // Regardless of training or testing, we will apply the estimated mean
114  // and standard deviation to the input. For testing, they are
115  // specified directly by the input, and for training, they are computed
116  // by the op.
117  Eigen::Array<float, Eigen::Dynamic, 1> inv_std(C);
118  if (is_test_) {
119  ConstEigenVectorArrayMap<float> var_arr(Input(EST_VAR).data<float>(), C);
120  inv_std = (var_arr + epsilon_).sqrt().inverse();
121  } else {
122  EigenVectorArrayMap<float> saved_inv_std(
123  Output(SAVED_INV_VAR)->mutable_data<float>(), C);
124  saved_inv_std = (saved_inv_std + epsilon_).inverse().sqrt();
125  inv_std = saved_inv_std;
126  }
127  ConstEigenVectorArrayMap<float> mean_arr(
128  is_test_ ? Input(EST_MEAN).data<float>()
129  : Output(SAVED_MEAN)->data<float>(),
130  C);
131  // We can fuse the output computation as follows:
132  // ((x - est_mean) * (inv_var) * scale + bias
133  // to
134  // (x * inv_var * scale) + (bias - est_mean * inv_var * scale)
135  Eigen::Array<float, Eigen::Dynamic, 1> new_scale = inv_std * scale_arr;
136  Eigen::Array<float, Eigen::Dynamic, 1> new_bias =
137  bias_arr - mean_arr * inv_std * scale_arr;
138  switch (order_) {
139  case StorageOrder::NHWC: {
140  EigenArrayMap<float>(Y->mutable_data<float>(), C, N * sample_size) =
141  (ConstEigenArrayMap<float>(X.data<float>(), C, N * sample_size)
142  .colwise() *
143  new_scale)
144  .colwise() +
145  new_bias;
146  break;
147  }
148  case StorageOrder::NCHW: {
149  EigenArrayMap<float> Y_arr(Y->mutable_data<float>(), sample_size, N * C);
150  ConstEigenArrayMap<float> X_arr(X.data<float>(), sample_size, N * C);
151  for (int nc = 0; nc < N * C; ++nc) {
152  Y_arr.col(nc) = X_arr.col(nc) * new_scale(nc % C) + new_bias(nc % C);
153  }
154  break;
155  }
156  default:
157  CAFFE_THROW("Unknown storage order: ", order_);
158  }
159  return true;
160 }
161 
162 namespace {
163 OpSchema::Cost CostInferenceForSpatialBN(
164  const OperatorDef& def,
165  const vector<TensorShape>& in) {
166  struct OpSchema::Cost cost = PointwiseCostInference<4>(def, in);
167  ArgumentHelper helper(def);
168  auto order =
169  StringToStorageOrder(helper.GetSingleArgument<string>("order", "NCHW"));
170  const TensorShape X = in[0];
171  const int C =
172  (order == StorageOrder::NCHW ? X.dims(1) : X.dims(X.dims_size() - 1));
173  cost.params_bytes = 2 * C * sizeof(float);
174  return cost;
175 }
176 } // namespace
177 
178 REGISTER_CPU_OPERATOR(SpatialBN, SpatialBNOp<CPUContext>);
179 
180 OPERATOR_SCHEMA(SpatialBN)
181  .NumInputs({5, 7})
182  .NumOutputs({1, 5})
183  .AllowInplace({{0, 0}})
184  .CostInferenceFunction(CostInferenceForSpatialBN)
185  .EnforceInplace({{3, 1}, {4, 2}})
186  .TensorInferenceFunction(
187  [](const OperatorDef& def, const vector<TensorShape>& in) {
188  ArgumentHelper helper(def);
189  bool is_test = helper.GetSingleArgument<int>(OpSchema::Arg_IsTest, 0);
190 
191  if (!is_test) {
192  vector<TensorShape> out;
193  StorageOrder order = StringToStorageOrder(
194  helper.GetSingleArgument<string>("order", "NCHW"));
195  const TensorShape& X = in[0];
196  const int C =
197  (order == StorageOrder::NCHW ? X.dims(1)
198  : X.dims(X.dims_size() - 1));
199 
200  out.push_back(in[0]);
201  TensorShape meanvar_tp =
202  CreateTensorShape(vector<int>{C}, TensorProto::FLOAT);
203  out.push_back(meanvar_tp); // RUNNING_MEAN
204  out.push_back(meanvar_tp); // RUNNING_MEAN
205  out.push_back(meanvar_tp); // SAVED_MEAN
206  out.push_back(meanvar_tp); // SAVED_VAR
207  return out;
208  } else {
209  return vector<TensorShape>{in[0]};
210  }
211  })
212  .SetDoc(R"DOC(
213 Carries out spatial batch normalization as described in the paper
214 https://arxiv.org/abs/1502.03167 . Depending on the mode it is being run,
215 there are multiple cases for the number of outputs, which we list below:
216 
217 
218 Output case #1:
219  Y, mean, var, saved_mean, saved_var (training mode)
220 
221 
222 Output case #2:
223  Y (test mode)
224 )DOC")
225  .ArgIsTest(
226  "If set to nonzero, run spatial batch normalization in test mode.")
227  .Arg("epsilon", "The epsilon value to use to avoid division by zero.")
228  .Arg("order", "A StorageOrder string.")
229  .Arg(
230  "momentum",
231  "Factor used in computing the running mean and variance."
232  "e.g., running_mean = running_mean * momentum + mean * (1 - momentum)")
233  .Arg(
234  "num_batches",
235  "(Optional) Specifies the number of batches to apply normalization on. "
236  "Requires specifying the optional sums and sumsq inputs that provide "
237  "statistics across multiple batches from which mean and variance can "
238  "be determined.")
239  .Input(
240  0,
241  "X",
242  "The input 4-dimensional tensor of shape NCHW or NHWC depending "
243  "on the order parameter.")
244  .Input(
245  1,
246  "scale",
247  "The scale as a 1-dimensional tensor of size C to be applied to the "
248  "output.")
249  .Input(
250  2,
251  "bias",
252  "The bias as a 1-dimensional tensor of size C to be applied to the "
253  "output.")
254  .Input(
255  3,
256  "mean",
257  "The running mean (training) or the estimated mean (testing) "
258  "as a 1-dimensional tensor of size C.")
259  .Input(
260  4,
261  "var",
262  "The running variance (training) or the estimated "
263  "variance (testing) as a 1-dimensional tensor of size C.")
264  .Input(
265  5,
266  "sums",
267  "(optional) Per-channel sums of elements to be used to determine the "
268  "mean and variance for this batch")
269  .Input(
270  6,
271  "sumsq",
272  "(optional) Per-channel sum of elements squared per channel to be used "
273  "to determine the variance for this batch")
274 
275  .Output(0, "Y", "The output 4-dimensional tensor of the same shape as X.")
276  .Output(
277  1,
278  "mean",
279  "The running mean after the spatial BN operator. Must be in-place "
280  "with the input mean. Should not be used for testing.")
281  .Output(
282  2,
283  "var",
284  "The running variance after the spatial BN operator. Must be "
285  "in-place with the input var. Should not be used for testing.")
286  .Output(
287  3,
288  "saved_mean",
289  "Saved mean used during training to speed up gradient "
290  "computation. Should not be used for testing.")
291  .Output(
292  4,
293  "saved_var",
294  "Saved variance used during training to speed up "
295  "gradient computation. Should not be used for testing.")
296  .InheritOnnxSchema("BatchNormalization");
297 
298 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...