1 #include "caffe2/operators/spatial_batch_norm_op.h" 6 bool SpatialBNOp<CPUContext>::RunOnDevice() {
7 const auto& X = Input(INPUT);
8 const auto& scale = Input(SCALE);
9 const auto& bias = Input(BIAS);
11 CAFFE_ENFORCE(X.ndim() >= 3 && X.ndim() <= 5);
12 const int N = X.dim32(0);
14 (order_ == StorageOrder::NCHW ? X.dim32(1) : X.dim32(X.ndim() - 1));
15 const int H = (order_ == StorageOrder::NCHW ? X.dim32(2) : X.dim32(1));
16 const int W = X.ndim() > 3
17 ? (order_ == StorageOrder::NCHW ? X.dim32(3) : X.dim32(2))
19 const int D = X.ndim() > 4
20 ? (order_ == StorageOrder::NCHW ? X.dim32(4) : X.dim32(3))
23 const int sample_size = H * W * D;
24 CAFFE_ENFORCE_EQ(scale.ndim(), 1);
25 CAFFE_ENFORCE_EQ(bias.ndim(), 1);
26 CAFFE_ENFORCE_EQ(scale.dim32(0), C);
27 CAFFE_ENFORCE_EQ(bias.dim32(0), C);
29 ConstEigenVectorArrayMap<float> scale_arr(scale.data<
float>(), C);
30 ConstEigenVectorArrayMap<float> bias_arr(bias.data<
float>(), C);
32 auto* Y = Output(OUTPUT);
42 Output(SAVED_MEAN)->Resize(C);
43 Output(SAVED_INV_VAR)->Resize(C);
44 EigenVectorArrayMap<float> mean(
45 Output(SAVED_MEAN)->mutable_data<float>(), C);
46 EigenVectorArrayMap<float> var(
47 Output(SAVED_INV_VAR)->mutable_data<float>(), C);
49 if (num_batches_ > 1) {
50 ConstEigenVectorArrayMap<float> sums(Input(SUMS).data<float>(), C);
51 ConstEigenVectorArrayMap<float> sumsq(Input(SUMSQ).data<float>(), C);
52 const auto multi_batch_size = N * num_batches_ * sample_size;
53 mean = sums / multi_batch_size;
54 var = (sumsq - (sums * sums) / multi_batch_size) / multi_batch_size;
59 case StorageOrder::NCHW: {
60 ConstEigenArrayMap<float> X_arr(X.data<
float>(), sample_size, N * C);
61 for (
int nc = 0; nc < N * C; ++nc) {
62 mean(nc % C) += X_arr.col(nc).sum();
64 mean /= N * sample_size;
65 for (
int nc = 0; nc < N * C; ++nc) {
67 (X_arr.col(nc) - mean(nc % C)).matrix().squaredNorm();
69 var /= N * sample_size;
72 case StorageOrder::NHWC: {
73 ConstEigenArrayMap<float> X_arr(X.data<
float>(), C, N * sample_size);
74 for (
int i = 0; i < N * sample_size; ++i) {
77 mean /= N * sample_size;
78 for (
int i = 0; i < N * sample_size; ++i) {
79 var += (X_arr.col(i) - mean) * (X_arr.col(i) - mean);
81 var /= N * sample_size;
85 CAFFE_THROW(
"Unknown storage order: ", order_);
90 auto* running_mean = Output(RUNNING_MEAN);
91 auto* running_var = Output(RUNNING_VAR);
93 if (!running_mean->size()) {
94 running_mean->Resize(C);
95 EigenVectorArrayMap<float> running_mean_map(
96 running_mean->mutable_data<
float>(), C);
97 running_mean_map.setZero();
99 if (!running_var->size()) {
100 running_var->Resize(C);
101 EigenVectorArrayMap<float> running_var_map(
102 running_var->mutable_data<
float>(), C);
103 running_var_map.setZero();
105 EigenVectorArrayMap<float> running_mean_arr(
106 running_mean->mutable_data<
float>(), C);
107 EigenVectorArrayMap<float> running_var_arr(
108 running_var->mutable_data<
float>(), C);
109 running_mean_arr = running_mean_arr * momentum_ + mean * (1. - momentum_);
110 running_var_arr = running_var_arr * momentum_ + var * (1. - momentum_);
117 Eigen::Array<float, Eigen::Dynamic, 1> inv_std(C);
119 ConstEigenVectorArrayMap<float> var_arr(Input(EST_VAR).data<float>(), C);
120 inv_std = (var_arr + epsilon_).sqrt().inverse();
122 EigenVectorArrayMap<float> saved_inv_std(
123 Output(SAVED_INV_VAR)->mutable_data<float>(), C);
124 saved_inv_std = (saved_inv_std + epsilon_).inverse().sqrt();
125 inv_std = saved_inv_std;
127 ConstEigenVectorArrayMap<float> mean_arr(
128 is_test_ ? Input(EST_MEAN).data<float>()
129 : Output(SAVED_MEAN)->data<float>(),
135 Eigen::Array<float, Eigen::Dynamic, 1> new_scale = inv_std * scale_arr;
136 Eigen::Array<float, Eigen::Dynamic, 1> new_bias =
137 bias_arr - mean_arr * inv_std * scale_arr;
139 case StorageOrder::NHWC: {
140 EigenArrayMap<float>(Y->mutable_data<
float>(), C, N * sample_size) =
141 (ConstEigenArrayMap<float>(X.data<
float>(), C, N * sample_size)
148 case StorageOrder::NCHW: {
149 EigenArrayMap<float> Y_arr(Y->mutable_data<
float>(), sample_size, N * C);
150 ConstEigenArrayMap<float> X_arr(X.data<
float>(), sample_size, N * C);
151 for (
int nc = 0; nc < N * C; ++nc) {
152 Y_arr.col(nc) = X_arr.col(nc) * new_scale(nc % C) + new_bias(nc % C);
157 CAFFE_THROW(
"Unknown storage order: ", order_);
163 OpSchema::Cost CostInferenceForSpatialBN(
164 const OperatorDef& def,
165 const vector<TensorShape>& in) {
166 struct OpSchema::Cost cost = PointwiseCostInference<4>(def, in);
167 ArgumentHelper helper(def);
169 StringToStorageOrder(helper.GetSingleArgument<
string>(
"order",
"NCHW"));
170 const TensorShape X = in[0];
172 (order == StorageOrder::NCHW ? X.dims(1) : X.dims(X.dims_size() - 1));
173 cost.params_bytes = 2 * C *
sizeof(float);
178 REGISTER_CPU_OPERATOR(SpatialBN, SpatialBNOp<CPUContext>);
180 OPERATOR_SCHEMA(SpatialBN)
183 .AllowInplace({{0, 0}})
184 .CostInferenceFunction(CostInferenceForSpatialBN)
185 .EnforceInplace({{3, 1}, {4, 2}})
186 .TensorInferenceFunction(
187 [](
const OperatorDef& def,
const vector<TensorShape>& in) {
188 ArgumentHelper helper(def);
189 bool is_test = helper.GetSingleArgument<
int>(OpSchema::Arg_IsTest, 0);
192 vector<TensorShape> out;
193 StorageOrder order = StringToStorageOrder(
194 helper.GetSingleArgument<
string>(
"order",
"NCHW"));
195 const TensorShape& X = in[0];
197 (order == StorageOrder::NCHW ? X.dims(1)
198 : X.dims(X.dims_size() - 1));
200 out.push_back(in[0]);
201 TensorShape meanvar_tp =
202 CreateTensorShape(vector<int>{C}, TensorProto::FLOAT);
203 out.push_back(meanvar_tp);
204 out.push_back(meanvar_tp);
205 out.push_back(meanvar_tp);
206 out.push_back(meanvar_tp);
209 return vector<TensorShape>{in[0]};
213 Carries out spatial batch normalization as described in the paper 214 https://arxiv.org/abs/1502.03167 . Depending on the mode it is being run, 215 there are multiple cases for the number of outputs, which we list below: 219 Y, mean, var, saved_mean, saved_var (training mode) 226 "If set to nonzero, run spatial batch normalization in test mode.")
227 .Arg(
"epsilon",
"The epsilon value to use to avoid division by zero.")
228 .Arg(
"order",
"A StorageOrder string.")
231 "Factor used in computing the running mean and variance." 232 "e.g., running_mean = running_mean * momentum + mean * (1 - momentum)")
235 "(Optional) Specifies the number of batches to apply normalization on. " 236 "Requires specifying the optional sums and sumsq inputs that provide " 237 "statistics across multiple batches from which mean and variance can " 242 "The input 4-dimensional tensor of shape NCHW or NHWC depending " 243 "on the order parameter.")
247 "The scale as a 1-dimensional tensor of size C to be applied to the " 252 "The bias as a 1-dimensional tensor of size C to be applied to the " 257 "The running mean (training) or the estimated mean (testing) " 258 "as a 1-dimensional tensor of size C.")
262 "The running variance (training) or the estimated " 263 "variance (testing) as a 1-dimensional tensor of size C.")
267 "(optional) Per-channel sums of elements to be used to determine the " 268 "mean and variance for this batch")
272 "(optional) Per-channel sum of elements squared per channel to be used " 273 "to determine the variance for this batch")
275 .Output(0,
"Y",
"The output 4-dimensional tensor of the same shape as X.")
279 "The running mean after the spatial BN operator. Must be in-place " 280 "with the input mean. Should not be used for testing.")
284 "The running variance after the spatial BN operator. Must be " 285 "in-place with the input var. Should not be used for testing.")
289 "Saved mean used during training to speed up gradient " 290 "computation. Should not be used for testing.")
294 "Saved variance used during training to speed up " 295 "gradient computation. Should not be used for testing.")
296 .InheritOnnxSchema(
"BatchNormalization");
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...