1 #include "caffe2/operators/instance_norm_op.h" 10 template <
typename T,
typename Context>
11 bool InstanceNormOp<T, Context>::RunOnDeviceWithOrderNHWC() {
12 const auto& X = Input(INPUT);
13 auto* Y = Output(OUTPUT);
14 CAFFE_ENFORCE(Y != &X,
"Can't run InstanceNorm NHWC in-place");
15 auto* mean = OutputSize() > 1 ? Output(MEAN) : &mean_;
16 auto* inv_stdev = OutputSize() > 1 ? Output(INV_STDEV) : &inv_stdev_;
17 const int N = X.dim32(0);
18 const int H = X.dim32(1);
19 const int W = X.dim32(2);
20 const int C = X.dim32(3);
21 const size_t offset = H * W * C;
23 CAFFE_ENFORCE_EQ(Input(SCALE).size(), C);
24 CAFFE_ENFORCE_EQ(Input(BIAS).size(), C);
28 inv_stdev->Resize(N, C);
29 ConstEigenVectorArrayMap<T> scale(Input(SCALE).
template data<T>(), C);
30 ConstEigenVectorArrayMap<T> bias(Input(BIAS).
template data<T>(), C);
31 for (
int n = 0; n < N; ++n) {
32 ConstEigenArrayMap<T> Xmat(X.template data<T>() + offset * n, C, H * W);
33 EigenArrayMap<T> Ymat(Y->template mutable_data<T>() + offset * n, C, H * W);
34 EigenVectorArrayMap<T> mean_arr(
35 mean->template mutable_data<T>() + n * C, C);
36 EigenVectorArrayMap<T> inv_stdev_arr(
37 inv_stdev->template mutable_data<T>() + n * C, C);
42 mean_arr = Xmat.col(0);
43 for (
int i = 1; i < H * W; ++i) {
44 mean_arr += Xmat.col(i);
46 mean_arr *= 1. / (H * W);
47 Ymat = Xmat.colwise() - mean_arr;
50 inv_stdev_arr = Ymat.col(0) * Ymat.col(0);
51 for (
int i = 1; i < H * W; ++i) {
52 inv_stdev_arr += Ymat.col(i) * Ymat.col(i);
54 inv_stdev_arr = (inv_stdev_arr / (H * W) + epsilon_).sqrt().inverse();
55 Ymat = (Ymat.colwise() * (inv_stdev_arr * scale)).colwise() + bias;
60 template <
typename T,
typename Context>
61 bool InstanceNormOp<T, Context>::RunOnDeviceWithOrderNCHW() {
62 const auto& X = Input(INPUT);
63 const auto& scale = Input(SCALE);
64 const auto& bias = Input(BIAS);
65 auto* Y = Output(OUTPUT);
66 auto* mean = OutputSize() > 1 ? Output(MEAN) : &mean_;
67 auto* inv_stdev = OutputSize() > 1 ? Output(INV_STDEV) : &inv_stdev_;
68 const int N = X.dim32(0);
69 const int C = X.dim32(1);
70 const int H = X.dim32(2);
71 const int W = X.dim32(3);
73 CAFFE_ENFORCE_EQ(scale.size(), C);
74 CAFFE_ENFORCE_EQ(bias.size(), C);
78 inv_stdev->Resize(N, C);
80 const auto* Xdata = X.template data<T>();
81 auto* Ydata = Y->template mutable_data<T>();
82 const auto* scale_data = scale.template data<T>();
83 const auto* bias_data = bias.template data<T>();
84 auto* mean_data = mean->template mutable_data<T>();
85 auto* inv_stdev_data = inv_stdev->template mutable_data<T>();
88 for (
auto i = 0; i < N * C; ++i) {
89 ConstEigenVectorArrayMap<T> Xi(Xdata + H * W * i, H * W);
90 const T Xi_mean = Xi.mean();
91 const T squared_norm = (Xi - Xi_mean).matrix().squaredNorm();
92 const T inv_stdev = 1.0 / std::sqrt(squared_norm / (H * W) + epsilon_);
93 mean_data[i] = Xi_mean;
94 inv_stdev_data[i] = inv_stdev;
95 EigenVectorArrayMap<T> Yi(Ydata + H * W * i, H * W);
96 const T channel_scale = inv_stdev * scale_data[i % C];
97 const T channel_shift = bias_data[i % C] - Xi_mean * channel_scale;
98 Yi = Xi * channel_scale + channel_shift;
104 REGISTER_CPU_OPERATOR(InstanceNorm, InstanceNormOp<float, CPUContext>);
106 OPERATOR_SCHEMA(InstanceNorm)
109 .AllowInplace({{0,0}})
111 Carries out instance normalization as described in the paper 112 https://arxiv.org/abs/1607.08022. Depending on the mode it is being run, 113 there are multiple cases for the number of outputs, which we list below: 115 * Output case #1: output 116 * Output case #2: output, saved_mean 117 - don't use, doesn't make sense but won't crash 118 * Output case #3: output, saved_mean, saved_inv_stdev 119 - Makes sense for training only 121 For training mode, type 3 is faster in the sense that for the backward 122 pass, it is able to reuse the saved mean and inv_stdev in the gradient 125 .Arg("epsilon",
"The epsilon value to use to avoid division by zero.")
126 .Arg(
"order",
"A StorageOrder string.")
130 "The input 4-dimensional tensor of shape NCHW or NHWC depending " 131 "on the order parameter.")
132 .Input(1,
"scale",
"The input 1-dimensional scale tensor of size C.")
133 .Input(2,
"bias",
"The input 1-dimensional bias tensor of size C.")
137 "The output 4-dimensional tensor of the same shape as input.")
141 "Optional saved mean used during training to speed up gradient " 142 "computation. Should not be used for testing.")
146 "Optional saved inverse stdev used during training to speed up " 147 "gradient computation. Should not be used for testing.");
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...