1 #include "caffe2/operators/instance_norm_op.h" 5 template <
typename T,
typename Context>
6 bool InstanceNormGradientOp<T, Context>::RunOnDeviceWithOrderNHWC() {
7 const auto& input = Input(INPUT);
8 const auto& scale = Input(SCALE);
9 const auto& bias = Input(BIAS);
10 const auto& output_grad = Input(OUTPUT_GRAD);
11 const auto& mean = InputSize() >= 5 ? Input(MEAN) : mean_;
12 const auto& inv_stdev = InputSize() >= 6 ? Input(INV_STDEV) : inv_stdev_;
13 auto input_grad = Output(INPUT_GRAD);
14 auto scale_grad = Output(SCALE_GRAD);
15 auto bias_grad = Output(BIAS_GRAD);
16 CAFFE_ENFORCE_EQ(4, input.ndim());
17 const int N = input.dim32(0);
18 const int H = input.dim32(1);
19 const int W = input.dim32(2);
20 const int C = input.dim32(3);
21 CAFFE_ENFORCE_EQ(1, scale.ndim());
22 CAFFE_ENFORCE_EQ(C, scale.dim32(0));
23 CAFFE_ENFORCE_EQ(1, bias.ndim());
24 CAFFE_ENFORCE_EQ(C, bias.dim32(0));
25 CAFFE_ENFORCE_EQ(4, output_grad.ndim());
26 CAFFE_ENFORCE_EQ(N, output_grad.dim32(0));
27 CAFFE_ENFORCE_EQ(H, output_grad.dim32(1));
28 CAFFE_ENFORCE_EQ(W, output_grad.dim32(2));
29 CAFFE_ENFORCE_EQ(C, output_grad.dim32(3));
30 input_grad->ResizeLike(input);
31 scale_grad->ResizeLike(scale);
32 bias_grad->ResizeLike(bias);
34 ConstEigenVectorArrayMap<T> scale_arr(scale.template data<T>(), C);
35 ConstEigenVectorArrayMap<T> bias_arr(bias.template data<T>(), C);
36 EigenVectorArrayMap<T> scale_grad_arr(
37 scale_grad->template mutable_data<T>(), C);
38 EigenVectorArrayMap<T> bias_grad_arr(
39 bias_grad->template mutable_data<T>(), C);
42 if (InputSize() < 5) {
45 if (InputSize() < 6) {
46 inv_stdev_.Resize(N, C);
51 for (
int n = 0; n < N; ++n) {
53 ConstEigenArrayMap<T> input_mat(
54 input.template data<T>() + n * C * H * W, C, H * W);
55 ConstEigenArrayMap<T> output_grad_mat(
56 output_grad.template data<T>() + n * C * H * W, C, H * W);
57 EigenArrayMap<T> input_grad_mat(
58 input_grad->template mutable_data<T>() + n * C * H * W, C, H * W);
61 if (InputSize() < 5) {
62 EigenVectorArrayMap<T> mean_mutable_arr(
63 mean_.template mutable_data<T>() + n * C, C);
64 mean_mutable_arr = input_mat.rowwise().mean();
66 CAFFE_ENFORCE_EQ(2, mean.ndim());
67 CAFFE_ENFORCE_EQ(N, mean.dim32(0));
68 CAFFE_ENFORCE_EQ(C, mean.dim32(1));
69 ConstEigenVectorArrayMap<T> mean_arr(mean.template data<T>() + n * C, C);
72 input_grad_mat = input_mat.colwise() - mean_arr;
75 if (InputSize() < 6) {
76 EigenVectorArrayMap<T> inv_stdev_mutable_arr(
77 inv_stdev_.template mutable_data<T>() + n * C, C);
80 inv_stdev_mutable_arr = input_grad_mat.pow(2).rowwise().mean();
82 inv_stdev_mutable_arr =
83 (inv_stdev_mutable_arr + epsilon_).sqrt().inverse();
85 CAFFE_ENFORCE_EQ(2, inv_stdev.ndim());
86 CAFFE_ENFORCE_EQ(N, inv_stdev.dim32(0));
87 CAFFE_ENFORCE_EQ(C, inv_stdev.dim32(1));
89 ConstEigenVectorArrayMap<T> inv_stdev_arr(
90 inv_stdev.template data<T>() + n * C, C);
94 bias_grad_arr += output_grad_mat.rowwise().sum();
98 ((input_grad_mat.colwise() * inv_stdev_arr) * output_grad_mat)
108 const auto temp = (inv_stdev_arr.pow(3) *
109 (input_grad_mat * output_grad_mat).rowwise().mean() *
112 input_grad_mat.colwise() *= temp;
115 input_grad_mat += output_grad_mat.colwise() * inv_stdev_arr;
118 const auto result_mean = input_grad_mat.rowwise().mean().eval();
119 input_grad_mat.colwise() -= result_mean;
120 input_grad_mat.colwise() *= scale_arr;
126 template <
typename T,
typename Context>
127 bool InstanceNormGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
128 const auto& input = Input(INPUT);
129 const auto& scale = Input(SCALE);
130 const auto& bias = Input(BIAS);
131 const auto& output_grad = Input(OUTPUT_GRAD);
132 const auto& mean = InputSize() >= 5 ? Input(MEAN) : mean_;
133 const auto& inv_stdev = InputSize() >= 6 ? Input(INV_STDEV) : inv_stdev_;
134 auto input_grad = Output(INPUT_GRAD);
135 auto scale_grad = Output(SCALE_GRAD);
136 auto bias_grad = Output(BIAS_GRAD);
137 CAFFE_ENFORCE_EQ(4, input.ndim());
138 const int N = input.dim32(0);
139 const int C = input.dim32(1);
140 const int H = input.dim32(2);
141 const int W = input.dim32(3);
142 CAFFE_ENFORCE_EQ(1, scale.ndim());
143 CAFFE_ENFORCE_EQ(C, scale.dim32(0));
144 CAFFE_ENFORCE_EQ(1, bias.ndim());
145 CAFFE_ENFORCE_EQ(C, bias.dim32(0));
146 CAFFE_ENFORCE_EQ(4, output_grad.ndim());
147 CAFFE_ENFORCE_EQ(N, output_grad.dim32(0));
148 CAFFE_ENFORCE_EQ(C, output_grad.dim32(1));
149 CAFFE_ENFORCE_EQ(H, output_grad.dim32(2));
150 CAFFE_ENFORCE_EQ(W, output_grad.dim32(3));
151 input_grad->ResizeLike(input);
152 scale_grad->ResizeLike(scale);
153 bias_grad->ResizeLike(bias);
155 ConstEigenArrayMap<T> input_mat(input.template data<T>(), H * W, N * C);
156 ConstEigenVectorArrayMap<T> scale_arr(scale.template data<T>(), C);
157 ConstEigenVectorArrayMap<T> bias_arr(bias.template data<T>(), C);
158 ConstEigenArrayMap<T> output_grad_mat(
159 output_grad.template data<T>(), H * W, N * C);
161 EigenArrayMap<T> input_grad_mat(
162 input_grad->template mutable_data<T>(), H * W, N * C);
163 EigenVectorArrayMap<T> scale_grad_arr(
164 scale_grad->template mutable_data<T>(), C);
165 EigenVectorArrayMap<T> bias_grad_arr(
166 bias_grad->template mutable_data<T>(), C);
169 if (InputSize() < 5) {
171 EigenVectorArrayMap<T> mean_mutable_arr(
172 mean_.template mutable_data<T>(), N * C);
173 mean_mutable_arr = input_mat.colwise().mean();
175 CAFFE_ENFORCE_EQ(2, mean.ndim());
176 CAFFE_ENFORCE_EQ(N, mean.dim32(0));
177 CAFFE_ENFORCE_EQ(C, mean.dim32(1));
178 ConstEigenVectorArrayMap<T> mean_arr(mean.template data<T>(), N * C);
181 input_grad_mat = input_mat.rowwise() - mean_arr.transpose();
184 if (InputSize() < 6) {
185 inv_stdev_.Resize(N, C);
186 EigenVectorArrayMap<T> inv_stdev_mutable_arr(
187 inv_stdev_.template mutable_data<T>(), N * C);
190 inv_stdev_mutable_arr = input_grad_mat.pow(2).colwise().mean();
192 inv_stdev_mutable_arr = (inv_stdev_mutable_arr + epsilon_).sqrt().inverse();
194 CAFFE_ENFORCE_EQ(2, inv_stdev.ndim());
195 CAFFE_ENFORCE_EQ(N, inv_stdev.dim32(0));
196 CAFFE_ENFORCE_EQ(C, inv_stdev.dim32(1));
198 ConstEigenVectorArrayMap<T> inv_stdev_arr(
199 inv_stdev.template data<T>(), N * C);
206 scale_grad_arr.setZero();
207 bias_grad_arr.setZero();
208 for (
int n = 0; n < N; ++n) {
209 scale_grad_arr += ((input_grad_mat.rowwise() * inv_stdev_arr.transpose()) *
211 .block(0, n * C, H * W, C)
214 bias_grad_arr += output_grad_mat.block(0, n * C, H * W, C).colwise().sum();
218 const auto temp = ((inv_stdev_arr.pow(3).transpose() *
219 (input_grad_mat * output_grad_mat).colwise().mean()) *
221 input_grad_mat.rowwise() *= temp;
223 input_grad_mat += output_grad_mat.rowwise() * inv_stdev_arr.transpose();
225 const auto result_mean = input_grad_mat.colwise().mean().eval();
226 input_grad_mat.rowwise() -= result_mean;
228 for (
int n = 0; n < N; ++n) {
229 input_grad_mat.block(0, n * C, H * W, C).rowwise() *= scale_arr.transpose();
236 using GradientMakerBase::GradientMakerBase;
237 vector<OperatorDef> GetGradientDefs()
override {
238 vector<string> inputs{I(0), I(1), I(2), GO(0)};
239 if (def_.output_size() >= 2) {
240 inputs.push_back(O(1));
242 if (def_.output_size() >= 3) {
243 inputs.push_back(O(2));
246 "InstanceNormGradient",
249 vector<string>{GI(0), GI(1), GI(2)});
253 REGISTER_CPU_OPERATOR(
254 InstanceNormGradient,
257 OPERATOR_SCHEMA(InstanceNormGradient).NumInputs(4, 6).NumOutputs(3);
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...