1 #include "caffe2/operators/pool_op.h" 15 static void process_grad(
21 dx_data += (scale * dy_data);
24 static void process_grad(
28 ConstEigenArrayMap<float>& ,
29 ConstEigenArrayMap<float>& ,
30 ConstEigenArrayMap<float>& dy_data,
31 EigenArrayMap<float>& dx_data) {
32 dx_data.col(x_col) += scale * dy_data.col(y_col);
39 static void process_grad(
45 if (x_data == y_data) {
50 static void process_grad(
54 ConstEigenArrayMap<float>& x_data,
55 ConstEigenArrayMap<float>& y_data,
56 ConstEigenArrayMap<float>& dy_data,
57 EigenArrayMap<float>& dx_data) {
59 dy_data.col(y_col) * (x_data.col(x_col)
60 .cwiseEqual(y_data.col(y_col))
61 .
template cast<float>());
66 template <
typename T,
class Context,
typename PoolType>
67 bool PoolGradientOp<T, Context, PoolType>::RunOnDeviceWithOrderNCHW() {
74 math::Set<float, CPUContext>(
75 X.size(), 0, dX->template mutable_data<float>(), &context_);
76 const float* Xdata = X.template data<float>();
77 const float* Ydata = Y.template data<float>();
78 const float* dYdata = dY.template data<float>();
79 float* dXdata = dX->template mutable_data<float>();
80 int channels = X.dim32(1);
81 CAFFE_ENFORCE_EQ(channels, dY.dim32(1));
82 int height = X.dim32(2);
83 int width = kernel_.size() > 1 ? X.dim32(3) : 1;
84 int depth = kernel_.size() > 2 ? X.dim32(4) : 1;
85 vector<int> dims(X.dims().begin() + 2, X.dims().end());
86 ConvPoolOpBase<CPUContext>::ComputePads(dims);
87 int pooled_height = dY.dim32(2);
88 int pooled_width = kernel_.size() > 1 ? dY.dim32(3) : 1;
89 int pooled_depth = kernel_.size() > 2 ? dY.dim32(4) : 1;
91 switch (kernel_.size()) {
93 for (
int n = 0; n < X.dim32(0); ++n) {
94 for (
int c = 0; c < channels; ++c) {
95 for (
int ph = 0; ph < pooled_height; ++ph) {
96 int hstart = ph * stride_h() - pad_t();
97 int hend = min(hstart + kernel_h(), height);
98 hstart = max(hstart, 0);
99 float scale = 1. / (hend - hstart);
100 for (
int h = hstart; h < hend; ++h) {
101 PoolType::process_grad(
102 Xdata[h], Ydata[ph], dYdata[ph], scale, dXdata[h]);
108 Ydata += pooled_height;
109 dYdata += pooled_height;
114 for (
int n = 0; n < X.dim32(0); ++n) {
115 for (
int c = 0; c < channels; ++c) {
116 for (
int ph = 0; ph < pooled_height; ++ph) {
117 int hstart = ph * stride_h() - pad_t();
118 int hend = min(hstart + kernel_h(), height);
119 hstart = max(hstart, 0);
120 for (
int pw = 0; pw < pooled_width; ++pw) {
121 int wstart = pw * stride_w() - pad_l();
122 int wend = min(wstart + kernel_w(), width);
123 wstart = max(wstart, 0);
124 float scale = 1. / (hend - hstart) / (wend - wstart);
125 const int pooled_index = ph * pooled_width + pw;
126 for (
int h = hstart; h < hend; ++h) {
127 for (
int w = wstart; w < wend; ++w) {
128 const int index = h * width + w;
129 PoolType::process_grad(
132 dYdata[pooled_index],
140 Xdata += height * width;
141 dXdata += height * width;
142 Ydata += pooled_height * pooled_width;
143 dYdata += pooled_height * pooled_width;
148 for (
int n = 0; n < X.dim32(0); ++n) {
149 for (
int c = 0; c < channels; ++c) {
150 for (
int ph = 0; ph < pooled_height; ++ph) {
151 int hstart = ph * stride_h() - pad_t();
152 int hend = min(hstart + kernel_h(), height);
153 hstart = max(hstart, 0);
154 for (
int pw = 0; pw < pooled_width; ++pw) {
155 int wstart = pw * stride_w() - pad_l();
156 int wend = min(wstart + kernel_w(), width);
157 wstart = max(wstart, 0);
158 for (
int pd = 0; pd < pooled_depth; ++pd) {
159 int dstart = pd * stride_[2] - pads_[2];
160 int dend = min(dstart + kernel_[2], depth);
161 dstart = max(dstart, 0);
163 1. / (hend - hstart) / (wend - wstart) / (dend - dstart);
164 const int pooled_index =
165 ph * pooled_width * pooled_depth + pw * pooled_depth + pd;
166 for (
int h = hstart; h < hend; ++h) {
167 for (
int w = wstart; w < wend; ++w) {
168 for (
int d = dstart; d < dend; ++d) {
169 const int index = h * width * depth + w * depth + d;
170 PoolType::process_grad(
173 dYdata[pooled_index],
183 Xdata += height * width * depth;
184 dXdata += height * width * depth;
185 Ydata += pooled_height * pooled_width * pooled_depth;
186 dYdata += pooled_height * pooled_width * pooled_depth;
191 CAFFE_THROW(
"Unsupported pooling size");
197 template <
typename T,
class Context,
typename PoolType>
198 bool PoolGradientOp<T, Context, PoolType>::RunOnDeviceWithOrderNHWC() {
202 DCHECK_EQ(dY.ndim(), kernel_.size() + 2);
203 auto* dX = Output(0);
206 int channels = X.dim32(X.ndim() - 1);
207 CAFFE_ENFORCE_EQ(channels, dY.dim32(dY.ndim() - 1));
208 ConstEigenArrayMap<T> Ymat(
209 Y.template data<float>(), channels, Y.size() / channels);
210 ConstEigenArrayMap<float> dYmat(
211 dY.template data<float>(), channels, Y.size() / channels);
212 ConstEigenArrayMap<float> Xmat(
213 X.template data<float>(), channels, X.size() / channels);
214 EigenArrayMap<float> dXmat(
215 dX->template mutable_data<float>(), channels, X.size() / channels);
217 int height = X.dim32(1);
218 int width = kernel_.size() > 1 ? X.dim32(2) : 1;
219 int depth = kernel_.size() > 2 ? X.dim32(3) : 1;
220 vector<int> dims(X.dims().begin() + 1, X.dims().end() - 1);
221 ConvPoolOpBase<CPUContext>::ComputePads(dims);
222 int pooled_height = dY.dim32(1);
223 int pooled_width = kernel_.size() > 1 ? dY.dim32(2) : 1;
224 int pooled_depth = kernel_.size() > 2 ? dY.dim32(3) : 1;
230 switch (kernel_.size()) {
232 for (
int n = 0; n < X.dim32(0); ++n) {
233 for (
int ph = 0; ph < pooled_height; ++ph) {
234 int hstart = ph * stride_h() - pad_t();
235 int hend = min(hstart + kernel_h(), height);
236 hstart = max(hstart, 0);
237 const int pool_index = n * pooled_height + ph;
238 const float scale = 1. / (hend - hstart);
239 for (
int h = hstart; h < hend; ++h) {
240 const int input_index = n * height + h;
241 PoolType::process_grad(
242 pool_index, input_index, scale, Xmat, Ymat, dYmat, dXmat);
248 for (
int n = 0; n < X.dim32(0); ++n) {
249 for (
int ph = 0; ph < pooled_height; ++ph) {
250 int hstart = ph * stride_h() - pad_t();
251 int hend = min(hstart + kernel_h(), height);
252 hstart = max(hstart, 0);
253 for (
int pw = 0; pw < pooled_width; ++pw) {
254 int wstart = pw * stride_w() - pad_l();
255 int wend = min(wstart + kernel_w(), width);
256 wstart = max(wstart, 0);
257 const int pool_index = (n * pooled_height + ph) * pooled_width + pw;
258 const float scale = 1. / (hend - hstart) / (wend - wstart);
259 for (
int h = hstart; h < hend; ++h) {
260 for (
int w = wstart; w < wend; ++w) {
261 const int input_index = (n * height + h) * width + w;
262 PoolType::process_grad(
263 pool_index, input_index, scale, Xmat, Ymat, dYmat, dXmat);
271 for (
int n = 0; n < X.dim32(0); ++n) {
272 for (
int ph = 0; ph < pooled_height; ++ph) {
273 int hstart = ph * stride_h() - pad_t();
274 int hend = min(hstart + kernel_h(), height);
275 hstart = max(hstart, 0);
276 for (
int pw = 0; pw < pooled_width; ++pw) {
277 int wstart = pw * stride_w() - pad_l();
278 int wend = min(wstart + kernel_w(), width);
279 wstart = max(wstart, 0);
280 for (
int pd = 0; pd < pooled_depth; ++pd) {
281 int dstart = pd * stride_[2] - pads_[2];
282 int dend = min(dstart + kernel_[2], depth);
283 dstart = max(dstart, 0);
284 const int pool_index =
285 ((n * pooled_height + ph) * pooled_width + pw) *
289 1. / (hend - hstart) / (wend - wstart) / (dend - dstart);
290 for (
int h = hstart; h < hend; ++h) {
291 for (
int w = wstart; w < wend; ++w) {
292 for (
int d = dstart; d < dend; ++d) {
293 const int input_index =
294 ((n * height + h) * width + w) * depth + d;
295 PoolType::process_grad(
312 CAFFE_THROW(
"Unsupported pooling size");
318 REGISTER_CPU_OPERATOR(
320 PoolGradientOp<
float, CPUContext, AveragePool<float>>);
321 OPERATOR_SCHEMA(AveragePoolGradient).NumInputs(3).NumOutputs(1);
323 REGISTER_CPU_OPERATOR(
324 AveragePool1DGradient,
325 PoolGradientOp<
float, CPUContext, AveragePool<float>>);
326 OPERATOR_SCHEMA(AveragePool1DGradient).NumInputs(3).NumOutputs(1);
328 REGISTER_CPU_OPERATOR(
329 AveragePool2DGradient,
330 PoolGradientOp<
float, CPUContext, AveragePool<float>>);
331 OPERATOR_SCHEMA(AveragePool2DGradient).NumInputs(3).NumOutputs(1);
333 REGISTER_CPU_OPERATOR(
334 AveragePool3DGradient,
335 PoolGradientOp<
float, CPUContext, AveragePool<float>>);
336 OPERATOR_SCHEMA(AveragePool3DGradient).NumInputs(3).NumOutputs(1);
338 REGISTER_CPU_OPERATOR(
340 PoolGradientOp<
float, CPUContext, MaxPool<float>>);
341 OPERATOR_SCHEMA(MaxPoolGradient).NumInputs(3).NumOutputs(1);
343 REGISTER_CPU_OPERATOR(
345 PoolGradientOp<
float, CPUContext, MaxPool<float>>);
346 OPERATOR_SCHEMA(MaxPool1DGradient).NumInputs(3).NumOutputs(1);
348 REGISTER_CPU_OPERATOR(
350 PoolGradientOp<
float, CPUContext, MaxPool<float>>);
351 OPERATOR_SCHEMA(MaxPool2DGradient).NumInputs(3).NumOutputs(1);
353 REGISTER_CPU_OPERATOR(
355 PoolGradientOp<
float, CPUContext, MaxPool<float>>);
356 OPERATOR_SCHEMA(MaxPool3DGradient).NumInputs(3).NumOutputs(1);
358 class GetPoolGradient :
public GradientMakerBase {
359 using GradientMakerBase::GradientMakerBase;
360 vector<OperatorDef> GetGradientDefs()
override {
361 return SingleGradientDef(
362 def_.type() +
"Gradient",
364 vector<string>{I(0), O(0), GO(0)},
365 vector<string>{GI(0)});
368 REGISTER_GRADIENT(AveragePool, GetPoolGradient);
369 REGISTER_GRADIENT(AveragePool1D, GetPoolGradient);
370 REGISTER_GRADIENT(AveragePool2D, GetPoolGradient);
371 REGISTER_GRADIENT(AveragePool3D, GetPoolGradient);
372 REGISTER_GRADIENT(MaxPool, GetPoolGradient);
373 REGISTER_GRADIENT(MaxPool1D, GetPoolGradient);
374 REGISTER_GRADIENT(MaxPool2D, GetPoolGradient);
375 REGISTER_GRADIENT(MaxPool3D, GetPoolGradient);
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...