Caffe2 - C++ API: caffe2/operators/pool_gradient

 #include "caffe2/operators/pool_op.h"
 
 namespace caffe2 {
 
 using std::max;
 using std::min;
 
 namespace {
 // These two classe are just used as template arguments passed to the
 // PoolGradientOp
 // template to instantiate the different algorithms.
 template <typename T>
 class AveragePool {
  public:
   static void process_grad(
       const T& /*x_data*/,
       const T& /*y_data*/,
       const T& dy_data,
       const T& scale,
       T& dx_data) {
     dx_data += (scale * dy_data);
   }
 
   static void process_grad(
       const int y_col,
       const int x_col,
       const float scale,
       ConstEigenArrayMap<float>& /*x_data*/,
       ConstEigenArrayMap<float>& /*y_data*/,
       ConstEigenArrayMap<float>& dy_data,
       EigenArrayMap<float>& dx_data) {
     dx_data.col(x_col) += scale * dy_data.col(y_col);
   }
 };
 
 template <typename T>
 class MaxPool {
  public:
   static void process_grad(
       const T& x_data,
       const T& y_data,
       const T& dy_data,
       const T& /*scale*/,
       T& dx_data) {
     if (x_data == y_data) {
       dx_data += dy_data;
     }
   }
 
   static void process_grad(
       const int y_col,
       const int x_col,
       const float /*scale*/,
       ConstEigenArrayMap<float>& x_data,
       ConstEigenArrayMap<float>& y_data,
       ConstEigenArrayMap<float>& dy_data,
       EigenArrayMap<float>& dx_data) {
     dx_data.col(x_col) +=
         dy_data.col(y_col) * (x_data.col(x_col)
                                   .cwiseEqual(y_data.col(y_col))
                                   .template cast<float>());
   }
 };
 }
 
 template <typename T, class Context, typename PoolType>
 bool PoolGradientOp<T, Context, PoolType>::RunOnDeviceWithOrderNCHW() {
   auto& X = Input(0);
   auto& Y = Input(1);
   auto& dY = Input(2);
   auto* dX = Output(0);
   // TODO(Yangqing): Add shape checks.
   dX->ResizeLike(X);
   math::Set<float, CPUContext>(
       X.size(), 0, dX->template mutable_data<float>(), &context_);
   const float* Xdata = X.template data<float>();
   const float* Ydata = Y.template data<float>();
   const float* dYdata = dY.template data<float>();
   float* dXdata = dX->template mutable_data<float>();
   int channels = X.dim32(1);
   CAFFE_ENFORCE_EQ(channels, dY.dim32(1));
   int height = X.dim32(2);
   int width = kernel_.size() > 1 ? X.dim32(3) : 1;
   int depth = kernel_.size() > 2 ? X.dim32(4) : 1;
   vector<int> dims(X.dims().begin() + 2, X.dims().end());
   ConvPoolOpBase<CPUContext>::ComputePads(dims);
   int pooled_height = dY.dim32(2);
   int pooled_width = kernel_.size() > 1 ? dY.dim32(3) : 1;
   int pooled_depth = kernel_.size() > 2 ? dY.dim32(4) : 1;
   // The main loop
   switch (kernel_.size()) {
     case 1:
       for (int n = 0; n < X.dim32(0); ++n) {
         for (int c = 0; c < channels; ++c) {
           for (int ph = 0; ph < pooled_height; ++ph) {
             int hstart = ph * stride_h() - pad_t();
             int hend = min(hstart + kernel_h(), height);
             hstart = max(hstart, 0);
             float scale = 1. / (hend - hstart);
             for (int h = hstart; h < hend; ++h) {
               PoolType::process_grad(
                   Xdata[h], Ydata[ph], dYdata[ph], scale, dXdata[h]);
             }
           }
           // offset
           Xdata += height;
           dXdata += height;
           Ydata += pooled_height;
           dYdata += pooled_height;
         }
       }
       break;
     case 2:
       for (int n = 0; n < X.dim32(0); ++n) {
         for (int c = 0; c < channels; ++c) {
           for (int ph = 0; ph < pooled_height; ++ph) {
             int hstart = ph * stride_h() - pad_t();
             int hend = min(hstart + kernel_h(), height);
             hstart = max(hstart, 0);
             for (int pw = 0; pw < pooled_width; ++pw) {
               int wstart = pw * stride_w() - pad_l();
               int wend = min(wstart + kernel_w(), width);
               wstart = max(wstart, 0);
               float scale = 1. / (hend - hstart) / (wend - wstart);
               const int pooled_index = ph * pooled_width + pw;
               for (int h = hstart; h < hend; ++h) {
                 for (int w = wstart; w < wend; ++w) {
                   const int index = h * width + w;
                   PoolType::process_grad(
                       Xdata[index],
                       Ydata[pooled_index],
                       dYdata[pooled_index],
                       scale,
                       dXdata[index]);
                 }
               }
             }
           }
           // offset
           Xdata += height * width;
           dXdata += height * width;
           Ydata += pooled_height * pooled_width;
           dYdata += pooled_height * pooled_width;
         }
       }
       break;
     case 3:
       for (int n = 0; n < X.dim32(0); ++n) {
         for (int c = 0; c < channels; ++c) {
           for (int ph = 0; ph < pooled_height; ++ph) {
             int hstart = ph * stride_h() - pad_t();
             int hend = min(hstart + kernel_h(), height);
             hstart = max(hstart, 0);
             for (int pw = 0; pw < pooled_width; ++pw) {
               int wstart = pw * stride_w() - pad_l();
               int wend = min(wstart + kernel_w(), width);
               wstart = max(wstart, 0);
               for (int pd = 0; pd < pooled_depth; ++pd) {
                 int dstart = pd * stride_[2] - pads_[2];
                 int dend = min(dstart + kernel_[2], depth);
                 dstart = max(dstart, 0);
                 float scale =
                     1. / (hend - hstart) / (wend - wstart) / (dend - dstart);
                 const int pooled_index =
                     ph * pooled_width * pooled_depth + pw * pooled_depth + pd;
                 for (int h = hstart; h < hend; ++h) {
                   for (int w = wstart; w < wend; ++w) {
                     for (int d = dstart; d < dend; ++d) {
                       const int index = h * width * depth + w * depth + d;
                       PoolType::process_grad(
                           Xdata[index],
                           Ydata[pooled_index],
                           dYdata[pooled_index],
                           scale,
                           dXdata[index]);
                     }
                   }
                 }
               }
             }
           }
           // offset
           Xdata += height * width * depth;
           dXdata += height * width * depth;
           Ydata += pooled_height * pooled_width * pooled_depth;
           dYdata += pooled_height * pooled_width * pooled_depth;
         }
       }
       break;
     default:
       CAFFE_THROW("Unsupported pooling size");
       return false;
   }
   return true;
 }
 
 template <typename T, class Context, typename PoolType>
 bool PoolGradientOp<T, Context, PoolType>::RunOnDeviceWithOrderNHWC() {
   auto& X = Input(0);
   auto& Y = Input(1);
   auto& dY = Input(2);
   DCHECK_EQ(dY.ndim(), kernel_.size() + 2);
   auto* dX = Output(0);
   dX->ResizeLike(X);
 
   int channels = X.dim32(X.ndim() - 1);
   CAFFE_ENFORCE_EQ(channels, dY.dim32(dY.ndim() - 1));
   ConstEigenArrayMap<T> Ymat(
       Y.template data<float>(), channels, Y.size() / channels);
   ConstEigenArrayMap<float> dYmat(
       dY.template data<float>(), channels, Y.size() / channels);
   ConstEigenArrayMap<float> Xmat(
       X.template data<float>(), channels, X.size() / channels);
   EigenArrayMap<float> dXmat(
       dX->template mutable_data<float>(), channels, X.size() / channels);
   dXmat.setZero();
   int height = X.dim32(1);
   int width = kernel_.size() > 1 ? X.dim32(2) : 1;
   int depth = kernel_.size() > 2 ? X.dim32(3) : 1;
   vector<int> dims(X.dims().begin() + 1, X.dims().end() - 1);
   ConvPoolOpBase<CPUContext>::ComputePads(dims);
   int pooled_height = dY.dim32(1);
   int pooled_width = kernel_.size() > 1 ? dY.dim32(2) : 1;
   int pooled_depth = kernel_.size() > 2 ? dY.dim32(3) : 1;
 
   // The main loop
   // Do not do openmp here: the following for loops are looping over the pooled
   // output, so if one parallelizes the outer loops, race conditions could
   // happen in the inner loops.
   switch (kernel_.size()) {
     case 1:
       for (int n = 0; n < X.dim32(0); ++n) {
         for (int ph = 0; ph < pooled_height; ++ph) {
           int hstart = ph * stride_h() - pad_t();
           int hend = min(hstart + kernel_h(), height);
           hstart = max(hstart, 0);
           const int pool_index = n * pooled_height + ph;
           const float scale = 1. / (hend - hstart);
           for (int h = hstart; h < hend; ++h) {
             const int input_index = n * height + h;
             PoolType::process_grad(
                 pool_index, input_index, scale, Xmat, Ymat, dYmat, dXmat);
           }
         }
       }
       break;
     case 2:
       for (int n = 0; n < X.dim32(0); ++n) {
         for (int ph = 0; ph < pooled_height; ++ph) {
           int hstart = ph * stride_h() - pad_t();
           int hend = min(hstart + kernel_h(), height);
           hstart = max(hstart, 0);
           for (int pw = 0; pw < pooled_width; ++pw) {
             int wstart = pw * stride_w() - pad_l();
             int wend = min(wstart + kernel_w(), width);
             wstart = max(wstart, 0);
             const int pool_index = (n * pooled_height + ph) * pooled_width + pw;
             const float scale = 1. / (hend - hstart) / (wend - wstart);
             for (int h = hstart; h < hend; ++h) {
               for (int w = wstart; w < wend; ++w) {
                 const int input_index = (n * height + h) * width + w;
                 PoolType::process_grad(
                     pool_index, input_index, scale, Xmat, Ymat, dYmat, dXmat);
               }
             }
           }
         }
       }
       break;
     case 3:
       for (int n = 0; n < X.dim32(0); ++n) {
         for (int ph = 0; ph < pooled_height; ++ph) {
           int hstart = ph * stride_h() - pad_t();
           int hend = min(hstart + kernel_h(), height);
           hstart = max(hstart, 0);
           for (int pw = 0; pw < pooled_width; ++pw) {
             int wstart = pw * stride_w() - pad_l();
             int wend = min(wstart + kernel_w(), width);
             wstart = max(wstart, 0);
             for (int pd = 0; pd < pooled_depth; ++pd) {
               int dstart = pd * stride_[2] - pads_[2];
               int dend = min(dstart + kernel_[2], depth);
               dstart = max(dstart, 0);
               const int pool_index =
                   ((n * pooled_height + ph) * pooled_width + pw) *
                       pooled_depth +
                   pd;
               const float scale =
                   1. / (hend - hstart) / (wend - wstart) / (dend - dstart);
               for (int h = hstart; h < hend; ++h) {
                 for (int w = wstart; w < wend; ++w) {
                   for (int d = dstart; d < dend; ++d) {
                     const int input_index =
                         ((n * height + h) * width + w) * depth + d;
                     PoolType::process_grad(
                         pool_index,
                         input_index,
                         scale,
                         Xmat,
                         Ymat,
                         dYmat,
                         dXmat);
                   }
                 }
               }
             }
           }
         }
       }
       break;
     default:
       CAFFE_THROW("Unsupported pooling size");
       return false;
   }
   return true;
 }
 
 REGISTER_CPU_OPERATOR(
     AveragePoolGradient,
     PoolGradientOp<float, CPUContext, AveragePool<float>>);
 OPERATOR_SCHEMA(AveragePoolGradient).NumInputs(3).NumOutputs(1);
 
 REGISTER_CPU_OPERATOR(
     AveragePool1DGradient,
     PoolGradientOp<float, CPUContext, AveragePool<float>>);
 OPERATOR_SCHEMA(AveragePool1DGradient).NumInputs(3).NumOutputs(1);
 
 REGISTER_CPU_OPERATOR(
     AveragePool2DGradient,
     PoolGradientOp<float, CPUContext, AveragePool<float>>);
 OPERATOR_SCHEMA(AveragePool2DGradient).NumInputs(3).NumOutputs(1);
 
 REGISTER_CPU_OPERATOR(
     AveragePool3DGradient,
     PoolGradientOp<float, CPUContext, AveragePool<float>>);
 OPERATOR_SCHEMA(AveragePool3DGradient).NumInputs(3).NumOutputs(1);
 
 REGISTER_CPU_OPERATOR(
     MaxPoolGradient,
     PoolGradientOp<float, CPUContext, MaxPool<float>>);
 OPERATOR_SCHEMA(MaxPoolGradient).NumInputs(3).NumOutputs(1);
 
 REGISTER_CPU_OPERATOR(
     MaxPool1DGradient,
     PoolGradientOp<float, CPUContext, MaxPool<float>>);
 OPERATOR_SCHEMA(MaxPool1DGradient).NumInputs(3).NumOutputs(1);
 
 REGISTER_CPU_OPERATOR(
     MaxPool2DGradient,
     PoolGradientOp<float, CPUContext, MaxPool<float>>);
 OPERATOR_SCHEMA(MaxPool2DGradient).NumInputs(3).NumOutputs(1);
 
 REGISTER_CPU_OPERATOR(
     MaxPool3DGradient,
     PoolGradientOp<float, CPUContext, MaxPool<float>>);
 OPERATOR_SCHEMA(MaxPool3DGradient).NumInputs(3).NumOutputs(1);
 
 class GetPoolGradient : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;
   vector<OperatorDef> GetGradientDefs() override {
     return SingleGradientDef(
         def_.type() + "Gradient",
         "",
         vector<string>{I(0), O(0), GO(0)},
         vector<string>{GI(0)});
   }
 };
 REGISTER_GRADIENT(AveragePool, GetPoolGradient);
 REGISTER_GRADIENT(AveragePool1D, GetPoolGradient);
 REGISTER_GRADIENT(AveragePool2D, GetPoolGradient);
 REGISTER_GRADIENT(AveragePool3D, GetPoolGradient);
 REGISTER_GRADIENT(MaxPool, GetPoolGradient);
 REGISTER_GRADIENT(MaxPool1D, GetPoolGradient);
 REGISTER_GRADIENT(MaxPool2D, GetPoolGradient);
 REGISTER_GRADIENT(MaxPool3D, GetPoolGradient);
 }
Facebook Open Source