Caffe2 - C++ API
A deep learning, cross platform ML framework
lp_pool_op.cc
1 // TODO: reduce the apparent redundancy of all the code below.
2 #include "caffe2/operators/pool_op.h"
3 
4 namespace caffe2 {
5 
6 using std::min;
7 using std::max;
8 
9 class LpPool {};
10 
11 template <>
13  auto& X = Input(0);
14  auto* Y = Output(0);
15  ConvPoolOpBase::SetOutputSize(X, Y, X.dim32(1));
16  const auto p = OperatorBase::GetSingleArgument<float>("p", 2.0);
17  const auto inv_p = 1.0 / p;
18 
19  const float* Xdata = X.data<float>();
20  float* Ydata = Y->mutable_data<float>();
21  math::Set<float, CPUContext>(Y->size(), 0, Ydata, &context_);
22  // The main loop
23  int channels = X.dim32(1);
24  int height = X.dim32(2);
25  int width = X.dim32(3);
26  int pooled_height = Y->dim32(2);
27  int pooled_width = Y->dim32(3);
28 
29  for (int n = 0; n < X.dim32(0); ++n) {
30  for (int c = 0; c < channels; ++c) {
31  for (int ph = 0; ph < pooled_height; ++ph) {
32  for (int pw = 0; pw < pooled_width; ++pw) {
33  int hstart = ph * stride_[0] - pads_[0];
34  int wstart = pw * stride_[1] - pads_[1];
35  int hend = min(hstart + kernel_[0], height);
36  int wend = min(wstart + kernel_[1], width);
37  hstart = max(hstart, 0);
38  wstart = max(wstart, 0);
39  const int pool_index = ph * pooled_width + pw;
40  for (int h = hstart; h < hend; ++h) {
41  for (int w = wstart; w < wend; ++w) {
42  const int input_index = h * width + w;
43  Ydata[pool_index] += std::pow(std::abs(Xdata[input_index]), p);
44  }
45  }
46  Ydata[pool_index] = std::pow(Ydata[pool_index], inv_p);
47  }
48  }
49  // Do offset.
50  Xdata += height * width;
51  Ydata += pooled_height * pooled_width;
52  }
53  }
54  return true;
55 }
56 
57 template <>
59  auto& X = Input(0);
60  auto* Y = Output(0);
61  int height = X.dim32(1);
62  int width = X.dim32(2);
63  int channels = X.dim32(3);
64  ConvPoolOpBase::SetOutputSize(X, Y, channels);
65 
66  const auto p = OperatorBase::GetSingleArgument<float>("p", 2.0);
67  const auto inv_p = 1.0 / p;
68 
69  const float* Xdata = X.data<float>();
70  float* Ydata = Y->mutable_data<float>();
71  math::Set<float, CPUContext>(Y->size(), 0, Ydata, &context_);
72  // The main loop
73  int pooled_height = Y->dim32(1);
74  int pooled_width = Y->dim32(2);
75  for (int n = 0; n < X.dim32(0); ++n) {
76  for (int ph = 0; ph < pooled_height; ++ph) {
77  for (int pw = 0; pw < pooled_width; ++pw) {
78  int hstart = ph * stride_[0] - pads_[0];
79  int wstart = pw * stride_[1] - pads_[1];
80  int hend = min(hstart + kernel_[0], height);
81  int wend = min(wstart + kernel_[1], width);
82  hstart = max(hstart, 0);
83  wstart = max(wstart, 0);
84  const int pool_index = (ph * pooled_width + pw) * channels;
85  for (int h = hstart; h < hend; ++h) {
86  for (int w = wstart; w < wend; ++w) {
87  const int input_index = (h * width + w) * channels;
88  for (int c = 0; c < channels; ++c) {
89  Ydata[pool_index + c] +=
90  std::pow(std::abs(Xdata[input_index + c]), p);
91  }
92  }
93  }
94  for (int c = 0; c < channels; ++c) {
95  Ydata[pool_index + c] = std::pow(Ydata[pool_index + c], inv_p);
96  }
97  }
98  }
99  // Do offset.
100  Xdata += X.size() / X.dim32(0);
101  Ydata += Y->size() / Y->dim32(0);
102  }
103  return true;
104 }
105 
106 template <>
108  const auto& X = Input(0);
109  const auto& Y = Input(1);
110  auto& dY = Input(2);
111  auto* dX = Output(0);
112  const auto p = OperatorBase::GetSingleArgument<float>("p", 2.0);
113  const auto inv_p = 1.0 / p;
114 
115  // TODO(Yangqing): Add shape checks.
116  dX->ResizeLike(X);
117  math::Set<float, CPUContext>(
118  X.size(), 0, dX->mutable_data<float>(), &context_);
119  const float* dYdata = dY.data<float>();
120  const float* Xdata = X.data<float>();
121  const float* Ydata = Y.data<float>();
122  float* dXdata = dX->mutable_data<float>();
123 
124  int channels = X.dim32(1);
125  CAFFE_ENFORCE_EQ(channels, dY.dim32(1));
126  int height = X.dim32(2);
127  int width = X.dim32(3);
129  int pooled_height = dY.dim32(2);
130  int pooled_width = dY.dim32(3);
131  // The main loop
132  for (int n = 0; n < X.dim32(0); ++n) {
133  for (int c = 0; c < channels; ++c) {
134  for (int ph = 0; ph < pooled_height; ++ph) {
135  for (int pw = 0; pw < pooled_width; ++pw) {
136  int hstart = ph * stride_[0] - pads_[0];
137  int wstart = pw * stride_[1] - pads_[1];
138  int hend = min(hstart + kernel_[0], height);
139  int wend = min(wstart + kernel_[1], width);
140  hstart = max(hstart, 0);
141  wstart = max(wstart, 0);
142  float scale = 1. / (hend - hstart) / (wend - wstart);
143  for (int h = hstart; h < hend; ++h) {
144  for (int w = wstart; w < wend; ++w) {
145  // gradient of p-norm is x_j * |x_j|^{p-2} / |x|_p^{p-1}
146  dXdata[h * width + w] += dYdata[ph * pooled_width + pw] *
147  Xdata[h * width + w] *
148  std::pow(std::abs(Xdata[h * width + w]), p - 2) /
149  std::pow(Ydata[ph * pooled_width + pw], p - 1);
150  }
151  }
152  }
153  }
154  // offset
155  dXdata += height * width;
156  dYdata += pooled_height * pooled_width;
157  Ydata += pooled_height * pooled_width;
158  Xdata += height * width;
159  }
160  }
161  return true;
162 }
163 
164 template <>
166  const auto& X = Input(0);
167  const auto& Y = Input(1);
168  auto& dY = Input(2);
169  CAFFE_ENFORCE_EQ(dY.ndim(), 4);
170  auto* dX = Output(0);
171  // TODO(Yangqing): Add shape checks.
172  dX->ResizeLike(X);
173  math::Set<float, CPUContext>(
174  X.size(), 0, dX->mutable_data<float>(), &context_);
175  const float* dYdata = dY.data<float>();
176  float* dXdata = dX->mutable_data<float>();
177  const float* Xdata = X.data<float>();
178  const float* Ydata = Y.data<float>();
179  // The main loop
180  int height = X.dim32(1);
181  int width = X.dim32(2);
183  const auto p = OperatorBase::GetSingleArgument<float>("p", 2.0);
184  const auto inv_p = 1.0 / p;
185 
186  int pooled_height = dY.dim32(1);
187  int pooled_width = dY.dim32(2);
188  int channels = X.dim32(3);
189  CAFFE_ENFORCE_EQ(channels, dY.dim32(3));
190  for (int n = 0; n < X.dim32(0); ++n) {
191  for (int ph = 0; ph < pooled_height; ++ph) {
192  for (int pw = 0; pw < pooled_width; ++pw) {
193  int hstart = ph * stride_[0] - pads_[0];
194  int wstart = pw * stride_[1] - pads_[1];
195  int hend = min(hstart + kernel_[0], height);
196  int wend = min(wstart + kernel_[1], width);
197  hstart = max(hstart, 0);
198  wstart = max(wstart, 0);
199  float scale = 1. / (hend - hstart) / (wend - wstart);
200  for (int h = hstart; h < hend; ++h) {
201  for (int w = wstart; w < wend; ++w) {
202  for (int c = 0; c < channels; ++c) {
203  dXdata[(h * width + w) * channels + c] +=
204  dYdata[(ph * pooled_width + pw) * channels + c] *
205  Xdata[(h * width + w) * channels + c] *
206  std::pow(
207  std::abs(Xdata[(h * width + w) * channels + c]), p - 2) /
208  std::pow(
209  Ydata[(ph * pooled_width + pw) * channels + c], p - 1);
210  }
211  }
212  }
213  }
214  }
215  // offset
216  dXdata += X.size() / X.dim32(0);
217  dYdata += dY.size() / dY.dim32(0);
218  Xdata += X.size() / X.dim32(0);
219  Ydata += Y.size() / Y.dim32(0);
220  }
221  return true;
222 }
223 
224 REGISTER_CPU_OPERATOR(LpPool, PoolOp<float, CPUContext, LpPool>);
225 REGISTER_CPU_OPERATOR(
226  LpPoolGradient,
228 
229 OPERATOR_SCHEMA(LpPool)
230  .NumInputs(1)
231  .NumOutputs(1)
232  .SetDoc(R"DOC(
233 LpPool consumes an input blob X and applies L-p pooling across the
234 the blob according to kernel sizes, stride sizes, and pad lengths defined by the
235 ConvPoolOpBase operator. L-p pooling consisting of taking the L-p norm of a
236 subset of the input tensor according to the kernel size and downsampling the
237 data into the output blob Y for further processing.
238 )DOC")
239  .Input(
240  0,
241  "X",
242  "Input data tensor from the previous operator; dimensions "
243  "depend on whether the NCHW or NHWC operators are being used. For example, "
244  "in the former, the input has size (N x C x H x W), where N is the batch "
245  "size, C is the number of channels, and H and W are the height and the width "
246  "of the data. The corresponding permutation of dimensions is used in the "
247  "latter case. ")
248  .Output(
249  0,
250  "Y",
251  "Output data tensor from L-p pooling across the input "
252  "tensor. Dimensions will vary based on various kernel, stride, and pad "
253  "sizes.");
254 
255 OPERATOR_SCHEMA(LpPoolGradient).NumInputs(3).NumOutputs(1);
256 
258  using GradientMakerBase::GradientMakerBase;
259  vector<OperatorDef> GetGradientDefs() override {
260  return SingleGradientDef(
261  def_.type() + "Gradient",
262  "",
263  vector<string>{I(0), O(0), GO(0)},
264  vector<string>{GI(0)});
265  }
266 };
267 REGISTER_GRADIENT(LpPool, GetPoolGradient);
268 }
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...