Caffe2 - C++ API
A deep learning, cross platform ML framework
resize_op.cc
1 #include "caffe2/operators/resize_op.h"
2 
3 #include "caffe2/utils/cpu_neon.h"
4 #include "caffe2/utils/math.h"
5 
6 namespace caffe2 {
7 
8 void resizeNearest2x(
9  int batch_size,
10  int num_channels,
11  int input_height,
12  int input_width,
13  const float* input,
14  float* output) {
15  const int output_height = input_height * 2;
16  const int output_width = input_width * 2;
17  for (int n = 0; n < batch_size; ++n) {
18  for (int c = 0; c < num_channels; ++c) {
19  for (int y = 0; y < output_height; ++y) {
20  const int in_y = y / 2;
21 
22 #ifdef __ARM_NEON__
23  int vecW = (input_width / 4) * 4; // round down
24  int x = 0;
25  for (; x < vecW; x += 4) {
26  // load 0 1 2 3
27  float32x4_t v = vld1q_f32(input + in_y * input_width + x);
28  const int oidx = output_width * y + x * 2;
29  float32x4x2_t v2 = {{v, v}};
30  // store 00 11 22 33
31  vst2q_f32(output + oidx + 0, v2);
32  }
33 
34  // handle remainder
35  for (; x < input_width; ++x) {
36  const float v = input[in_y * input_width + x];
37  const int oidx = output_width * y + x * 2;
38  output[oidx + 0] = v;
39  output[oidx + 1] = v;
40  }
41 #else
42  for (int x = 0; x < input_width; ++x) {
43  const float v = input[in_y * input_width + x];
44  const int oidx = output_width * y + x * 2;
45  output[oidx + 0] = v;
46  output[oidx + 1] = v;
47  }
48 #endif
49  }
50  input += input_height * input_width;
51  output += output_height * output_width;
52  }
53  }
54 }
55 
56 template <>
57 bool ResizeNearestOp<float, CPUContext>::RunOnDevice() {
58  const auto& X = Input(0);
59  auto* Y = Output(0);
60 
61  const int batch_size = X.dim32(0),
62  num_channels = X.dim32(1),
63  input_height = X.dim32(2),
64  input_width = X.dim32(3);
65  int output_width = input_width * width_scale_;
66  int output_height = input_height * height_scale_;
67  Y->Resize(batch_size, num_channels, output_height, output_width);
68 
69  const float* Xdata = X.data<float>();
70  float* Ydata = Y->mutable_data<float>();
71 
72  // Specialized implementation for fast 2x upsampling
73  if (width_scale_ == 2.0 && height_scale_ == 2.0) {
74  resizeNearest2x(
75  batch_size, num_channels, input_height, input_width, Xdata, Ydata);
76  return true;
77  }
78 
79  for (int n = 0; n < batch_size; ++n) {
80  for (int c = 0; c < num_channels; ++c) {
81  for (int y = 0; y < output_height; ++y) {
82  const int in_y = std::min((int)(y / height_scale_), (input_height - 1));
83  for (int x = 0; x < output_width; ++x) {
84  const int in_x = std::min((int)(x / width_scale_), (input_width - 1));
85  Ydata[output_width * y + x] = Xdata[input_width * in_y + in_x];
86  }
87  }
88  Xdata += input_height * input_width;
89  Ydata += output_width * output_height;
90  }
91  }
92 
93  return true;
94 }
95 
96 template <>
97 bool ResizeNearestGradientOp<float, CPUContext>::RunOnDevice() {
98  const auto& dY = Input(0);
99  const auto& X = Input(1);
100  auto* dX = Output(0);
101 
102  const auto& inputDims = dY.dims();
103  CAFFE_ENFORCE_EQ(4, inputDims.size());
104  const int batch_size = dY.dim32(0),
105  num_channels = dY.dim32(1),
106  input_height = dY.dim32(2),
107  input_width = dY.dim32(3);
108  const int output_height = X.dim32(2);
109  const int output_width = X.dim32(3);
110  dX->Resize(batch_size, num_channels, output_height, output_width);
111  math::Set<float, CPUContext>(dX->size(),
112  0.0f,
113  dX->mutable_data<float>(),
114  &context_);
115 
116  const float* dYdata = dY.data<float>();
117  float* dXdata = dX->mutable_data<float>();
118 
119  for (int n = 0; n < batch_size; ++n) {
120  for (int c = 0; c < num_channels; ++c) {
121  for (int y = 0; y < input_height; ++y) {
122  const int out_y = std::min((int)(y / height_scale_),
123  (output_height - 1));
124  for (int x = 0; x < input_width; ++x) {
125  const int out_x = std::min((int)(x / width_scale_),
126  (output_width - 1));
127  dXdata[output_width * out_y + out_x] += dYdata[input_width * y + x];
128  }
129  }
130  dYdata += input_height * input_width;
131  dXdata += output_height * output_width;
132  }
133  }
134 
135  return true;
136 }
137 
138 REGISTER_CPU_OPERATOR(ResizeNearest, ResizeNearestOp<float, CPUContext>);
139 REGISTER_CPU_OPERATOR(ResizeNearestGradient,
140  ResizeNearestGradientOp<float, CPUContext>);
141 
142 // Input: X, output: Y
143 OPERATOR_SCHEMA(ResizeNearest)
144  .NumInputs(1)
145  .NumOutputs(1)
146  .Arg("width_scale", "Scale along width dimension")
147  .Arg("height_scale", "Scale along height dimension")
148  .SetDoc(R"DOC(
149 Resizes the spatial dimensions of the input using nearest neighbor
150 interpolation. The `width_scale` and `height_scale` arguments
151 control the size of the output, which is given by:
152 output_width = floor(input_width * width_scale)
153 output_height = floor(output_height * height_scale)
154 )DOC")
155  .Input(0, "X", "Input tensor")
156  .Output(0, "Y", "Output tensor");
157 
158 // Input: dY, output: dX
159 OPERATOR_SCHEMA(ResizeNearestGradient)
160  .NumInputs(2)
161  .NumOutputs(1)
162  .Arg("width_scale", "Scale along width dimension")
163  .Arg("height_scale", "Scale along height dimension");
164 
166  using GradientMakerBase::GradientMakerBase;
167  vector<OperatorDef> GetGradientDefs() override {
168  return SingleGradientDef("ResizeNearestGradient",
169  "",
170  vector<string>{GO(0), I(0)},
171  vector<string>{GI(0)});
172  }
173 };
174 REGISTER_GRADIENT(ResizeNearest, GetResizeNearestGradient);
175 
176 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...