Caffe2 - C++ API
A deep learning, cross platform ML framework
GLPadImage.cc
1 
2 #include "../core/GLFilter.h"
3 #include "../core/GLImage.h"
4 #include "../core/ImageAllocator.h"
5 
6 #include "caffe2/core/operator.h"
7 #include "caffe2/core/timer.h"
8 #include "caffe2/operators/conv_pool_op_base.h"
9 
10 class GLPadImage : public GLFilter {
11  public:
12  binding* padSize;
13  binding* inputSize;
14  binding* outputSize;
15  binding* inputData;
16 
17  GLPadImage()
18  : GLFilter(
19  "GLPadImage",
20  vertex_shader,
21  fragment_shader,
22  std::vector<binding*>(
23  {BINDING(padSize), BINDING(inputSize), BINDING(outputSize), BINDING(inputData)}),
24  {/* no uniform blocks */},
25  {/* no attributes */},
26  {/* no replacements */}) {}
27 
28  template <typename T>
29  void pad(const GLImageVector<T>& input_images,
30  const GLImageVector<T>& output_images,
31  const int pad_l,
32  const int pad_t);
33 
34  static const char* fragment_shader;
35 };
36 
37 // MARK: GLSL
38 
39 const char* GLPadImage::fragment_shader = R"GLSL(#version 300 es
40 
41 precision mediump float;
42 precision mediump int;
43 
44 in highp vec2 v_texCoord;
45 
46 uniform ivec2 padSize;
47 uniform ivec2 inputSize;
48 uniform ivec2 outputSize;
49 
50 TEXTURE_INPUT(inputData);
51 TEXTURE_OUTPUT(0, outputData);
52 
53 void main() {
54  ivec2 texelCoord = ivec2(v_texCoord * vec2(outputSize)) - padSize;
55  texelCoord = max(texelCoord, -texelCoord);
56  texelCoord = min(texelCoord, ivec2(2) * (inputSize - 1) - texelCoord);
57  vec4 value = TEXTURE_LOAD(inputData, texelCoord);
58  outputData = TEXTURE_STORE(value);
59 }
60 
61 )GLSL";
62 
63 template <typename T>
64 void GLPadImage::pad(const GLImageVector<T>& input_images,
65  const GLImageVector<T>& output_images,
66  const int pad_l,
67  const int pad_t) {
68  for (int i = 0; i < input_images.size(); i++) {
69  auto input_image = input_images[i];
70  auto output_image = output_images[i];
71  int input_slices = input_image->slices;
72  int output_slices = output_image->slices;
73 
74  for (int is = 0; is < input_slices; is++) {
75  run(std::vector<texture_attachment>({{input_image->textures[is], inputData}}),
76  {output_image->textures.begin() + is, output_image->textures.begin() + is + 1},
77  [&]() {
78  glUniform2i(inputSize->location, input_image->width, input_image->height);
79  glUniform2i(outputSize->location, output_image->width, output_image->height);
80  glUniform2i(padSize->location, pad_l, pad_t);
81  },
82  output_image->width,
83  output_image->height);
84  }
85  }
86 }
87 
88 namespace caffe2 {
89 
90 template <typename OPBase>
91 static void computeOutputHW(OPBase* op, int H, int W, int* OH, int* OW) {
92  Tensor<CPUContext> input, output;
93  input.Resize(1, 1, H, W);
94  op->SetOutputSize(input, &output, 1);
95  CAFFE_ENFORCE_EQ(output.ndim(), 4);
96  *OH = output.dim(2);
97  *OW = output.dim(3);
98 }
99 
100 template <class T>
101 class OpenGLPadImageOp final : public ConvPoolOpBase<CPUContext>, ImageAllocator<T> {
102  public:
103  OpenGLPadImageOp(const OperatorDef& operator_def, Workspace* ws)
104  : ConvPoolOpBase<CPUContext>(operator_def, ws),
105  mode_(OperatorBase::GetSingleArgument<string>("mode", "")) {
106  OPERATOR_NEEDS_FEATURE(order_ == StorageOrder::NCHW, "OpenGL only supports NCHW order.");
107  OPERATOR_NEEDS_FEATURE(mode_ == "reflect", "OpenGL only supports reflection");
108 
109  CAFFE_ENFORCE(legacy_pad_ == LegacyPadding::NOTSET,
110  "Padding layer only supports explicit pad values.");
111  CAFFE_ENFORCE(dilation_h() == 1 && dilation_w() == 1,
112  "Pooling op does not support dilation right now.");
113  CAFFE_ENFORCE(stride_h() == 1 && stride_w() == 1,
114  "Pooling op does not support stride right now.");
115  // Pad op does not use kernel sizes, so we set it to 1 for computing the
116  // output size.
117  kernel_.assign(pads_.size() / 2, 1);
118  }
119 
120  bool RunOnDeviceWithOrderNCHW() override {
121  const GLImageVector<T>& input = Inputs()[0]->template Get<GLImageVector<T>>();
122 
123  const int num_images = input.size();
124  const int input_width = input.width();
125  const int input_height = input.height();
126  const int input_channels = input.channels();
127  const int output_channels = input_channels;
128 
129  int output_height, output_width;
130  computeOutputHW(this, input_height, input_width, &output_height, &output_width);
131 
132  int is_last = OperatorBase::GetSingleArgument<int>("is_last", 0);
133 
135  num_images, output_width, output_height, output_channels, is_last);
136 
137  if (!padImage_) {
138  padImage_.reset(new GLPadImage());
139  LOG(INFO) << input_channels << ": " << input_height << " X " << input_width << " => "
140  << output_channels << ": " << output_height << " X " << output_width;
141  LOG(INFO) << "Padmode: " << mode_ << ", pad_l = " << pad_l() << ", pad_r = " << pad_r() << ", pad_t = " << pad_t()
142  << ", pad_b = " << pad_b();
143  }
144 
145  padImage_->pad(input, *output, pad_l(), pad_t());
146 
147  Outputs()[0]->Reset(output);
148 
149  return true;
150  }
151 
152  private:
153  std::string mode_;
154  std::unique_ptr<GLPadImage> padImage_;
155 };
156 
157 REGISTER_CPU_OPERATOR(OpenGLPadImage, OpenGLPadImageOp<float16_t>);
158 OPERATOR_SCHEMA(OpenGLPadImage).NumInputs(1).NumOutputs(1);
159 } // namespace caffe2
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...