Caffe2 - C++ API
A deep learning, cross platform ML framework
GLStylizer.cc
1 
2 #include "../core/GLContext.h"
3 #include "../core/GLFilter.h"
4 #include "../core/GLImage.h"
5 #include "../core/ImageAllocator.h"
6 
7 #include "caffe2/core/common.h"
8 #include "caffe2/core/context.h"
9 #include "caffe2/core/operator.h"
10 
11 enum InputFormat { BGRA = 0, RGBA = 1 };
12 
13 class GLStylizer : public GLFilter {
14  binding* inputData;
15  binding* outputSize;
16  binding* mean;
17  binding* noise_std;
18  bool deprocess;
19 
20  public:
21  GLStylizer(bool _deprocess = false, InputFormat input_format = BGRA)
22  : GLFilter(_deprocess ? "GLDeStylizer" : "GLStylizer",
23  vertex_shader,
24  fragment_shader,
25  std::vector<binding*>({BINDING(inputData), BINDING(mean), BINDING(noise_std), BINDING(outputSize)}),
26  {/* no uniform blocks */},
27  {/* no attributes */},
28  {{"DEPROCESS", caffe2::to_string(_deprocess)}, {"RGBAINPUT", caffe2::to_string(input_format)}}),
29  deprocess(_deprocess) {}
30 
31  template <typename T1, typename T2>
32  void stylize(const GLImage<T1>* input_image,
33  const GLImage<T2>* output_image,
34  const float mean_values[3],
35  float noise_std_value);
36 
37  static const char* fragment_shader;
38 };
39 
40 // MARK: GLSL
41 
42 const char* GLStylizer::fragment_shader = R"GLSL(#version 300 es
43 
44 #define DEPROCESS $(DEPROCESS)
45 #define RGBAINPUT $(RGBAINPUT)
46 
47 precision mediump float;
48 precision mediump int;
49 precision mediump sampler2D;
50 
51 in highp vec2 v_texCoord;
52 
53 uniform ivec2 outputSize;
54 
55 uniform vec3 mean;
56 uniform float noise_std;
57 
58 #if DEPROCESS
59 TEXTURE_INPUT(inputData);
60 layout(location = 0) out mediump vec4 outputData;
61 #else
62 uniform sampler2D inputData;
63 TEXTURE_OUTPUT(0, outputData);
64 #endif
65 
66 #if !DEPROCESS
67 // http://byteblacksmith.com/improvements-to-the-canonical-one-liner-glsl-rand-for-opengl-es-2-0/
68 
69 highp float rand(vec2 co) {
70  highp float a = 12.9898;
71  highp float b = 78.233;
72  highp float c = 43758.5453;
73  highp float dt = dot(co.xy, vec2(a, b));
74  highp float sn = mod(dt, 3.14);
75  return fract(sin(sn) * c);
76 }
77 #endif
78 
79 // In AR Engine, input/output a RBGA texture; otherwise, BGRA tensor => texture
80 #if RGBAINPUT
81 void main() {
82 #if DEPROCESS
83  ivec2 texelCoord = ivec2(v_texCoord * vec2(outputSize));
84  vec4 val = TEXTURE_LOAD(inputData, texelCoord);
85  outputData = vec4((val.rgb + mean) / 255.0, 1.0).bgra;
86 #else
87  outputData = TEXTURE_STORE(vec4(255.0 * texture(inputData, v_texCoord).bgr - mean + vec3(noise_std * rand(v_texCoord)), 0.0));
88 #endif
89 }
90 #else
91 void main() {
92 #if DEPROCESS
93  ivec2 texelCoord = ivec2(v_texCoord * vec2(outputSize));
94  vec4 val = TEXTURE_LOAD(inputData, texelCoord);
95  outputData = vec4((val.rgb + mean) / 255.0, 1.0);
96 #else
97  outputData = TEXTURE_STORE(vec4(255.0 * texture(inputData, v_texCoord).rgb - mean + vec3(noise_std * rand(v_texCoord)), 0.0));
98 #endif
99 }
100 #endif
101 )GLSL";
102 
103 template <typename T1, typename T2>
104 void GLStylizer::stylize(const GLImage<T1>* input_image,
105  const GLImage<T2>* output_image,
106  const float mean_values[3],
107  float noise_std_value) {
108  int input_slices = input_image->slices;
109  int output_slices = output_image->slices;
110 
111  run(std::vector<texture_attachment>({{input_image->textures[0], inputData}}),
112  {output_image->textures[0]},
113  [&]() {
114  glUniform2i(outputSize->location, output_image->width, output_image->height);
115  glUniform3f(mean->location, mean_values[0], mean_values[1], mean_values[2]);
116  if (!deprocess) {
117  glUniform1f(noise_std->location, noise_std_value);
118  }
119  },
120  output_image->width,
121  output_image->height);
122 }
123 
124 namespace caffe2 {
126  ImageAllocator<uint8_t>,
127  ImageAllocator<float16_t> {
128  public:
129  // Expect this many channels as input
130  static constexpr int kInputChannels = 4;
131 
132  // Expect this many channels as output
133  static constexpr int kOutputChannels = 3;
134 
135  USE_OPERATOR_BASE_FUNCTIONS;
136 
137  OpenGLTensorToTextureStylizerPreprocessOp(const OperatorDef& operator_def, Workspace* ws)
138  : Operator<CPUContext>(operator_def, ws) {}
139 
140  bool RunOnDevice() {
141  const auto& input = Input(0);
142  const auto& mean = Input(1);
143 
144  CAFFE_ENFORCE(input.ndim() == 4);
145 
146  const int num_images = input.dim32(0);
147  const int input_height = input.dim32(1);
148  const int input_width = input.dim32(2);
149  const int input_channels = input.dim32(3);
150 
151  CAFFE_ENFORCE(input.dim32(0) == 1); // N == 1
152  CAFFE_ENFORCE(input_channels == kInputChannels);
153  CAFFE_ENFORCE(mean.size() == kOutputChannels); // Assume BGR or BGRA
154 
155  // get the buffers from input tensors
156  const float* mean_buffer = mean.template data<float>();
157  const uint8_t* input_buffer = input.template data<uint8_t>();
158 
159  // set up the OpenGL context
160  GLContext::getGLContext()->set_context();
161 
163  input_width,
164  input_height,
165  kOutputChannels,
166 #if CAFFE2_IOS
167  true
168 #else
169  false
170 #endif
171  );
172  const int tile_x = 1, tile_y = 1;
174  num_images, input_width, input_height, kInputChannels, tile_x, tile_y, false);
175  for (int i = 0; i < num_images; i++) {
176  auto input_image = (*input_images)[i];
177  auto output_image = (*output_images)[i];
178  const GLTexture* inputTexture = input_image->textures[0];
179  inputTexture->loadData(input_buffer);
180 
181  if (!glStylizer_) {
182  glStylizer_.reset(new GLStylizer());
183  }
184 
185  glStylizer_->stylize(
186  input_image, output_image, mean_buffer, GetSingleArgument<float>("noise_std", 10.0));
187  }
188  delete input_images;
189  Outputs()[0]->Reset(output_images);
190 
191  return true;
192  }
193 
194  private:
195  std::unique_ptr<GLStylizer> glStylizer_;
196 };
197 
198 template <InputFormat inputFormat>
200  ImageAllocator<uint8_t>,
201  ImageAllocator<float16_t> {
202  public:
203  // Expect this many channels as input
204  static constexpr int kInputChannels = 4;
205 
206  // Expect this many channels as output
207  static constexpr int kOutputChannels = 3;
208 
209  USE_OPERATOR_BASE_FUNCTIONS;
210 
211  OpenGLTextureToTextureStylizerPreprocessOp(const OperatorDef& operator_def, Workspace* ws)
212  : Operator<CPUContext>(operator_def, ws) {}
213 
214  bool RunOnDevice() {
215  const GLImageVector<uint8_t>& input = Inputs()[0]->template Get<GLImageVector<uint8_t>>();
216  const auto& mean = Input(1);
217 
218  const int num_images = input.size();
219  const int input_height = input.height();
220  const int input_width = input.width();
221  const int input_channels = input.channels();
222 
223  CAFFE_ENFORCE_GT(num_images, 0);
224  CAFFE_ENFORCE(input[0]->slices == 1); // N == 1
225  CAFFE_ENFORCE(input_channels == kInputChannels);
226  CAFFE_ENFORCE(mean.size() == kOutputChannels); // Assume BGR or BGRA
227 
228  // get the buffers from input tensors
229  const float* mean_buffer = mean.template data<float>();
230 
232  num_images, input_width, input_height, kOutputChannels, false);
233 
234  if (!glStylizer_) {
235  glStylizer_.reset(new GLStylizer(false, inputFormat));
236  }
237  for (int i = 0; i < num_images; i++) {
238  auto input_image = input[i];
239  auto output_image = (*output_images)[i];
240  glStylizer_->stylize(
241  input_image, output_image, mean_buffer, GetSingleArgument<float>("noise_std", 10.0));
242  }
243  Outputs()[0]->Reset(output_images);
244 
245  return true;
246  }
247 
248  private:
249  std::unique_ptr<GLStylizer> glStylizer_;
250 };
251 
252 REGISTER_CPU_OPERATOR(OpenGLTensorToTextureStylizerPreprocess,
254 OPERATOR_SCHEMA(OpenGLTensorToTextureStylizerPreprocess).NumInputs(2).NumOutputs(1);
255 
256 REGISTER_CPU_OPERATOR(OpenGLTextureToTextureStylizerPreprocess,
258 OPERATOR_SCHEMA(OpenGLTextureToTextureStylizerPreprocess).NumInputs(2).NumOutputs(1);
259 
261  ImageAllocator<uint8_t> {
262  public:
264 
265  // Expect this many channels as input
266  static constexpr int kInputChannels = 3;
267 
268  // Expect this many channels as output
269  static constexpr int kOutputChannels = 4;
270 
271  bool RunOnDevice() {
272  const GLImageVector<float16_t>& input = Inputs()[0]->template Get<GLImageVector<float16_t>>();
273  const auto& mean = Input(1);
274  auto* output = Output(0);
275 
276  const int num_images = input.size(), channels = input.channels(), height = input.height(),
277  width = input.width();
278  // Assume BGR or BGRA
279  CAFFE_ENFORCE(mean.size() == kInputChannels);
280  CAFFE_ENFORCE(channels == kInputChannels);
281  // RGB
282  output->Resize(num_images, height, width, kOutputChannels);
283 
284  const auto* mean_data = mean.template data<float>();
285  auto* output_buffer = output->template mutable_data<uint8_t>();
286 
287  GLImageVector<uint8_t>* output_images =
288  ImageAllocator<uint8_t>::newImage(num_images, width, height, kOutputChannels, true);
289 
290  if (!glStylizer_) {
291  glStylizer_.reset(new GLStylizer(true));
292  }
293 
294  for (int i = 0; i < num_images; i++) {
295  auto input_image = input[i];
296  auto output_image = (*output_images)[i];
297  glStylizer_->stylize(input_image, output_image, mean_data, 0);
298 
299  output_image->textures[0]->map_read([&](const void* buffer,
300  size_t width,
301  size_t height,
302  size_t stride,
303  size_t channels,
304  const GLTexture::Type& type) {
305  if (width == stride) {
306  memcpy(output_buffer, buffer, channels * width * height);
307  } else {
308  typedef uint8_t(input_data_t)[height][stride][channels];
309  typedef uint8_t(output_data_t)[height][width][channels];
310 
311  const input_data_t& input_data = *reinterpret_cast<const input_data_t*>(buffer);
312  output_data_t& output_data = *reinterpret_cast<output_data_t*>(output_buffer);
313 
314  for (int y = 0; y < height; y++) {
315  memcpy(output_data[y], input_data[y], channels * width);
316  }
317  }
318  });
319  }
320  delete output_images;
321 
322  return true;
323  }
324 
325  private:
326  std::unique_ptr<GLStylizer> glStylizer_;
327 };
328 
329 template <InputFormat inputFormat>
331  ImageAllocator<uint8_t> {
332  public:
334 
335  // Expect this many channels as input
336  static constexpr int kInputChannels = 3;
337 
338  // Expect this many channels as output
339  static constexpr int kOutputChannels = 4;
340 
341  bool RunOnDevice() {
342  const GLImageVector<float16_t>& input = Inputs()[0]->template Get<GLImageVector<float16_t>>();
343  const auto& mean = Input(1);
344 
345  const int num_images = input.size(), channels = input.channels(), height = input.height(),
346  width = input.width();
347 
348  CAFFE_ENFORCE(mean.size() == kInputChannels);
349  CAFFE_ENFORCE(channels == kInputChannels);
350 
351  const auto* mean_data = mean.template data<float>();
352 
353  // Use foreignTextureAllocator inside GLContext
354  // glDeleteTexture will not be called from inside caffe2 for this texture
355  GLImageVector<uint8_t>* output_images;
356  auto textureAllocator = GLContext::getGLContext()->getTextureAllocator();
357  const int tile_x = 1, tile_y = 1;
358  if (textureAllocator != nullptr) {
359  output_images = ImageAllocator<uint8_t>::newImage(
360  num_images, width, height, kOutputChannels, tile_x, tile_y, textureAllocator);
361  } else {
362  // fallback when textureAllocator is not set
363  output_images = ImageAllocator<uint8_t>::newImage(num_images, width, height, kOutputChannels);
364  }
365 
366  if (!glStylizer_) {
367  glStylizer_.reset(new GLStylizer(true, inputFormat));
368  }
369 
370  for (int i = 0; i < num_images; i++) {
371  auto input_image = input[i];
372  auto output_image = (*output_images)[i];
373  glStylizer_->stylize(input_image, output_image, mean_data, 0);
374  }
375 
376  Outputs()[0]->Reset(output_images);
377 
378  return true;
379  }
380 
381  private:
382  std::unique_ptr<GLStylizer> glStylizer_;
383 };
384 
385 REGISTER_CPU_OPERATOR(OpenGLTextureToTensorStylizerDeprocess,
387 OPERATOR_SCHEMA(OpenGLTextureToTensorStylizerDeprocess).NumInputs(2).NumOutputs(1);
388 
389 REGISTER_CPU_OPERATOR(OpenGLTextureToTextureStylizerDeprocess,
391 OPERATOR_SCHEMA(OpenGLTextureToTextureStylizerDeprocess).NumInputs(2).NumOutputs(1);
392 } // namespace caffe2
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...