Caffe2 - C++ API
A deep learning, cross platform ML framework
GLFilter.cc
1 
2 #include "GLFilter.h"
3 #include <sstream>
4 
5 GLFilter::GLFilter(const std::string _kernel_name,
6  const std::string _vertex_shader,
7  const std::string _fragment_shader,
8  const std::vector<binding*> uniforms,
9  const std::vector<binding*> uniform_blocks,
10  const std::vector<binding*> attributes,
11  const replacements_t& _replacements)
12  : kernel_name(_kernel_name),
13  uniforms_(uniforms),
14  uniform_blocks_(uniform_blocks),
15  attributes_(attributes) {
16  // shader program
17  if (createProgram(_vertex_shader.c_str(),
18  process_replacements(_fragment_shader, _replacements).c_str(),
19  &program)) {
20  gl_log(GL_VERBOSE, "created program %d\n", program);
21  } else {
22  releaseBuffers();
23 
24  throwRuntimeError(
25  [&](std::stringstream& errmsg) { errmsg << "Problem initializing OpenGL program"; });
26  }
27 }
28 
29 const char* shader_utils = R"GLSL(
30 #define unpackHalf4x16(pd) vec4(unpackHalf2x16(pd.x), unpackHalf2x16(pd.y))
31 #define packHalf4x16(pd) uvec2(packHalf2x16(pd.xy), packHalf2x16(pd.zw))
32 )GLSL";
33 
34 const char* half_float_texture_utils = R"GLSL(
35 precision mediump sampler2D;
36 
37 #define TEXTURE_OUTPUT(_loc, _var) \
38  layout(location = _loc) out mediump vec4 _var
39 #define TEXTURE_INPUT(_var) \
40  uniform sampler2D _var
41 #define TEXTURE_LOAD(_input, _coord) \
42  texelFetch((_input), (_coord), 0)
43 #define TEXTURE_STORE(_val) \
44  (_val)
45 )GLSL";
46 
47 const char* half_float_compat_texture_utils = R"GLSL(
48 precision highp usampler2D;
49 
50 #define TEXTURE_OUTPUT(_loc, _var) \
51  layout(location = _loc) out highp uvec2 _var
52 #define TEXTURE_INPUT(_var) \
53  uniform usampler2D _var
54 #define TEXTURE_LOAD(_input, _coord) \
55  unpackHalf4x16(texelFetch((_input), (_coord), 0).xy)
56 #define TEXTURE_STORE(_val) \
57  (uvec2(packHalf4x16((_val))))
58 )GLSL";
59 
60 std::string GLFilter::process_replacements(std::string shader,
61  const replacements_t& replacements) const {
62  for (auto&& replacement : replacements) {
63  std::string tag = "$(" + replacement.first + ")";
64  std::string value = replacement.second;
65 
66  size_t position = shader.find(tag);
67  if (position != std::string::npos) {
68  shader.replace(position, tag.size(), value);
69  } else {
70  throwRuntimeError(
71  [&](std::stringstream& errmsg) { errmsg << "Couldn't find replacement tag: " << tag; });
72  }
73  }
74 
75  // Add some #defines for convenience
76  std::string version_tag = "#version 300 es";
77  if (GLContext::getGLContext()->halfFloatTextureSupported()) {
78  shader.insert(shader.find(version_tag) + version_tag.size(), half_float_texture_utils);
79  } else {
80  shader.insert(shader.find(version_tag) + version_tag.size(), half_float_compat_texture_utils);
81  }
82  shader.insert(shader.find(version_tag) + version_tag.size(), shader_utils);
83  return shader;
84 }
85 
86 template <typename T>
87 void GLFilter::attach_uniform_buffer(const binding* block,
88  GLuint bindingPoint,
89  std::function<void(T*, size_t)> loader) {
90  if (block->location >= 0) {
91  if (bindingPoint < kMaxUniformBlocks) {
92  if (uniformBlock[bindingPoint] == 0) {
93  // Associate the uniform block index with a binding point
94  glUniformBlockBinding(program, block->location, bindingPoint);
95 
96  // Get the size of block
97  glGetActiveUniformBlockiv(program, block->location, GL_UNIFORM_BLOCK_DATA_SIZE, &blockSize[bindingPoint]);
98 
99  // Create and fill a buffer object
100  glGenBuffers(1, &uniformBlock[bindingPoint]);
101 
102  gl_log(GL_VERBOSE, "created uniform buffer block %d\n", uniformBlock[bindingPoint]);
103  }
104 
105  // Fill a buffer object
106  glBindBuffer(GL_UNIFORM_BUFFER, uniformBlock[bindingPoint]);
107  glBufferData(GL_UNIFORM_BUFFER, blockSize[bindingPoint], NULL, GL_DYNAMIC_DRAW);
108 
109  checkGLError([&](std::stringstream& errmsg) {
110  errmsg << "Unable to bind uniform buffer " << block->name << ":" << block->location
111  << " at binding point " << bindingPoint;
112  });
113 
114  T* blockData = (T*)glMapBufferRange(
115  GL_UNIFORM_BUFFER, 0, blockSize[bindingPoint], GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
116  if (blockData != NULL) {
117  // Copy the data into the mapped buffer
118  if (loader)
119  loader(blockData, blockSize[bindingPoint]);
120 
121  // Unmap the buffer
122  if (glUnmapBuffer(GL_UNIFORM_BUFFER) == GL_TRUE) {
123  // Bind the buffer object to the uniform block binding point
124  glBindBufferBase(GL_UNIFORM_BUFFER, bindingPoint, uniformBlock[bindingPoint]);
125  } else {
126  throwRuntimeError([&](std::stringstream& errmsg) { errmsg << "Error unmapping element buffer object"; });
127  }
128  } else {
129  throwRuntimeError([&](std::stringstream& errmsg) {
130  errmsg << "Error mapping element buffer object, blockSize: " << blockSize;
131  });
132  }
133 
134  glBindBuffer(GL_UNIFORM_BUFFER, 0);
135  } else {
136  throwRuntimeError([&](std::stringstream& errmsg) {
137  errmsg << "Uniform block binding point out of range: " << bindingPoint << ", should be < "
138  << kMaxUniformBlocks;
139  });
140  }
141  } else {
142  throwRuntimeError([&](std::stringstream& errmsg) { errmsg << "unbound uniform block"; });
143  }
144 }
145 
146 template void GLFilter::attach_uniform_buffer<float16_t>(const binding* block,
147  GLuint bindingPoint,
148  std::function<void(float16_t*, size_t)> loader);
149 
150 static const GLenum unused_capability[] = {GL_CULL_FACE,
151  GL_BLEND,
152  GL_DITHER,
153  GL_STENCIL_TEST,
154  GL_DEPTH_TEST,
155  GL_SCISSOR_TEST,
156  GL_POLYGON_OFFSET_FILL,
157  GL_SAMPLE_ALPHA_TO_COVERAGE,
158  GL_SAMPLE_COVERAGE};
159 
160 void GLFilter::run(const std::vector<texture_attachment>& input,
161  const std::vector<const GLTexture*>& output,
162  std::function<void(void)> uniforms_initializer,
163  int width,
164  int height) {
165  const int first_texture_id = GL_TEXTURE0;
166 
167  GLint defaultFramebuffer = 0;
168  glGetIntegerv(GL_FRAMEBUFFER_BINDING, &defaultFramebuffer);
169 
170  gl_log(GL_VERBOSE,
171  "GLFilter::run %s - inputs: %d, outputs: %d, width: %d, height: %d\n",
172  kernel_name.c_str(),
173  input.size(),
174  output.size(),
175  width,
176  height);
177 
178  if (output.size() > 4) {
179  throwRuntimeError([&](std::stringstream& errmsg) {
180  errmsg << "Too many output textures: " << output.size() << ", should be <= 4";
181  });
182  }
183 
184  if (frameBuffer == 0) {
185  // create the frame buffer
186  glGenFramebuffers(1, &frameBuffer);
187  gl_log(GL_VERBOSE, "created frame buffer %d\n", frameBuffer);
188  }
189 
190  glBindFramebuffer(GL_FRAMEBUFFER, frameBuffer);
191  checkGLError([&](std::stringstream& errmsg) { errmsg << "glBindFramebuffer"; });
192 
193  // Set up the output textures
194  for (int i = 0; i < output.size(); i++) {
195  GLenum target = output[i]->target();
196  GLuint texture = output[i]->name();
197 
198  glBindTexture(target, texture);
199  glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + i, target, texture, 0);
200 
201  checkGLError([&](std::stringstream& errmsg) {
202  errmsg << "Unable to connect output texture " << texture << " at color attachment " << i;
203  });
204 
205  gl_log(GL_VERBOSE, "connected output texture %d to color attachment %d\n", texture, i);
206  }
207 
208  // Bind the output textures to the frame buffer attachments
209  if (!frame_buffer_initialized) {
210  const int attachments_number = output.size();
211  const GLenum attachments[4] = {
212  GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1, GL_COLOR_ATTACHMENT2, GL_COLOR_ATTACHMENT3};
213 
214  glDrawBuffers(attachments_number, attachments);
215 
216  int fbs = glCheckFramebufferStatus(GL_FRAMEBUFFER);
217 
218  if (fbs != GL_FRAMEBUFFER_COMPLETE) {
219  throwRuntimeError(
220  [&](std::stringstream& errmsg) { errmsg << "Frame buffer incomplete: " << fbs; });
221  }
222 
223  frame_buffer_initialized = true;
224  }
225 
226  glUseProgram(program);
227  checkGLError([&](std::stringstream& errmsg) { errmsg << "glUseProgram"; });
228 
229  // Set up the input textures
230  GLenum texture_idx = first_texture_id;
231  for (int i = 0; i < input.size(); i++, texture_idx++) {
232  if (input[i].uniform->location >= 0) {
233  GLenum target = input[i].texture->target();
234  GLuint texture = input[i].texture->name();
235 
236  glActiveTexture(texture_idx);
237  glBindTexture(target, texture);
238  glUniform1i(input[i].uniform->location, texture_idx - GL_TEXTURE0);
239 
240  checkGLError([&](std::stringstream& errmsg) {
241  errmsg << ": Unable to attach input texture " << texture << " to uniform "
242  << input[i].uniform->name << ":" << input[i].uniform->location << " at index "
243  << texture_idx - GL_TEXTURE0;
244  });
245 
246  gl_log(GL_VERBOSE,
247  "connected input texture %d to texture unit %d\n",
248  texture,
249  texture_idx - GL_TEXTURE0);
250  } else {
251  gl_log(GL_VERBOSE, "something wrong happened when i = %d\n", i);
252  }
253  }
254 
255  // Caller supplied uniforms initializer
256  if (uniforms_initializer) {
257  uniforms_initializer();
258 
259  checkGLError([&](std::stringstream& errmsg) {
260  errmsg << "errors in the uniforms initializer callback";
261  });
262  }
263 
264  // Validate program
265  if (check_opengl_errors && !validateProgram(program)) {
266  throwRuntimeError(
267  [&](std::stringstream& errmsg) { errmsg << "Couldn't validate OpenGL program"; });
268  }
269 
270  glViewport(0, 0, width, height);
271 
272  // Disable stuff we don't need and make sure that we have all the channels ebabled
273  for (int i = 0; i < sizeof(unused_capability) / sizeof(GLenum); i++) {
274  glDisable(unused_capability[i]);
275  }
276  glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
277 
278  // glDrawElements should be more efficient, but on iOS glDrawArrays is faster.
279 
280  const bool useDrawArrays = true;
281 
282  if (useDrawArrays) {
283  enum { ATTRIB_VERTEX, ATTRIB_TEXTUREPOSITON, NUM_ATTRIBUTES };
284 
285  static const GLfloat squareVertices[] = {
286  -1.0f,
287  -1.0f, // bottom left
288  1.0f,
289  -1.0f, // bottom right
290  -1.0f,
291  1.0f, // top left
292  1.0f,
293  1.0f, // top right
294  };
295 
296  static const float textureVertices[] = {
297  0.0f,
298  0.0f, // bottom left
299  1.0f,
300  0.0f, // bottom right
301  0.0f,
302  1.0f, // top left
303  1.0f,
304  1.0f, // top right
305  };
306 
307  glBindBuffer(GL_ARRAY_BUFFER, 0);
308  glVertexAttribPointer(ATTRIB_VERTEX, 2, GL_FLOAT, 0, 0, squareVertices);
309  glEnableVertexAttribArray(ATTRIB_VERTEX);
310  checkGLError(
311  [&](std::stringstream& errmsg) { errmsg << "glEnableVertexAttribArray(ATTRIB_VERTEX)"; });
312 
313  glVertexAttribPointer(ATTRIB_TEXTUREPOSITON, 2, GL_FLOAT, 0, 0, textureVertices);
314  glEnableVertexAttribArray(ATTRIB_TEXTUREPOSITON);
315  checkGLError([&](std::stringstream& errmsg) {
316  errmsg << "glEnableVertexAttribArray(ATTRIB_TEXTUREPOSITON)";
317  });
318 
319  gl_log(GL_VERBOSE, "Calling glDrawArrays\n");
320  glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
321 
322  checkGLError([&](std::stringstream& errmsg) { errmsg << "glDrawArrays"; });
323  } else {
324  // Run the shaders on the output geometry
325  static const GLfloat vVertices[] = {
326  -1.0f, -1.0f, 0.0f, // Position 0
327  0.0f, 0.0f, // TexCoord 0
328  -1.0f, 1.0f, 0.0f, // Position 1
329  0.0f, 1.0f, // TexCoord 1
330  1.0f, 1.0f, 0.0f, // Position 2
331  1.0f, 1.0f, // TexCoord 2
332  1.0f, -1.0f, 0.0f, // Position 3
333  1.0f, 0.0f // TexCoord 3
334  };
335  static const GLushort indices[] = {0, 1, 2, 0, 2, 3};
336 
337  // Load the vertex position
338  glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), vVertices);
339  // Load the texture coordinate
340  glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), &vVertices[3]);
341 
342  glEnableVertexAttribArray(0);
343  glEnableVertexAttribArray(1);
344 
345  gl_log(GL_VERBOSE, "Calling glDrawElements\n");
346  glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, indices);
347 
348  checkGLError([&](std::stringstream& errmsg) { errmsg << "glDrawElements"; });
349  }
350 
351 #if CAFFE2_ANDROID
352  glFlush();
353 #endif
354 
355  // Unbind the current texture - Man, this is expensive!
356  for (int i = texture_idx - 1; i >= first_texture_id; i--) {
357  gl_log(GL_VERBOSE, "unbinding texture unit %d\n", i - GL_TEXTURE0);
358  glActiveTexture(i);
359  glBindTexture(GL_TEXTURE_2D, 0);
360 
361  checkGLError([&](std::stringstream& errmsg) {
362  errmsg << "Error unbinding texture unit " << i - GL_TEXTURE0;
363  });
364  }
365 
366  glBindFramebuffer(GL_FRAMEBUFFER, defaultFramebuffer);
367 }
368 
369 void GLFilter::releaseBuffers() {
370  for (int i = 0; i < kMaxUniformBlocks; i++) {
371  if (uniformBlock[i]) {
372  gl_log(GL_VERBOSE, "deleting uniform buffer block %d\n", uniformBlock[i]);
373  glDeleteBuffers(1, &uniformBlock[i]);
374  uniformBlock[i] = 0;
375  }
376  }
377  if (frameBuffer) {
378  gl_log(GL_VERBOSE, "deleting frame buffer %d\n", frameBuffer);
379  glDeleteFramebuffers(1, &frameBuffer);
380  frameBuffer = 0;
381  }
382 }
383 
384 void GLFilter::deleteProgram() {
385  if (program) {
386  gl_log(GL_VERBOSE, "deleting program %d\n", program);
387  glDeleteProgram(program);
388  program = 0;
389  }
390 }
391 
392 void GLFilter::deleteBindings() {
393  for (binding* uniform : uniforms_) {
394  delete uniform;
395  }
396  for (binding* uniform_block : uniform_blocks_) {
397  delete uniform_block;
398  }
399  for (binding* attribute : attributes_) {
400  delete attribute;
401  }
402 }
403 
404 // Simple vertex shader setting up the coordinates system
405 const char* GLFilter::vertex_shader = R"GLSL(#version 300 es
406 
407  layout(location = 0) in vec4 a_position;
408  layout(location = 1) in vec2 a_texCoord;
409  out vec2 v_texCoord;
410 
411  void main()
412  {
413  gl_Position = a_position;
414  v_texCoord = a_texCoord;
415  }
416 )GLSL";
417 
418 bool GLFilter::createProgram(const GLchar* vertSource,
419  const GLchar* fragSource,
420  GLuint* program) const {
421  GLuint vertShader = 0, fragShader = 0, prog = 0, status = 1;
422 
423  // Clear the error state. We check error state later in the function and
424  // want to capture only errors in filter program initialization.
425  glGetError();
426 
427  // Create shader program
428  prog = glCreateProgram();
429 
430  // Create and compile vertex shader
431  status *= compileShader(GL_VERTEX_SHADER, 1, &vertSource, &vertShader);
432 
433  // Create and compile fragment shader
434  status *= compileShader(GL_FRAGMENT_SHADER, 1, &fragSource, &fragShader);
435 
436  // Attach vertex shader to program
437  glAttachShader(prog, vertShader);
438 
439  // Attach fragment shader to program
440  glAttachShader(prog, fragShader);
441 
442  // Bind attribute locations
443  // This needs to be done prior to linking
444  for (auto&& attribute : attributes_) {
445  glBindAttribLocation(prog, attribute->location, attribute->name.c_str());
446 
447  checkGLError([&](std::stringstream& errmsg) {
448  errmsg << "Couldn't bind attribute: " << attribute->name << " at location "
449  << attribute->location;
450  });
451  }
452 
453  // Link program
454  status *= linkProgram(prog);
455 
456  // Get locations of uniforms
457  if (status) {
458  for (auto&& uniform : uniforms_) {
459  uniform->location = glGetUniformLocation(prog, uniform->name.c_str());
460 
461  checkGLError([&](std::stringstream& errmsg) {
462  errmsg << "Couldn't resolve uniform: " << uniform->name;
463  });
464  }
465 
466  for (auto&& uniform_block : uniform_blocks_) {
467  uniform_block->location = glGetUniformBlockIndex(prog, uniform_block->name.c_str());
468  gl_log(GL_VERBOSE,
469  "Getting location for uniform block: %s, location: %d\n",
470  uniform_block->name.c_str(),
471  uniform_block->location);
472 
473  checkGLError([&](std::stringstream& errmsg) {
474  errmsg << "Couldn't resolve uniform block: " << uniform_block->name;
475  });
476  }
477 
478  *program = prog;
479  }
480 
481  // Release vertex and fragment shaders
482  if (vertShader) {
483  glDetachShader(prog, vertShader);
484  glDeleteShader(vertShader);
485  }
486  if (fragShader) {
487  glDetachShader(prog, fragShader);
488  glDeleteShader(fragShader);
489  }
490 
491  return status == 1;
492 }
493 
494 #include <stdlib.h>
495 
496 /* Compile a shader from the provided source(s) */
497 GLint GLFilter::compileShader(GLenum target,
498  GLsizei count,
499  const GLchar** sources,
500  GLuint* shader) const {
501  GLint status = 1;
502 
503  *shader = glCreateShader(target);
504  glShaderSource(*shader, count, sources, NULL);
505  glCompileShader(*shader);
506 
507  GLint logLength = 0;
508  glGetShaderiv(*shader, GL_INFO_LOG_LENGTH, &logLength);
509  if (logLength > 0) {
510  std::vector<GLchar> log(logLength);
511  glGetShaderInfoLog(*shader, logLength, &logLength, &log[0]);
512  gl_log(GL_ERR, "Shader compile log:\n%s", &log[0]);
513  }
514 
515  glGetShaderiv(*shader, GL_COMPILE_STATUS, &status);
516  if (status == 0) {
517  int i;
518 
519  gl_log(GL_ERR, "Failed to compile shader:\n");
520  for (i = 0; i < count; i++)
521  gl_log(GL_ERR, "%s", sources[i]);
522  }
523 
524  return status;
525 }
526 
527 /* Link a program with all currently attached shaders */
528 GLint GLFilter::linkProgram(GLuint program) const {
529  GLint status = 1;
530 
531  glLinkProgram(program);
532 
533  GLint logLength = 0;
534  glGetProgramiv(program, GL_INFO_LOG_LENGTH, &logLength);
535  if (logLength > 0) {
536  std::vector<GLchar> log(logLength);
537  glGetProgramInfoLog(program, logLength, &logLength, &log[0]);
538  gl_log(GL_ERR, "Program link log:\n%s", &log[0]);
539  }
540 
541  glGetProgramiv(program, GL_LINK_STATUS, &status);
542  if (status == 0)
543  gl_log(GL_ERR, "Failed to link program %d\n", program);
544 
545  return status;
546 }
547 
548 /* Validate a program (for i.e. inconsistent samplers) */
549 GLint GLFilter::validateProgram(GLuint program) const {
550  GLint status = 1;
551 
552  glValidateProgram(program);
553 
554  GLint logLength = 0;
555  glGetProgramiv(program, GL_INFO_LOG_LENGTH, &logLength);
556  if (logLength > 0) {
557  std::vector<GLchar> log(logLength);
558  glGetProgramInfoLog(program, logLength, &logLength, &log[0]);
559  gl_log(GL_ERR, "Program validate log:\n%s", &log[0]);
560  }
561 
562  glGetProgramiv(program, GL_VALIDATE_STATUS, &status);
563  if (status == 0)
564  gl_log(GL_ERR, "Failed to validate program %d\n", program);
565 
566  return status;
567 }