Caffe2 - C++ API
A deep learning, cross platform ML framework
GLMul.cc
1 
2 #include "../core/GLFilter.h"
3 #include "../core/GLImage.h"
4 #include "../core/ImageAllocator.h"
5 
6 #include "caffe2/core/operator.h"
7 #include "caffe2/core/timer.h"
8 
9 class GLMul : public GLFilter {
10  public:
11  binding* outputSize;
12  binding* inputData;
13  binding* B;
14 
15  GLMul()
16  : GLFilter("GLMul",
17  vertex_shader,
18  fragment_shader,
19  std::vector<binding*>({BINDING(outputSize), BINDING(inputData), BINDING(B)}),
20  {/* no uniform blocks */},
21  {/* no attributes */},
22  {/* no replacements */}) {}
23 
24  template <typename T>
25  void mul(const GLImageVector<T>& input_images, const GLImageVector<T>& output_images, float b);
26 
27  static const char* fragment_shader;
28 };
29 
30 // MARK: GLSL
31 
32 const char* GLMul::fragment_shader = R"GLSL(#version 300 es
33 
34 precision mediump float;
35 precision mediump int;
36 
37 in highp vec2 v_texCoord;
38 
39 uniform ivec2 outputSize;
40 uniform vec4 B;
41 
42 TEXTURE_INPUT(inputData);
43 TEXTURE_OUTPUT(0, outputData);
44 
45 void main() {
46  ivec2 texelCoord = ivec2(v_texCoord * vec2(outputSize));
47  vec4 A = TEXTURE_LOAD(inputData, texelCoord);
48  outputData = TEXTURE_STORE(A * B);
49 }
50 
51 )GLSL";
52 
53 template <typename T>
54 void GLMul::mul(const GLImageVector<T>& input_images,
55  const GLImageVector<T>& output_images,
56  float b) {
57  for (int i = 0; i < input_images.size(); i++) {
58  auto input_image = input_images[i];
59  auto output_image = output_images[i];
60  int input_slices = input_image->slices;
61  int output_slices = output_image->slices;
62 
63  for (int is = 0; is < input_slices; is++) {
64  run(std::vector<texture_attachment>({{input_image->textures[is], inputData}}),
65  {output_image->textures.begin() + is, output_image->textures.begin() + is + 1},
66  [&]() {
67  glUniform2i(outputSize->location, output_image->width, output_image->height);
68  glUniform4f(B->location, b, b, b, b);
69  },
70  output_image->width,
71  output_image->height);
72  }
73  }
74 }
75 
76 namespace caffe2 {
77 template <class T>
78 class OpenGLMulOp final : public Operator<CPUContext>, ImageAllocator<T> {
79  public:
80  OpenGLMulOp(const OperatorDef& operator_def, Workspace* ws)
81  : Operator<CPUContext>(operator_def, ws) {
82  OPERATOR_NEEDS_FEATURE(OperatorBase::GetSingleArgument<int>("broadcast", 0) == 1,
83  "OpenGLMul only supports broadcast");
84 
85  OPERATOR_NEEDS_FEATURE(OperatorBase::HasArgument("axis") == false,
86  "OpenGLMul does not support axis");
87  }
88 
89  bool RunOnDevice() override {
90  const GLImageVector<T>& input = Inputs()[0]->template Get<GLImageVector<T>>();
91  const auto& B = Input(1);
92  CAFFE_ENFORCE_EQ(B.size(), 1); // only scalar is supported
93 
94  const int num_images = input.size();
95  const auto output_height = input.height();
96  const auto output_width = input.width();
97  const int output_channels = input.channels();
98 
99  int is_last = OperatorBase::GetSingleArgument<int>("is_last", 0);
100 
102  num_images, output_width, output_height, output_channels, is_last);
103 
104  if (!_mult) {
105  _mult.reset(new GLMul());
106  }
107 
108  _mult->mul(input, *output, B.template data<float>()[0]);
109 
110  Outputs()[0]->Reset(output);
111 
112  return true;
113  }
114 
115  private:
116  std::unique_ptr<GLMul> _mult;
117 };
118 
119 REGISTER_CPU_OPERATOR(OpenGLMul, OpenGLMulOp<float16_t>);
120 } // namespace caffe2
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
Definition: GLMul.cc:9
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...