Caffe2 - C++ API
A deep learning, cross platform ML framework
GLConcat.cc
1 
2 #include "../core/GLFilter.h"
3 #include "../core/GLImage.h"
4 #include "../core/ImageAllocator.h"
5 #include "gl_tiling_utils.h"
6 
7 #include <iostream>
8 #include <vector>
9 #include "caffe2/core/operator.h"
10 #include "caffe2/core/timer.h"
11 #include "caffe2/utils/math.h"
12 
13 class GLConcat : public GLFilter {
14  public:
15  bool tiling_;
16  binding* inputData;
17  binding* outputSize;
18  binding* inputTileRange;
19  binding* input_tile_x;
20 
21  GLConcat(tile_descriptor output_tile_geometries, bool tiling = false)
22  : GLFilter("GLConcat",
23  vertex_shader,
24  fragment_shader,
25  std::vector<binding*>(
26  {BINDING(outputSize), BINDING(inputData), BINDING(inputTileRange), BINDING(input_tile_x)}),
27  {/* no uniform blocks */},
28  {/* no attributes */},
29  {{"TILING", caffe2::to_string(tiling)},
30  {"OUTPUT_TILES", caffe2::to_string(output_tile_geometries.tiles)},
31  {"OUTPUT_TILE_X", caffe2::to_string(output_tile_geometries.tile_dims.x)},
32  {"OUTPUT_TILE_WIDTH", caffe2::to_string(output_tile_geometries.tile_size.x)},
33  {"OUTPUT_TILE_HEIGHT", caffe2::to_string(output_tile_geometries.tile_size.y)}}),
34  tiling_(tiling) {}
35 
36  template <typename T>
37  void concat(const GLImageVector<T>** input_images, const GLImageVector<T>& output_image, int size);
38  static const char* fragment_shader;
39 };
40 
41 // MARK: GLSL
42 
43 const char* GLConcat::fragment_shader = R"GLSL(#version 300 es
44 #define TILING $(TILING)
45 
46 // tiling
47 #define OUTPUT_TILES $(OUTPUT_TILES)
48 #define OUTPUT_TILE_X $(OUTPUT_TILE_X)
49 #define OUTPUT_TILE_WIDTH $(OUTPUT_TILE_WIDTH)
50 #define OUTPUT_TILE_HEIGHT $(OUTPUT_TILE_HEIGHT)
51 
52 precision mediump float;
53 precision mediump int;
54 
55 in highp vec2 v_texCoord;
56 TEXTURE_INPUT(inputData);
57 TEXTURE_OUTPUT(0, outputData);
58 
59 uniform ivec2 outputSize;
60 uniform ivec2 inputTileRange; // (]
61 uniform int input_tile_x;
62 
63 #if TILING
64 const ivec2 outputTileSize = ivec2(OUTPUT_TILE_WIDTH, OUTPUT_TILE_HEIGHT);
65 
66 void main() {
67  ivec2 texelCoord = ivec2(v_texCoord * vec2(outputSize));
68  ivec2 tile = texelCoord / outputTileSize; // 2D output tile idx
69  ivec2 tileCoord = texelCoord % outputTileSize; // in-tile coordinates
70  int tileNum = OUTPUT_TILE_X * tile.y + tile.x; // 1D output tile idx
71 
72  if (tileNum >= inputTileRange.x && tileNum < inputTileRange.y) {
73  tileNum = tileNum - inputTileRange.x;
74  texelCoord = ivec2(tileNum % input_tile_x, tileNum / input_tile_x) * ivec2(OUTPUT_TILE_WIDTH, OUTPUT_TILE_HEIGHT) + tileCoord;
75  vec4 value = TEXTURE_LOAD(inputData, texelCoord);
76  outputData = TEXTURE_STORE(value);
77  } else {
78  // early termination
79  discard;
80  }
81 }
82 
83 #else
84 void main() {
85  ivec2 texelCoord = ivec2(v_texCoord * vec2(outputSize));
86  vec4 value = TEXTURE_LOAD(inputData, texelCoord);
87  outputData = TEXTURE_STORE(value);
88 }
89 #endif
90 
91 )GLSL";
92 
93 template <typename T>
94 void GLConcat::concat(const GLImageVector<T>** input_images, const GLImageVector<T>& output_images, int input_size) {
95  for (int k = 0; k < output_images.size(); k++) {
96  GLImage<T>* output_image = output_images[k];
97 
98  int is = 0, os = 0;
99  for (int i = 0; i < input_size; i++) {
100  for (int j = 0; j < input_images[i]->slices(); j++) {
101  GLImage<T>* input_image = (*input_images[i])[k];
102  std::vector<texture_attachment> input_attachments;
103  input_attachments.push_back({input_image->textures[j], inputData});
104 
105  run(input_attachments,
106  {output_image->textures.begin() + os, output_image->textures.begin() + os + 1},
107  [&]() {
108  glUniform2i(outputSize->location, output_image->texture_width, output_image->texture_height);
109  glUniform2i(inputTileRange->location, is, is + input_image->tile_x * input_image->tile_y);
110  glUniform1i(input_tile_x->location, input_image->tile_x);
111  },
112  output_image->texture_width,
113  output_image->texture_height);
114  if (!tiling_) {
115  os++; // for tiling, you always write to the same texture
116  }
117  is += input_image->tile_x * input_image->tile_y;
118  }
119  }
120  }
121 }
122 
123 namespace caffe2 {
124 template <typename T>
125 class OpenGLConcatOp final : public Operator<CPUContext>, ImageAllocator<T> {
126  public:
127  OpenGLConcatOp(const OperatorDef& operator_def, Workspace* ws)
128  : Operator<CPUContext>(operator_def, ws),
129  order_(StringToStorageOrder(OperatorBase::GetSingleArgument<string>("order", "NCHW"))) {
130  OPERATOR_NEEDS_FEATURE(this->order_ == StorageOrder::NCHW, "OpenGL only supports NCHW order.");
131  }
132 
133  bool RunOnDevice() override {
134  const GLImageVector<T>& input0 = Inputs()[0]->template Get<GLImageVector<T>>();
135  const int num_images = input0.size();
136 
137  const GLImageVector<T>** input_images = new const GLImageVector<T>*[Inputs().size()];
138  input_images[0] = &input0;
139  int channelCount = input0.channels();
140 
141  bool tiling = OperatorBase::GetSingleArgument<int>("tiling", 0);
142 
143  // Only supports input channels divisible by 4 for now
144  CAFFE_ENFORCE_EQ(input0.channels() % 4, 0);
145  for (auto i = 1; i < Inputs().size(); i++) {
146  const GLImageVector<T>& inputi = Inputs()[i]->template Get<GLImageVector<T>>();
147  channelCount += inputi.channels();
148  CAFFE_ENFORCE_EQ(num_images, inputi.size());
149  CAFFE_ENFORCE_EQ(inputi.channels() % 4, 0);
150  CAFFE_ENFORCE_EQ(input0.width(), inputi.width());
151  CAFFE_ENFORCE_EQ(input0.height(), inputi.height());
152  input_images[i] = &inputi;
153 
154  if (inputi.tile_x() > 1 || inputi.tile_y() > 1) {
155  tiling = true;
156  }
157  }
158 
159  const int input_width = input0.width();
160  const int input_height = input0.height();
161 
162  const int output_channels = channelCount;
163  const int output_width = input_width;
164  const int output_height = input_height;
165 
166  int output_tile_x = 1;
167  int output_tile_y = 1;
168  if (tiling) {
169  computeOutputTiles(output_channels, output_tile_x, output_tile_y);
170  }
171 
172  int is_last = OperatorBase::GetSingleArgument<int>("is_last", 0);
173 
175  num_images, output_width, output_height, output_channels, output_tile_x, output_tile_y, is_last);
176  if (!_concat) {
177  tile_descriptor output_tile_geometries{
178  {output_tile_x, output_tile_y}, {output_width, output_height}, output_tile_x * output_tile_y};
179  _concat.reset(new GLConcat(output_tile_geometries, tiling));
180  }
181 
182  _concat->concat(input_images, *output, Inputs().size());
183  delete[] input_images;
184  Outputs()[0]->Reset(output);
185 
186  return true;
187  }
188 
189  private:
190  StorageOrder order_;
191  std::unique_ptr<GLConcat> _concat;
192 };
193 
194 REGISTER_CPU_OPERATOR(OpenGLConcat, OpenGLConcatOp<float16_t>);
195 OPERATOR_SCHEMA(OpenGLConcat).NumInputs(2, 4).NumOutputs(1, 2);
196 } // namespace caffe2
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...