Caffe2 - C++ API
A deep learning, cross platform ML framework
GLConvolution.h
1 #pragma once
2 
3 #include "../core/GLFilter.h"
4 #include "../core/GLImage.h"
5 #include "gl_tiling_utils.h"
6 
7 class GLConvolution : public GLFilter {
8  public:
9  static constexpr int MaxInputBatchSize = 8;
10  static constexpr int MaxOutputBatchSize = 4;
11 
12  struct descriptor {
13  int input_channels;
14  int output_channels;
15  point kernel_size;
16  point input_tile_size;
17  point output_tile_size;
18  point input_tile_grid_size;
19  point output_tile_grid_size;
20  point input_padding;
21  point input_stride;
22  bool transposed;
23  };
24 
25  const float* kernel;
26  const float* bias;
27  const float* prelu_scale;
28 
29  binding* inputData[MaxInputBatchSize];
30  binding* previousData[MaxOutputBatchSize];
31  binding* outputSize;
32  binding* accumulate;
33  binding* fusePRelu;
34  binding* kernel_block[MaxInputBatchSize];
35  binding* bias_block;
36  binding* prelu_scale_block;
37  binding* inputTileRange;
38 
39  const descriptor geometry;
40  const int prelu_scale_size;
41  const int input_batch_size;
42  const int output_batch_size;
43  const int input_tiles;
44  const int output_tiles;
45  const int input_tile_chunk_size;
46  const int output_tile_chunk_size;
47  const int input_tile_batch_size;
48  const int output_tile_batch_size;
49  const bool tiling;
50 
51  static const char* fragment_shader;
52 
54  const descriptor& _geometry,
55  const float* _kernel,
56  const float* _bias,
57  const float* _prelu_scale = nullptr,
58  int _prelu_scale_size = 0,
59  int _input_batch_size = 1,
60  int _output_batch_size = 1,
61  int _input_tiles = 1,
62  int _output_tiles = 1,
63  int _input_tile_chunk_size = 1,
64  int _output_tile_chunk_size = 1,
65  int _input_tile_batch_size = 1,
66  int _output_tile_batch_size = 1,
67  bool _tiling = false)
68  : GLFilter(
69  "GLConvolution",
70  vertex_shader,
71  fragment_shader,
72  input_bindings(_input_batch_size, _output_batch_size),
73  uniform_blocks_bindings(
74  _input_batch_size,
75  _output_batch_size,
76  _output_tile_batch_size,
77  _prelu_scale != nullptr),
78  {/* no attributes */},
79  {{"KERNEL_SIZE_X", caffe2::to_string(_geometry.kernel_size.x)},
80  {"KERNEL_SIZE_Y", caffe2::to_string(_geometry.kernel_size.y)},
81  {"INPUT_BATCH_SIZE", caffe2::to_string(_input_batch_size)},
82  {"OUTPUT_BATCH_SIZE", caffe2::to_string(_output_batch_size)},
83  {"INPUT_TILES", caffe2::to_string(_input_tiles)},
84  {"OUTPUT_TILES", caffe2::to_string(_output_tiles)},
85  {"INPUT_TILE_WIDTH",
86  caffe2::to_string(_geometry.input_tile_size.x)},
87  {"INPUT_TILE_HEIGHT",
88  caffe2::to_string(_geometry.input_tile_size.y)},
89  {"OUTPUT_TILE_WIDTH",
90  caffe2::to_string(_geometry.output_tile_size.x)},
91  {"OUTPUT_TILE_HEIGHT",
92  caffe2::to_string(_geometry.output_tile_size.y)},
93  {"INPUT_TILE_X",
94  caffe2::to_string(_geometry.input_tile_grid_size.x)},
95  {"OUTPUT_TILE_X",
96  caffe2::to_string(_geometry.output_tile_grid_size.x)},
97  {"INPUT_TILE_CHUNK_SIZE",
98  caffe2::to_string(_input_tile_chunk_size)},
99  {"OUTPUT_TILE_CHUNK_SIZE",
100  caffe2::to_string(_output_tile_chunk_size)},
101  {"OUTPUT_TILE_BATCH_SIZE",
102  caffe2::to_string(_output_tile_batch_size)},
103  {"TILED_CONVOLUTION", caffe2::to_string(_tiling)},
104  {"INPUT_PADDING_X",
105  caffe2::to_string(
106  _geometry.transposed
107  ? _geometry.kernel_size.x - 1 - _geometry.input_padding.x
108  : _geometry.input_padding.x)},
109  {"INPUT_PADDING_Y",
110  caffe2::to_string(
111  _geometry.transposed
112  ? _geometry.kernel_size.y - 1 - _geometry.input_padding.y
113  : _geometry.input_padding.y)},
114  {"INPUT_STRIDE_X", caffe2::to_string(_geometry.input_stride.x)},
115  {"INPUT_STRIDE_Y", caffe2::to_string(_geometry.input_stride.y)},
116  {"TRANSPOSED_CONVOLUTION",
117  caffe2::to_string(_geometry.transposed)},
118  {"BOUNDS_CHECK_MODE",
119  caffe2::to_string(bounds_check_mode(_tiling, _geometry))}}),
120  kernel(_kernel),
121  bias(_bias),
122  prelu_scale(_prelu_scale),
123  geometry(_geometry),
124  prelu_scale_size(_prelu_scale_size),
125  input_batch_size(_input_batch_size),
126  output_batch_size(_output_batch_size),
127  input_tiles(_input_tiles),
128  output_tiles(_output_tiles),
129  input_tile_chunk_size(_input_tile_chunk_size),
130  output_tile_chunk_size(_output_tile_chunk_size),
131  input_tile_batch_size(_input_tile_batch_size),
132  output_tile_batch_size(_output_tile_batch_size),
133  tiling(_tiling) {}
134 
135  ~GLConvolution() {}
136 
137  template <typename T>
138  void convolution(
139  const GLImageVector<T>& input_images,
140  const GLImageVector<T>& output_images);
141 
142  private:
143  /*
144  * Computes BOUNDS_CHECK_MODE for the convolution parameters.
145  *
146  * @retval 0 if bounds check can be skipped
147  * @retval non-zero if bounds check can not be skipped
148  */
149  inline static int bounds_check_mode(bool tiling, const descriptor& geometry) {
150  if (tiling) {
151  return 1;
152  }
153 
154  int input_padding_x = geometry.input_padding.x,
155  input_padding_y = geometry.input_padding.y;
156  if (geometry.transposed) {
157  input_padding_x = geometry.kernel_size.x - 1 - input_padding_x;
158  input_padding_y = geometry.kernel_size.y - 1 - input_padding_y;
159  }
160 
161  if (GLContext::getGLContext()->GL_EXT_texture_border_clamp_defined() ||
162  (input_padding_x == 0 && input_padding_y == 0)) {
163  return 0;
164  } else {
165  return 1;
166  }
167  }
168 
169  const std::vector<binding*> input_bindings(
170  int input_batch_size,
171  int output_batch_size) {
172  std::vector<binding*> bindings({BINDING(outputSize),
173  BINDING(accumulate),
174  BINDING(fusePRelu),
175  BINDING(inputTileRange)});
176 
177  for (int i = 0; i < input_batch_size; i++) {
178  bindings.push_back(
179  inputData[i] =
180  new binding{"inputData[" + caffe2::to_string(i) + "]"});
181  }
182 
183  for (int i = 0; i < output_batch_size; i++) {
184  bindings.push_back(
185  previousData[i] =
186  new binding{"previousData[" + caffe2::to_string(i) + "]"});
187  }
188 
189  return bindings;
190  }
191 
192  const std::vector<binding*> uniform_blocks_bindings(
193  int input_batch_size,
194  int output_batch_size,
195  int output_tile_batch_size,
196  bool fuse_prelu) {
197  std::vector<binding*> bindings({BINDING(bias_block)});
198  if (fuse_prelu) {
199  bindings.push_back(BINDING(prelu_scale_block));
200  }
201 
202  for (int i = 0; i < std::max(input_batch_size, output_tile_batch_size);
203  i++) {
204  bindings.push_back(
205  kernel_block[i] =
206  new binding{"Kernel_block[" + caffe2::to_string(i) + "]"});
207  }
208 
209  return bindings;
210  }
211 
212  void pack_kernel_data_for_bached_conv(
213  float16_t* data,
214  size_t size,
215  int input_channels,
216  int output_channels,
217  int is,
218  int os,
219  int ib);
220 
221  void pack_kernel_data_for_tiled_conv(
222  float16_t* data, // destination
223  size_t size,
224  int input_channels,
225  int output_channels,
226  point input_tile_range,
227  point output_tile_range);
228 
229  template <typename T>
230  void run_batched_conv(
231  const GLImageVector<T>& input_images,
232  const GLImageVector<T>& output_images);
233 
234  template <typename T>
235  void run_tiled_conv(
236  const GLImageVector<T>& input_images,
237  const GLImageVector<T>& output_images);
238 };