3 #include "../core/GLFilter.h" 4 #include "../core/GLImage.h" 5 #include "gl_tiling_utils.h" 9 static constexpr
int MaxInputBatchSize = 8;
10 static constexpr
int MaxOutputBatchSize = 4;
16 point input_tile_size;
17 point output_tile_size;
18 point input_tile_grid_size;
19 point output_tile_grid_size;
27 const float* prelu_scale;
29 binding* inputData[MaxInputBatchSize];
30 binding* previousData[MaxOutputBatchSize];
34 binding* kernel_block[MaxInputBatchSize];
40 const int prelu_scale_size;
41 const int input_batch_size;
42 const int output_batch_size;
43 const int input_tiles;
44 const int output_tiles;
45 const int input_tile_chunk_size;
46 const int output_tile_chunk_size;
47 const int input_tile_batch_size;
48 const int output_tile_batch_size;
51 static const char* fragment_shader;
57 const float* _prelu_scale =
nullptr,
58 int _prelu_scale_size = 0,
59 int _input_batch_size = 1,
60 int _output_batch_size = 1,
62 int _output_tiles = 1,
63 int _input_tile_chunk_size = 1,
64 int _output_tile_chunk_size = 1,
65 int _input_tile_batch_size = 1,
66 int _output_tile_batch_size = 1,
72 input_bindings(_input_batch_size, _output_batch_size),
73 uniform_blocks_bindings(
76 _output_tile_batch_size,
77 _prelu_scale !=
nullptr),
79 {{
"KERNEL_SIZE_X", caffe2::to_string(_geometry.kernel_size.x)},
80 {
"KERNEL_SIZE_Y", caffe2::to_string(_geometry.kernel_size.y)},
81 {
"INPUT_BATCH_SIZE", caffe2::to_string(_input_batch_size)},
82 {
"OUTPUT_BATCH_SIZE", caffe2::to_string(_output_batch_size)},
83 {
"INPUT_TILES", caffe2::to_string(_input_tiles)},
84 {
"OUTPUT_TILES", caffe2::to_string(_output_tiles)},
86 caffe2::to_string(_geometry.input_tile_size.x)},
88 caffe2::to_string(_geometry.input_tile_size.y)},
90 caffe2::to_string(_geometry.output_tile_size.x)},
91 {
"OUTPUT_TILE_HEIGHT",
92 caffe2::to_string(_geometry.output_tile_size.y)},
94 caffe2::to_string(_geometry.input_tile_grid_size.x)},
96 caffe2::to_string(_geometry.output_tile_grid_size.x)},
97 {
"INPUT_TILE_CHUNK_SIZE",
98 caffe2::to_string(_input_tile_chunk_size)},
99 {
"OUTPUT_TILE_CHUNK_SIZE",
100 caffe2::to_string(_output_tile_chunk_size)},
101 {
"OUTPUT_TILE_BATCH_SIZE",
102 caffe2::to_string(_output_tile_batch_size)},
103 {
"TILED_CONVOLUTION", caffe2::to_string(_tiling)},
107 ? _geometry.kernel_size.x - 1 - _geometry.input_padding.x
108 : _geometry.input_padding.x)},
112 ? _geometry.kernel_size.y - 1 - _geometry.input_padding.y
113 : _geometry.input_padding.y)},
114 {
"INPUT_STRIDE_X", caffe2::to_string(_geometry.input_stride.x)},
115 {
"INPUT_STRIDE_Y", caffe2::to_string(_geometry.input_stride.y)},
116 {
"TRANSPOSED_CONVOLUTION",
117 caffe2::to_string(_geometry.transposed)},
118 {
"BOUNDS_CHECK_MODE",
119 caffe2::to_string(bounds_check_mode(_tiling, _geometry))}}),
122 prelu_scale(_prelu_scale),
124 prelu_scale_size(_prelu_scale_size),
125 input_batch_size(_input_batch_size),
126 output_batch_size(_output_batch_size),
127 input_tiles(_input_tiles),
128 output_tiles(_output_tiles),
129 input_tile_chunk_size(_input_tile_chunk_size),
130 output_tile_chunk_size(_output_tile_chunk_size),
131 input_tile_batch_size(_input_tile_batch_size),
132 output_tile_batch_size(_output_tile_batch_size),
137 template <
typename T>
149 inline static int bounds_check_mode(
bool tiling,
const descriptor& geometry) {
154 int input_padding_x = geometry.input_padding.x,
155 input_padding_y = geometry.input_padding.y;
156 if (geometry.transposed) {
157 input_padding_x = geometry.kernel_size.x - 1 - input_padding_x;
158 input_padding_y = geometry.kernel_size.y - 1 - input_padding_y;
161 if (GLContext::getGLContext()->GL_EXT_texture_border_clamp_defined() ||
162 (input_padding_x == 0 && input_padding_y == 0)) {
169 const std::vector<binding*> input_bindings(
170 int input_batch_size,
171 int output_batch_size) {
172 std::vector<binding*> bindings({BINDING(outputSize),
175 BINDING(inputTileRange)});
177 for (
int i = 0; i < input_batch_size; i++) {
180 new binding{
"inputData[" + caffe2::to_string(i) +
"]"});
183 for (
int i = 0; i < output_batch_size; i++) {
186 new binding{
"previousData[" + caffe2::to_string(i) +
"]"});
192 const std::vector<binding*> uniform_blocks_bindings(
193 int input_batch_size,
194 int output_batch_size,
195 int output_tile_batch_size,
197 std::vector<binding*> bindings({BINDING(bias_block)});
199 bindings.push_back(BINDING(prelu_scale_block));
202 for (
int i = 0; i < std::max(input_batch_size, output_tile_batch_size);
206 new binding{
"Kernel_block[" + caffe2::to_string(i) +
"]"});
212 void pack_kernel_data_for_bached_conv(
221 void pack_kernel_data_for_tiled_conv(
226 point input_tile_range,
227 point output_tile_range);
229 template <
typename T>
230 void run_batched_conv(
234 template <
typename T>