2 #include "../core/GLFilter.h" 3 #include "../core/GLImage.h" 4 #include "../core/ImageAllocator.h" 6 #include "caffe2/core/timer.h" 7 #include "caffe2/operators/pool_op.h" 11 typedef enum { AveragePool, MaxPool } PoolType;
23 point input_tile_size;
24 point output_tile_size;
45 {{
"KERNEL_SIZE_X", caffe2::to_string(_geometry.kernel_size.x)},
46 {
"KERNEL_SIZE_Y", caffe2::to_string(_geometry.kernel_size.y)},
47 {
"INPUT_PADDING_X", caffe2::to_string(_geometry.input_padding.x)},
48 {
"INPUT_PADDING_Y", caffe2::to_string(_geometry.input_padding.y)},
49 {
"INPUT_STRIDE_X", caffe2::to_string(_geometry.input_stride.x)},
50 {
"INPUT_STRIDE_Y", caffe2::to_string(_geometry.input_stride.y)},
52 caffe2::to_string(_geometry.input_tile_size.x)},
54 caffe2::to_string(_geometry.input_tile_size.y)},
56 caffe2::to_string(_geometry.output_tile_size.x)},
57 {
"OUTPUT_TILE_HEIGHT",
58 caffe2::to_string(_geometry.output_tile_size.y)},
59 {
"TILED_POOLING", caffe2::to_string(_tiling)},
60 {
"MAX_POOL", caffe2::to_string(poolType == MaxPool)},
61 {
"BOUNDS_CHECK_MODE", caffe2::to_string(1)}}),
62 geometry(_geometry) {}
67 for (
int i = 0; i < input_images.size(); i++) {
68 auto input_image = input_images[i];
69 auto output_image = output_images[i];
70 int input_slices = input_image->slices;
71 int output_slices = output_image->slices;
73 for (
int is = 0; is < input_slices; is++) {
74 run({{input_image->textures[is], inputData}},
75 {output_image->textures[is]},
77 glUniform2i(outputSize->location, output_image->texture_width, output_image->texture_height);
78 glUniform2i(kernelSize->location, geometry.kernel_size.x, geometry.kernel_size.y);
80 output_image->texture_width,
81 output_image->texture_height);
93 inline static int bounds_check_mode(
bool tiling,
const descriptor& geometry) {
98 if (GLContext::getGLContext()->GL_EXT_texture_border_clamp_defined() ||
99 (geometry.input_padding.x == 0 && geometry.input_padding.y == 0)) {
106 static const char* fragment_shader;
110 const char* GLPool::fragment_shader = R
"GLSL(#version 300 es 111 #define TILED_POOLING $(TILED_POOLING) 112 #define MAX_POOL $(MAX_POOL) 115 #define INPUT_TILE_WIDTH $(INPUT_TILE_WIDTH) 116 #define INPUT_TILE_HEIGHT $(INPUT_TILE_HEIGHT) 117 #define OUTPUT_TILE_WIDTH $(OUTPUT_TILE_WIDTH) 118 #define OUTPUT_TILE_HEIGHT $(OUTPUT_TILE_HEIGHT) 120 #define BOUNDS_CHECK_MODE $(BOUNDS_CHECK_MODE) 122 precision mediump float; 123 precision mediump int; 125 in highp vec2 v_texCoord; 127 const ivec2 input_padding = ivec2($(INPUT_PADDING_X), $(INPUT_PADDING_Y)); 128 const ivec2 input_stride = ivec2($(INPUT_STRIDE_X), $(INPUT_STRIDE_Y)); 129 const ivec2 kernel_size = ivec2($(KERNEL_SIZE_X), $(KERNEL_SIZE_Y)); 131 uniform ivec2 kernelSize; 132 uniform ivec2 outputSize; 134 TEXTURE_INPUT(inputData); 135 TEXTURE_OUTPUT(0, outputData); 137 #if BOUNDS_CHECK_MODE == 0 138 #define IN_BOUNDS(p, p0, p1) (true) 140 #define IN_BOUNDS(p, p0, p1) (all(greaterThanEqual(p, p0)) && all(lessThan(p, p1))) 143 // MIN_FLOAT is -2^14, which is the minimum precision requirement for mediump in OpenGL ES 3.0 144 const float MIN_FLOAT = -exp2(14.0); 148 const ivec2 inputTileSize = ivec2(INPUT_TILE_WIDTH, INPUT_TILE_HEIGHT); 149 const ivec2 outputTileSize = ivec2(OUTPUT_TILE_WIDTH, OUTPUT_TILE_HEIGHT); 155 pool = vec4(MIN_FLOAT); \ 156 for (int y = 0; y < kernelSize.y; y++) { \ 157 for (int x = 0; x < kernelSize.x; x++) { \ 158 ivec2 idx = tileCoord + ivec2(x, y); \ 159 if IN_BOUNDS(idx, ivec2(0), inputTileSize) { \ 160 vec4 data = TEXTURE_LOAD(inputData, inputTileOffset + idx); \ 161 pool = max(pool, data); \ 171 for (int y = 0; y < kernelSize.y; y++) { \ 172 for (int x = 0; x < kernelSize.x; x++) { \ 173 ivec2 idx = tileCoord + ivec2(x, y); \ 174 if IN_BOUNDS(idx, ivec2(0), inputTileSize) { \ 175 vec4 data = TEXTURE_LOAD(inputData, inputTileOffset + idx); \ 181 pool = pool / float(count); \ 187 ivec2 inputSize = textureSize(inputData, 0); 188 ivec2 texelCoord = ivec2(v_texCoord * vec2(outputSize)); 190 ivec2 tile = texelCoord / outputTileSize; // 2D output tile idx 191 ivec2 tileCoord = texelCoord % outputTileSize; // in-tile coordinates 192 tileCoord = input_stride * tileCoord - input_padding; 194 ivec2 inputTileOffset = tile * inputTileSize; 199 highp vec4 pool = vec4(0); 204 outputData = TEXTURE_STORE(pool); 213 pool = vec4(MIN_FLOAT); \ 214 for (int y = 0; y < kernelSize.y; y++) { \ 215 for (int x = 0; x < kernelSize.x; x++) { \ 216 ivec2 idx = texelCoord + ivec2(x, y); \ 217 if IN_BOUNDS(idx, ivec2(0), inputSize) { \ 218 vec4 data = TEXTURE_LOAD(inputData, idx); \ 219 pool = max(pool, data); \ 229 for (int y = 0; y < kernelSize.y; y++) { \ 230 for (int x = 0; x < kernelSize.x; x++) { \ 231 ivec2 idx = texelCoord + ivec2(x, y); \ 232 if IN_BOUNDS(idx, ivec2(0), inputSize) { \ 233 vec4 data = TEXTURE_LOAD(inputData, idx); \ 239 pool = pool / float(count); \ 245 ivec2 inputSize = textureSize(inputData, 0); 246 ivec2 texelCoord = input_stride * ivec2(v_texCoord * vec2(outputSize)) - input_padding; 250 highp vec4 pool = vec4(0); 255 outputData = TEXTURE_STORE(pool); 257 #endif // TILED_POOLING 263 template <
typename OPBase>
264 static void computeOutputHW(OPBase* op,
int H,
int W,
int* OH,
int* OW) {
266 input.Resize(1, 1, H, W);
267 op->SetOutputSize(input, &output, 1);
268 CAFFE_ENFORCE_EQ(output.ndim(), 4);
273 template <
typename T, GLPool::PoolType poolType>
278 OPERATOR_NEEDS_FEATURE(order_ == StorageOrder::NCHW,
"OpenGL only supports NCHW order.");
279 CAFFE_ENFORCE(dilation_h() == 1 && dilation_w() == 1,
280 "Pooling op does not support dilation right now.");
281 if (!global_pooling_) {
282 CAFFE_ENFORCE(pad_t() < kernel_h() && pad_b() < kernel_h() && pad_l() < kernel_w() &&
283 pad_r() < kernel_w(),
284 "Pad should be smaller than kernel.");
288 bool RunOnDeviceWithOrderNCHW()
override {
289 const GLImageVector<T>& input = OperatorBase::Inputs()[0]->template Get<GLImageVector<T>>();
290 const int num_images = input.size();
291 const int input_channels = input.channels();
292 const int input_width = input.width();
293 const int input_height = input.height();
297 const int output_channels = input_channels;
299 computeOutputHW(
this, input_height, input_width, &output_height, &output_width);
301 int is_last = OperatorBase::GetSingleArgument<int>(
"is_last", 0);
303 const int input_tile_x = input.tile_x(), input_tile_y = input.tile_y();
304 const int output_tile_x = input_tile_x, output_tile_y = input_tile_y;
307 num_images, output_width, output_height, output_channels, output_tile_x, output_tile_y, is_last);
310 {kernel_w(), kernel_h()},
312 {stride_w(), stride_h()},
313 {input_width, input_height},
314 {output_height, output_width}};
317 LOG(INFO) << input_channels <<
": " << input_height <<
" X " << input_width <<
" => " << output_channels <<
": " 318 << output_height <<
" X " << output_width <<
" Kernel: " << kernel_w() <<
"X" << kernel_h()
319 <<
" Tiling: " << input_tile_x <<
"X" << input_tile_y;
321 glPool_.reset(
new GLPool(geometry, poolType, input_tile_x > 1 || input_tile_y > 1));
324 glPool_->pool(input, *output);
326 OperatorBase::Outputs()[0]->Reset(output);
332 std::unique_ptr<GLPool> glPool_;
338 OPERATOR_SCHEMA(OpenGLAveragePool).NumInputs(1).NumOutputs(1);
339 OPERATOR_SCHEMA(OpenGLMaxPool).NumInputs(1).NumOutputs(1);
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...