2 #include "../core/GLFilter.h" 3 #include "../core/GLImage.h" 4 #include "../core/ImageAllocator.h" 6 #include "caffe2/core/operator.h" 7 #include "caffe2/core/timer.h" 13 typedef enum { PRelu = 0, Relu = 1 } ReluType;
22 const int output_tile_x;
23 const int output_tile_y;
24 const int output_tile_width;
25 const int output_tile_height;
29 const int _scale_size,
33 int _output_tile_width,
34 int _output_tile_height)
39 std::vector<binding*>({BINDING(inputData)}),
40 std::vector<binding*>({BINDING(scale_block)}),
42 {{
"USE_RELU", caffe2::to_string(PRelu)},
44 caffe2::to_string(_output_tile_x * _output_tile_y)},
45 {
"OUTPUT_TILE_X", caffe2::to_string(_output_tile_x)},
46 {
"OUTPUT_TILE_WIDTH", caffe2::to_string(_output_tile_width)},
47 {
"OUTPUT_TILE_HEIGHT", caffe2::to_string(_output_tile_height)},
49 caffe2::to_string(_output_tile_x > 1 || _output_tile_y > 1)}}),
51 scale_size(_scale_size),
53 output_tile_x(_output_tile_x),
54 output_tile_y(_output_tile_y),
55 output_tile_width(_output_tile_width),
56 output_tile_height(_output_tile_height) {}
62 std::vector<binding*>({BINDING(inputData)}),
65 {{
"USE_RELU", caffe2::to_string(Relu)},
66 {
"OUTPUT_TILES", caffe2::to_string(1)},
67 {
"OUTPUT_TILE_X", caffe2::to_string(1)},
68 {
"OUTPUT_TILE_WIDTH", caffe2::to_string(1)},
69 {
"OUTPUT_TILE_HEIGHT", caffe2::to_string(1)},
70 {
"TILED_PRELU", caffe2::to_string(0)}}),
78 output_tile_height(1) {}
83 GLPRelu::ReluType reluType);
85 static const char* fragment_shader;
90 const char* GLPRelu::fragment_shader = R
"GLSL(#version 300 es 91 #define TILED_PRELU $(TILED_PRELU) 92 #define USE_RELU $(USE_RELU) 95 #define OUTPUT_TILES $(OUTPUT_TILES) 96 #define OUTPUT_TILE_X $(OUTPUT_TILE_X) 97 #define OUTPUT_TILE_WIDTH $(OUTPUT_TILE_WIDTH) 98 #define OUTPUT_TILE_HEIGHT $(OUTPUT_TILE_HEIGHT) 101 precision mediump float; 104 TEXTURE_INPUT(inputData); 105 TEXTURE_OUTPUT(0, outputData); 107 in highp vec2 v_texCoord; 113 ivec2 inputSize = textureSize(inputData, 0); 114 ivec2 texelCoord = ivec2(v_texCoord * vec2(inputSize)); 115 vec4 value = TEXTURE_LOAD(inputData, texelCoord); 116 outputData = TEXTURE_STORE(max(value, vec4(0.0))); 122 const ivec2 outputTileSize = ivec2(OUTPUT_TILE_WIDTH, OUTPUT_TILE_HEIGHT); 124 layout (std140) uniform scale_block { 125 highp uvec4 scale[(OUTPUT_TILES + 1) / 2]; 129 ivec2 inputSize = textureSize(inputData, 0); 130 ivec2 texelCoord = ivec2(v_texCoord * vec2(inputSize)); 132 ivec2 tile = texelCoord / outputTileSize; // 2D output tile idx 133 int tileNum = OUTPUT_TILE_X * tile.y + tile.x; // 1D output tile idx 135 // outputData = value > 0 ? value : value * weight; 136 vec4 value = TEXTURE_LOAD(inputData, texelCoord); 137 vec4 preluValue = (tileNum % 2 == 0) ? unpackHalf4x16(scale[tileNum/2].xy) : unpackHalf4x16(scale[tileNum/2].zw); 138 value = mix(value * preluValue, value, vec4(greaterThan(value, vec4(0)))); 139 outputData = TEXTURE_STORE(value); 142 layout (std140) uniform scale_block { 146 ivec2 inputSize = textureSize(inputData, 0); 147 ivec2 texelCoord = ivec2(v_texCoord * vec2(inputSize)); 149 // outputData = value > 0 ? value : value * weight; 150 vec4 value = TEXTURE_LOAD(inputData, texelCoord); 151 value = mix(value * unpackHalf4x16(scale.xy), value, vec4(greaterThan(value, vec4(0)))); 152 outputData = TEXTURE_STORE(value); 154 #endif // TILED_PRELU 160 template <
typename T>
163 GLPRelu::ReluType reluType) {
164 int num_images = input_images.size();
165 for (
int i = 0; i < num_images; i++) {
168 int input_slices = input_image->slices;
169 int output_slices = output_image->slices;
171 for (
int is = 0; is < input_slices; is++) {
172 if (reluType == PRelu) {
173 attach_uniform_buffer<float16_t>(scale_block, 0, [&](float16_t* data,
size_t size) {
174 int output_tiles = output_tile_x * output_tile_y;
175 for (
int j = 0, k = 4 * is * output_tiles;
176 k < std::min(channels, 4 * (is + 1) * output_tiles);
178 data[j] = scale_size == channels ? scale[k] : scale[0];
183 std::vector<texture_attachment> input_attachments;
185 input_attachments.push_back({input_image->textures[is], inputData});
187 run(input_attachments,
188 {output_image->textures.begin() + is, output_image->textures.begin() + is + 1},
190 output_image->texture_width,
191 output_image->texture_height);
197 template <
typename T, GLPRelu::ReluType reluType>
202 order_(StringToStorageOrder(OperatorBase::GetSingleArgument<string>(
"order",
"NCHW"))) {
203 OPERATOR_NEEDS_FEATURE(this->order_ == StorageOrder::NCHW,
"OpenGL only supports NCHW order.");
206 bool RunOnDevice()
override {
207 const GLImageVector<T>& input = Inputs()[0]->template Get<GLImageVector<T>>();
208 const int num_images = input.size();
209 const int input_channels = input.channels();
210 const int input_width = input.width();
211 const int input_height = input.height();
213 const int output_channels = input_channels;
214 const int output_width = input_width;
215 const int output_height = input_height;
217 int is_last = OperatorBase::GetSingleArgument<int>(
"is_last", 0);
219 const int input_tile_x = input.tile_x(), input_tile_y = input.tile_y();
220 const int output_tile_x = input_tile_x, output_tile_y = input_tile_y;
221 if (input_tile_x > 1 || input_tile_y > 1) {
222 CAFFE_ENFORCE_EQ(input.slices(), 1,
"Input needs to be tiled in a single texture");
233 const auto* scale = reluType == GLPRelu::PRelu ? &Input(1) :
nullptr;
236 if (reluType == GLPRelu::PRelu) {
237 _prelu.reset(
new GLPRelu(scale->template data<float>(),
245 _prelu.reset(
new GLPRelu(input_channels));
249 _prelu->prelu(input, *output, reluType);
251 Outputs()[0]->Reset(output);
258 std::unique_ptr<GLPRelu> _prelu;
262 OPERATOR_SCHEMA(OpenGLPRelu)
265 .AllowInplace({{0, 0}})
266 .IdenticalTypeAndShape();
268 OPERATOR_SCHEMA(OpenGLRelu)
271 .AllowInplace({{0, 0}})
272 .IdenticalTypeAndShape();
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...