2 #include "../core/GLFilter.h" 3 #include "../core/GLImage.h" 4 #include "../core/ImageAllocator.h" 5 #include "gl_tiling_utils.h" 9 #include "caffe2/core/operator.h" 10 #include "caffe2/core/timer.h" 11 #include "caffe2/utils/math.h" 25 std::vector<binding*>(
26 {BINDING(outputSize), BINDING(inputData), BINDING(inputTileRange), BINDING(input_tile_x)}),
29 {{
"TILING", caffe2::to_string(tiling)},
30 {
"OUTPUT_TILES", caffe2::to_string(output_tile_geometries.tiles)},
31 {
"OUTPUT_TILE_X", caffe2::to_string(output_tile_geometries.tile_dims.x)},
32 {
"OUTPUT_TILE_WIDTH", caffe2::to_string(output_tile_geometries.tile_size.x)},
33 {
"OUTPUT_TILE_HEIGHT", caffe2::to_string(output_tile_geometries.tile_size.y)}}),
38 static const char* fragment_shader;
43 const char* GLConcat::fragment_shader = R
"GLSL(#version 300 es 44 #define TILING $(TILING) 47 #define OUTPUT_TILES $(OUTPUT_TILES) 48 #define OUTPUT_TILE_X $(OUTPUT_TILE_X) 49 #define OUTPUT_TILE_WIDTH $(OUTPUT_TILE_WIDTH) 50 #define OUTPUT_TILE_HEIGHT $(OUTPUT_TILE_HEIGHT) 52 precision mediump float; 53 precision mediump int; 55 in highp vec2 v_texCoord; 56 TEXTURE_INPUT(inputData); 57 TEXTURE_OUTPUT(0, outputData); 59 uniform ivec2 outputSize; 60 uniform ivec2 inputTileRange; // (] 61 uniform int input_tile_x; 64 const ivec2 outputTileSize = ivec2(OUTPUT_TILE_WIDTH, OUTPUT_TILE_HEIGHT); 67 ivec2 texelCoord = ivec2(v_texCoord * vec2(outputSize)); 68 ivec2 tile = texelCoord / outputTileSize; // 2D output tile idx 69 ivec2 tileCoord = texelCoord % outputTileSize; // in-tile coordinates 70 int tileNum = OUTPUT_TILE_X * tile.y + tile.x; // 1D output tile idx 72 if (tileNum >= inputTileRange.x && tileNum < inputTileRange.y) { 73 tileNum = tileNum - inputTileRange.x; 74 texelCoord = ivec2(tileNum % input_tile_x, tileNum / input_tile_x) * ivec2(OUTPUT_TILE_WIDTH, OUTPUT_TILE_HEIGHT) + tileCoord; 75 vec4 value = TEXTURE_LOAD(inputData, texelCoord); 76 outputData = TEXTURE_STORE(value); 85 ivec2 texelCoord = ivec2(v_texCoord * vec2(outputSize)); 86 vec4 value = TEXTURE_LOAD(inputData, texelCoord); 87 outputData = TEXTURE_STORE(value); 95 for (
int k = 0; k < output_images.size(); k++) {
99 for (
int i = 0; i < input_size; i++) {
100 for (
int j = 0; j < input_images[i]->slices(); j++) {
101 GLImage<T>* input_image = (*input_images[i])[k];
102 std::vector<texture_attachment> input_attachments;
103 input_attachments.push_back({input_image->textures[j], inputData});
105 run(input_attachments,
106 {output_image->textures.begin() + os, output_image->textures.begin() + os + 1},
108 glUniform2i(outputSize->location, output_image->texture_width, output_image->texture_height);
109 glUniform2i(inputTileRange->location, is, is + input_image->tile_x * input_image->tile_y);
110 glUniform1i(input_tile_x->location, input_image->tile_x);
112 output_image->texture_width,
113 output_image->texture_height);
117 is += input_image->tile_x * input_image->tile_y;
124 template <
typename T>
129 order_(StringToStorageOrder(OperatorBase::GetSingleArgument<string>(
"order",
"NCHW"))) {
130 OPERATOR_NEEDS_FEATURE(this->order_ == StorageOrder::NCHW,
"OpenGL only supports NCHW order.");
133 bool RunOnDevice()
override {
134 const GLImageVector<T>& input0 = Inputs()[0]->template Get<GLImageVector<T>>();
135 const int num_images = input0.size();
138 input_images[0] = &input0;
139 int channelCount = input0.channels();
141 bool tiling = OperatorBase::GetSingleArgument<int>(
"tiling", 0);
144 CAFFE_ENFORCE_EQ(input0.channels() % 4, 0);
145 for (
auto i = 1; i < Inputs().size(); i++) {
146 const GLImageVector<T>& inputi = Inputs()[i]->template Get<GLImageVector<T>>();
147 channelCount += inputi.channels();
148 CAFFE_ENFORCE_EQ(num_images, inputi.size());
149 CAFFE_ENFORCE_EQ(inputi.channels() % 4, 0);
150 CAFFE_ENFORCE_EQ(input0.width(), inputi.width());
151 CAFFE_ENFORCE_EQ(input0.height(), inputi.height());
152 input_images[i] = &inputi;
154 if (inputi.tile_x() > 1 || inputi.tile_y() > 1) {
159 const int input_width = input0.width();
160 const int input_height = input0.height();
162 const int output_channels = channelCount;
163 const int output_width = input_width;
164 const int output_height = input_height;
166 int output_tile_x = 1;
167 int output_tile_y = 1;
169 computeOutputTiles(output_channels, output_tile_x, output_tile_y);
172 int is_last = OperatorBase::GetSingleArgument<int>(
"is_last", 0);
175 num_images, output_width, output_height, output_channels, output_tile_x, output_tile_y, is_last);
178 {output_tile_x, output_tile_y}, {output_width, output_height}, output_tile_x * output_tile_y};
179 _concat.reset(
new GLConcat(output_tile_geometries, tiling));
182 _concat->concat(input_images, *output, Inputs().size());
183 delete[] input_images;
184 Outputs()[0]->Reset(output);
191 std::unique_ptr<GLConcat> _concat;
195 OPERATOR_SCHEMA(OpenGLConcat).NumInputs(2, 4).NumOutputs(1, 2);
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...