Caffe2 - C++ API
A deep learning, cross platform ML framework
GLCopyOps.cc
1 
2 #include "caffe2/core/common.h"
3 #include "caffe2/core/operator.h"
4 #include "caffe2/core/timer.h"
5 
6 #include "../core/DataTransfer.h"
7 #include "../core/GLContext.h"
8 #include "../core/GLImage.h"
9 #include "../core/GLPlainTexture.h"
10 #include "../core/ImageAllocator.h"
11 
12 #include <algorithm>
13 
14 namespace caffe2 {
15 template <class T>
16 class CopyToOpenGLOp final : public Operator<CPUContext>, ImageAllocator<T> {
17  public:
18  CopyToOpenGLOp(const OperatorDef& operator_def, Workspace* ws)
19  : Operator<CPUContext>(operator_def, ws) {}
20 
21  bool RunOnDevice() override {
22  // caffe2::Timer timer;
23  const TensorCPU& X = Input(0);
24  const int num_images = X.dim32(0);
25  const int input_channels = X.dim32(1);
26  const int input_width = X.dim32(3);
27  const int input_height = X.dim32(2);
28  const int input_size = input_width * input_height;
29 
30  // set up the OpenGL context
31  GLContext::getGLContext()->set_context();
32 
33  const float* input = X.template data<float>();
34 
35  int tile_x = GetSingleArgument<int>("tile_x", 1);
36  int tile_y = GetSingleArgument<int>("tile_y", 1);
37 
38  GLImageVector<T>* output_image = ImageAllocator<T>::newImage(num_images,
39  input_width,
40  input_height,
41  input_channels,
42  tile_x,
43  tile_y,
44 #if CAFFE2_IOS
45  true
46 #else
47  false
48 #endif
49  );
50 
51  if (output_image->tile_x() > 1 || output_image->tile_y() > 1) {
52  LOG(INFO) << "CopyToOpenGLOp tiling: " << output_image->tile_x() << ":"
53  << output_image->tile_y();
54  }
55 
56  Outputs()[0]->Reset(output_image);
57 
58  for (int i = 0; i < num_images; i++) {
59  const auto textures = (*output_image)[i]->textures;
60  for (int slice = 0; slice < textures.size(); slice++) {
61  // timer.Start();
62 
63  textures[slice]->map_load([&](void* buffer,
64  size_t width,
65  size_t height,
66  size_t stride,
67  size_t channels,
68  const GLTexture::Type& type) {
69  for (int y = 0; y < tile_y; y++) {
70  for (int x = 0; x < tile_x; x++) {
71  const int tiles = slice * tile_x * tile_y + y * tile_x + x;
72  const int slice_channels = std::min(4, input_channels - 4 * tiles);
73  interleaveSlice(
74  (float16_t*)buffer + 4 * (y * input_height * stride + x * input_width),
75  &input[i * input_channels * input_size + 4 * tiles * input_size],
76  input_width,
77  input_height,
78  stride, // texture stride
79  slice_channels);
80  }
81  }
82  });
83  // LOG(INFO) << "Texture uploading takes " << timer.MilliSeconds() << " ms";
84  }
85  }
86 
87  return true;
88  }
89 };
90 
91 REGISTER_CPU_OPERATOR(CopyToOpenGL, CopyToOpenGLOp<float16_t>);
92 OPERATOR_SCHEMA(CopyToOpenGL).NumInputs(1).NumOutputs(1).AllowInplace({{0, 0}});
93 
94 template <class T>
95 class CopyFromOpenGLOp final : public Operator<CPUContext> {
96  public:
97  CopyFromOpenGLOp(const OperatorDef& operator_def, Workspace* ws)
98  : Operator<CPUContext>(operator_def, ws) {}
99 
100  bool RunOnDevice() override {
101  caffe2::Timer timer;
102  const GLImageVector<T>& X = Inputs()[0]->template Get<GLImageVector<T>>();
103  const int num_images = X.size();
104  const int input_channels = X.channels();
105  const int input_width = X.width();
106  const int input_height = X.height();
107 
108  TensorCPU* Y = Output(0);
109  Y->Resize(num_images, input_channels, input_height, input_width);
110  const int output_width = input_width;
111  const int output_height = input_height;
112  const int output_size = input_width * input_height;
113 
114  float* output = Y->mutable_data<float>();
115 
116  const int tile_x = X.tile_x();
117  const int tile_y = X.tile_y();
118  for (int i = 0; i < num_images; i++) {
119  for (int slice = 0; slice < X[i]->slices; slice++) {
120  timer.Start();
121  const GLTexture* texture = X[i]->textures[slice];
122 
123  texture->map_read([&](const void* buffer,
124  size_t width,
125  size_t height,
126  size_t stride,
127  size_t channels,
128  const GLTexture::Type& type) {
129  //#if CAFFE2_ANDROID && defined(__ARM_NEON__)
130  // if (static_cast<AndroidGLContext*>(GLContext::getGLContext())->get_platform() ==
131  // Mali) {
132  // caffe2::Timer timer;
133  // timer.Start();
134  // float16_t* copy_buffer = (float16_t*)malloc(_capacity);
135  // arm_memcpy(
136  // (volatile unsigned char*)copy_buffer, (volatile unsigned char*)buffer,
137  // _capacity);
138  // deInterleaveSlice(
139  // output + 4 * slice * output_size, copy_buffer, width, height, stride,
140  // slice_channels);
141  // free(copy_buffer);
142  // LOG(INFO) << "memcpy takes " << timer.MilliSeconds() << " ms";
143  // } else
144  //#endif
145  {
146  gl_log(GL_VERBOSE,
147  "calling deInterleaveSlice width: %d, height: %d, stride: %d, channels: %d\n",
148  width,
149  height,
150  stride,
151  channels);
152 
153  for (int y = 0; y < tile_y; y++) {
154  for (int x = 0; x < tile_x; x++) {
155  const int tiles = slice * tile_x * tile_y + y * tile_x + x;
156  const int slice_channels = std::min(4, input_channels - 4 * tiles);
157  deInterleaveSlice(
158  output + i * input_channels * output_size + 4 * tiles * output_size,
159  (float16_t*)buffer + 4 * (y * input_height * stride + x * input_width),
160  input_width,
161  input_height,
162  stride,
163  slice_channels);
164  }
165  }
166  }
167  });
168  }
169  }
170  return true;
171  }
172 };
173 
174 REGISTER_CPU_OPERATOR(CopyFromOpenGL, CopyFromOpenGLOp<float16_t>);
175 OPERATOR_SCHEMA(CopyFromOpenGL).NumInputs(1).NumOutputs(1).AllowInplace({{0, 0}});
176 } // namespace caffe2
void Start()
Starts a timer.
Definition: timer.h:24
int dim32(const int i) const
Returns the i-th dimension of the tensor in int.
Definition: tensor.h:657
T * mutable_data()
Returns a typed pointer of the underlying storage.
Definition: tensor.h:578
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
void Resize(Ts...dim_source)
Resizes a tensor.
Definition: tensor.h:288
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
A simple timer object for measuring time.
Definition: timer.h:16