Caffe2 - C++ API
A deep learning, cross platform ML framework
context.h
1 #ifndef CAFFE2_OPENGL_CONTEXT_H_
2 #define CAFFE2_OPENGL_CONTEXT_H_
3 
4 #ifdef CAFFE2_OPENGL_BACKEND
5 #error Can only build one OpenGL backend at a time.
6 #else
7 #define CAFFE2_OPENGL_BACKEND
8 #endif
9 
10 #include "caffe2/core/allocator.h"
11 #include "caffe2/core/blob.h"
12 #include "caffe2/core/context.h"
13 #include "caffe2/core/logging.h"
14 #include "caffe2/core/tensor.h"
15 
16 #include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
17 #include "arm_compute/runtime/GLES_COMPUTE/GCFunctions.h"
18 #include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
19 #include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
20 
21 #include "arm_compute/core/Types.h"
22 #include "arm_compute/runtime/Allocator.h"
23 #include "arm_compute/runtime/BlobLifetimeManager.h"
24 #include "arm_compute/runtime/MemoryManagerOnDemand.h"
25 #include "arm_compute/runtime/PoolManager.h"
26 #include "utils/Utils.h"
27 #include "include/half/half.hpp"
28 
29 namespace caffe2 {
30 
31 typedef half_float::half half;
32 typedef half DataType;
33 
34 template <typename T> class GLTensor;
35 
36 class GLContext final {
37 public:
38  static bool initialized;
39  explicit GLContext();
40  explicit GLContext(const DeviceOption &option) {
41  DCHECK_EQ(option.device_type(), OPENGL);
42  GLContext();
43  }
44  ~GLContext() {}
45 
46  static void sync() { arm_compute::GCScheduler::get().memory_barrier(); }
47 
48  template <typename T>
49  using deleted_unique_ptr = std::unique_ptr<T, std::function<void(T *)>>;
50 
51  template <typename T>
52  static deleted_unique_ptr<const GLTensor<T>> getGLTensor(const Blob *b) {
53  if (b->IsType<TensorCPU>()) {
54  auto &Xcpu = b->Get<TensorCPU>();
55  GLTensor<T> *X_raw_ptr;
56  X_raw_ptr = new GLTensor<T>();
57  X_raw_ptr->ResizeLike(Xcpu);
58  deleted_unique_ptr<const GLTensor<T>> X_unique_ptr(
59  X_raw_ptr, [](const GLTensor<T> *X) { delete X; });
60  return X_unique_ptr;
61  }
62  const GLTensor<T> *X_raw_ptr;
63  X_raw_ptr = &b->Get<GLTensor<T>>();
64  deleted_unique_ptr<const GLTensor<T>> X_unique_ptr(
65  X_raw_ptr, [](const GLTensor<T> *X) { return; });
66  return X_unique_ptr;
67  }
68 
69  /*
70  * Everything below is basically boiler plate for Context classes
71  */
72  static std::pair<void *, MemoryDeleter> New(size_t nbytes) {
73  return std::pair<void *, MemoryDeleter>(malloc(nbytes), GLContext::Delete);
74  }
75 
76  static void Delete(void *data) {
77  if (data != nullptr) {
78  free(data);
79  }
80  }
81 
82  template <class SrcContext, class DstContext>
83  inline void CopyBytes(size_t nbytes, const void *src, void *dst) {}
84 
85  template <typename T, class SrcContext, class DstContext>
86  inline void Copy(int n, const T *src, T *dst) {
87  CopyBytes<SrcContext, DstContext>(n * sizeof(T),
88  static_cast<const void *>(src),
89  static_cast<void *>(dst));
90  }
91 
92  template <class SrcContext, class DstContext>
93  inline void CopyItems(const TypeMeta &meta, size_t n, const void *src,
94  void *dst) {
95  CAFFE_ENFORCE(!meta.copy(), "GLContext requires fundamental types.");
96  CopyBytes<SrcContext, DstContext>(n * meta.itemsize(), src, dst);
97  }
98 
99  void SwitchToDevice(int a, ...) { /* TODO */
100  }
101  void SwitchToDevice() { SwitchToDevice(0); }
102 
103  inline void WaitEvent(const Event &ev) { /* TODO */
104  }
105  void FinishDeviceComputation() { /* TODO */
106  }
107  inline void Record(Event *ev, const char *&) const { /* TODO */
108  }
109  static bool IsStreamFree(const DeviceOption& /* unused */, int /* unused */) {
110  return true;
111  }
112  bool HasAsyncPartDefault() const { return false; }
113  bool SupportsAsyncScheduling() const { return false; }
114 
115 };
116 
117 template <typename T> class GLTensor {
118 private:
119  bool allocated_ = false;
120 public:
121  GLTensor() { tensor_ = make_unique<arm_compute::GCTensor>(); }
122  ~GLTensor() { tensor_->allocator()->free(); }
123 
124  template <typename TensorType> void ResizeLike(TensorType &X) {
125  tensor_->allocator()->free();
126  SetDims(X.dims());
127  shape_ = arm_compute::TensorShape();
128  for (int i = 0; i < dims_.size(); i++) {
129  shape_.set(dims_.size() - i - 1, dims_[i]);
130  }
131 
132  tensor_->allocator()->init(
133  arm_compute::TensorInfo(shape_, 1, arm_compute::DataType::F16));
134  }
135 
136  template <typename... Ts> void Resize(Ts... dim_source) {
137  bool size_changed = SetDims(dim_source...);
138  if (size_changed) {
139  // TODO: Make it type generic
140  int64_t new_size = size_ * sizeof(T);
141  tensor_->allocator()->free();
142  for (int i = 0; i < dims_.size(); i++) {
143  shape_.set(dims_.size() - i - 1, dims_[i]);
144  }
145  tensor_->allocator()->init(
146  arm_compute::TensorInfo(shape_, 1, arm_compute::DataType::F16));
147  }
148  }
149 
150  // Allocates and copies data if needed
151  void lazy_allocate(const Blob *b, bool allocate_tensor, bool try_to_copy_from_cpu) const {
152  if (try_to_copy_from_cpu) {
153  // we skip GLTensors, nothing to copy
154  if (!b->IsType<GLTensor>()) {
155  // typically only called on the second run
156  if (allocate_tensor) {
157  allocate();
158  }
159  fillGLTensor(b);
160  }
161  }
162  }
163 
164  void allocate() const {
165  tensor_->allocator()->allocate();
166  }
167 
168  void fillGLTensor(const Blob *b) const {
169  if (b->IsType<TensorCPU>()) {
170  auto &Xcpu = b->Get<TensorCPU>();
171 
172  T *buffer = map();
173  char *byte_buffer = (char *)buffer;
174  auto info = tensor_->info();
175  if (Xcpu.ndim() == 4) {
176  auto M = Xcpu.dim32(0);
177  auto C = Xcpu.dim32(1);
178  auto H = Xcpu.dim32(2);
179  auto W = Xcpu.dim32(3);
180  for (auto m = 0; m < M; ++m) {
181  for (auto c = 0; c < C; ++c) {
182  for (auto h = 0; h < H; ++h) {
183  for (auto w = 0; w < W; ++w) {
184  T *b = (T *)(&byte_buffer[info->offset_element_in_bytes(
185  arm_compute::Coordinates(w, h, c, m))]);
186  // require cpu input blob to be float
187  *b = T(Xcpu.data<float>()[((m * C + c) * H + h) * W + w]);
188  }
189  }
190  }
191  }
192  } else if (Xcpu.ndim() == 3) {
193  auto C = Xcpu.dim32(0);
194  auto H = Xcpu.dim32(1);
195  auto W = Xcpu.dim32(2);
196  for (auto c = 0; c < C; ++c) {
197  for (auto h = 0; h < H; ++h) {
198  for (auto w = 0; w < W; ++w) {
199  T *b = (T *)(&byte_buffer[info->offset_element_in_bytes(
200  arm_compute::Coordinates(w, h, c))]);
201  // require cpu input blob to be float
202  *b = T(Xcpu.data<float>()[(c * H + h) * W + w]);
203  }
204  }
205  }
206  } else if (Xcpu.ndim() == 2) {
207  auto H = Xcpu.dim32(0);
208  auto W = Xcpu.dim32(1);
209  for (auto h = 0; h < H; ++h) {
210  for (auto w = 0; w < W; ++w) {
211  T *b = (T *)(&byte_buffer[info->offset_element_in_bytes(
212  arm_compute::Coordinates(w, h))]);
213  // require cpu input blob to be float
214  *b = T(Xcpu.data<float>()[h * W + w]);
215  }
216  }
217  } else {
218  auto size = Xcpu.dim32(0);
219  for (auto i = 0; i < size; ++i) {
220  T *b = (T *)(&byte_buffer[info->offset_element_in_bytes(arm_compute::Coordinates(i))]);
221  // require cpu input blob to be float
222  *b = T(Xcpu.data<float>()[i]);
223  }
224  }
225  unmap();
226  }
227  }
228 
229 
230  const int32_t ndim() const { return dims_.size(); }
231 
232  vector<TIndex> dims() const { return dims_; }
233 
234  const int32_t dim32(const int index) const { return dims_.at(index); }
235 
236  const int32_t size() const {
237  int32_t s = 1;
238  for (int i = 0; i < dims_.size(); i++) {
239  s *= dims_[i];
240  }
241  return s;
242  }
243 
244  arm_compute::GCTensor *get_underlying() const { return tensor_.get(); }
245 
246  T *map() const {
247  GLContext::sync();
248  tensor_->map(true);
249  return reinterpret_cast<T *>(tensor_->buffer());
250  }
251 
252  void unmap() const { return tensor_->unmap(); }
253 
254  void sync() const {
255  GLContext::sync();
256  tensor_->map();
257  tensor_->unmap();
258  }
259 
260 private:
261  template <typename TI, typename = typename std::enable_if<
262  std::is_integral<TI>::value>::type>
263  bool SetDims(const vector<TI> &src) {
264  auto old_size = size_;
265  dims_.resize(src.size());
266  TIndex new_size = 1;
267  for (unsigned int i = 0; i < src.size(); ++i) {
268  new_size *= src[i];
269  dims_[i] = src[i];
270  }
271  size_ = new_size;
272  return size_ != old_size;
273  }
274 
275  bool SetDims() {
276  auto old_size = size_;
277  dims_.resize(0);
278  size_ = 1;
279  return size_ != old_size;
280  }
281 
282  bool SetDims(const TIndex d0) {
283  auto old_size = size_;
284  dims_.resize(1);
285  dims_[0] = d0;
286  size_ = d0;
287  return size_ != old_size;
288  }
289 
290  bool SetDims(const TIndex d0, const TIndex d1) {
291  auto old_size = size_;
292  dims_.resize(2);
293  dims_[0] = d0;
294  dims_[1] = d1;
295  size_ = d0 * d1;
296  return size_ != old_size;
297  }
298 
299  bool SetDims(const TIndex d0, const TIndex d1, const TIndex d2) {
300  auto old_size = size_;
301  dims_.resize(3);
302  dims_[0] = d0;
303  dims_[1] = d1;
304  dims_[2] = d2;
305  size_ = d0 * d1 * d2;
306  return size_ != old_size;
307  }
308 
309  bool SetDims(const TIndex d0, const TIndex d1, const TIndex d2,
310  const TIndex d3) {
311  auto old_size = size_;
312  dims_.resize(4);
313  dims_[0] = d0;
314  dims_[1] = d1;
315  dims_[2] = d2;
316  dims_[3] = d3;
317  size_ = d0 * d1 * d2 * d3;
318  return size_ != old_size;
319  }
320 
321  vector<TIndex> dims_;
322  TIndex size_ = -1;
323  arm_compute::TensorShape shape_;
324  unique_ptr<arm_compute::GCTensor> tensor_;
325 };
326 
327 template<typename T = half>
328 void getTensorCPU(const GLTensor<T>& g_, TensorCPU& g) {
329  g.Resize(g_.dims());
330  T *buffer = g_.map();
331 
332  for (auto i = 0; i < g.size(); ++i) {
333  auto tmp = buffer[i];
334  g.mutable_data<float>()[i] = tmp;
335  }
336  g_.unmap();
337 }
338 
339 
340 } // namespace caffe2
341 
342 #endif /* CAFFE2_OPENGL_CONTEXT_H_ */
Blob is a general container that hosts a typed pointer.
Definition: blob.h:25
TIndex size() const
Returns the size (i.e.
Definition: tensor.h:593
T * mutable_data()
Returns a typed pointer of the underlying storage.
Definition: tensor.h:578
void Resize(Ts...dim_source)
Resizes a tensor.
Definition: tensor.h:288
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
TypedCopy copy() const
Returns the typed copy function pointer for individual iterms.
Definition: typeid.h:155
bool IsType() const
Checks if the content stored in the blob is of type T.
Definition: blob.h:58
TypeMeta is a thin class that allows us to store the type of a container such as a blob...
Definition: typeid.h:88
const T & Get() const
Gets the const reference of the stored object.
Definition: blob.h:75
const size_t & itemsize() const
Returns the size of the item.
Definition: typeid.h:143