1 #ifndef CAFFE2_OPENGL_CONTEXT_H_ 2 #define CAFFE2_OPENGL_CONTEXT_H_ 4 #ifdef CAFFE2_OPENGL_BACKEND 5 #error Can only build one OpenGL backend at a time. 7 #define CAFFE2_OPENGL_BACKEND 10 #include "caffe2/core/allocator.h" 11 #include "caffe2/core/blob.h" 12 #include "caffe2/core/context.h" 13 #include "caffe2/core/logging.h" 14 #include "caffe2/core/tensor.h" 16 #include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" 17 #include "arm_compute/runtime/GLES_COMPUTE/GCFunctions.h" 18 #include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" 19 #include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" 21 #include "arm_compute/core/Types.h" 22 #include "arm_compute/runtime/Allocator.h" 23 #include "arm_compute/runtime/BlobLifetimeManager.h" 24 #include "arm_compute/runtime/MemoryManagerOnDemand.h" 25 #include "arm_compute/runtime/PoolManager.h" 26 #include "utils/Utils.h" 27 #include "include/half/half.hpp" 31 typedef half_float::half half;
32 typedef half DataType;
38 static bool initialized;
40 explicit GLContext(
const DeviceOption &option) {
41 DCHECK_EQ(option.device_type(), OPENGL);
46 static void sync() { arm_compute::GCScheduler::get().memory_barrier(); }
49 using deleted_unique_ptr = std::unique_ptr<T, std::function<void(T *)>>;
52 static deleted_unique_ptr<const GLTensor<T>> getGLTensor(
const Blob *b) {
57 X_raw_ptr->ResizeLike(Xcpu);
58 deleted_unique_ptr<const GLTensor<T>> X_unique_ptr(
64 deleted_unique_ptr<const GLTensor<T>> X_unique_ptr(
72 static std::pair<void *, MemoryDeleter> New(
size_t nbytes) {
73 return std::pair<void *, MemoryDeleter>(malloc(nbytes), GLContext::Delete);
76 static void Delete(
void *data) {
77 if (data !=
nullptr) {
82 template <
class SrcContext,
class DstContext>
83 inline void CopyBytes(
size_t nbytes,
const void *src,
void *dst) {}
85 template <
typename T,
class SrcContext,
class DstContext>
86 inline void Copy(
int n,
const T *src, T *dst) {
87 CopyBytes<SrcContext, DstContext>(n *
sizeof(T),
88 static_cast<const void *>(src),
89 static_cast<void *
>(dst));
92 template <
class SrcContext,
class DstContext>
93 inline void CopyItems(
const TypeMeta &meta,
size_t n,
const void *src,
95 CAFFE_ENFORCE(!meta.
copy(),
"GLContext requires fundamental types.");
96 CopyBytes<SrcContext, DstContext>(n * meta.
itemsize(), src, dst);
99 void SwitchToDevice(
int a, ...) {
101 void SwitchToDevice() { SwitchToDevice(0); }
103 inline void WaitEvent(
const Event &ev) {
105 void FinishDeviceComputation() {
107 inline void Record(
Event *ev,
const char *&)
const {
109 static bool IsStreamFree(
const DeviceOption& ,
int ) {
112 bool HasAsyncPartDefault()
const {
return false; }
113 bool SupportsAsyncScheduling()
const {
return false; }
117 template <
typename T>
class GLTensor {
119 bool allocated_ =
false;
121 GLTensor() { tensor_ = make_unique<arm_compute::GCTensor>(); }
122 ~
GLTensor() { tensor_->allocator()->free(); }
124 template <
typename TensorType>
void ResizeLike(TensorType &X) {
125 tensor_->allocator()->free();
127 shape_ = arm_compute::TensorShape();
128 for (
int i = 0; i < dims_.size(); i++) {
129 shape_.set(dims_.size() - i - 1, dims_[i]);
132 tensor_->allocator()->init(
133 arm_compute::TensorInfo(shape_, 1, arm_compute::DataType::F16));
136 template <
typename... Ts>
void Resize(Ts... dim_source) {
137 bool size_changed = SetDims(dim_source...);
140 int64_t new_size = size_ *
sizeof(T);
141 tensor_->allocator()->free();
142 for (
int i = 0; i < dims_.size(); i++) {
143 shape_.set(dims_.size() - i - 1, dims_[i]);
145 tensor_->allocator()->init(
146 arm_compute::TensorInfo(shape_, 1, arm_compute::DataType::F16));
151 void lazy_allocate(
const Blob *b,
bool allocate_tensor,
bool try_to_copy_from_cpu)
const {
152 if (try_to_copy_from_cpu) {
156 if (allocate_tensor) {
164 void allocate()
const {
165 tensor_->allocator()->allocate();
168 void fillGLTensor(
const Blob *b)
const {
173 char *byte_buffer = (
char *)buffer;
174 auto info = tensor_->info();
175 if (Xcpu.ndim() == 4) {
176 auto M = Xcpu.dim32(0);
177 auto C = Xcpu.dim32(1);
178 auto H = Xcpu.dim32(2);
179 auto W = Xcpu.dim32(3);
180 for (
auto m = 0; m < M; ++m) {
181 for (
auto c = 0; c < C; ++c) {
182 for (
auto h = 0; h < H; ++h) {
183 for (
auto w = 0; w < W; ++w) {
184 T *b = (T *)(&byte_buffer[info->offset_element_in_bytes(
185 arm_compute::Coordinates(w, h, c, m))]);
187 *b = T(Xcpu.data<
float>()[((m * C + c) * H + h) * W + w]);
192 }
else if (Xcpu.ndim() == 3) {
193 auto C = Xcpu.dim32(0);
194 auto H = Xcpu.dim32(1);
195 auto W = Xcpu.dim32(2);
196 for (
auto c = 0; c < C; ++c) {
197 for (
auto h = 0; h < H; ++h) {
198 for (
auto w = 0; w < W; ++w) {
199 T *b = (T *)(&byte_buffer[info->offset_element_in_bytes(
200 arm_compute::Coordinates(w, h, c))]);
202 *b = T(Xcpu.data<
float>()[(c * H + h) * W + w]);
206 }
else if (Xcpu.ndim() == 2) {
207 auto H = Xcpu.dim32(0);
208 auto W = Xcpu.dim32(1);
209 for (
auto h = 0; h < H; ++h) {
210 for (
auto w = 0; w < W; ++w) {
211 T *b = (T *)(&byte_buffer[info->offset_element_in_bytes(
212 arm_compute::Coordinates(w, h))]);
214 *b = T(Xcpu.data<
float>()[h * W + w]);
218 auto size = Xcpu.dim32(0);
219 for (
auto i = 0; i < size; ++i) {
220 T *b = (T *)(&byte_buffer[info->offset_element_in_bytes(arm_compute::Coordinates(i))]);
222 *b = T(Xcpu.data<
float>()[i]);
230 const int32_t ndim()
const {
return dims_.size(); }
232 vector<TIndex> dims()
const {
return dims_; }
234 const int32_t dim32(
const int index)
const {
return dims_.at(index); }
236 const int32_t size()
const {
238 for (
int i = 0; i < dims_.size(); i++) {
244 arm_compute::GCTensor *get_underlying()
const {
return tensor_.get(); }
249 return reinterpret_cast<T *
>(tensor_->buffer());
252 void unmap()
const {
return tensor_->unmap(); }
261 template <
typename TI,
typename =
typename std::enable_if<
262 std::is_integral<TI>::value>::type>
263 bool SetDims(
const vector<TI> &src) {
264 auto old_size = size_;
265 dims_.resize(src.size());
267 for (
unsigned int i = 0; i < src.size(); ++i) {
272 return size_ != old_size;
276 auto old_size = size_;
279 return size_ != old_size;
282 bool SetDims(
const TIndex d0) {
283 auto old_size = size_;
287 return size_ != old_size;
290 bool SetDims(
const TIndex d0,
const TIndex d1) {
291 auto old_size = size_;
296 return size_ != old_size;
299 bool SetDims(
const TIndex d0,
const TIndex d1,
const TIndex d2) {
300 auto old_size = size_;
305 size_ = d0 * d1 * d2;
306 return size_ != old_size;
309 bool SetDims(
const TIndex d0,
const TIndex d1,
const TIndex d2,
311 auto old_size = size_;
317 size_ = d0 * d1 * d2 * d3;
318 return size_ != old_size;
321 vector<TIndex> dims_;
323 arm_compute::TensorShape shape_;
324 unique_ptr<arm_compute::GCTensor> tensor_;
327 template<
typename T = half>
330 T *buffer = g_.map();
332 for (
auto i = 0; i < g.
size(); ++i) {
333 auto tmp = buffer[i];
Blob is a general container that hosts a typed pointer.
TIndex size() const
Returns the size (i.e.
T * mutable_data()
Returns a typed pointer of the underlying storage.
void Resize(Ts...dim_source)
Resizes a tensor.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
bool IsType() const
Checks if the content stored in the blob is of type T.
const T & Get() const
Gets the const reference of the stored object.