Caffe2 - C++ API
A deep learning, cross platform ML framework
gl_operator_test.h
1 #include "caffe2/mobile/contrib/arm-compute/core/context.h"
2 #include <gtest/gtest.h>
3 
4 #include "caffe2/core/graph.h"
5 #include "caffe2/core/operator.h"
6 #include "caffe2/core/workspace.h"
7 
8 namespace caffe2 {
9 
10 #define DECLARE_OPENGL_OPERATOR(_name) \
11  OperatorDef _name; \
12  _name.mutable_device_option()->set_device_type(OPENGL);
13 
14 #define MAKE_OPENGL_OPERATOR(_op) \
15  _op->mutable_device_option()->set_device_type(OPENGL);
16 
17 #define ADD_ARG(_op, _name, _type, _val) \
18  { \
19  Argument *arg = _op.add_arg(); \
20  arg->set_name(_name); \
21  arg->set_##_type(_val); \
22  }
23 
24 // Use value 1337 to generate a blob that is deterministic
25 // and unique at each value (for debugging purposes)
26 template<typename T = float>
27 void PopulateCPUBlob(Workspace *ws, bool random, std::string name,
28  std::vector<int> dims, int val = 1, int dist_shift = 0, float variance = 1) {
29  Blob *blob = ws->CreateBlob(name);
30  auto *tensor = blob->GetMutable<TensorCPU>();
31  tensor->Resize(dims);
32  T *t_data = tensor->mutable_data<T>();
33  std::random_device rd;
34  std::mt19937 e2(rd());
35  std::normal_distribution<> dist(0 + dist_shift, variance + dist_shift);
36  for (int i = 0; i < tensor->size(); ++i) {
37  t_data[i] = T(random ? dist(e2) : (val == 1337 ? i : val));
38  }
39 }
40 
41 template<typename T = half>
42 void compareNetResult(Workspace& ws,
43  NetDef& cpu_net, NetDef& gpu_net,
44  string cpu_blob="ref_Y",
45  string gpu_blob="gpu_Y",
46  double tol=0.01,
47  bool relative=false) {
48  ws.RunNetOnce(cpu_net);
49  ws.RunNetOnce(gpu_net);
50 
51  Blob *cpu_out = ws.GetBlob(cpu_blob);
52  Blob *gpu_out = ws.GetBlob(gpu_blob);
53  EXPECT_NE(nullptr, cpu_out);
54  EXPECT_NE(nullptr, gpu_out);
55 
56  TensorCPU g;
57  auto& g_ = gpu_out->Get<GLTensor<T>>();
58  getTensorCPU(g_, g);
59 
60  auto &t = cpu_out->Get<TensorCPU>();
61  EXPECT_EQ(g.size(), t.size());
62 
63  for (auto i = 0; i < g.size(); ++i) {
64  if (relative) {
65  EXPECT_NEAR(g.data<float>()[i], t.data<float>()[i], tol + tol * std::abs(t.data<float>()[i])) << "at index " << i;
66  } else{
67  EXPECT_NEAR(g.data<float>()[i], t.data<float>()[i], tol)
68  << "at index " << i;
69  }
70  }
71 }
72 
73 template<typename T = half>
74 void compareNetResult4D(Workspace& ws,
75  NetDef& cpu_net, NetDef& gpu_net,
76  string cpu_blob="ref_Y",
77  string gpu_blob="gpu_Y",
78  double tol=0.05) {
79  ws.RunNetOnce(cpu_net);
80  ws.RunNetOnce(gpu_net);
81 
82  Blob *cpu_out = ws.GetBlob(cpu_blob);
83  Blob *gpu_out = ws.GetBlob(gpu_blob);
84  auto &g_ = gpu_out->Get<GLTensor<T>>();
85 
86  EXPECT_NE(nullptr, cpu_out);
87  EXPECT_NE(nullptr, gpu_out);
88 
89  TensorCPU g;
90  auto &t = cpu_out->Get<TensorCPU>();
91  g.Resize(g_.dims());
92  T *buffer = g_.map();
93  char *byte_buffer = (char *)buffer;
94  auto info = g_.get_underlying()->info();
95 
96  CAFFE_ENFORCE(byte_buffer != NULL);
97  auto C = t.dim32(1);
98  auto H = t.dim32(2);
99  auto W = t.dim32(3);
100  int diff_num = 0;
101 #define get_elem(_a, _b, _c) \
102  (half *)&byte_buffer[info->offset_element_in_bytes( \
103  arm_compute::Coordinates(_a, _b, _c))]
104  for (auto c = 0; c < C; ++c) {
105  for (auto h = 0; h < H; ++h) {
106  for (auto w = 0; w < W; ++w) {
107  auto t_elem = t.data<float>()[(c * H + h) * W + w];
108  auto g_elem = get_elem(w, h, c);
109 
110  if (!isnan(t_elem) && (std::abs(t_elem - float(*g_elem)) > tol + tol * std::abs(t_elem))) {
111  diff_num++;
112  }
113  CHECK(diff_num <= 0.03 * C*H*W);
114  }
115  }
116  }
117 #undef get_elem
118  g_.unmap();
119 }
120 
121 
122 } // namespace caffe2
void Resize(Ts...dim_source)
Resizes a tensor.
Definition: tensor.h:288
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...