Caffe2 - C++ API
A deep learning, cross platform ML framework
conversions.h
1 #pragma once
2 
3 #include <caffe2/core/types.h>
4 
5 #ifdef __CUDA_ARCH__
6 // Proxy for including cuda_fp16.h, because common_gpu.h
7 // has necessary diagnostic guards.
8 #include <caffe2/core/common_gpu.h>
9 #endif
10 
11 #ifdef __CUDA_ARCH__
12 #define CONVERSIONS_DECL __host__ __device__ inline
13 #else
14 #define CONVERSIONS_DECL inline
15 #endif
16 
17 namespace caffe2 {
18 
19 namespace convert {
20 
21 namespace {
22 inline float16 cpu_float2half_rn(float f) {
23  float16 ret;
24 
25  static_assert(
26  sizeof(unsigned int) == sizeof(float),
27  "Programming error sizeof(unsigned int) != sizeof(float)");
28 
29  unsigned* xp = reinterpret_cast<unsigned int*>(&f);
30  unsigned x = *xp;
31  unsigned u = (x & 0x7fffffff), remainder, shift, lsb, lsb_s1, lsb_m1;
32  unsigned sign, exponent, mantissa;
33 
34  // Get rid of +NaN/-NaN case first.
35  if (u > 0x7f800000) {
36  ret.x = 0x7fffU;
37  return ret;
38  }
39 
40  sign = ((x >> 16) & 0x8000);
41 
42  // Get rid of +Inf/-Inf, +0/-0.
43  if (u > 0x477fefff) {
44  ret.x = sign | 0x7c00U;
45  return ret;
46  }
47  if (u < 0x33000001) {
48  ret.x = (sign | 0x0000);
49  return ret;
50  }
51 
52  exponent = ((u >> 23) & 0xff);
53  mantissa = (u & 0x7fffff);
54 
55  if (exponent > 0x70) {
56  shift = 13;
57  exponent -= 0x70;
58  } else {
59  shift = 0x7e - exponent;
60  exponent = 0;
61  mantissa |= 0x800000;
62  }
63  lsb = (1 << shift);
64  lsb_s1 = (lsb >> 1);
65  lsb_m1 = (lsb - 1);
66 
67  // Round to nearest even.
68  remainder = (mantissa & lsb_m1);
69  mantissa >>= shift;
70  if (remainder > lsb_s1 || (remainder == lsb_s1 && (mantissa & 0x1))) {
71  ++mantissa;
72  if (!(mantissa & 0x3ff)) {
73  ++exponent;
74  mantissa = 0;
75  }
76  }
77 
78  ret.x = (sign | (exponent << 10) | mantissa);
79 
80  return ret;
81 }
82 
83 inline float cpu_half2float(float16 h) {
84  unsigned sign = ((h.x >> 15) & 1);
85  unsigned exponent = ((h.x >> 10) & 0x1f);
86  unsigned mantissa = ((h.x & 0x3ff) << 13);
87 
88  if (exponent == 0x1f) { /* NaN or Inf */
89  mantissa = (mantissa ? (sign = 0, 0x7fffff) : 0);
90  exponent = 0xff;
91  } else if (!exponent) { /* Denorm or Zero */
92  if (mantissa) {
93  unsigned int msb;
94  exponent = 0x71;
95  do {
96  msb = (mantissa & 0x400000);
97  mantissa <<= 1; /* normalize */
98  --exponent;
99  } while (!msb);
100  mantissa &= 0x7fffff; /* 1.mantissa is implicit */
101  }
102  } else {
103  exponent += 0x70;
104  }
105 
106  unsigned i = ((sign << 31) | (exponent << 23) | mantissa);
107  float ret;
108  memcpy(&ret, &i, sizeof(i));
109  return ret;
110 }
111 
112 }; // anonymous
113 
114 #if __CUDACC__
115 
116 #if CUDA_VERSION >= 9000
117 CONVERSIONS_DECL float16 halfToFloat16(half x) {
118 #ifdef __GNUC__
119 #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
120 #pragma GCC diagnostic push
121 #endif
122 #pragma GCC diagnostic ignored "-Wstrict-aliasing"
123 #endif // __GNUC__
124  float16 r = *reinterpret_cast<float16*>(&x);
125 #ifdef __GNUC__
126 #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
127 #pragma GCC diagnostic pop
128 #endif
129 #endif // __GNUC__
130  return r;
131 }
132 
133 inline half float16ToHalf(const float16 x) {
134  __half_raw hr;
135  hr.x = x.x;
136  half r(hr);
137  return r;
138 }
139 
140 inline half floatToHalf(const float x) {
141  float16 xh = cpu_float2half_rn(x);
142  return float16ToHalf(xh);
143 }
144 
145 #else
146 inline float16 halfToFloat16(__half x) {
147  float16 r;
148  r.x = x.x;
149  return r;
150 }
151 
152 inline __half float16ToHalf(const float16 x) {
153  __half r;
154  r.x = x.x;
155  return r;
156 }
157 
158 inline half floatToHalf(const float x) {
159  float16 xh = cpu_float2half_rn(x);
160  return float16ToHalf(xh);
161 }
162 #endif // CUDA_VERSION
163 
164 #endif // __CUDACC__
165 
166 // general version: defer to static_cast
167 template <typename IN, typename OUT>
168 CONVERSIONS_DECL OUT To(const IN in) {
169  return static_cast<OUT>(in);
170 }
171 
172 // explicit for fp16
173 template <>
174 CONVERSIONS_DECL float16 To(const float in) {
175 #if __CUDA_ARCH__
176  // hacky interface between C2 fp16 and CUDA
177 #if CUDA_VERSION >= 9000
178  half rh = static_cast<half>(in);
179  return halfToFloat16(rh);
180 #else
181  float16 ret;
182  ret.x = __float2half(in).x;
183  return ret;
184 #endif // CUDA_VERSION >= 9000
185 #else
186  return cpu_float2half_rn(in);
187 #endif
188 }
189 
190 template <>
191 CONVERSIONS_DECL float To(const float16 in) {
192 #if __CUDA_ARCH__
193 #if CUDA_VERSION >= 9000
194  __half_raw tmp;
195 #else
196  __half tmp;
197 #endif
198  tmp.x = in.x;
199  return __half2float(tmp);
200 #else
201  return cpu_half2float(in);
202 #endif
203 };
204 
205 template <>
206 CONVERSIONS_DECL float To(const float in) {
207  return in;
208 }
209 
210 template <typename OUT, typename IN>
211 CONVERSIONS_DECL OUT Get(IN x) {
212  return static_cast<OUT>(x);
213 }
214 
215 template <>
216 CONVERSIONS_DECL float Get(float16 x) {
217  return To<float16, float>(x);
218 }
219 
220 template <>
221 CONVERSIONS_DECL float16 Get(float x) {
222  return To<float, float16>(x);
223 }
224 
225 }; // namespace convert
226 
227 }; // namespace caffe2
228 
229 #undef CONVERSIONS_DECL
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...