3 #include <caffe2/core/types.h> 8 #include <caffe2/core/common_gpu.h> 12 #define CONVERSIONS_DECL __host__ __device__ inline 14 #define CONVERSIONS_DECL inline 22 inline float16 cpu_float2half_rn(
float f) {
26 sizeof(
unsigned int) ==
sizeof(
float),
27 "Programming error sizeof(unsigned int) != sizeof(float)");
29 unsigned* xp =
reinterpret_cast<unsigned int*
>(&f);
31 unsigned u = (x & 0x7fffffff), remainder, shift, lsb, lsb_s1, lsb_m1;
32 unsigned sign, exponent, mantissa;
40 sign = ((x >> 16) & 0x8000);
44 ret.x = sign | 0x7c00U;
48 ret.x = (sign | 0x0000);
52 exponent = ((u >> 23) & 0xff);
53 mantissa = (u & 0x7fffff);
55 if (exponent > 0x70) {
59 shift = 0x7e - exponent;
68 remainder = (mantissa & lsb_m1);
70 if (remainder > lsb_s1 || (remainder == lsb_s1 && (mantissa & 0x1))) {
72 if (!(mantissa & 0x3ff)) {
78 ret.x = (sign | (exponent << 10) | mantissa);
83 inline float cpu_half2float(float16 h) {
84 unsigned sign = ((h.x >> 15) & 1);
85 unsigned exponent = ((h.x >> 10) & 0x1f);
86 unsigned mantissa = ((h.x & 0x3ff) << 13);
88 if (exponent == 0x1f) {
89 mantissa = (mantissa ? (sign = 0, 0x7fffff) : 0);
91 }
else if (!exponent) {
96 msb = (mantissa & 0x400000);
100 mantissa &= 0x7fffff;
106 unsigned i = ((sign << 31) | (exponent << 23) | mantissa);
108 memcpy(&ret, &i,
sizeof(i));
116 #if CUDA_VERSION >= 9000 117 CONVERSIONS_DECL float16 halfToFloat16(half x) {
119 #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) 120 #pragma GCC diagnostic push 122 #pragma GCC diagnostic ignored "-Wstrict-aliasing" 124 float16 r = *
reinterpret_cast<float16*
>(&x);
126 #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) 127 #pragma GCC diagnostic pop 133 inline half float16ToHalf(
const float16 x) {
140 inline half floatToHalf(
const float x) {
141 float16 xh = cpu_float2half_rn(x);
142 return float16ToHalf(xh);
146 inline float16 halfToFloat16(__half x) {
152 inline __half float16ToHalf(
const float16 x) {
158 inline half floatToHalf(
const float x) {
159 float16 xh = cpu_float2half_rn(x);
160 return float16ToHalf(xh);
162 #endif // CUDA_VERSION 167 template <
typename IN,
typename OUT>
168 CONVERSIONS_DECL OUT To(
const IN in) {
169 return static_cast<OUT
>(in);
174 CONVERSIONS_DECL float16 To(
const float in) {
177 #if CUDA_VERSION >= 9000 178 half rh =
static_cast<half
>(in);
179 return halfToFloat16(rh);
182 ret.x = __float2half(in).x;
184 #endif // CUDA_VERSION >= 9000 186 return cpu_float2half_rn(in);
191 CONVERSIONS_DECL
float To(
const float16 in) {
193 #if CUDA_VERSION >= 9000 199 return __half2float(tmp);
201 return cpu_half2float(in);
206 CONVERSIONS_DECL
float To(
const float in) {
210 template <
typename OUT,
typename IN>
211 CONVERSIONS_DECL OUT Get(IN x) {
212 return static_cast<OUT
>(x);
216 CONVERSIONS_DECL
float Get(float16 x) {
217 return To<float16, float>(x);
221 CONVERSIONS_DECL float16 Get(
float x) {
222 return To<float, float16>(x);
229 #undef CONVERSIONS_DECL A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...