Caffe2 - C++ API
A deep learning, cross platform ML framework
mixed_utils.h
1 // Copyright 2004-present Facebook. All Rights Reserved.
2 #ifndef CAFFE2_UTILS_MIXED_UTILS_H
3 #define CAFFE2_UTILS_MIXED_UTILS_H
4 
5 #include "caffe2/core/common_gpu.h"
6 #include "caffe2/core/context_gpu.h"
7 
8 // define functions to allow add/mult/store operaions for input/output with
9 // mixed precisions.
10 namespace caffe2 {
11 
12 // functions that will only be triggered when there is no spcialized version
13 // supported
14 template <typename T, typename T2>
15 inline __device__ T mixed_mult(T data1, T2 data2) {
16  return data1 * data2;
17 };
18 
19 template <typename T, typename T2>
20 inline __device__ T mixed_add(T data1, T2 data2) {
21  return data1 + data2;
22 };
23 
24 template <typename TIN, typename TOUT>
25 inline __device__ void mixed_store(TIN* data_in, TOUT* data_out) {
26  *data_out = *data_in;
27  return;
28 };
29 
30 template <typename T>
31 inline __device__ void mixed_store(T* data_in, T* data_out) {
32  *data_out = *data_in;
33  return;
34 };
35 
36 #ifdef CAFFE_HAS_CUDA_FP16
37 // define templated functions to support mixed precision computation
38 template <>
39 inline __device__ float mixed_mult(float data1, const float data2) {
40  return data1 * data2;
41 }
42 
43 template <>
44 inline __device__ float mixed_mult(float data1, const half data2) {
45  return data1 * __half2float(data2);
46 }
47 
48 template <>
49 inline __device__ float mixed_mult(float data1, float16 data2) {
50  half* data2_half = reinterpret_cast<half*>(&data2);
51  return data1 * __half2float(*data2_half);
52 }
53 template <>
54 inline __device__ float mixed_add(float data1, const float data2) {
55  return data1 + data2;
56 }
57 
58 template <>
59 inline __device__ float mixed_add(float data1, const half data2) {
60  return data1 + __half2float(data2);
61 }
62 
63 template <>
64 inline __device__ float mixed_add(float data1, float16 data2) {
65  half* data2_half = reinterpret_cast<half*>(&data2);
66  return data1 + __half2float(*data2_half);
67 }
68 
69 template <>
70 inline __device__ void mixed_store(float* data_in, float* data_out) {
71  *data_out = *data_in;
72  return;
73 }
74 
75 template <>
76 inline __device__ void mixed_store(half* data_in, float* data_out) {
77  *data_out = __half2float(*data_in);
78  return;
79 }
80 
81 template <>
82 inline __device__ void mixed_store(float16* data_in, float* data_out) {
83  half* data_in_half = reinterpret_cast<half*>(data_in);
84  *data_out = __half2float(*data_in_half);
85  return;
86 }
87 
88 template <>
89 inline __device__ void mixed_store(float* data_in, float16* data_out) {
90  half data_in_half = __float2half(*data_in);
91  float16* data_in_float16 = reinterpret_cast<float16*>(&data_in_half);
92  *data_out = *data_in_float16;
93  return;
94 }
95 
96 template <>
97 inline __device__ void mixed_store(float* data_in, half* data_out) {
98  half data_in_half = __float2half(*data_in);
99  *data_out = data_in_half;
100  return;
101 }
102 #endif // for CAFFE_HAS_CUDA_FP16
103 } // namespace caffe2
104 #endif // for CAFFE2_UTILS_MIXED_UTILS_H
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...