Caffe2 - C++ API
A deep learning, cross platform ML framework
typed_axpy.cc
1 #include "caffe2/perfkernels/typed_axpy.h"
2 #include "caffe2/core/types.h"
3 #include "caffe2/perfkernels/common.h"
4 #include "caffe2/utils/cpuid.h"
5 #include "caffe2/utils/math.h"
6 
7 namespace caffe2 {
8 
9 template <>
10 void TypedAxpy<float, float>(int N, const float a, const float* x, float* y) {
11  // This uses a hack that axpy implementation actually does not use the
12  // CPUContext, so passing in a nullpointer works.
13  math::Axpy<float, CPUContext>(N, a, x, y, nullptr);
14 }
15 
16 void TypedAxpy_float16_float__base(
17  int N,
18  const float a,
19  const float16* x,
20  float* y) {
21  for (int i = 0; i < N; ++i) {
22  union {
23  uint32_t intval;
24  float floatval;
25  } t1;
26  uint32_t t2, t3;
27  t1.intval = x[i].x & 0x7fff; // Non-sign bits
28  t2 = x[i].x & 0x8000; // Sign bit
29  t3 = x[i].x & 0x7c00; // Exponent
30  t1.intval <<= 13; // Align mantissa on MSB
31  t2 <<= 16; // Shift sign bit into position
32  t1.intval += 0x38000000; // Adjust bias
33  t1.intval = (t3 == 0 ? 0 : t1.intval); // Denormals-as-zero
34  t1.intval |= t2; // Re-insert sign bit
35  y[i] += t1.floatval * a;
36  }
37 }
38 
39 template <>
40 void TypedAxpy<float16, float>(
41  int N,
42  const float a,
43  const float16* x,
44  float* y) {
45  AVX2_FMA_DO(TypedAxpy_float16_float, N, a, x, y);
46  AVX_F16C_DO(TypedAxpy_float16_float, N, a, x, y);
47  BASE_DO(TypedAxpy_float16_float, N, a, x, y);
48 }
49 
50 void TypedAxpy_uint8_float__base(
51  int N,
52  const float a,
53  const std::uint8_t* x,
54  float* y) {
55  for (int i = 0; i < N; ++i) {
56  y[i] += (float)(x[i]) * a;
57  }
58 }
59 
60 template <>
61 void TypedAxpy<std::uint8_t, float>(
62  int N,
63  const float a,
64  const std::uint8_t* x,
65  float* y) {
66  AVX2_FMA_DO(TypedAxpy_uint8_float, N, a, x, y);
67  BASE_DO(TypedAxpy_uint8_float, N, a, x, y);
68 }
69 
70 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...