Caffe2 - C++ API
A deep learning, cross platform ML framework
Related Pages
Modules
Data Structures
Files
C++ API
Python API
GitHub
File List
Globals
caffe2
perfkernels
common.h
1
// Common utilities for writing performance kernels and easy dispatching of
2
// different backends.
3
/*
4
The general workflow shall be as follows, say we want to
5
implement a functionality called void foo(int a, float b).
6
7
In foo.h, do:
8
void foo(int a, float b);
9
10
In foo_avx2.cc, do:
11
void foo__avx2(int a, float b) {
12
[actual avx2 implementation]
13
}
14
15
In foo_avx.cc, do:
16
void foo__avx(int a, float b) {
17
[actual avx implementation]
18
}
19
20
In foo.cc, do:
21
// The base implementation should *always* be provided.
22
void foo__base(int a, float b) {
23
[base, possibly slow implementation]
24
}
25
void foo(int a, float b) {
26
// You should always order things by their preference, faster
27
// implementations earlier in the function.
28
AVX2_DO(foo, a, b);
29
AVX_DO(foo, a, b);
30
BASE_DO(foo, a, b);
31
}
32
33
*/
34
// Details: this functionality basically covers the cases for both build time
35
// and run time architecture support.
36
//
37
// During build time:
38
// The build system should provide flags CAFFE2_PERF_WITH_AVX2 and
39
// CAFFE2_PERF_WITH_AVX that corresponds to the __AVX__ and __AVX2__ flags
40
// the compiler provides. Note that we do not use the compiler flags but
41
// rely on the build system flags, because the common files (like foo.cc
42
// above) will always be built without __AVX__ and __AVX2__.
43
// During run time:
44
// we use cpuid to identify cpu support and run the proper functions.
45
46
#pragma once
47
48
// DO macros: these should be used in your entry function, similar to foo()
49
// above, that routes implementations based on CPU capability.
50
51
#define BASE_DO(funcname, ...) return funcname##__base(__VA_ARGS__);
52
53
#ifdef CAFFE2_PERF_WITH_AVX2
54
#define AVX2_DO(funcname, ...) \
55
decltype(funcname##__base) funcname##__avx2; \
56
if (GetCpuId().avx2()) { \
57
return funcname##__avx2(__VA_ARGS__); \
58
}
59
#define AVX2_FMA_DO(funcname, ...) \
60
decltype(funcname##__base) funcname##__avx2_fma; \
61
if (GetCpuId().avx2() && GetCpuId().fma()) { \
62
return funcname##__avx2_fma(__VA_ARGS__); \
63
}
64
#else // CAFFE2_PERF_WITH_AVX2
65
#define AVX2_DO(funcname, ...)
66
#define AVX2_FMA_DO(funcname, ...)
67
#endif // CAFFE2_PERF_WITH_AVX2
68
69
#ifdef CAFFE2_PERF_WITH_AVX
70
#define AVX_DO(funcname, ...) \
71
decltype(funcname##__base) funcname##__avx; \
72
if (GetCpuId().avx()) { \
73
return funcname##__avx(__VA_ARGS__); \
74
}
75
#define AVX_F16C_DO(funcname, ...) \
76
decltype(funcname##__base) funcname##__avx_f16c; \
77
if (GetCpuId().avx() && GetCpuId().f16c()) { \
78
return funcname##__avx_f16c(__VA_ARGS__); \
79
}
80
#else // CAFFE2_PERF_WITH_AVX
81
#define AVX_DO(funcname, ...)
82
#define AVX_F16C_DO(funcname, ...)
83
#endif // CAFFE2_PERF_WITH_AVX
Generated on Thu Apr 19 2018 13:03:49 for Caffe2 - C++ API by
1.8.11