1 #ifndef CAFFE2_CORE_COMMON_GPU_H_ 2 #define CAFFE2_CORE_COMMON_GPU_H_ 6 #include <cuda_runtime.h> 12 #if CUDA_VERSION >= 9000 14 #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) 15 #pragma GCC diagnostic push 17 #pragma GCC diagnostic ignored "-Wstrict-aliasing" 19 #endif // CUDA_VERSION >= 9000 21 #include <cublas_v2.h> 23 #include <driver_types.h> 25 #include "caffe2/core/logging.h" 26 #include "caffe2/core/common.h" 34 #ifndef CAFFE_HAS_CUDA_FP16 35 #if CUDA_VERSION >= 7050 36 #define CAFFE_HAS_CUDA_FP16 37 #endif // CUDA_VERSION >= 7050 38 #endif // CAFFE_HAS_CUDA_FP16 40 #ifdef CAFFE_HAS_CUDA_FP16 41 #include <cuda_fp16.h> 45 #if CUDA_VERSION >= 9000 47 #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) 48 #pragma GCC diagnostic pop 51 #endif // CUDA_VERSION >= 9000 56 #define CAFFE2_COMPILE_TIME_MAX_GPUS 16 64 #define CAFFE2_CUDA_MAX_PEER_SIZE 8 68 #if CUDA_VERSION >= 9000 72 class TensorCoreEngine {};
151 #define CUDA_ENFORCE(condition, ...) \ 153 cudaError_t error = condition; \ 162 cudaGetErrorString(error), ##__VA_ARGS__); \ 164 #define CUDA_CHECK(condition) \ 166 cudaError_t error = condition; \ 167 CHECK(error == cudaSuccess) << cudaGetErrorString(error); \ 170 #define CUDA_DRIVERAPI_ENFORCE(condition) \ 172 CUresult result = condition; \ 173 if (result != CUDA_SUCCESS) { \ 175 cuGetErrorName(result, &msg); \ 176 CAFFE_THROW("Error at: ", __FILE__, ":", __LINE__, ": ", msg); \ 179 #define CUDA_DRIVERAPI_CHECK(condition) \ 181 CUresult result = condition; \ 182 if (result != CUDA_SUCCESS) { \ 184 cuGetErrorName(result, &msg); \ 185 LOG(FATAL) << "Error at: " << __FILE__ << ":" << __LINE__ << ": " \ 190 #define CUBLAS_ENFORCE(condition) \ 192 cublasStatus_t status = condition; \ 195 CUBLAS_STATUS_SUCCESS, \ 201 ::caffe2::cublasGetErrorString(status)); \ 203 #define CUBLAS_CHECK(condition) \ 205 cublasStatus_t status = condition; \ 206 CHECK(status == CUBLAS_STATUS_SUCCESS) \ 207 << ::caffe2::cublasGetErrorString(status); \ 210 #define CURAND_ENFORCE(condition) \ 212 curandStatus_t status = condition; \ 215 CURAND_STATUS_SUCCESS, \ 221 ::caffe2::curandGetErrorString(status)); \ 223 #define CURAND_CHECK(condition) \ 225 curandStatus_t status = condition; \ 226 CHECK(status == CURAND_STATUS_SUCCESS) \ 227 << ::caffe2::curandGetErrorString(status); \ 230 #define CUDA_1D_KERNEL_LOOP(i, n) \ 231 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ 232 i += blockDim.x * gridDim.x) 238 #define CUDA_KERNEL_ASSERT(...) 240 #define CUDA_KERNEL_ASSERT(...) assert(__VA_ARGS__) 258 constexpr
int CAFFE_CUDA_NUM_THREADS = 512;
264 constexpr
int CAFFE_MAXIMUM_NUM_BLOCKS = 4096;
272 (N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS,
273 CAFFE_MAXIMUM_NUM_BLOCKS),
281 if (previous_ != newDevice) {
295 #endif // CAFFE2_CORE_COMMON_GPU_H_ void DeviceQuery(const int device)
Runs a device query function and prints out the results to LOG(INFO).
int CudaVersion()
A runtime function to report the cuda version that Caffe2 is built with.
bool HasCudaGPU()
Check if the current running session has a cuda gpu present.
bool GetCudaPeerAccessPattern(vector< vector< bool > > *pattern)
Return a peer access pattern by returning a matrix (in the format of a nested vector) of boolean valu...
int NumCudaDevices()
Returns the number of devices.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
int GetGPUIDForPointer(const void *ptr)
Gets the GPU id that the current pointer is located at.
int CaffeCudaGetDevice()
Gets the current GPU id.
void CaffeCudaSetDevice(const int id)
Gets the current GPU id.
const cudaDeviceProp & GetDeviceProperty(const int deviceid)
Gets the device property for the given device.
const char * curandGetErrorString(curandStatus_t error)
Return a human readable curand error string.
int CAFFE_GET_BLOCKS(const int N)
Compute the number of blocks needed to run N threads.
const char * cublasGetErrorString(cublasStatus_t error)
Return a human readable cublas error string.
bool TensorCoreAvailable()
Return the availability of TensorCores for math.