From 9a3a2308a24f488e303e8389cf900c0cc53d8e71 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 10 Jul 2020 12:15:07 +0800 Subject: [PATCH] fix for cudnn_v8 --- src/convolutional_layer.c | 64 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 1fb58b0..9fed951 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -8,6 +8,8 @@ #include #include +#define PRINT_CUDNN_ALGO 0 + #ifdef AI2 #include "xnor_layer.h" #endif @@ -145,6 +147,67 @@ void cudnn_convolutional_setup(layer *l) } #endif + #if CUDNN_MAJOR >= 8 + int returnedAlgoCount; + cudnnConvolutionFwdAlgoPerf_t fw_results[2 * CUDNN_CONVOLUTION_FWD_ALGO_COUNT]; + cudnnConvolutionBwdDataAlgoPerf_t bd_results[2 * CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT]; + cudnnConvolutionBwdFilterAlgoPerf_t bf_results[2 * CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT]; + + cudnnFindConvolutionForwardAlgorithm(cudnn_handle(), + l->srcTensorDesc, + l->weightDesc, + l->convDesc, + l->dstTensorDesc, + CUDNN_CONVOLUTION_FWD_ALGO_COUNT, + &returnedAlgoCount, + fw_results); + #if PRINT_CUDNN_ALGO > 0 + for(int algoIndex = 0; algoIndex < returnedAlgoCount; ++algoIndex){ + printf("^^^^ %s for Algo %d: %f time requiring %llu memory\n", + cudnnGetErrorString(fw_results[algoIndex].status), + fw_results[algoIndex].algo, fw_results[algoIndex].time, + (unsigned long long)fw_results[algoIndex].memory); + } + #endif + l->fw_algo = fw_results[0].algo; + + cudnnFindConvolutionBackwardDataAlgorithm(cudnn_handle(), + l->weightDesc, + l->ddstTensorDesc, + l->convDesc, + l->dsrcTensorDesc, + CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT, + &returnedAlgoCount, + bd_results); + #if PRINT_CUDNN_ALGO > 0 + for(int algoIndex = 0; algoIndex < returnedAlgoCount; ++algoIndex){ + printf("^^^^ %s for Algo %d: %f time requiring %llu memory\n", + cudnnGetErrorString(bd_results[algoIndex].status), + bd_results[algoIndex].algo, bd_results[algoIndex].time, + (unsigned long long)bd_results[algoIndex].memory); + } + #endif + l->bd_algo = bd_results[0].algo; + + cudnnFindConvolutionBackwardFilterAlgorithm(cudnn_handle(), + l->srcTensorDesc, + l->ddstTensorDesc, + l->convDesc, + l->dweightDesc, + CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT, + &returnedAlgoCount, + bf_results); + #if PRINT_CUDNN_ALGO > 0 + for(int algoIndex = 0; algoIndex < returnedAlgoCount; ++algoIndex){ + printf("^^^^ %s for Algo %d: %f time requiring %llu memory\n", + cudnnGetErrorString(bf_results[algoIndex].status), + bf_results[algoIndex].algo, bf_results[algoIndex].time, + (unsigned long long)bf_results[algoIndex].memory); + } + #endif + l->bf_algo = bf_results[0].algo; + + #else cudnnGetConvolutionForwardAlgorithm(cudnn_handle(), l->srcTensorDesc, l->weightDesc, @@ -169,6 +232,7 @@ void cudnn_convolutional_setup(layer *l) CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, 2000000000, &l->bf_algo); + #endif } #endif #endif -- 2.17.1