From 5f9cdec264618ac6e41bdebd6752f09646771d97 Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 25 Nov 2021 06:46:21 +0000 Subject: [PATCH] patch for jp4.6 --- Makefile | 2 +- Makefile.config.example | 31 ++++---- src/caffe/layers/cudnn_conv_layer.cpp | 94 +++++++++++++++++-------- src/caffe/layers/cudnn_deconv_layer.cpp | 46 +++++++++++- src/caffe/layers/window_data_layer.cpp | 2 +- src/caffe/util/io.cpp | 8 +-- 6 files changed, 126 insertions(+), 57 deletions(-) diff --git a/Makefile b/Makefile index b7660e85..cd846e31 100644 --- a/Makefile +++ b/Makefile @@ -205,7 +205,7 @@ ifeq ($(USE_OPENCV), 1) endif endif -PYTHON_LIBRARIES ?= boost_python python2.7 +PYTHON_LIBRARIES ?= boost_python3 python3.6m WARNINGS := -Wall -Wno-sign-compare ############################## diff --git a/Makefile.config.example b/Makefile.config.example index 24ca6327..e35cd4c3 100644 --- a/Makefile.config.example +++ b/Makefile.config.example @@ -2,7 +2,7 @@ # Contributions simplifying and improving our build system are welcome! # cuDNN acceleration switch (uncomment to build with cuDNN). -# USE_CUDNN := 1 +USE_CUDNN := 1 # CPU-only switch (uncomment to build without GPU support). # CPU_ONLY := 1 @@ -20,7 +20,7 @@ # ALLOW_LMDB_NOLOCK := 1 # Uncomment if you're using OpenCV 3 -# OPENCV_VERSION := 3 +OPENCV_VERSION := 3 # To customize your choice of compiler, uncomment and set the following. # N.B. the default for Linux is g++ and the default for OSX is clang++ @@ -36,15 +36,10 @@ CUDA_DIR := /usr/local/cuda # For CUDA < 6.0, comment the *_50 through *_61 lines for compatibility. # For CUDA < 8.0, comment the *_60 and *_61 lines for compatibility. # For CUDA >= 9.0, comment the *_20 and *_21 lines for compatibility. -CUDA_ARCH := -gencode arch=compute_20,code=sm_20 \ - -gencode arch=compute_20,code=sm_21 \ - -gencode arch=compute_30,code=sm_30 \ - -gencode arch=compute_35,code=sm_35 \ - -gencode arch=compute_50,code=sm_50 \ - -gencode arch=compute_52,code=sm_52 \ - -gencode arch=compute_60,code=sm_60 \ - -gencode arch=compute_61,code=sm_61 \ - -gencode arch=compute_61,code=compute_61 +CUDA_ARCH := -gencode arch=compute_53,code=sm_53 \ + -gencode arch=compute_62,code=sm_62 \ + -gencode arch=compute_72,code=sm_72 \ + -gencode arch=compute_72,code=compute_72 # BLAS choice: # atlas for ATLAS (default) @@ -68,8 +63,8 @@ BLAS := atlas # NOTE: this is required only if you will compile the python interface. # We need to be able to find Python.h and numpy/arrayobject.h. -PYTHON_INCLUDE := /usr/include/python2.7 \ - /usr/lib/python2.7/dist-packages/numpy/core/include +PYTHON_INCLUDE := /usr/include/python3.6 \ + /usr/lib/python3.6/dist-packages/numpy/core/include # Anaconda Python distribution is quite popular. Include path: # Verify anaconda location, sometimes it's in root. # ANACONDA_HOME := $(HOME)/anaconda @@ -78,9 +73,9 @@ PYTHON_INCLUDE := /usr/include/python2.7 \ # $(ANACONDA_HOME)/lib/python2.7/site-packages/numpy/core/include # Uncomment to use Python 3 (default is Python 2) -# PYTHON_LIBRARIES := boost_python3 python3.5m -# PYTHON_INCLUDE := /usr/include/python3.5m \ -# /usr/lib/python3.5/dist-packages/numpy/core/include +PYTHON_LIBRARIES := boost_python3 python3.6m +PYTHON_INCLUDE := /usr/include/python3.6m \ + /usr/lib/python3.6/dist-packages/numpy/core/include # We need to be able to find libpythonX.X.so or .dylib. PYTHON_LIB := /usr/lib @@ -94,8 +89,8 @@ PYTHON_LIB := /usr/lib # WITH_PYTHON_LAYER := 1 # Whatever else you find you need goes here. -INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include -LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib +INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include /usr/include/hdf5/serial /usr/include/opencv4 +LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib /usr/lib/aarch64-linux-gnu /usr/lib/aarch64-linux-gnu/hdf5/serial # If Homebrew is installed at a non standard location (for example your home directory) and you use it for general dependencies # INCLUDE_DIRS += $(shell brew --prefix)/include diff --git a/src/caffe/layers/cudnn_conv_layer.cpp b/src/caffe/layers/cudnn_conv_layer.cpp index efc9e04e..6241f763 100644 --- a/src/caffe/layers/cudnn_conv_layer.cpp +++ b/src/caffe/layers/cudnn_conv_layer.cpp @@ -127,45 +127,77 @@ void CuDNNConvolutionLayer::Reshape( filter_desc_, pad_h, pad_w, stride_h, stride_w); - // choose forward and backward algorithms + workspace(s) +#if CUDNN_MAJOR >= 8 + int returnedAlgoCount; + cudnnConvolutionFwdAlgoPerf_t fw_results[2 * CUDNN_CONVOLUTION_FWD_ALGO_COUNT]; + cudnnConvolutionBwdDataAlgoPerf_t bd_results[2 * CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT]; + cudnnConvolutionBwdFilterAlgoPerf_t bf_results[2 * CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT]; + + CUDNN_CHECK(cudnnFindConvolutionForwardAlgorithm(handle_[0], + bottom_descs_[i], + filter_desc_, + conv_descs_[i], + top_descs_[i], + CUDNN_CONVOLUTION_FWD_ALGO_COUNT, + &returnedAlgoCount, + fw_results)); + fwd_algo_[i] = fw_results[0].algo; + + CUDNN_CHECK(cudnnFindConvolutionBackwardDataAlgorithm(handle_[0], + filter_desc_, + top_descs_[i], + conv_descs_[i], + bottom_descs_[i], + CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT, + &returnedAlgoCount, + bd_results)); + bwd_data_algo_[i] = bd_results[0].algo; + + CUDNN_CHECK(cudnnFindConvolutionBackwardFilterAlgorithm(handle_[0], + bottom_descs_[i], + top_descs_[i], + conv_descs_[i], + filter_desc_, + CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT, + &returnedAlgoCount, + bf_results)); + bwd_filter_algo_[i] = bf_results[0].algo; +#else CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm(handle_[0], - bottom_descs_[i], - filter_desc_, - conv_descs_[i], - top_descs_[i], - CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, - workspace_limit_bytes, - &fwd_algo_[i])); + bottom_descs_[i], + filter_desc_, + conv_descs_[i], + top_descs_[i], + CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, + workspace_limit_bytes, + &fwd_algo_[i])); - CUDNN_CHECK(cudnnGetConvolutionForwardWorkspaceSize(handle_[0], - bottom_descs_[i], - filter_desc_, - conv_descs_[i], - top_descs_[i], - fwd_algo_[i], - &(workspace_fwd_sizes_[i]))); - - // choose backward algorithm for filter CUDNN_CHECK(cudnnGetConvolutionBackwardFilterAlgorithm(handle_[0], - bottom_descs_[i], top_descs_[i], conv_descs_[i], filter_desc_, - CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, - workspace_limit_bytes, &bwd_filter_algo_[i]) ); - - // get workspace for backwards filter algorithm - CUDNN_CHECK(cudnnGetConvolutionBackwardFilterWorkspaceSize(handle_[0], - bottom_descs_[i], top_descs_[i], conv_descs_[i], filter_desc_, - bwd_filter_algo_[i], &workspace_bwd_filter_sizes_[i])); + bottom_descs_[i], top_descs_[i], conv_descs_[i], filter_desc_, + CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, + workspace_limit_bytes, &bwd_filter_algo_[i]) ); - // choose backward algo for data CUDNN_CHECK(cudnnGetConvolutionBackwardDataAlgorithm(handle_[0], - filter_desc_, top_descs_[i], conv_descs_[i], bottom_descs_[i], - CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, + filter_desc_, top_descs_[i], conv_descs_[i], bottom_descs_[i], + CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, workspace_limit_bytes, &bwd_data_algo_[i])); +#endif + + CUDNN_CHECK(cudnnGetConvolutionForwardWorkspaceSize(handle_[0], + bottom_descs_[i], + filter_desc_, + conv_descs_[i], + top_descs_[i], + fwd_algo_[i], + &(workspace_fwd_sizes_[i]))); + + CUDNN_CHECK(cudnnGetConvolutionBackwardFilterWorkspaceSize(handle_[0], + bottom_descs_[i], top_descs_[i], conv_descs_[i], filter_desc_, + bwd_filter_algo_[i], &workspace_bwd_filter_sizes_[i])); - // get workspace size CUDNN_CHECK(cudnnGetConvolutionBackwardDataWorkspaceSize(handle_[0], - filter_desc_, top_descs_[i], conv_descs_[i], bottom_descs_[i], - bwd_data_algo_[i], &workspace_bwd_data_sizes_[i]) ); + filter_desc_, top_descs_[i], conv_descs_[i], bottom_descs_[i], + bwd_data_algo_[i], &workspace_bwd_data_sizes_[i]) ); } // reduce over all workspace sizes to get a maximum to allocate / reallocate diff --git a/src/caffe/layers/cudnn_deconv_layer.cpp b/src/caffe/layers/cudnn_deconv_layer.cpp index 260da5c1..0875ab25 100644 --- a/src/caffe/layers/cudnn_deconv_layer.cpp +++ b/src/caffe/layers/cudnn_deconv_layer.cpp @@ -141,7 +141,21 @@ void CuDNNDeconvolutionLayer::Reshape( stride_h, stride_w); - // choose forward and backward algorithms + workspace(s) +#if CUDNN_MAJOR >= 8 + int returnedAlgoCount; + cudnnConvolutionFwdAlgoPerf_t fw_results[2 * CUDNN_CONVOLUTION_FWD_ALGO_COUNT]; + + CUDNN_CHECK(cudnnFindConvolutionForwardAlgorithm( + handle_[0], + top_descs_[i], + filter_desc_, + conv_descs_[i], + bottom_descs_[i], + CUDNN_CONVOLUTION_FWD_ALGO_COUNT, + &returnedAlgoCount, + fw_results)); + fwd_algo_[i] = fw_results[0].algo; +#else CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm( handle_[0], top_descs_[i], @@ -151,6 +165,7 @@ void CuDNNDeconvolutionLayer::Reshape( CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, workspace_limit_bytes, &fwd_algo_[i])); +#endif // We have found that CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM is // buggy. Thus, if this algo was chosen, choose winograd instead. If @@ -183,6 +198,19 @@ void CuDNNDeconvolutionLayer::Reshape( fwd_algo_[i], &(workspace_fwd_sizes_[i]))); +#if CUDNN_MAJOR >= 8 + cudnnConvolutionBwdFilterAlgoPerf_t bf_results[2 * CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT]; + CUDNN_CHECK(cudnnFindConvolutionBackwardFilterAlgorithm( + handle_[0], + top_descs_[i], + bottom_descs_[i], + conv_descs_[i], + filter_desc_, + CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT, + &returnedAlgoCount, + bf_results)); + bwd_filter_algo_[i] = bf_results[0].algo; +#else // choose backward algorithm for filter CUDNN_CHECK(cudnnGetConvolutionBackwardFilterAlgorithm( handle_[0], @@ -193,6 +221,7 @@ void CuDNNDeconvolutionLayer::Reshape( CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, workspace_limit_bytes, &bwd_filter_algo_[i])); +#endif // get workspace for backwards filter algorithm CUDNN_CHECK(cudnnGetConvolutionBackwardFilterWorkspaceSize( @@ -203,7 +232,19 @@ void CuDNNDeconvolutionLayer::Reshape( filter_desc_, bwd_filter_algo_[i], &workspace_bwd_filter_sizes_[i])); - +#if CUDNN_MAJOR >= 8 + cudnnConvolutionBwdDataAlgoPerf_t bd_results[2 * CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT]; + CUDNN_CHECK(cudnnFindConvolutionBackwardDataAlgorithm( + handle_[0], + filter_desc_, + bottom_descs_[i], + conv_descs_[i], + top_descs_[i], + CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT, + &returnedAlgoCount, + bd_results)); + bwd_data_algo_[i] = bd_results[0].algo; +#else // choose backward algo for data CUDNN_CHECK(cudnnGetConvolutionBackwardDataAlgorithm( handle_[0], @@ -214,6 +255,7 @@ void CuDNNDeconvolutionLayer::Reshape( CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, workspace_limit_bytes, &bwd_data_algo_[i])); +#endif // get workspace size CUDNN_CHECK(cudnnGetConvolutionBackwardDataWorkspaceSize( diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp index 1bf3760e..f41169de 100644 --- a/src/caffe/layers/window_data_layer.cpp +++ b/src/caffe/layers/window_data_layer.cpp @@ -290,7 +290,7 @@ void WindowDataLayer::load_batch(Batch* batch) { image_database_cache_[window[WindowDataLayer::IMAGE_INDEX]]; cv_img = DecodeDatumToCVMat(image_cached.second, true); } else { - cv_img = cv::imread(image.first, CV_LOAD_IMAGE_COLOR); + cv_img = cv::imread(image.first, cv::IMREAD_COLOR); if (!cv_img.data) { LOG(ERROR) << "Could not open or find file " << image.first; return; diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp index 5295d9dd..ca8a6492 100644 --- a/src/caffe/util/io.cpp +++ b/src/caffe/util/io.cpp @@ -73,8 +73,8 @@ void WriteProtoToBinaryFile(const Message& proto, const char* filename) { cv::Mat ReadImageToCVMat(const string& filename, const int height, const int width, const bool is_color) { cv::Mat cv_img; - int cv_read_flag = (is_color ? CV_LOAD_IMAGE_COLOR : - CV_LOAD_IMAGE_GRAYSCALE); + int cv_read_flag = (is_color ? cv::IMREAD_COLOR : + cv::ImreadModes::IMREAD_GRAYSCALE); cv::Mat cv_img_origin = cv::imread(filename, cv_read_flag); if (!cv_img_origin.data) { LOG(ERROR) << "Could not open or find file " << filename; @@ -179,8 +179,8 @@ cv::Mat DecodeDatumToCVMat(const Datum& datum, bool is_color) { CHECK(datum.encoded()) << "Datum not encoded"; const string& data = datum.data(); std::vector vec_data(data.c_str(), data.c_str() + data.size()); - int cv_read_flag = (is_color ? CV_LOAD_IMAGE_COLOR : - CV_LOAD_IMAGE_GRAYSCALE); + int cv_read_flag = (is_color ? cv::IMREAD_COLOR : + cv::ImreadModes::IMREAD_GRAYSCALE); cv_img = cv::imdecode(vec_data, cv_read_flag); if (!cv_img.data) { LOG(ERROR) << "Could not decode datum "; -- 2.17.1