2022年10月18日 星期二

Darknet with CUDA for AORUS

Xubuntu 20.4


環境: i7-11800H + RTK3070
Ubuntu 22.04.1, 5.15.0-43-generic, gcc:11, cmake:3.23, python:3.10
安裝好22.04.1,CUDA也裝好了,如下:
 $ nvidia-smi
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.65.01    Driver Version: 515.65.01    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|===============================+======================+======================|
|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0 Off |                  N/A |
| N/A   39C    P0    N/A /  N/A |      5MiB /  8192MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+

+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|        ID   ID                                                   Usage      |
|=============================================================================|
|    0   N/A  N/A      2334      G   /usr/lib/xorg/Xorg                  4MiB |
+-----------------------------------------------------------------------------+

安裝 CUDA 11.7 tool-kit (裝11.8,無法開機@@)
$ wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-ubuntu2204.pin
$ sudo mv cuda-ubuntu2204.pin /etc/apt/preferences.d/cuda-repository-pin-600
$ wget https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda-repo-ubuntu2204-11-7-local_11.7.1-515.65.01-1_amd64.deb
$ sudo dpkg -i cuda-repo-ubuntu2204-11-7-local_11.7.1-515.65.01-1_amd64.deb
$ sudo cp /var/cuda-repo-ubuntu2204-11-7-local/cuda-*-keyring.gpg /usr/share/keyrings/
$ sudo apt-get update
$ sudo apt-get -y install cuda
裝好後,重開機看是否能正常開機,也確認nvidia-smi正常
安裝CUDNN (下載,需另外註冊,要配合 CUDA 版本)
cudnn-linux-x86_64-8.5.0.96_cuda11-archive.tar.xz

$ sudo cp include/* /usr/local/cuda-11/include
$ sudo cp -d lib/* /usr/local/cuda-11/lib64

edit .bashrc
export PATH=$PATH:/usr/local/cuda-11/bin
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11/lib64                
export CUDADIR=/usr/local/cuda-11
export CUDA_HOME=/usr/local/cuda-11



OpenCV (latest ver: 4.6), refer to Here
$ sudo apt install libgtk2.0-dev ffmpeg \
pkg-config libjpeg-dev libpng-dev libtiff-dev \
libavcodec-dev libavformat-dev libswscale-dev \
libv4l-dev libxvidcore-dev libgtk-4-dev libcanberra-gtk-module libcanberra-gtk3-module \
libx264-dev libatlas-base-dev gfortran


$ mkdir build; cd build
$ cmake {...} -D CMAKE_BUILD_TYPE=RELEASE \
  -D CMAKE_INSTALL_PREFIX=/usr/local/opencv \
  -D INSTALL_C_EXAMPLES=OFF \
  -D INSTALL_PYTHON_EXAMPLES=ON \
  -D BUILD_EXAMPLES=ON \
  -D WITH_CUDA=ON \
  -D WITH_CUDNN=ON \
  -D WITH_OPENCL=ON \
  -D OPENCV_DNN_CUDA=ON \
  -D WITH_VA=OFF \ (編譯時,有gapi/VA_INCLUDE_DIR的問題, ref: Here)
  -D WITH_GTK=ON \
  -D WITH_GTK_2_X=ON \
  -D OPENCV_EXTRA_MODULES_PATH=/home/ubuntu/Projector/opencv/opencv_contrib/modules \
  -D PYTHON2_EXECUTABLE= \
  -D BUILD_OPENCV_PYTHON3=yes \
  -D PYTHON3_EXECUTABLE=/usr/bin/python3.10 \
  -D PYTHON3_INCLUDE_DIR=/usr/include/python3.10 \
  -D PYTHON3_INCLUDE_DIR2=/usr/include/python3.10 \
  -D PYTHON3_LIBRARY=/usr/lib/x86_64-linux-gnu/libpython3.10.so \
  -D PYTHON3_NUMPY_INCLUDE_DIRS=/home/ubuntu/.local/lib/python3.10/site-packages/numpy/core/include \
  -D PYTHON3_PACKAGES_PATH=/home/ubuntu/.local/lib/python3.10/site-packages \
  -D OPENCV_GENERATE_PKGCONFIG=YES \
  -D WITH_TENGINE=ON \
  -D OPENCV_ENABLE_NONFREE:BOOL=ON ..
  
$ make (約3小時多)
$ sudo make install
opencv.pc
$ cd /usr/local/lib/pkgconfig
$ sudo ln -s /usr/local/opencv/lib/pkgconfig/opencv4.pc opencv.pc



Darknet
下載:
$ git clone https://github.com/pjreddie/darknet.git
修改Makefile: ( compute_xx查詢 )
GPU=1
CUDNN=1
OPENCV=1
OPENMP=0
DEBUG=0

ARCH= -gencode arch=compute_86,code=sm_86
弄錯的話,會有no kernel image is available for execution on the device的問題
修改src/convolutional_layer.c
...
...

#define PRINT_CUDNN_ALGO 0
#define MEMORY_LIMIT 2000000000

...
...
     #if CUDNN_MAJOR >= 7
     cudnnSetConvolutionGroupCount(l->convDesc, l->groups);
     #else
     if(l->groups > 1){
         error("CUDNN < 7 doesn't support groups, please upgrade!");
     }
     #endif
 
 	
    #if CUDNN_MAJOR >= 8
    int returnedAlgoCount;
    cudnnConvolutionFwdAlgoPerf_t       fw_results[2 * CUDNN_CONVOLUTION_FWD_ALGO_COUNT];
    cudnnConvolutionBwdDataAlgoPerf_t   bd_results[2 * CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT];
    cudnnConvolutionBwdFilterAlgoPerf_t bf_results[2 * CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT];
 
    cudnnFindConvolutionForwardAlgorithm(cudnn_handle(),
            l->srcTensorDesc,
            l->weightDesc,
            l->convDesc,
            l->dstTensorDesc,
            CUDNN_CONVOLUTION_FWD_ALGO_COUNT,
            &returnedAlgoCount,
	    fw_results);
    for(int algoIndex = 0; algoIndex < returnedAlgoCount; ++algoIndex){
        #if PRINT_CUDNN_ALGO > 0
        printf("^^^^ %s for Algo %d: %f time requiring %llu memory\n",
               cudnnGetErrorString(fw_results[algoIndex].status),
               fw_results[algoIndex].algo, fw_results[algoIndex].time,
               (unsigned long long)fw_results[algoIndex].memory);
        #endif
        if( fw_results[algoIndex].memory < MEMORY_LIMIT ){
            l->fw_algo = fw_results[algoIndex].algo;
            break;
	}
    }
 
    cudnnFindConvolutionBackwardDataAlgorithm(cudnn_handle(),
            l->weightDesc,
            l->ddstTensorDesc,
            l->convDesc,
            l->dsrcTensorDesc,
            CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT,
            &returnedAlgoCount,
            bd_results);
    for(int algoIndex = 0; algoIndex < returnedAlgoCount; ++algoIndex){
        #if PRINT_CUDNN_ALGO > 0
        printf("^^^^ %s for Algo %d: %f time requiring %llu memory\n",
               cudnnGetErrorString(bd_results[algoIndex].status),
               bd_results[algoIndex].algo, bd_results[algoIndex].time,
               (unsigned long long)bd_results[algoIndex].memory);
        #endif
        if( bd_results[algoIndex].memory < MEMORY_LIMIT ){
            l->bd_algo = bd_results[algoIndex].algo;
            break;
        }
    }
 
    cudnnFindConvolutionBackwardFilterAlgorithm(cudnn_handle(),
            l->srcTensorDesc,
            l->ddstTensorDesc,
            l->convDesc,
            l->dweightDesc,
            CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT,
            &returnedAlgoCount,
            bf_results);
    for(int algoIndex = 0; algoIndex < returnedAlgoCount; ++algoIndex){
        #if PRINT_CUDNN_ALGO > 0
        printf("^^^^ %s for Algo %d: %f time requiring %llu memory\n",
               cudnnGetErrorString(bf_results[algoIndex].status),
               bf_results[algoIndex].algo, bf_results[algoIndex].time,
               (unsigned long long)bf_results[algoIndex].memory);
        #endif
        if( bf_results[algoIndex].memory < MEMORY_LIMIT ){
            l->bf_algo = bf_results[algoIndex].algo;
            break;
        }
    }
 	
    #else
    
         cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
              l->srcTensorDesc,
              l->weightDesc,
              l->convDesc,
              l->dstTensorDesc,
              CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
              2000000000,
              &l->fw_algo);
    ...
    ...
    
    #endif
    
...
...
...
...
[SOLVED] "Error: 'IplImage' does not name a type when trying to build darknet with
$ git diff src/image_opencv.cpp
diff --git a/src/image_opencv.cpp b/src/image_opencv.cpp
index 7511280..1b0d7b6 100644
--- a/src/image_opencv.cpp
+++ b/src/image_opencv.cpp
@@ -5,6 +5,11 @@
 #include "opencv2/opencv.hpp"
 #include "image.h"

+#include "opencv2/core/core_c.h"
+#include "opencv2/videoio/legacy/constants_c.h"
+#include "opencv2/highgui/highgui_c.h"
+
+
 using namespace cv;

 extern "C" {
@@ -60,7 +65,8 @@ Mat image_to_mat(image im)

 image mat_to_image(Mat m)
 {
-    IplImage ipl = m;
+    //IplImage ipl = m;
+    IplImage ipl = cvIplImage(m) ;
測試:
$ wget https://pjreddie.com/media/files/yolov3.weights
$ ./darknet detect cfg/yolov3.cfg yolov3.weights data/dog.jpg

沒有留言:

張貼留言