環境: Xubuntu20.04
重灌電腦後,就裝了cuda 11, 安裝流程
$ /usr/bin/nvidia-smi +-----------------------------------------------------------------------------+ | NVIDIA-SMI 465.27 Driver Version: 465.27 CUDA Version: 11.3 | |-------------------------------+----------------------+----------------------+ | GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC | | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | | | | MIG M. | |===============================+======================+======================| | 0 NVIDIA GeForce ... Off | 00000000:01:00.0 N/A | N/A | | N/A 54C P0 N/A / N/A | 405MiB / 2002MiB | N/A Default | | | | N/A | +-------------------------------+----------------------+----------------------+ +-----------------------------------------------------------------------------+ | Processes: | | GPU GI CI PID Type Process name GPU Memory | | ID ID Usage | |=============================================================================| | No running processes found | +-----------------------------------------------------------------------------+
出現no kernel image is available for execution on the device
才發現,我電腦顯卡GeForce GT 750M的Compute Capability為3.0, 查表
只能用compute30,code=sm_30
所以就把cuda11給移除 (sudo apt-get remove --auto-remove cuda-11)
重新安裝cuda10
但後來還是顯示CUDA Version: 11.3@@
ii cuda-10-0 10.0.130-1 amd64 CUDA 10.0 meta-package ii cuda-command-line-tools-10-0 10.0.130-1 amd64 CUDA command-line tools ii cuda-compiler-10-0 10.0.130-1 amd64 CUDA compiler ii cuda-cublas-10-0 10.0.130-1 amd64 CUBLAS native runtime libraries ii cuda-cublas-dev-10-0 10.0.130-1 amd64 CUBLAS native dev links, headers ... ...
bashrc設定
export PATH=$PATH:/usr/local/cuda/bin export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/opt/cuda/cuda_10.0/lib64/ export CUDADIR=/usr/local/cuda export CUDA_HOME=/usr/local/cuda
下載cudnn(需另外註冊)
必需配合 CUDA 的版本 Download cuDNN v7.6.5 (November 5th, 2019), for CUDA 10.0
下載Darknet
$ git clone https://github.com/pjreddie/darknet.git
修改Makefile (OpenCV 4.x需事先安裝好)
GPU=1
CUDNN=1
OPENCV=1
OPENMP=0
DEBUG=0
ARCH= -gencode arch=compute_30,code=sm_30 (GeForce GT 750M的Compute Capability為3.0)
# -gencode arch=compute_20,code=[sm_20,sm_21] \ This one is deprecated?
# This is what I use, uncomment if you know your arch and want to specify
# ARCH= -gencode arch=compute_52,code=compute_52
VPATH=./src/:./examples
SLIB=libdarknet.so
ALIB=libdarknet.a
EXEC=darknet
OBJDIR=./obj/
CC=gcc
CPP=g++
NVCC=nvcc
AR=ar
ARFLAGS=rcs
OPTS=-Ofast
LDFLAGS= -lm -pthread
COMMON= -Iinclude/ -Isrc/ -I/opt/cuda/cuda_10.0/include/
CFLAGS=-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC
ifeq ($(OPENMP), 1)
CFLAGS+= -fopenmp
endif
ifeq ($(DEBUG), 1)
OPTS=-O0 -g
endif
CFLAGS+=$(OPTS)
ifeq ($(OPENCV), 1)
COMMON+= -DOPENCV
CFLAGS+= -DOPENCV
LDFLAGS+= `pkg-config --libs opencv4` -lstdc++
COMMON+= `pkg-config --cflags opencv4`
endif
ifeq ($(GPU), 1)
COMMON+= -DGPU -I/usr/local/cuda/include/
CFLAGS+= -DGPU
LDFLAGS+= -L/opt/cuda/cuda_10.0/lib64 -lcuda -lcudart -lcublas -lcurand
LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lcurand
endif
ifeq ($(CUDNN), 1)
COMMON+= -DCUDNN
CFLAGS+= -DCUDNN
LDFLAGS+= -lcudnn
endif
...
...
...
...
安裝gcc 7 (CUDA10 不支援大於7版)
$ sudo apt-get install gcc-7 g++-7
$ ln -s /usr/bin/gcc-7 /usr/local/cuda/bin/gcc
$ ln -s /usr/bin/g++-7 /usr/local/cuda/bin/g++
...
...
#define PRINT_CUDNN_ALGO 0
#define MEMORY_LIMIT 2000000000
...
...
#if CUDNN_MAJOR >= 7
cudnnSetConvolutionGroupCount(l->convDesc, l->groups);
#else
if(l->groups > 1){
error("CUDNN < 7 doesn't support groups, please upgrade!");
}
#endif
#if CUDNN_MAJOR >= 8
int returnedAlgoCount;
cudnnConvolutionFwdAlgoPerf_t fw_results[2 * CUDNN_CONVOLUTION_FWD_ALGO_COUNT];
cudnnConvolutionBwdDataAlgoPerf_t bd_results[2 * CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT];
cudnnConvolutionBwdFilterAlgoPerf_t bf_results[2 * CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT];
cudnnFindConvolutionForwardAlgorithm(cudnn_handle(),
l->srcTensorDesc,
l->weightDesc,
l->convDesc,
l->dstTensorDesc,
CUDNN_CONVOLUTION_FWD_ALGO_COUNT,
&returnedAlgoCount,
fw_results);
for(int algoIndex = 0; algoIndex < returnedAlgoCount; ++algoIndex){
#if PRINT_CUDNN_ALGO > 0
printf("^^^^ %s for Algo %d: %f time requiring %llu memory\n",
cudnnGetErrorString(fw_results[algoIndex].status),
fw_results[algoIndex].algo, fw_results[algoIndex].time,
(unsigned long long)fw_results[algoIndex].memory);
#endif
if( fw_results[algoIndex].memory < MEMORY_LIMIT ){
l->fw_algo = fw_results[algoIndex].algo;
break;
}
}
cudnnFindConvolutionBackwardDataAlgorithm(cudnn_handle(),
l->weightDesc,
l->ddstTensorDesc,
l->convDesc,
l->dsrcTensorDesc,
CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT,
&returnedAlgoCount,
bd_results);
for(int algoIndex = 0; algoIndex < returnedAlgoCount; ++algoIndex){
#if PRINT_CUDNN_ALGO > 0
printf("^^^^ %s for Algo %d: %f time requiring %llu memory\n",
cudnnGetErrorString(bd_results[algoIndex].status),
bd_results[algoIndex].algo, bd_results[algoIndex].time,
(unsigned long long)bd_results[algoIndex].memory);
#endif
if( bd_results[algoIndex].memory < MEMORY_LIMIT ){
l->bd_algo = bd_results[algoIndex].algo;
break;
}
}
cudnnFindConvolutionBackwardFilterAlgorithm(cudnn_handle(),
l->srcTensorDesc,
l->ddstTensorDesc,
l->convDesc,
l->dweightDesc,
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT,
&returnedAlgoCount,
bf_results);
for(int algoIndex = 0; algoIndex < returnedAlgoCount; ++algoIndex){
#if PRINT_CUDNN_ALGO > 0
printf("^^^^ %s for Algo %d: %f time requiring %llu memory\n",
cudnnGetErrorString(bf_results[algoIndex].status),
bf_results[algoIndex].algo, bf_results[algoIndex].time,
(unsigned long long)bf_results[algoIndex].memory);
#endif
if( bf_results[algoIndex].memory < MEMORY_LIMIT ){
l->bf_algo = bf_results[algoIndex].algo;
break;
}
}
#else
...
...
...
...
測試
$ wget https://pjreddie.com/media/files/yolov3.weights
$ ./darknet detect cfg/yolov3.cfg yolov3.weights data/dog.jpg
...
...
...
CUDA Error: out of memory
darknet: ./src/cuda.c:36: check_error: Assertion `0' failed.
$ vim cfg/yolov3.cfg
...
...
batch=64
subdivisions=64
width=416
height=416
...
...
碰到的問題
01. opencv2/opencv.hpp: No such file or directory
02. -lcudnn not found
03. cudnn.h: No such file or directory
04. gcc versions later than 7 are not supported by CUDA 10
05. error: 'CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT' undeclared
06. image_opencv.cpp:12:1: error: ‘IplImage’ does not name a type
07. CUDA Error: out of memory
08. Unsupported gpu architecture 'compute_30'
09. no kernel image is available for execution on the device
查看顯卡資訊的指令,但要先裝好cuda...
deviceQuery $ cd /usr/local/cuda/samples $ make $ bin/x86_64/linux/release/deviceQuery $ ./deviceQuery ./deviceQuery Starting... CUDA Device Query (Runtime API) version (CUDART static linking) Detected 1 CUDA Capable device(s) Device 0: "NVIDIA GeForce GTX 765M" CUDA Driver Version / Runtime Version 11.3 / 10.0 CUDA Capability Major/Minor version number: 3.0 Total amount of global memory: 2002 MBytes (2099511296 bytes) ( 4) Multiprocessors, (192) CUDA Cores/MP: 768 CUDA Cores GPU Max Clock rate: 863 MHz (0.86 GHz) Memory Clock rate: 2004 Mhz Memory Bus Width: 128-bit ... ... ...
沒有留言:
張貼留言