From 54ea3ec09b6868be6071040bae945724a111cccb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix-Antoine=20Fortin?= Date: Tue, 23 Feb 2021 11:08:50 -0500 Subject: [PATCH] Refactor nvidia symlink creation Avoid creation of broken symlink when dealing with VGPU drivers. --- site/profile/facts.d/nvidia_driver_vers.sh | 17 +---- site/profile/manifests/gpu.pp | 84 +++++++++++++++++----- 2 files changed, 70 insertions(+), 31 deletions(-) diff --git a/site/profile/facts.d/nvidia_driver_vers.sh b/site/profile/facts.d/nvidia_driver_vers.sh index 0f804a112..cc36a85b7 100755 --- a/site/profile/facts.d/nvidia_driver_vers.sh +++ b/site/profile/facts.d/nvidia_driver_vers.sh @@ -1,16 +1,5 @@ #!/bin/sh -PROCESSOR=$(uname -p) -VERSION="$(source /etc/os-release; echo $VERSION_ID)" -PACKAGE="cuda-drivers" -PACKAGE_REGEX="${PACKAGE}-\([0-9.]\{1,\}\)[-0-9]*\.${PROCESSOR}" -DRIVER_VERSION=$(test -f /usr/sbin/dkms && /usr/sbin/dkms status | grep -m 1 -Po 'nvidia, \K(\d+.\d+[\.]\d*)') -# If that didn't work let's try nvidia-smi -if [ -z $DRIVER_VERSION ]; then - DRIVER_VERSION=$(if [ -x "$(command -v nvidia-smi)" ]; then nvidia-smi --query-gpu=driver_version --format=csv,noheader; fi) +if [ -e /proc/driver/nvidia ]; then + DRIVER_VERSION=$(grep -m 1 -Po 'NVRM version:.* \K(\d+\.\d+\.\d+)' /proc/driver/nvidia/version) fi -if [ -z $DRIVER_VERSION ]; then - BASE_URL="http://developer.download.nvidia.com/compute/cuda/repos" - CUDA_REPO_GZ=$(curl -s ${BASE_URL}/rhel${VERSION}/${PROCESSOR}/repodata/repomd.xml | sed '2 s/xmlns=".*"//g' | xmllint --xpath 'string(/repomd/data[@type="primary"]/location/@href)' -) - DRIVER_VERSION=$(curl -s ${BASE_URL}/rhel${VERSION}/${PROCESSOR}/${CUDA_REPO_GZ} | gunzip | sed -n "s/^.*\"${PACKAGE_REGEX}\.rpm\".*$/\1/p" | sort -V | tail -n1) -fi -echo "{ 'nvidia_driver_version' : '${DRIVER_VERSION}' }" +echo "{ 'nvidia_driver_version' : '${DRIVER_VERSION}' }" \ No newline at end of file diff --git a/site/profile/manifests/gpu.pp b/site/profile/manifests/gpu.pp index 0a458ad55..cac364024 100644 --- a/site/profile/manifests/gpu.pp +++ b/site/profile/manifests/gpu.pp @@ -58,24 +58,32 @@ ensure => directory } - $driver_ver = $::facts['nvidia_driver_version'] $nvidia_libs = [ - "libnvidia-ml.so.${driver_ver}", 'libnvidia-ml.so.1', 'libnvidia-fbc.so.1', - "libnvidia-fbc.so.${driver_ver}", 'libnvidia-ifr.so.1', "libnvidia-ifr.so.${driver_ver}", - 'libcuda.so', 'libcuda.so.1', "libcuda.so.${driver_ver}", "libnvcuvid.so.${driver_ver}", - 'libnvcuvid.so.1', "libnvidia-compiler.so.${driver_ver}", 'libnvidia-encode.so.1', - "libnvidia-encode.so.${driver_ver}", "libnvidia-fatbinaryloader.so.${driver_ver}", - 'libnvidia-opencl.so.1', "libnvidia-opencl.so.${driver_ver}", 'libnvidia-opticalflow.so.1', - "libnvidia-opticalflow.so.${driver_ver}", 'libnvidia-ptxjitcompiler.so.1', "libnvidia-ptxjitcompiler.so.${driver_ver}", - 'libnvcuvid.so', 'libnvidia-cfg.so', 'libnvidia-encode.so', - 'libnvidia-fbc.so', 'libnvidia-ifr.so', 'libnvidia-ml.so', - 'libnvidia-ptxjitcompiler.so', 'libEGL_nvidia.so.0', "libEGL_nvidia.so.${driver_ver}", - 'libGLESv1_CM_nvidia.so.1', "libGLESv1_CM_nvidia.so.${driver_ver}", 'libGLESv2_nvidia.so.2', - "libGLESv2_nvidia.so.${driver_ver}", 'libGLX_indirect.so.0', 'libGLX_nvidia.so.0', - "libGLX_nvidia.so.${driver_ver}", "libnvidia-cbl.so.${driver_ver}", 'libnvidia-cfg.so.1', - "libnvidia-cfg.so.${driver_ver}", "libnvidia-eglcore.so.${driver_ver}", "libnvidia-glcore.so.${driver_ver}", - "libnvidia-glsi.so.${driver_ver}", "libnvidia-glvkspirv.so.${driver_ver}", "libnvidia-rtcore.so.${driver_ver}", - "libnvidia-tls.so.${driver_ver}", 'libnvoptix.so.1', "libnvoptix.so.${driver_ver}"] + 'libcuda.so.1', + 'libcuda.so', + 'libEGL_nvidia.so.0', + 'libGLESv1_CM_nvidia.so.1', + 'libGLESv2_nvidia.so.2', + 'libGLX_indirect.so.0', + 'libGLX_nvidia.so.0', + 'libnvcuvid.so.1', + 'libnvcuvid.so', + 'libnvidia-cfg.so.1', + 'libnvidia-cfg.so', + 'libnvidia-encode.so.1', + 'libnvidia-encode.so', + 'libnvidia-fbc.so.1', + 'libnvidia-fbc.so', + 'libnvidia-ifr.so.1', + 'libnvidia-ifr.so', + 'libnvidia-ml.so.1', + 'libnvidia-ml.so', + 'libnvidia-opencl.so.1', + 'libnvidia-opticalflow.so.1', + 'libnvidia-ptxjitcompiler.so.1', + 'libnvidia-ptxjitcompiler.so', + 'libnvoptix.so.1', + ] $nvidia_libs.each |String $lib| { file { "/usr/lib64/nvidia/${lib}": @@ -84,6 +92,48 @@ seltype => 'lib_t' } } + + # WARNING : since the fact is computed before Puppet agent run, + # on a clean host, the symbolic links to the NVIDIA libraries + # that include the version number will be created on the + # second Puppet run only. + $driver_vers = $::facts['nvidia_driver_version'] + if $driver_vers != '' { + $nvidia_libs_vers = [ + "libcuda.so.${driver_vers}", + "libEGL_nvidia.so.${driver_vers}", + "libGLESv1_CM_nvidia.so.${driver_vers}", + "libGLESv2_nvidia.so.${driver_vers}", + "libGLX_nvidia.so.${driver_vers}", + "libnvcuvid.so.${driver_vers}", + "libnvidia-cbl.so.${driver_vers}", + "libnvidia-cfg.so.${driver_vers}", + "libnvidia-compiler.so.${driver_vers}", + "libnvidia-eglcore.so.${driver_vers}", + "libnvidia-encode.so.${driver_vers}", + "libnvidia-fatbinaryloader.so.${driver_vers}", + "libnvidia-fbc.so.${driver_vers}", + "libnvidia-glcore.so.${driver_vers}", + "libnvidia-glsi.so.${driver_vers}", + "libnvidia-glvkspirv.so.${driver_vers}", + "libnvidia-ifr.so.${driver_vers}", + "libnvidia-ml.so.${driver_vers}", + "libnvidia-opencl.so.${driver_vers}", + "libnvidia-opticalflow.so.${driver_vers}", + "libnvidia-ptxjitcompiler.so.${driver_vers}", + "libnvidia-rtcore.so.${driver_vers}", + "libnvidia-tls.so.${driver_vers}", + "libnvoptix.so.${driver_vers}" + ] + + $nvidia_libs_vers.each |String $lib| { + file { "/usr/lib64/nvidia/${lib}": + ensure => link, + target => "/usr/lib64/${lib}", + seltype => 'lib_t' + } + } + } } class profile::gpu::install::passthrough(Array[String] $packages) {