Skip to content

Commit

Permalink
Refactor nvidia symlink creation
Browse files Browse the repository at this point in the history
Avoid creation of broken symlink when dealing with VGPU drivers.
  • Loading branch information
cmd-ntrf committed Feb 23, 2021
1 parent 8664e64 commit 54ea3ec
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 31 deletions.
17 changes: 3 additions & 14 deletions site/profile/facts.d/nvidia_driver_vers.sh
Original file line number Diff line number Diff line change
@@ -1,16 +1,5 @@
#!/bin/sh
PROCESSOR=$(uname -p)
VERSION="$(source /etc/os-release; echo $VERSION_ID)"
PACKAGE="cuda-drivers"
PACKAGE_REGEX="${PACKAGE}-\([0-9.]\{1,\}\)[-0-9]*\.${PROCESSOR}"
DRIVER_VERSION=$(test -f /usr/sbin/dkms && /usr/sbin/dkms status | grep -m 1 -Po 'nvidia, \K(\d+.\d+[\.]\d*)')
# If that didn't work let's try nvidia-smi
if [ -z $DRIVER_VERSION ]; then
DRIVER_VERSION=$(if [ -x "$(command -v nvidia-smi)" ]; then nvidia-smi --query-gpu=driver_version --format=csv,noheader; fi)
if [ -e /proc/driver/nvidia ]; then
DRIVER_VERSION=$(grep -m 1 -Po 'NVRM version:.* \K(\d+\.\d+\.\d+)' /proc/driver/nvidia/version)
fi
if [ -z $DRIVER_VERSION ]; then
BASE_URL="http://developer.download.nvidia.com/compute/cuda/repos"
CUDA_REPO_GZ=$(curl -s ${BASE_URL}/rhel${VERSION}/${PROCESSOR}/repodata/repomd.xml | sed '2 s/xmlns=".*"//g' | xmllint --xpath 'string(/repomd/data[@type="primary"]/location/@href)' -)
DRIVER_VERSION=$(curl -s ${BASE_URL}/rhel${VERSION}/${PROCESSOR}/${CUDA_REPO_GZ} | gunzip | sed -n "s/^.*\"${PACKAGE_REGEX}\.rpm\".*$/\1/p" | sort -V | tail -n1)
fi
echo "{ 'nvidia_driver_version' : '${DRIVER_VERSION}' }"
echo "{ 'nvidia_driver_version' : '${DRIVER_VERSION}' }"
84 changes: 67 additions & 17 deletions site/profile/manifests/gpu.pp
Original file line number Diff line number Diff line change
Expand Up @@ -58,24 +58,32 @@
ensure => directory
}

$driver_ver = $::facts['nvidia_driver_version']
$nvidia_libs = [
"libnvidia-ml.so.${driver_ver}", 'libnvidia-ml.so.1', 'libnvidia-fbc.so.1',
"libnvidia-fbc.so.${driver_ver}", 'libnvidia-ifr.so.1', "libnvidia-ifr.so.${driver_ver}",
'libcuda.so', 'libcuda.so.1', "libcuda.so.${driver_ver}", "libnvcuvid.so.${driver_ver}",
'libnvcuvid.so.1', "libnvidia-compiler.so.${driver_ver}", 'libnvidia-encode.so.1',
"libnvidia-encode.so.${driver_ver}", "libnvidia-fatbinaryloader.so.${driver_ver}",
'libnvidia-opencl.so.1', "libnvidia-opencl.so.${driver_ver}", 'libnvidia-opticalflow.so.1',
"libnvidia-opticalflow.so.${driver_ver}", 'libnvidia-ptxjitcompiler.so.1', "libnvidia-ptxjitcompiler.so.${driver_ver}",
'libnvcuvid.so', 'libnvidia-cfg.so', 'libnvidia-encode.so',
'libnvidia-fbc.so', 'libnvidia-ifr.so', 'libnvidia-ml.so',
'libnvidia-ptxjitcompiler.so', 'libEGL_nvidia.so.0', "libEGL_nvidia.so.${driver_ver}",
'libGLESv1_CM_nvidia.so.1', "libGLESv1_CM_nvidia.so.${driver_ver}", 'libGLESv2_nvidia.so.2',
"libGLESv2_nvidia.so.${driver_ver}", 'libGLX_indirect.so.0', 'libGLX_nvidia.so.0',
"libGLX_nvidia.so.${driver_ver}", "libnvidia-cbl.so.${driver_ver}", 'libnvidia-cfg.so.1',
"libnvidia-cfg.so.${driver_ver}", "libnvidia-eglcore.so.${driver_ver}", "libnvidia-glcore.so.${driver_ver}",
"libnvidia-glsi.so.${driver_ver}", "libnvidia-glvkspirv.so.${driver_ver}", "libnvidia-rtcore.so.${driver_ver}",
"libnvidia-tls.so.${driver_ver}", 'libnvoptix.so.1', "libnvoptix.so.${driver_ver}"]
'libcuda.so.1',
'libcuda.so',
'libEGL_nvidia.so.0',
'libGLESv1_CM_nvidia.so.1',
'libGLESv2_nvidia.so.2',
'libGLX_indirect.so.0',
'libGLX_nvidia.so.0',
'libnvcuvid.so.1',
'libnvcuvid.so',
'libnvidia-cfg.so.1',
'libnvidia-cfg.so',
'libnvidia-encode.so.1',
'libnvidia-encode.so',
'libnvidia-fbc.so.1',
'libnvidia-fbc.so',
'libnvidia-ifr.so.1',
'libnvidia-ifr.so',
'libnvidia-ml.so.1',
'libnvidia-ml.so',
'libnvidia-opencl.so.1',
'libnvidia-opticalflow.so.1',
'libnvidia-ptxjitcompiler.so.1',
'libnvidia-ptxjitcompiler.so',
'libnvoptix.so.1',
]

$nvidia_libs.each |String $lib| {
file { "/usr/lib64/nvidia/${lib}":
Expand All @@ -84,6 +92,48 @@
seltype => 'lib_t'
}
}

# WARNING : since the fact is computed before Puppet agent run,
# on a clean host, the symbolic links to the NVIDIA libraries
# that include the version number will be created on the
# second Puppet run only.
$driver_vers = $::facts['nvidia_driver_version']
if $driver_vers != '' {
$nvidia_libs_vers = [
"libcuda.so.${driver_vers}",
"libEGL_nvidia.so.${driver_vers}",
"libGLESv1_CM_nvidia.so.${driver_vers}",
"libGLESv2_nvidia.so.${driver_vers}",
"libGLX_nvidia.so.${driver_vers}",
"libnvcuvid.so.${driver_vers}",
"libnvidia-cbl.so.${driver_vers}",
"libnvidia-cfg.so.${driver_vers}",
"libnvidia-compiler.so.${driver_vers}",
"libnvidia-eglcore.so.${driver_vers}",
"libnvidia-encode.so.${driver_vers}",
"libnvidia-fatbinaryloader.so.${driver_vers}",
"libnvidia-fbc.so.${driver_vers}",
"libnvidia-glcore.so.${driver_vers}",
"libnvidia-glsi.so.${driver_vers}",
"libnvidia-glvkspirv.so.${driver_vers}",
"libnvidia-ifr.so.${driver_vers}",
"libnvidia-ml.so.${driver_vers}",
"libnvidia-opencl.so.${driver_vers}",
"libnvidia-opticalflow.so.${driver_vers}",
"libnvidia-ptxjitcompiler.so.${driver_vers}",
"libnvidia-rtcore.so.${driver_vers}",
"libnvidia-tls.so.${driver_vers}",
"libnvoptix.so.${driver_vers}"
]

$nvidia_libs_vers.each |String $lib| {
file { "/usr/lib64/nvidia/${lib}":
ensure => link,
target => "/usr/lib64/${lib}",
seltype => 'lib_t'
}
}
}
}

class profile::gpu::install::passthrough(Array[String] $packages) {
Expand Down

0 comments on commit 54ea3ec

Please sign in to comment.