Skip to content

Commit

Permalink
nvsandboxutils: Add usage of GetDriverVersion API
Browse files Browse the repository at this point in the history
This change includes the usage of Sandboxutils GetDriverVersion API to
retrieve the CUDA driver version. If the library is not available on the
system or the API call fails for some other reason, it will fallback to
the NVML API to return the driver version.

Signed-off-by: Evan Lezar <[email protected]>
Signed-off-by: Huy Nguyen <[email protected]>
Signed-off-by: Sananya Majumder <[email protected]>
  • Loading branch information
sananya12 committed Aug 6, 2024
1 parent 7a9071c commit 93edec3
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 1 deletion.
14 changes: 14 additions & 0 deletions pkg/nvcdi/lib-nvml.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"tags.cncf.io/container-device-interface/specs-go"

"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvsandboxutils"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
)

Expand All @@ -52,6 +53,19 @@ func (l *nvmllib) GetAllDeviceSpecs() ([]specs.Device, error) {
}
}()

if l.nvsandboxutilslib != nil {
if r := l.nvsandboxutilslib.Init(l.driverRoot); r != nvsandboxutils.SUCCESS {
l.logger.Warningf("Failed to init nvsandboxutils: %v; ignoring", r)
l.nvsandboxutilslib = nil
}
defer func() {
if l.nvsandboxutilslib == nil {
return
}
_ = l.nvsandboxutilslib.Shutdown()
}()
}

gpuDeviceSpecs, err := l.getGPUDeviceSpecs()
if err != nil {
return nil, err
Expand Down
36 changes: 35 additions & 1 deletion pkg/nvcdi/lib.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (

"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvsandboxutils"
"github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/tegra/csv"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
Expand All @@ -43,6 +44,7 @@ type wrapper struct {
type nvcdilib struct {
logger logger.Interface
nvmllib nvml.Interface
nvsandboxutilslib nvsandboxutils.Interface
mode string
devicelib device.Interface
deviceNamers DeviceNamers
Expand Down Expand Up @@ -107,6 +109,19 @@ func New(opts ...Option) (Interface, error) {
}
l.nvmllib = nvml.New(nvmlOpts...)
}
if l.nvsandboxutilslib == nil {
var nvsandboxutilsOpts []nvsandboxutils.LibraryOption
// Set the library path for libnvidia-sandboxutils
candidates, err := l.driver.Libraries().Locate("libnvidia-sandboxutils.so.1")
if err != nil {
l.logger.Warningf("Ignoring error in locating libnvidia-sandboxutils.so.1: %v", err)
} else {
libNvidiaSandboxutilsPath := candidates[0]
l.logger.Infof("Using %v", libNvidiaSandboxutilsPath)
nvsandboxutilsOpts = append(nvsandboxutilsOpts, nvsandboxutils.WithLibraryPath(libNvidiaSandboxutilsPath))
}
l.nvsandboxutilslib = nvsandboxutils.New(nvsandboxutilsOpts...)
}
if l.devicelib == nil {
l.devicelib = device.New(l.nvmllib)
}
Expand Down Expand Up @@ -213,7 +228,7 @@ func (l *nvcdilib) resolveMode() (rmode string) {
}

// getCudaVersion returns the CUDA version of the current system.
func (l *nvcdilib) getCudaVersion() (string, error) {
func (l *nvcdilib) getCudaVersionNvml() (string, error) {
if hasNVML, reason := l.infolib.HasNvml(); !hasNVML {
return "", fmt.Errorf("nvml not detected: %v", reason)
}
Expand All @@ -236,3 +251,22 @@ func (l *nvcdilib) getCudaVersion() (string, error) {
}
return version, nil
}

func (l *nvcdilib) getCudaVersionNvsandboxutils() (string, error) {
// Sandboxutils initialization should happen before this function is called
version, ret := l.nvsandboxutilslib.GetDriverVersion()
if ret != nvsandboxutils.SUCCESS {
return "", fmt.Errorf("%v", ret)
}
return version, nil
}

func (l *nvcdilib) getCudaVersion() (string, error) {
version, err := l.getCudaVersionNvsandboxutils()
if err == nil {
return version, err
}

// Fallback to NVML
return l.getCudaVersionNvml()
}

0 comments on commit 93edec3

Please sign in to comment.