Skip to content

Commit

Permalink
Add UsesNVGPUModule check to info module
Browse files Browse the repository at this point in the history
Signed-off-by: Evan Lezar <[email protected]>
  • Loading branch information
elezar committed Mar 26, 2024
1 parent 35b90cf commit aaa7424
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 1 deletion.
57 changes: 56 additions & 1 deletion pkg/nvlib/info/info.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (
"path/filepath"
"strings"

"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"github.com/NVIDIA/go-nvml/pkg/dl"
)

Expand All @@ -30,10 +32,13 @@ type Interface interface {
HasDXCore() (bool, string)
HasNvml() (bool, string)
IsTegraSystem() (bool, string)
UsesNVGPUModule() (bool, string)
}

type infolib struct {
root string
root string
nvmllib nvml.Interface
devicelib device.Interface
}

var _ Interface = &infolib{}
Expand Down Expand Up @@ -87,6 +92,56 @@ func (i *infolib) IsTegraSystem() (bool, string) {
return false, fmt.Sprintf("%v has no 'tegra' prefix", tegraFamilyFile)
}

// UsesNVGPUModule checks whether the nvgpu module is used.
// We use the device name to signal this, since devices that use the nvgpu module have their device
// names as:
//
// GPU 0: Orin (nvgpu) (UUID: 54d0709b-558d-5a59-9c65-0c5fc14a21a4)
//
// This function returns true if ALL devices use the nvgpu module.
func (i *infolib) UsesNVGPUModule() (uses bool, reason string) {
// We ensure that this function never panics
defer func() {
if err := recover(); err != nil {
uses = false
reason = fmt.Sprintf("panic: %v", err)
}
}()

ret := i.nvmllib.Init()
if ret != nvml.SUCCESS {
return false, fmt.Sprintf("failed to initialize nvml: %v", ret)
}
defer func() {
_ = i.nvmllib.Shutdown()
}()

var names []string

err := i.devicelib.VisitDevices(func(i int, d device.Device) error {
name, ret := d.GetName()
if ret != nvml.SUCCESS {
return fmt.Errorf("device %v: %v", i, ret)
}
names = append(names, name)
return nil
})
if err != nil {
return false, fmt.Sprintf("failed to get device names: %v", err)
}

if len(names) == 0 {
return false, "no devices found"
}

for _, name := range names {
if !strings.Contains(name, "(nvgpu)") {
return false, fmt.Sprintf("device %q does not use nvgpu module", name)
}
}
return true, "all devices use nvgpu module"
}

// assertHasLibrary returns an error if the specified library cannot be loaded
func assertHasLibrary(libraryName string) error {
const (
Expand Down
25 changes: 25 additions & 0 deletions pkg/nvlib/info/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@

package info

import (
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
)

// Option defines a function for passing options to the New() call
type Option func(*infolib)

Expand All @@ -28,9 +33,29 @@ func New(opts ...Option) Interface {
if i.root == "" {
i.root = "/"
}
if i.nvmllib == nil {
i.nvmllib = nvml.New()
}
if i.devicelib == nil {
i.devicelib = device.New(device.WithNvml(i.nvmllib))
}
return i
}

// WithDeviceLib sets the device library for the library
func WithDeviceLib(devicelib device.Interface) Option {
return func(l *infolib) {
l.devicelib = devicelib
}
}

// WithNvmlLib sets the nvml library for the library
func WithNvmlLib(nvmllib nvml.Interface) Option {
return func(l *infolib) {
l.nvmllib = nvmllib
}
}

// WithRoot provides a Option to set the root of the 'info' interface
func WithRoot(root string) Option {
return func(i *infolib) {
Expand Down

0 comments on commit aaa7424

Please sign in to comment.