Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
From 8ab814295aaba3fef5a43ba89deefc94378d5569 Mon Sep 17 00:00:00 2001
From: Maher Homsi <maherhom@amazon.com>
Date: Mon, 18 May 2026 18:43:07 +0000
Subject: [PATCH] Add --additional-symlinks flag for library path compatibility

Add a --additional-symlinks flag to `nvidia-ctk cdi generate` that takes
a directory path. When specified, for each discovered nvidia library
mounted into the container, a symlink is created from
<additional-symlinks-dir>/<lib> pointing to the actual library path.

This enables backwards compatibility for workloads that expect nvidia
libraries at /usr/lib/nvidia/tesla/ after libraries move to /usr/lib/.

Usage:
nvidia-ctk cdi generate --additional-symlinks /usr/lib/nvidia/tesla

Signed-off-by: Maher Homsi <maherhom@amazon.com>
---
cmd/nvidia-ctk/cdi/generate/generate.go | 14 +++++++++
internal/discover/lib_path_symlinks.go | 44 +++++++++++++++++++++++++++++++++
pkg/nvcdi/driver-nvml.go | 10 ++++++
pkg/nvcdi/lib.go | 1 +
pkg/nvcdi/options.go | 8 +++++
5 files changed, 77 insertions(+)
create mode 100644 internal/discover/lib_path_symlinks.go

diff --git a/cmd/nvidia-ctk/cdi/generate/generate.go b/cmd/nvidia-ctk/cdi/generate/generate.go
index 3c4d5e6..7a8b9c0 100644
--- a/cmd/nvidia-ctk/cdi/generate/generate.go
+++ b/cmd/nvidia-ctk/cdi/generate/generate.go
@@ -66,6 +66,7 @@ type options struct {
enabledHooks []string

featureFlags []string
+ libDirSymlinksDir string

csv struct {
files []string
@@ -231,6 +232,12 @@ func (m command) build() *cli.Command {
Destination: &opts.featureFlags,
Sources: cli.EnvVars("NVIDIA_CTK_CDI_GENERATE_FEATURE_FLAGS"),
},
+ &cli.StringFlag{
+ Name: "additional-symlinks",
+ Destination: &opts.libDirSymlinksDir,
+ Usage: "Create symlinks in the specified directory pointing to each nvidia library. This enables backwards compatibility for workloads expecting libraries at a legacy path.",
+ Sources: cli.EnvVars("NVIDIA_CTK_CDI_GENERATE_ADDITIONAL_SYMLINKS"),
+ },
},
}

@@ -363,6 +370,7 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) {
nvcdi.WithDisabledHooks(opts.disabledHooks...),
nvcdi.WithEnabledHooks(opts.enabledHooks...),
nvcdi.WithFeatureFlags(opts.featureFlags...),
+ nvcdi.WithLibDirSymlinksDir(opts.libDirSymlinksDir),
// We set the following to allow for dependency injection:
nvcdi.WithNvmlLib(opts.nvmllib),
}
diff --git a/internal/discover/lib_path_symlinks.go b/internal/discover/lib_path_symlinks.go
new file mode 100644
index 0000000..a1b2c3d
--- /dev/null
+++ b/internal/discover/lib_path_symlinks.go
@@ -0,0 +1,56 @@
+package discover
+
+import (
+ "fmt"
+ "path/filepath"
+)
+
+type libDirSymlinks struct {
+ Discover
+ hookCreator HookCreator
+ symlinkDir string
+}
+
+// WithLibDirSymlinks decorates the provided discoverer to add a hook that
+// creates symlinks in symlinkDir pointing to each discovered library.
+// For each library at /usr/lib/<lib>, a symlink <symlinkDir>/<lib> -> /usr/lib/<lib>
+// is created inside the container.
+func WithLibDirSymlinks(mounts Discover, hookCreator HookCreator, symlinkDir string) Discover {
+ if symlinkDir == "" {
+ return mounts
+ }
+ return &libDirSymlinks{
+ Discover: mounts,
+ hookCreator: hookCreator,
+ symlinkDir: symlinkDir,
+ }
+}
+
+// Hooks returns hooks from the wrapped discoverer plus a create-symlinks hook
+// that creates symlinks in the configured directory.
+func (d *libDirSymlinks) Hooks() ([]Hook, error) {
+ hooks, err := d.Discover.Hooks()
+ if err != nil {
+ return nil, fmt.Errorf("failed to get hooks: %v", err)
+ }
+
+ mounts, err := d.Mounts()
+ if err != nil {
+ return nil, fmt.Errorf("failed to get library mounts: %v", err)
+ }
+
+ var links []string
+ for _, mount := range mounts {
+ basename := filepath.Base(mount.Path)
+ link := fmt.Sprintf("%s::%s", mount.Path, filepath.Join(d.symlinkDir, basename))
+ links = append(links, link)
+ }
+
+ symlinkHook := d.hookCreator.Create(CreateSymlinksHook, links...)
+ if symlinkHook != nil {
+ hookList, _ := symlinkHook.Hooks()
+ hooks = append(hooks, hookList...)
+ }
+
+ return hooks, nil
+}
+}
diff --git a/pkg/nvcdi/driver-nvml.go b/pkg/nvcdi/driver-nvml.go
index e145a2d..f8a9b12 100644
--- a/pkg/nvcdi/driver-nvml.go
+++ b/pkg/nvcdi/driver-nvml.go
@@ -104,6 +104,16 @@ func (l *nvcdilib) NewDriverLibraryDiscoverer(version string, libcudaSoParentDir
cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(l.logger, l.hookCreator, version)
discoverers = append(discoverers, cudaCompatLibHookDiscoverer)

+ if l.libDirSymlinksDir != "" {
+ l.logger.Infof("Adding additional symlinks discoverer for directory %q", l.libDirSymlinksDir)
+ libDirSymlinksDiscoverer := discover.WithLibDirSymlinks(
+ libraries,
+ l.hookCreator,
+ l.libDirSymlinksDir,
+ )
+ discoverers = append(discoverers, libDirSymlinksDiscoverer)
+ }
+
updateLDCache, _ := discover.NewLDCacheUpdateHook(l.logger, libraries, l.hookCreator, l.ldconfigPath)
discoverers = append(discoverers, updateLDCache)

diff --git a/pkg/nvcdi/lib.go b/pkg/nvcdi/lib.go
index 1a2b3c4..5d6e7f8 100644
--- a/pkg/nvcdi/lib.go
+++ b/pkg/nvcdi/lib.go
@@ -61,6 +61,7 @@ type nvcdilib struct {
disabledHooks []discover.HookName
enabledHooks []discover.HookName
hookCreator discover.HookCreator
+ libDirSymlinksDir string
}

// New creates a new nvcdi library
diff --git a/pkg/nvcdi/options.go b/pkg/nvcdi/options.go
index 2a3b4c5..6d7e8f9 100644
--- a/pkg/nvcdi/options.go
+++ b/pkg/nvcdi/options.go
@@ -177,6 +177,14 @@ func WithEnabledHooks[T string | HookName](hooks ...T) Option {
}
}

+// WithLibDirSymlinksDir sets the directory where additional library
+// symlinks should be created in the container.
+func WithLibDirSymlinksDir(dir string) Option {
+ return func(l *nvcdilib) {
+ l.libDirSymlinksDir = dir
+ }
+}
+
// WithFeatureFlags allows the specified set of features to be toggled on.
func WithFeatureFlags[T string | FeatureFlag](featureFlags ...T) Option {
return func(o *nvcdilib) {
--
2.53.0
7 changes: 7 additions & 0 deletions packages/nvidia-container-toolkit/generate-cdi-specs.service
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,19 @@ RefuseManualStop=true
Type=oneshot
# Explanation of the options:
# --format json: to be consistent across Bottlerocket's variants
# --driver-root: point to the sysroot where NVIDIA libraries are installed
# --dev-root /: use the host root for device discovery
# --additional-symlinks: create backwards-compat symlinks at this path so
# libraries appear in containers
# --mode nvml: the default mode ("auto") resolves to this already, make it explicit
Comment thread
maherthomsi marked this conversation as resolved.
# --device-name-strategy uuid: the ECS agent only supports device UUIDs; for k8s
# this is irrelevant because these specs will be used
# only when NVIDIA_VISIBLE_DEVICES is "all"
# --output /etc/cdi/nvidia.json: store the CDI specifications at this location
ExecStart=/usr/bin/nvidia-ctk cdi generate --format json \
--driver-root /x86_64-bottlerocket-linux-gnu/sys-root \
--dev-root / \
--additional-symlinks /usr/lib/nvidia/tesla \
--mode nvml \
--device-name-strategy uuid \
--output /etc/cdi/nvidia.json
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Source5: nvidia-container-toolkit-tmpfiles-k8s.conf
Source6: nvidia-container-toolkit-config-k8s
Source7: generate-cdi-specs.service
Patch0001: 0001-discover-reduce-missing-resource-warnings-to-debug-l.patch
Patch0002: 0002-add-additional-symlinks-flag.patch

BuildRequires: %{_cross_os}glibc-devel
Requires: %{_cross_os}libnvidia-container
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
From 1846be5c3fb9b44d5bfb3faac8e9df45857fc673 Mon Sep 17 00:00:00 2001
From: Maher Homsi <maherhom@amazon.com>
Date: Mon, 18 May 2026 18:43:12 +0000
Subject: [PATCH] Enable library path compatibility symlinks

Enable the CreateLibSymlinksHook to generate backwards-compatibility
symlinks in containers for library paths that have moved.

Signed-off-by: Maher Homsi <maherhom@amazon.com>
---
internal/cdi/cdi.go | 1 +
1 file changed, 1 insertion(+)

diff --git a/internal/cdi/cdi.go b/internal/cdi/cdi.go
index bee83c6e4..a1b2c3d4e 100644
--- a/internal/cdi/cdi.go
+++ b/internal/cdi/cdi.go
@@ -127,6 +127,7 @@ func New(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interf
nvcdi.WithNvmlLib(c.nvmllib),
nvcdi.WithVendor(c.vendor),
nvcdi.WithDisabledHook(nvcdi.HookEnableCudaCompat),
+ nvcdi.WithEnabledHooks(nvcdi.CreateLibSymlinksHook),
}

c.cdilibs = make(map[string]nvcdi.SpecGenerator)
--
2.52.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
From a734135ac989eb235263d5a24aca6427fda8d53a Mon Sep 17 00:00:00 2001
From: Maher Homsi <maherhom@amazon.com>
Date: Mon, 18 May 2026 18:43:19 +0000
Subject: [PATCH] vendor: add CreateLibSymlinksHook to vendored
nvidia-container-toolkit

Update the vendored nvidia-container-toolkit to include the
CreateLibSymlinksHook feature. This is needed because patch 1002 enables
this hook in the device plugin, but the vendored toolkit code does not
include it.

Signed-off-by: Maher Homsi <maherhom@amazon.com>
---
.../internal/discover/hooks.go | 15 +++-
.../internal/discover/lib_path_symlinks.go | 57 +++++++++++++++++++
.../nvidia-container-toolkit/pkg/nvcdi/api.go | 2 +
.../pkg/nvcdi/driver-nvml.go | 3 +
4 files changed, 74 insertions(+), 3 deletions(-)
create mode 100644 vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/lib_path_symlinks.go

diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/hooks.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/hooks.go
index 66bef75..a1f608a 100644
--- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/hooks.go
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/hooks.go
@@ -36,6 +36,8 @@ const (
ChmodHook = HookName("chmod")
// A CreateSymlinksHook is used to create symlinks in the container.
CreateSymlinksHook = HookName("create-symlinks")
+ // A CreateLibSymlinksHook is used to create library path compatibility symlinks.
+ CreateLibSymlinksHook = HookName("create-lib-symlinks")
// DisableDeviceNodeModificationHook refers to the hook used to ensure that
// device nodes are not created by libnvidia-ml.so or nvidia-smi in a
// container.
@@ -57,6 +59,8 @@ var defaultDisabledHooks = []HookName{
// ChmodHook is disabled by default as it was a workaround for older
// versions of crun that has since been fixed.
ChmodHook,
+ // CreateLibSymlinksHook is disabled by default; opt-in for backwards compat.
+ CreateLibSymlinksHook,
}

var _ Discover = (*Hook)(nil)
@@ -204,19 +208,24 @@ func (c cdiHookCreator) isDisabled(name HookName, args ...string) bool {

// still reject hooks that require args if none were provided
switch name {
- case CreateSymlinksHook, ChmodHook:
+ case CreateSymlinksHook, CreateLibSymlinksHook, ChmodHook:
return len(args) == 0
}
return false
}

func (c cdiHookCreator) requiredArgs(name HookName) []string {
- return append(c.fixedArgs, string(name))
+ cliName := name
+ switch name {
+ case CreateLibSymlinksHook:
+ cliName = CreateSymlinksHook
+ }
+ return append(c.fixedArgs, string(cliName))
}

func (c cdiHookCreator) transformArgs(name HookName, args ...string) []string {
switch name {
- case CreateSymlinksHook:
+ case CreateSymlinksHook, CreateLibSymlinksHook:
var transformedArgs []string
for _, arg := range args {
transformedArgs = append(transformedArgs, "--link", arg)
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/lib_path_symlinks.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/lib_path_symlinks.go
new file mode 100644
index 0000000..7cbf052
--- /dev/null
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/lib_path_symlinks.go
@@ -0,0 +1,57 @@
+package discover
+
+import (
+ "fmt"
+ "path/filepath"
+)
+
+const defaultLegacyLibPath = "/usr/lib/nvidia/tesla"
+
+type libPathSymlinks struct {
+ Discover
+ hookCreator HookCreator
+ legacyLibPath string
+}
+
+// WithLibPathSymlinks decorates the provided discoverer to add a hook that
+// creates backwards-compatibility symlinks from legacyLibPath/<lib> to the
+// actual library paths.
+func WithLibPathSymlinks(mounts Discover, hookCreator HookCreator, legacyLibPath string) Discover {
+ if legacyLibPath == "" {
+ legacyLibPath = defaultLegacyLibPath
+ }
+ return &libPathSymlinks{
+ Discover: mounts,
+ hookCreator: hookCreator,
+ legacyLibPath: legacyLibPath,
+ }
+}
+
+// Hooks returns hooks from the wrapped discoverer plus a hook to create
+// library path compatibility symlinks.
+func (d *libPathSymlinks) Hooks() ([]Hook, error) {
+ hooks, err := d.Discover.Hooks()
+ if err != nil {
+ return nil, fmt.Errorf("failed to get hooks: %v", err)
+ }
+
+ mounts, err := d.Mounts()
+ if err != nil {
+ return nil, fmt.Errorf("failed to get library mounts: %v", err)
+ }
+
+ var links []string
+ for _, mount := range mounts {
+ basename := filepath.Base(mount.Path)
+ link := fmt.Sprintf("%s::%s", mount.Path, filepath.Join(d.legacyLibPath, basename))
+ links = append(links, link)
+ }
+
+ symlinkHook := d.hookCreator.Create(CreateLibSymlinksHook, links...)
+ if symlinkHook != nil {
+ hookList, _ := symlinkHook.Hooks()
+ hooks = append(hooks, hookList...)
+ }
+
+ return hooks, nil
+}
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/api.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/api.go
index fce32bc..d827c06 100644
--- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/api.go
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/api.go
@@ -63,6 +63,8 @@ const (
EnableCudaCompatHook = discover.EnableCudaCompatHook
// An UpdateLDCacheHook is used to update the ldcache in the container.
UpdateLDCacheHook = discover.UpdateLDCacheHook
+ // A CreateLibSymlinksHook is used to create library path compatibility symlinks.
+ CreateLibSymlinksHook = discover.CreateLibSymlinksHook

// Deprecated: Use CreateSymlinksHook instead.
HookCreateSymlinks = CreateSymlinksHook
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/driver-nvml.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/driver-nvml.go
index e145a2d..32ed643 100644
--- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/driver-nvml.go
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/driver-nvml.go
@@ -104,6 +104,9 @@ func (l *nvcdilib) NewDriverLibraryDiscoverer(version string, libcudaSoParentDir
cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(l.logger, l.hookCreator, version)
discoverers = append(discoverers, cudaCompatLibHookDiscoverer)

+ libPathSymlinksDiscoverer := discover.WithLibPathSymlinks(libraries, l.hookCreator, "")
+ discoverers = append(discoverers, libPathSymlinksDiscoverer)
+
updateLDCache, _ := discover.NewLDCacheUpdateHook(l.logger, libraries, l.hookCreator, l.ldconfigPath)
discoverers = append(discoverers, updateLDCache)

--
2.53.0

Loading
Loading