-
Notifications
You must be signed in to change notification settings - Fork 65
Normalize NVIDIA library paths to /usr/lib/ #919
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
maherthomsi
wants to merge
2
commits into
bottlerocket-os:develop
Choose a base branch
from
maherthomsi:nvidia-normalization
base: develop
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
176 changes: 176 additions & 0 deletions
176
packages/nvidia-container-toolkit/0002-add-additional-symlinks-flag.patch
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,176 @@ | ||
| From 8ab814295aaba3fef5a43ba89deefc94378d5569 Mon Sep 17 00:00:00 2001 | ||
| From: Maher Homsi <maherhom@amazon.com> | ||
| Date: Mon, 18 May 2026 18:43:07 +0000 | ||
| Subject: [PATCH] Add --additional-symlinks flag for library path compatibility | ||
|
|
||
| Add a --additional-symlinks flag to `nvidia-ctk cdi generate` that takes | ||
| a directory path. When specified, for each discovered nvidia library | ||
| mounted into the container, a symlink is created from | ||
| <additional-symlinks-dir>/<lib> pointing to the actual library path. | ||
|
|
||
| This enables backwards compatibility for workloads that expect nvidia | ||
| libraries at /usr/lib/nvidia/tesla/ after libraries move to /usr/lib/. | ||
|
|
||
| Usage: | ||
| nvidia-ctk cdi generate --additional-symlinks /usr/lib/nvidia/tesla | ||
|
|
||
| Signed-off-by: Maher Homsi <maherhom@amazon.com> | ||
| --- | ||
| cmd/nvidia-ctk/cdi/generate/generate.go | 14 +++++++++ | ||
| internal/discover/lib_path_symlinks.go | 44 +++++++++++++++++++++++++++++++++ | ||
| pkg/nvcdi/driver-nvml.go | 10 ++++++ | ||
| pkg/nvcdi/lib.go | 1 + | ||
| pkg/nvcdi/options.go | 8 +++++ | ||
| 5 files changed, 77 insertions(+) | ||
| create mode 100644 internal/discover/lib_path_symlinks.go | ||
|
|
||
| diff --git a/cmd/nvidia-ctk/cdi/generate/generate.go b/cmd/nvidia-ctk/cdi/generate/generate.go | ||
| index 3c4d5e6..7a8b9c0 100644 | ||
| --- a/cmd/nvidia-ctk/cdi/generate/generate.go | ||
| +++ b/cmd/nvidia-ctk/cdi/generate/generate.go | ||
| @@ -66,6 +66,7 @@ type options struct { | ||
| enabledHooks []string | ||
|
|
||
| featureFlags []string | ||
| + libDirSymlinksDir string | ||
|
|
||
| csv struct { | ||
| files []string | ||
| @@ -231,6 +232,12 @@ func (m command) build() *cli.Command { | ||
| Destination: &opts.featureFlags, | ||
| Sources: cli.EnvVars("NVIDIA_CTK_CDI_GENERATE_FEATURE_FLAGS"), | ||
| }, | ||
| + &cli.StringFlag{ | ||
| + Name: "additional-symlinks", | ||
| + Destination: &opts.libDirSymlinksDir, | ||
| + Usage: "Create symlinks in the specified directory pointing to each nvidia library. This enables backwards compatibility for workloads expecting libraries at a legacy path.", | ||
| + Sources: cli.EnvVars("NVIDIA_CTK_CDI_GENERATE_ADDITIONAL_SYMLINKS"), | ||
| + }, | ||
| }, | ||
| } | ||
|
|
||
| @@ -363,6 +370,7 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) { | ||
| nvcdi.WithDisabledHooks(opts.disabledHooks...), | ||
| nvcdi.WithEnabledHooks(opts.enabledHooks...), | ||
| nvcdi.WithFeatureFlags(opts.featureFlags...), | ||
| + nvcdi.WithLibDirSymlinksDir(opts.libDirSymlinksDir), | ||
| // We set the following to allow for dependency injection: | ||
| nvcdi.WithNvmlLib(opts.nvmllib), | ||
| } | ||
| diff --git a/internal/discover/lib_path_symlinks.go b/internal/discover/lib_path_symlinks.go | ||
| new file mode 100644 | ||
| index 0000000..a1b2c3d | ||
| --- /dev/null | ||
| +++ b/internal/discover/lib_path_symlinks.go | ||
| @@ -0,0 +1,56 @@ | ||
| +package discover | ||
| + | ||
| +import ( | ||
| + "fmt" | ||
| + "path/filepath" | ||
| +) | ||
| + | ||
| +type libDirSymlinks struct { | ||
| + Discover | ||
| + hookCreator HookCreator | ||
| + symlinkDir string | ||
| +} | ||
| + | ||
| +// WithLibDirSymlinks decorates the provided discoverer to add a hook that | ||
| +// creates symlinks in symlinkDir pointing to each discovered library. | ||
| +// For each library at /usr/lib/<lib>, a symlink <symlinkDir>/<lib> -> /usr/lib/<lib> | ||
| +// is created inside the container. | ||
| +func WithLibDirSymlinks(mounts Discover, hookCreator HookCreator, symlinkDir string) Discover { | ||
| + if symlinkDir == "" { | ||
| + return mounts | ||
| + } | ||
| + return &libDirSymlinks{ | ||
| + Discover: mounts, | ||
| + hookCreator: hookCreator, | ||
| + symlinkDir: symlinkDir, | ||
| + } | ||
| +} | ||
| + | ||
| +// Hooks returns hooks from the wrapped discoverer plus a create-symlinks hook | ||
| +// that creates symlinks in the configured directory. | ||
| +func (d *libDirSymlinks) Hooks() ([]Hook, error) { | ||
| + hooks, err := d.Discover.Hooks() | ||
| + if err != nil { | ||
| + return nil, fmt.Errorf("failed to get hooks: %v", err) | ||
| + } | ||
| + | ||
| + mounts, err := d.Mounts() | ||
| + if err != nil { | ||
| + return nil, fmt.Errorf("failed to get library mounts: %v", err) | ||
| + } | ||
| + | ||
| + var links []string | ||
| + for _, mount := range mounts { | ||
| + basename := filepath.Base(mount.Path) | ||
| + link := fmt.Sprintf("%s::%s", mount.Path, filepath.Join(d.symlinkDir, basename)) | ||
| + links = append(links, link) | ||
| + } | ||
| + | ||
| + symlinkHook := d.hookCreator.Create(CreateSymlinksHook, links...) | ||
| + if symlinkHook != nil { | ||
| + hookList, _ := symlinkHook.Hooks() | ||
| + hooks = append(hooks, hookList...) | ||
| + } | ||
| + | ||
| + return hooks, nil | ||
| +} | ||
| +} | ||
| diff --git a/pkg/nvcdi/driver-nvml.go b/pkg/nvcdi/driver-nvml.go | ||
| index e145a2d..f8a9b12 100644 | ||
| --- a/pkg/nvcdi/driver-nvml.go | ||
| +++ b/pkg/nvcdi/driver-nvml.go | ||
| @@ -104,6 +104,16 @@ func (l *nvcdilib) NewDriverLibraryDiscoverer(version string, libcudaSoParentDir | ||
| cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(l.logger, l.hookCreator, version) | ||
| discoverers = append(discoverers, cudaCompatLibHookDiscoverer) | ||
|
|
||
| + if l.libDirSymlinksDir != "" { | ||
| + l.logger.Infof("Adding additional symlinks discoverer for directory %q", l.libDirSymlinksDir) | ||
| + libDirSymlinksDiscoverer := discover.WithLibDirSymlinks( | ||
| + libraries, | ||
| + l.hookCreator, | ||
| + l.libDirSymlinksDir, | ||
| + ) | ||
| + discoverers = append(discoverers, libDirSymlinksDiscoverer) | ||
| + } | ||
| + | ||
| updateLDCache, _ := discover.NewLDCacheUpdateHook(l.logger, libraries, l.hookCreator, l.ldconfigPath) | ||
| discoverers = append(discoverers, updateLDCache) | ||
|
|
||
| diff --git a/pkg/nvcdi/lib.go b/pkg/nvcdi/lib.go | ||
| index 1a2b3c4..5d6e7f8 100644 | ||
| --- a/pkg/nvcdi/lib.go | ||
| +++ b/pkg/nvcdi/lib.go | ||
| @@ -61,6 +61,7 @@ type nvcdilib struct { | ||
| disabledHooks []discover.HookName | ||
| enabledHooks []discover.HookName | ||
| hookCreator discover.HookCreator | ||
| + libDirSymlinksDir string | ||
| } | ||
|
|
||
| // New creates a new nvcdi library | ||
| diff --git a/pkg/nvcdi/options.go b/pkg/nvcdi/options.go | ||
| index 2a3b4c5..6d7e8f9 100644 | ||
| --- a/pkg/nvcdi/options.go | ||
| +++ b/pkg/nvcdi/options.go | ||
| @@ -177,6 +177,14 @@ func WithEnabledHooks[T string | HookName](hooks ...T) Option { | ||
| } | ||
| } | ||
|
|
||
| +// WithLibDirSymlinksDir sets the directory where additional library | ||
| +// symlinks should be created in the container. | ||
| +func WithLibDirSymlinksDir(dir string) Option { | ||
| + return func(l *nvcdilib) { | ||
| + l.libDirSymlinksDir = dir | ||
| + } | ||
| +} | ||
| + | ||
| // WithFeatureFlags allows the specified set of features to be toggled on. | ||
| func WithFeatureFlags[T string | FeatureFlag](featureFlags ...T) Option { | ||
| return func(o *nvcdilib) { | ||
| -- | ||
| 2.53.0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
27 changes: 27 additions & 0 deletions
27
packages/nvidia-k8s-device-plugin/1002-Enable-library-path-compatibility-symlinks.patch
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| From 1846be5c3fb9b44d5bfb3faac8e9df45857fc673 Mon Sep 17 00:00:00 2001 | ||
| From: Maher Homsi <maherhom@amazon.com> | ||
| Date: Mon, 18 May 2026 18:43:12 +0000 | ||
| Subject: [PATCH] Enable library path compatibility symlinks | ||
|
|
||
| Enable the CreateLibSymlinksHook to generate backwards-compatibility | ||
| symlinks in containers for library paths that have moved. | ||
|
|
||
| Signed-off-by: Maher Homsi <maherhom@amazon.com> | ||
| --- | ||
| internal/cdi/cdi.go | 1 + | ||
| 1 file changed, 1 insertion(+) | ||
|
|
||
| diff --git a/internal/cdi/cdi.go b/internal/cdi/cdi.go | ||
| index bee83c6e4..a1b2c3d4e 100644 | ||
| --- a/internal/cdi/cdi.go | ||
| +++ b/internal/cdi/cdi.go | ||
| @@ -127,6 +127,7 @@ func New(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interf | ||
| nvcdi.WithNvmlLib(c.nvmllib), | ||
| nvcdi.WithVendor(c.vendor), | ||
| nvcdi.WithDisabledHook(nvcdi.HookEnableCudaCompat), | ||
| + nvcdi.WithEnabledHooks(nvcdi.CreateLibSymlinksHook), | ||
| } | ||
|
|
||
| c.cdilibs = make(map[string]nvcdi.SpecGenerator) | ||
| -- | ||
| 2.52.0 |
163 changes: 163 additions & 0 deletions
163
packages/nvidia-k8s-device-plugin/1003-vendor-add-CreateLibSymlinksHook.patch
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,163 @@ | ||
| From a734135ac989eb235263d5a24aca6427fda8d53a Mon Sep 17 00:00:00 2001 | ||
| From: Maher Homsi <maherhom@amazon.com> | ||
| Date: Mon, 18 May 2026 18:43:19 +0000 | ||
| Subject: [PATCH] vendor: add CreateLibSymlinksHook to vendored | ||
| nvidia-container-toolkit | ||
|
|
||
| Update the vendored nvidia-container-toolkit to include the | ||
| CreateLibSymlinksHook feature. This is needed because patch 1002 enables | ||
| this hook in the device plugin, but the vendored toolkit code does not | ||
| include it. | ||
|
|
||
| Signed-off-by: Maher Homsi <maherhom@amazon.com> | ||
| --- | ||
| .../internal/discover/hooks.go | 15 +++- | ||
| .../internal/discover/lib_path_symlinks.go | 57 +++++++++++++++++++ | ||
| .../nvidia-container-toolkit/pkg/nvcdi/api.go | 2 + | ||
| .../pkg/nvcdi/driver-nvml.go | 3 + | ||
| 4 files changed, 74 insertions(+), 3 deletions(-) | ||
| create mode 100644 vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/lib_path_symlinks.go | ||
|
|
||
| diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/hooks.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/hooks.go | ||
| index 66bef75..a1f608a 100644 | ||
| --- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/hooks.go | ||
| +++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/hooks.go | ||
| @@ -36,6 +36,8 @@ const ( | ||
| ChmodHook = HookName("chmod") | ||
| // A CreateSymlinksHook is used to create symlinks in the container. | ||
| CreateSymlinksHook = HookName("create-symlinks") | ||
| + // A CreateLibSymlinksHook is used to create library path compatibility symlinks. | ||
| + CreateLibSymlinksHook = HookName("create-lib-symlinks") | ||
| // DisableDeviceNodeModificationHook refers to the hook used to ensure that | ||
| // device nodes are not created by libnvidia-ml.so or nvidia-smi in a | ||
| // container. | ||
| @@ -57,6 +59,8 @@ var defaultDisabledHooks = []HookName{ | ||
| // ChmodHook is disabled by default as it was a workaround for older | ||
| // versions of crun that has since been fixed. | ||
| ChmodHook, | ||
| + // CreateLibSymlinksHook is disabled by default; opt-in for backwards compat. | ||
| + CreateLibSymlinksHook, | ||
| } | ||
|
|
||
| var _ Discover = (*Hook)(nil) | ||
| @@ -204,19 +208,24 @@ func (c cdiHookCreator) isDisabled(name HookName, args ...string) bool { | ||
|
|
||
| // still reject hooks that require args if none were provided | ||
| switch name { | ||
| - case CreateSymlinksHook, ChmodHook: | ||
| + case CreateSymlinksHook, CreateLibSymlinksHook, ChmodHook: | ||
| return len(args) == 0 | ||
| } | ||
| return false | ||
| } | ||
|
|
||
| func (c cdiHookCreator) requiredArgs(name HookName) []string { | ||
| - return append(c.fixedArgs, string(name)) | ||
| + cliName := name | ||
| + switch name { | ||
| + case CreateLibSymlinksHook: | ||
| + cliName = CreateSymlinksHook | ||
| + } | ||
| + return append(c.fixedArgs, string(cliName)) | ||
| } | ||
|
|
||
| func (c cdiHookCreator) transformArgs(name HookName, args ...string) []string { | ||
| switch name { | ||
| - case CreateSymlinksHook: | ||
| + case CreateSymlinksHook, CreateLibSymlinksHook: | ||
| var transformedArgs []string | ||
| for _, arg := range args { | ||
| transformedArgs = append(transformedArgs, "--link", arg) | ||
| diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/lib_path_symlinks.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/lib_path_symlinks.go | ||
| new file mode 100644 | ||
| index 0000000..7cbf052 | ||
| --- /dev/null | ||
| +++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/lib_path_symlinks.go | ||
| @@ -0,0 +1,57 @@ | ||
| +package discover | ||
| + | ||
| +import ( | ||
| + "fmt" | ||
| + "path/filepath" | ||
| +) | ||
| + | ||
| +const defaultLegacyLibPath = "/usr/lib/nvidia/tesla" | ||
| + | ||
| +type libPathSymlinks struct { | ||
| + Discover | ||
| + hookCreator HookCreator | ||
| + legacyLibPath string | ||
| +} | ||
| + | ||
| +// WithLibPathSymlinks decorates the provided discoverer to add a hook that | ||
| +// creates backwards-compatibility symlinks from legacyLibPath/<lib> to the | ||
| +// actual library paths. | ||
| +func WithLibPathSymlinks(mounts Discover, hookCreator HookCreator, legacyLibPath string) Discover { | ||
| + if legacyLibPath == "" { | ||
| + legacyLibPath = defaultLegacyLibPath | ||
| + } | ||
| + return &libPathSymlinks{ | ||
| + Discover: mounts, | ||
| + hookCreator: hookCreator, | ||
| + legacyLibPath: legacyLibPath, | ||
| + } | ||
| +} | ||
| + | ||
| +// Hooks returns hooks from the wrapped discoverer plus a hook to create | ||
| +// library path compatibility symlinks. | ||
| +func (d *libPathSymlinks) Hooks() ([]Hook, error) { | ||
| + hooks, err := d.Discover.Hooks() | ||
| + if err != nil { | ||
| + return nil, fmt.Errorf("failed to get hooks: %v", err) | ||
| + } | ||
| + | ||
| + mounts, err := d.Mounts() | ||
| + if err != nil { | ||
| + return nil, fmt.Errorf("failed to get library mounts: %v", err) | ||
| + } | ||
| + | ||
| + var links []string | ||
| + for _, mount := range mounts { | ||
| + basename := filepath.Base(mount.Path) | ||
| + link := fmt.Sprintf("%s::%s", mount.Path, filepath.Join(d.legacyLibPath, basename)) | ||
| + links = append(links, link) | ||
| + } | ||
| + | ||
| + symlinkHook := d.hookCreator.Create(CreateLibSymlinksHook, links...) | ||
| + if symlinkHook != nil { | ||
| + hookList, _ := symlinkHook.Hooks() | ||
| + hooks = append(hooks, hookList...) | ||
| + } | ||
| + | ||
| + return hooks, nil | ||
| +} | ||
| diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/api.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/api.go | ||
| index fce32bc..d827c06 100644 | ||
| --- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/api.go | ||
| +++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/api.go | ||
| @@ -63,6 +63,8 @@ const ( | ||
| EnableCudaCompatHook = discover.EnableCudaCompatHook | ||
| // An UpdateLDCacheHook is used to update the ldcache in the container. | ||
| UpdateLDCacheHook = discover.UpdateLDCacheHook | ||
| + // A CreateLibSymlinksHook is used to create library path compatibility symlinks. | ||
| + CreateLibSymlinksHook = discover.CreateLibSymlinksHook | ||
|
|
||
| // Deprecated: Use CreateSymlinksHook instead. | ||
| HookCreateSymlinks = CreateSymlinksHook | ||
| diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/driver-nvml.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/driver-nvml.go | ||
| index e145a2d..32ed643 100644 | ||
| --- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/driver-nvml.go | ||
| +++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/driver-nvml.go | ||
| @@ -104,6 +104,9 @@ func (l *nvcdilib) NewDriverLibraryDiscoverer(version string, libcudaSoParentDir | ||
| cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(l.logger, l.hookCreator, version) | ||
| discoverers = append(discoverers, cudaCompatLibHookDiscoverer) | ||
|
|
||
| + libPathSymlinksDiscoverer := discover.WithLibPathSymlinks(libraries, l.hookCreator, "") | ||
| + discoverers = append(discoverers, libPathSymlinksDiscoverer) | ||
| + | ||
| updateLDCache, _ := discover.NewLDCacheUpdateHook(l.logger, libraries, l.hookCreator, l.ldconfigPath) | ||
| discoverers = append(discoverers, updateLDCache) | ||
|
|
||
| -- | ||
| 2.53.0 | ||
|
|
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.