-
Notifications
You must be signed in to change notification settings - Fork 455
implement NRI plugin server to inject management CDI devices #1498
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,146 @@ | ||
| /** | ||
| # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| **/ | ||
|
|
||
| package nri | ||
|
|
||
| import ( | ||
| "context" | ||
| "fmt" | ||
| "os" | ||
| "strings" | ||
|
|
||
| "github.com/containerd/nri/pkg/api" | ||
| nriplugin "github.com/containerd/nri/pkg/stub" | ||
|
|
||
| "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" | ||
| ) | ||
|
|
||
| // Compile-time interface checks | ||
| var ( | ||
| _ nriplugin.Plugin = (*Plugin)(nil) | ||
| ) | ||
|
|
||
| const ( | ||
| // nriCDIDeviceKey is the prefix of the key used for CDI device annotations. | ||
| nriCDIDeviceKey = "nvidia.cdi.k8s.io" | ||
| // defaultNRISocket represents the default path of the NRI socket | ||
| defaultNRISocket = "/var/run/nri/nri.sock" | ||
| ) | ||
|
|
||
| type Plugin struct { | ||
| logger logger.Interface | ||
|
|
||
| stub nriplugin.Stub | ||
| } | ||
|
|
||
| // NewPlugin creates a new NRI plugin for injecting CDI devices | ||
| func NewPlugin(logger logger.Interface) *Plugin { | ||
| return &Plugin{ | ||
| logger: logger, | ||
| } | ||
| } | ||
|
|
||
| // CreateContainer handles container creation requests. | ||
| func (p *Plugin) CreateContainer(_ context.Context, pod *api.PodSandbox, ctr *api.Container) (*api.ContainerAdjustment, []*api.ContainerUpdate, error) { | ||
| adjust := &api.ContainerAdjustment{} | ||
|
|
||
| if err := p.injectCDIDevices(pod, ctr, adjust); err != nil { | ||
| return nil, nil, err | ||
| } | ||
|
|
||
| return adjust, nil, nil | ||
| } | ||
|
|
||
| func (p *Plugin) injectCDIDevices(pod *api.PodSandbox, ctr *api.Container, a *api.ContainerAdjustment) error { | ||
| devices, err := parseCDIDevices(ctr.Name, pod.Annotations) | ||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
||
| if len(devices) == 0 { | ||
| p.logger.Debugf("%s: no CDI devices annotated...", containerName(pod, ctr)) | ||
| return nil | ||
| } | ||
|
|
||
| for _, name := range devices { | ||
| a.AddCDIDevice( | ||
| &api.CDIDevice{ | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As far as I am aware, this introduces restrictions on compatible containerd / cri-o versions.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given that we've moved to native CDI, the minimum supported containerd is now v1.7. Will that be affected by this change?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We would need to check when CDI Devices were added to the NRI APIs. The versions of containerd / cri-o that then started responding to these fields would be the minimum versions.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here was the NRI commit: containerd/nri@c9b4798 We would also need to check which version of containerd includes the related adjustments. (see https://github.com/containerd/containerd/blob/6dc89d8b392dbea8651ab0b141765f196836205c/internal/cri/nri/nri_api_linux.go#L321-L326). (I wasn't able to find similar code in v1.7 ir v2.0). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @elezar CDI injection from an NRI plugin does not work in 1.7.x. I have a fix for it which I never filed a PR from. But if it's important enough for you guys, I can file it and then see if we could get it merged. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @elezar Here is the filed PR against the 1.7 maintenance branch: containerd/containerd#12650 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @elezar And here is one filed against the 2.0 maintenance branch: containerd/containerd#12651
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks @klihub ! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @elezar @tariq1890 Not sure how realistic is to get containerd/containerd#12651 in. 2.0 is EOL'd. 12651 would require a new tagged release, and if that is an absolute no-go (it's usually not that inflexible), then it's a tough call. Anyway, you might want to chime in there and comment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The v1.7 version of the fix has been now commented as being considered for (a late) inclusion in the upcoming v1.30 release. You might want to chime in there as well. |
||
| Name: name, | ||
| }, | ||
| ) | ||
| p.logger.Infof("%s: injected CDI device %q...", containerName(pod, ctr), name) | ||
| } | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
| func parseCDIDevices(ctr string, annotations map[string]string) ([]string, error) { | ||
| annotation := getAnnotation(annotations, nriCDIDeviceKey, ctr) | ||
| if len(annotation) == 0 { | ||
| return nil, nil | ||
| } | ||
|
|
||
| cdiDevices := strings.Split(annotation, ",") | ||
| return cdiDevices, nil | ||
| } | ||
|
|
||
| func getAnnotation(annotations map[string]string, key, ctr string) string { | ||
| nriPluginAnnotationKey := fmt.Sprintf("%s/container.%s", key, ctr) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know that the original example allowed all containers in a pod to automatically have access to devices using a different key. Here, I understand that we're only allowing a SINGLE form, correct?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, correct we are only allowing a single form. Please let me know if you have any concerns |
||
| if value, ok := annotations[nriPluginAnnotationKey]; ok { | ||
| return value | ||
| } | ||
|
|
||
| return "" | ||
| } | ||
|
|
||
| // Construct a container name for log messages. | ||
| func containerName(pod *api.PodSandbox, container *api.Container) string { | ||
| if pod != nil { | ||
| return pod.Name + "/" + container.Name | ||
| } | ||
| return container.Name | ||
| } | ||
|
|
||
| // Start starts the NRI plugin | ||
| func (p *Plugin) Start(ctx context.Context, nriSocketPath, nriPluginIdx string) error { | ||
| if len(nriSocketPath) == 0 { | ||
| nriSocketPath = defaultNRISocket | ||
| } | ||
| _, err := os.Stat(nriSocketPath) | ||
| if err != nil { | ||
| return fmt.Errorf("failed to find valid nri socket in %s: %w", nriSocketPath, err) | ||
| } | ||
|
|
||
| pluginOpts := []nriplugin.Option{ | ||
| nriplugin.WithPluginIdx(nriPluginIdx), | ||
| nriplugin.WithSocketPath(nriSocketPath), | ||
| } | ||
| if p.stub, err = nriplugin.New(p, pluginOpts...); err != nil { | ||
| return fmt.Errorf("failed to initialise plugin at %s: %w", nriSocketPath, err) | ||
| } | ||
| err = p.stub.Start(ctx) | ||
| if err != nil { | ||
| return fmt.Errorf("plugin exited with error: %w", err) | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| // Stop stops the NRI plugin | ||
| func (p *Plugin) Stop() { | ||
| if p != nil && p.stub != nil { | ||
| p.stub.Stop() | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.