From be62077a0d4d9615df058d803d7df64c9c232799 Mon Sep 17 00:00:00 2001 From: Konstantin Koslowski Date: Thu, 18 Jun 2026 11:46:46 +0000 Subject: [PATCH] feat: create ServerReadinessCheck objects from Netbox interface data Adds opt-in support for creating ServerReadinessCheck objects (from metal-maintenance-operator) per device reconciled by IronCoreReconciler. When --readiness-checks=network is set, argora fetches interface data from Netbox for each active device and creates a ServerReadinessCheck in the configured namespace (--readiness-check-namespace) with the device's MAC addresses and expected carrierStatus. MMO then validates the actual server network interfaces against this spec and gates server availability via a NoBind taint until the wiring is verified. Supersedes PR #145 (ServerNetworkConfig). --- cmd/manager/main.go | 7 +- dist/chart/templates/rbac/manager-role.yaml | 11 +++ internal/controller/ironcore_controller.go | 95 ++++++++++++++++++++- 3 files changed, 111 insertions(+), 2 deletions(-) diff --git a/cmd/manager/main.go b/cmd/manager/main.go index a575dfe7..c2dcb104 100644 --- a/cmd/manager/main.go +++ b/cmd/manager/main.go @@ -76,6 +76,9 @@ type FlagVariables struct { rateLimiterFrequency int rateLimiterBurst int reconcileInterval time.Duration + + readinessChecks string + readinessCheckNS string } func init() { @@ -165,7 +168,7 @@ func main() { setupLog.Info("argora", "version", bininfo.Version()) if flagVar.enableIronCore { - if err = controller.NewIronCoreReconciler(mgr, creds, status.NewClusterImportStatusHandler(mgr.GetClient()), netbox.NewNetbox(flagVar.netboxURL), flagVar.reconcileInterval).SetupWithManager(mgr, rateLimiter); err != nil { + if err = controller.NewIronCoreReconciler(mgr, creds, status.NewClusterImportStatusHandler(mgr.GetClient()), netbox.NewNetbox(flagVar.netboxURL), flagVar.reconcileInterval, flagVar.readinessChecks, flagVar.readinessCheckNS).SetupWithManager(mgr, rateLimiter); err != nil { setupLog.Error(err, "unable to create controller", "controller", "ironcore") os.Exit(1) } @@ -237,6 +240,8 @@ func getFlagVariables() *FlagVariables { flag.DurationVar(&flagVariables.failureBaseDelay, "failure-base-delay", failureBaseDelayDefault, "Indicates the failure base delay for rate limiter.") flag.DurationVar(&flagVariables.failureMaxDelay, "failure-max-delay", failureMaxDelayDefault, "Indicates the failure max delay.") flag.DurationVar(&flagVariables.reconcileInterval, "reconcile-interval", reconcileIntervalDefault, "Indicates the time based reconcile interval.") + flag.StringVar(&flagVariables.readinessChecks, "readiness-checks", "", "Comma-separated list of readiness check types to enable (supported: network).") + flag.StringVar(&flagVariables.readinessCheckNS, "readiness-check-namespace", "metal-maintenance-operator-system", "Namespace in which ServerReadinessCheck objects are created.") return flagVariables } diff --git a/dist/chart/templates/rbac/manager-role.yaml b/dist/chart/templates/rbac/manager-role.yaml index 82606562..4ed4a6ec 100644 --- a/dist/chart/templates/rbac/manager-role.yaml +++ b/dist/chart/templates/rbac/manager-role.yaml @@ -155,4 +155,15 @@ rules: - patch - update - watch + - apiGroups: + - maintenance.metal.ironcore.dev + resources: + - serverreadinesschecks + verbs: + - create + - get + - list + - patch + - update + - watch {{- end }} diff --git a/internal/controller/ironcore_controller.go b/internal/controller/ironcore_controller.go index 5d5fe239..186fa8ff 100644 --- a/internal/controller/ironcore_controller.go +++ b/internal/controller/ironcore_controller.go @@ -10,6 +10,7 @@ import ( "fmt" "maps" "net" + "slices" "strings" "time" @@ -24,7 +25,9 @@ import ( "golang.org/x/time/rate" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/util/workqueue" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -46,9 +49,15 @@ type IronCoreReconciler struct { statusHandler status.ClusterImportStatus netBox netbox.Netbox reconcileInterval time.Duration + readinessChecks []string + readinessCheckNS string } -func NewIronCoreReconciler(mgr ctrl.Manager, creds *credentials.Credentials, statusHandler status.ClusterImportStatus, netBox netbox.Netbox, reconcileInterval time.Duration) *IronCoreReconciler { +func NewIronCoreReconciler(mgr ctrl.Manager, creds *credentials.Credentials, statusHandler status.ClusterImportStatus, netBox netbox.Netbox, reconcileInterval time.Duration, readinessChecks, readinessCheckNS string) *IronCoreReconciler { + var checks []string + if readinessChecks != "" { + checks = strings.Split(readinessChecks, ",") + } return &IronCoreReconciler{ k8sClient: mgr.GetClient(), scheme: mgr.GetScheme(), @@ -56,6 +65,8 @@ func NewIronCoreReconciler(mgr ctrl.Manager, creds *credentials.Credentials, sta statusHandler: statusHandler, netBox: netBox, reconcileInterval: reconcileInterval, + readinessChecks: checks, + readinessCheckNS: readinessCheckNS, } } @@ -85,6 +96,7 @@ func (r *IronCoreReconciler) SetupWithManager(mgr ctrl.Manager, rateLimiter Rate // +kubebuilder:rbac:groups=metal.ironcore.dev,resources=servers,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=metal.ironcore.dev,resources=bmcs,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=metal.ironcore.dev,resources=bmcsecrets,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=maintenance.metal.ironcore.dev,resources=serverreadinesschecks,verbs=get;list;watch;create;update;patch func (r *IronCoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { logger := log.FromContext(ctx) @@ -236,6 +248,12 @@ func (r *IronCoreReconciler) reconcileDevice(ctx context.Context, netBox netbox. } logger.Info("BMC custom resource already exists, will skip", "bmc", device.Name) + + if slices.Contains(r.readinessChecks, "network") { + if err := r.reconcileServerReadinessCheck(ctx, device, commonLabels); err != nil { + return fmt.Errorf("unable to reconcile ServerReadinessCheck: %w", err) + } + } return nil } @@ -258,6 +276,12 @@ func (r *IronCoreReconciler) reconcileDevice(ctx context.Context, netBox netbox. return err } } + + if slices.Contains(r.readinessChecks, "network") { + if err := r.reconcileServerReadinessCheck(ctx, device, commonLabels); err != nil { + return fmt.Errorf("unable to reconcile ServerReadinessCheck: %w", err) + } + } return nil } @@ -412,3 +436,72 @@ func (r *IronCoreReconciler) patchBMCLabels(ctx context.Context, bmc *metalv1alp return nil } + +func (r *IronCoreReconciler) reconcileServerReadinessCheck(ctx context.Context, device *models.Device, commonLabels map[string]string) error { + logger := log.FromContext(ctx) + + ifaces, err := r.netBox.DCIM().GetInterfacesForDevice(device) + if err != nil { + return fmt.Errorf("unable to get interfaces for device %s: %w", device.Name, err) + } + + var interfaces []interface{} + for _, iface := range ifaces { + if iface.MgmtOnly { + continue + } + if iface.Type.Value == interfaceTypeLag { + continue + } + if iface.MacAddress == "" { + continue + } + if iface.Name == remoteboardInterfaceName { + continue + } + interfaces = append(interfaces, map[string]interface{}{ + "macAddress": iface.MacAddress, + "carrierStatus": "up", + }) + } + + name := device.Name + "-network" + obj := &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "maintenance.metal.ironcore.dev/v1alpha1", + "kind": "ServerReadinessCheck", + "metadata": map[string]interface{}{ + "name": name, + "namespace": r.readinessCheckNS, + }, + "spec": map[string]interface{}{ + "serverSelector": map[string]interface{}{ + "matchLabels": map[string]interface{}{ + "kubernetes.metal.cloud.sap/name": commonLabels["kubernetes.metal.cloud.sap/name"], + }, + }, + "network": map[string]interface{}{ + "interfaces": interfaces, + }, + }, + }} + + existing := &unstructured.Unstructured{} + existing.SetGroupVersionKind(obj.GroupVersionKind()) + err = r.k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: r.readinessCheckNS}, existing) + if err != nil { + if !apierrors.IsNotFound(err) { + return fmt.Errorf("unable to get ServerReadinessCheck %s: %w", name, err) + } + logger.Info("Creating ServerReadinessCheck", "name", name, "namespace", r.readinessCheckNS) + return r.k8sClient.Create(ctx, obj) + } + + base := existing.DeepCopy() + spec, _ := obj.Object["spec"].(map[string]interface{}) + existing.Object["spec"] = spec + if err := r.k8sClient.Patch(ctx, existing, client.MergeFrom(base)); err != nil { + return fmt.Errorf("unable to patch ServerReadinessCheck %s: %w", name, err) + } + logger.Info("Patched ServerReadinessCheck", "name", name, "namespace", r.readinessCheckNS) + return nil +}