diff --git a/api/v1alpha1/constants.go b/api/v1alpha1/constants.go new file mode 100644 index 0000000..b176a17 --- /dev/null +++ b/api/v1alpha1/constants.go @@ -0,0 +1,30 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +// Well-known labels and annotations applied to Nodes by the controller. +const ( + // LabelManaged is set on Nodes that are managed by a BootcNodePool. + // Its presence triggers the DaemonSet to schedule a daemon pod on + // the node. + LabelManaged = "bootc.dev/managed" + + // AnnotationWasCordoned records whether a node was already cordoned + // before the controller cordoned it for a reboot. Used to restore + // prior cordon state after update. + AnnotationWasCordoned = "bootc.dev/was-cordoned" +) diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index df120ae..b62d106 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -4,10 +4,21 @@ kind: ClusterRole metadata: name: manager-role rules: +- apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - patch + - update + - watch - apiGroups: - node.bootc.dev resources: - bootcnodepools + - bootcnodes verbs: - create - delete @@ -30,3 +41,9 @@ rules: - get - patch - update +- apiGroups: + - node.bootc.dev + resources: + - bootcnodes/status + verbs: + - get diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 143ca40..ed7897b 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -335,10 +335,11 @@ cache. Then reconcile: pull only), and the disruptive reboot step is still gated by `maxUnavailable`. - **Conflict** (matching node, BootcNode exists but owned by a different - pool): Set the pool's `Degraded` condition with reason `NodeConflict` - and a message identifying the conflicting pool(s). Do not create a - BootcNode for the contested node. Return early -- skip steps 3 and 4 - for this pool. + pool): Do not create a BootcNode for the contested node. Continue + reconciling uncontested nodes normally through all steps. After + membership sync completes, set the pool's `Degraded` condition with + reason `NodeConflict` and a message identifying the conflicting + pool(s). - **No longer matching** (owned BootcNode whose node doesn't match, or whose node was deleted): Delete the BootcNode. Remove the `bootc.dev/managed` label if the node still exists (which triggers diff --git a/docs/IMPLEMENTATION_PLAN.md b/docs/IMPLEMENTATION_PLAN.md index 6861f53..a5feae4 100644 --- a/docs/IMPLEMENTATION_PLAN.md +++ b/docs/IMPLEMENTATION_PLAN.md @@ -111,7 +111,7 @@ status aggregation) can be fully exercised by simulating BootcNode status updates. E2e tests become more valuable in Milestone 4 when the full controller+daemon loop can be tested end-to-end. -### 3a. Pool membership sync +### 3a. Pool membership sync ✅ - Watch BootcNodePool, Node (with predicates: label changes, Ready condition, `spec.unschedulable` only), BootcNode (via ownerReference) @@ -124,7 +124,7 @@ the full controller+daemon loop can be tested end-to-end. - Conflict detection: node matches multiple pools → `Degraded/NodeConflict` on conflicting pools, skip rollout steps -**Validation:** +**Validation:** ✅ - envtest: create Nodes and a BootcNodePool, verify BootcNodes are created with correct ownerReference and desiredImage. Verify nodes diff --git a/go.mod b/go.mod index 95a195f..f8f8cca 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,8 @@ module github.com/jlebon/bootc-operator go 1.25.7 require ( + github.com/onsi/gomega v1.41.0 + k8s.io/api v0.35.0 k8s.io/apimachinery v0.35.0 k8s.io/client-go v0.35.0 sigs.k8s.io/controller-runtime v0.23.3 @@ -42,19 +44,18 @@ require ( go.uber.org/zap v1.27.0 // indirect go.yaml.in/yaml/v2 v2.4.3 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect - golang.org/x/net v0.47.0 // indirect + golang.org/x/net v0.49.0 // indirect golang.org/x/oauth2 v0.30.0 // indirect - golang.org/x/sync v0.18.0 // indirect - golang.org/x/sys v0.38.0 // indirect - golang.org/x/term v0.37.0 // indirect - golang.org/x/text v0.31.0 // indirect + golang.org/x/sync v0.19.0 // indirect + golang.org/x/sys v0.40.0 // indirect + golang.org/x/term v0.39.0 // indirect + golang.org/x/text v0.33.0 // indirect golang.org/x/time v0.9.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/protobuf v1.36.8 // indirect gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/api v0.35.0 // indirect k8s.io/apiextensions-apiserver v0.35.0 // indirect k8s.io/klog/v2 v2.130.1 // indirect k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect diff --git a/go.sum b/go.sum index 67fcb14..2b52cfe 100644 --- a/go.sum +++ b/go.sum @@ -72,8 +72,8 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/onsi/ginkgo/v2 v2.27.2 h1:LzwLj0b89qtIy6SSASkzlNvX6WktqurSHwkk2ipF/Ns= github.com/onsi/ginkgo/v2 v2.27.2/go.mod h1:ArE1D/XhNXBXCBkKOLkbsb2c81dQHCRcF5zwn/ykDRo= -github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A= -github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k= +github.com/onsi/gomega v1.41.0 h1:OwKp4pXNgVxf6sCplzYo794OFNuoL2q2SBMU5NSWOjA= +github.com/onsi/gomega v1.41.0/go.mod h1:M/Uqpu/8qTjtzCLUA2zJHX9Iilrau25x1PdoSRbWh5A= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -113,24 +113,24 @@ go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8= go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= -golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA= -golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w= -golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY= -golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= +golang.org/x/mod v0.31.0 h1:HaW9xtz0+kOcWKwli0ZXy79Ix+UW/vOfmWI5QVd2tgI= +golang.org/x/mod v0.31.0/go.mod h1:43JraMp9cGx1Rx3AqioxrbrhNsLl2l/iNAvuBkrezpg= +golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o= +golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8= golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= -golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I= -golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= -golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= -golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU= -golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254= -golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM= -golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM= +golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= +golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ= +golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.39.0 h1:RclSuaJf32jOqZz74CkPA9qFuVTX7vhLlpfj/IGWlqY= +golang.org/x/term v0.39.0/go.mod h1:yxzUCTP/U+FzoxfdKmLaA0RV1WgE0VY7hXBwKtY/4ww= +golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE= +golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8= golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY= golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= -golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ= -golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs= +golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA= +golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc= gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= diff --git a/internal/controller/bootcnodepool_controller.go b/internal/controller/bootcnodepool_controller.go index 49dfd27..91f7ff4 100644 --- a/internal/controller/bootcnodepool_controller.go +++ b/internal/controller/bootcnodepool_controller.go @@ -18,11 +18,27 @@ package controller import ( "context" + "fmt" + "reflect" + "slices" + "strings" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + apimeta "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" bootcv1alpha1 "github.com/jlebon/bootc-operator/api/v1alpha1" ) @@ -36,28 +52,443 @@ type BootcNodePoolReconciler struct { // +kubebuilder:rbac:groups=node.bootc.dev,resources=bootcnodepools,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=node.bootc.dev,resources=bootcnodepools/status,verbs=get;update;patch // +kubebuilder:rbac:groups=node.bootc.dev,resources=bootcnodepools/finalizers,verbs=update +// +kubebuilder:rbac:groups=node.bootc.dev,resources=bootcnodes,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=node.bootc.dev,resources=bootcnodes/status,verbs=get +// +kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch;update;patch // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. -// TODO(user): Modify the Reconcile function to compare the state specified by -// the BootcNodePool object against the actual cluster state, and then -// perform operations to make the cluster state reflect the state specified by -// the user. -// -// For more details, check Reconcile and its Result here: -// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.23.3/pkg/reconcile func (r *BootcNodePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - _ = logf.FromContext(ctx) + log := logf.FromContext(ctx).WithValues("pool", req.Name) - // TODO(user): your logic here + // Fetch the pool. + var pool bootcv1alpha1.BootcNodePool + if err := r.Get(ctx, req.NamespacedName, &pool); err != nil { + if apierrors.IsNotFound(err) { + log.Info("Pool deleted, nothing to do") + return ctrl.Result{}, nil + } + return ctrl.Result{}, fmt.Errorf("fetching pool: %w", err) + } + + // Sync pool membership. + if err := r.syncMembership(ctx, &pool); err != nil { + return ctrl.Result{}, fmt.Errorf("syncing membership: %w", err) + } return ctrl.Result{}, nil } +// syncMembership reconciles the set of BootcNodes owned by this pool +// against the set of Nodes matching the pool's nodeSelector. +func (r *BootcNodePoolReconciler) syncMembership(ctx context.Context, pool *bootcv1alpha1.BootcNodePool) error { + log := logf.FromContext(ctx).WithValues("pool", pool.Name) + + // List all nodes matching the pool's selector. + matchingNodes, err := r.listMatchingNodes(ctx, pool) + if err != nil { + return fmt.Errorf("listing matching nodes: %w", err) + } + matchingSet := map[string]*corev1.Node{} + for i := range matchingNodes { + matchingSet[matchingNodes[i].Name] = &matchingNodes[i] + } + + // List all BootcNodes and partition into owned by this pool vs others. + allBootcNodes, err := r.listAllBootcNodes(ctx) + if err != nil { + return fmt.Errorf("listing BootcNodes: %w", err) + } + ownedSet := map[string]*bootcv1alpha1.BootcNode{} + for name, bn := range allBootcNodes { + if metav1.IsControlledBy(bn, pool) { + // XXX: val should probably be a list of nodes instead of a bool + // so we can say in the conflict condition msg exactly which nodes + // are overlapping + ownedSet[name] = bn + } + } + + // Create BootcNodes for new matches and sync spec for existing ones. + // If a BootcNode already exists for a node but is owned by a different + // pool, that's a conflict — we skip that node and track the + // conflicting pool name. + conflicting := map[string]bool{} + for nodeName, node := range matchingSet { + if bn, exists := ownedSet[nodeName]; exists { + // Sync spec fields if needed. + if err := r.syncBootcNodeSpec(ctx, pool, bn); err != nil { + return fmt.Errorf("syncing BootcNode spec for %s: %w", nodeName, err) + } + } else { + // New match: create BootcNode and label the node. + log.Info("Creating BootcNode for new match", "node", nodeName) + if err := r.createBootcNode(ctx, pool, node); err != nil { + if !apierrors.IsAlreadyExists(err) { + return fmt.Errorf("creating BootcNode for %s: %w", nodeName, err) + } + // BootcNode exists but isn't ours — find the owning pool. + if existing, ok := allBootcNodes[nodeName]; ok { + if owner := metav1.GetControllerOf(existing); owner != nil { + conflicting[owner.Name] = true + } + } + } + } + } + + // Delete BootcNodes for nodes that no longer match. + for nodeName, bn := range ownedSet { + if _, stillMatches := matchingSet[nodeName]; !stillMatches { + log.Info("Removing BootcNode for departed node", "node", nodeName) + if err := r.removeBootcNode(ctx, bn); err != nil { + return fmt.Errorf("removing BootcNode for %s: %w", nodeName, err) + } + } + } + + // Set or clear the conflict condition based on what we found. + var conflictingPools []string + for name := range conflicting { + conflictingPools = append(conflictingPools, name) + } + if err := r.setConflictCondition(ctx, pool, conflictingPools); err != nil { + return fmt.Errorf("setting conflict condition: %w", err) + } + + return nil +} + +// createBootcNode creates a BootcNode for a node joining the pool and +// labels the node as managed. +func (r *BootcNodePoolReconciler) createBootcNode(ctx context.Context, pool *bootcv1alpha1.BootcNodePool, node *corev1.Node) error { + bn := &bootcv1alpha1.BootcNode{ + ObjectMeta: metav1.ObjectMeta{ + Name: node.Name, + }, + Spec: bootcv1alpha1.BootcNodeSpec{ + DesiredImage: pool.Spec.Image.Ref, + DesiredImageState: bootcv1alpha1.DesiredImageStateStaged, + }, + } + + // Copy pull secret ref from pool if set. + if pool.Spec.PullSecretRef != nil { + bn.Spec.PullSecretRef = pool.Spec.PullSecretRef.DeepCopy() + } + + // Set ownerReference so the BootcNode is cleaned up if the pool is + // deleted and so the Owns() watch routes BootcNode events to this pool. + if err := controllerutil.SetControllerReference(pool, bn, r.Scheme); err != nil { + return fmt.Errorf("setting owner reference: %w", err) + } + + if err := r.Create(ctx, bn); err != nil { + return fmt.Errorf("creating BootcNode: %w", err) + } + + // Label the node as managed. + if err := r.ensureManagedLabel(ctx, node, true); err != nil { + return fmt.Errorf("labeling node: %w", err) + } + + return nil +} + +// removeBootcNode deletes a BootcNode for a node leaving the pool, +// removes the managed label, and restores prior cordon state. +func (r *BootcNodePoolReconciler) removeBootcNode(ctx context.Context, bn *bootcv1alpha1.BootcNode) error { + // Try to clean up the node (label + cordon state) before deleting + // the BootcNode. The node may have been deleted from the cluster. + var node corev1.Node + if err := r.Get(ctx, types.NamespacedName{Name: bn.Name}, &node); err == nil { + // No point in cleaning up the Node object if it's going away anyway... + if node.DeletionTimestamp == nil { + if err := r.restoreCordonState(ctx, &node); err != nil { + return fmt.Errorf("restoring cordon state: %w", err) + } + if err := r.ensureManagedLabel(ctx, &node, false); err != nil { + return fmt.Errorf("removing managed label: %w", err) + } + } + } else if !apierrors.IsNotFound(err) { + return fmt.Errorf("fetching node %s: %w", bn.Name, err) + } + + if err := r.Delete(ctx, bn); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("deleting BootcNode: %w", err) + } + + return nil +} + +// setConflictCondition sets or clears the Degraded condition with reason +// NodeConflict on the pool. +func (r *BootcNodePoolReconciler) setConflictCondition(ctx context.Context, pool *bootcv1alpha1.BootcNodePool, conflictingPools []string) error { + var desired metav1.Condition + if len(conflictingPools) > 0 { + desired = metav1.Condition{ + Type: bootcv1alpha1.PoolDegraded, + Status: metav1.ConditionTrue, + Reason: bootcv1alpha1.PoolNodeConflict, + // Sort so the message is stable across reconciles. + Message: fmt.Sprintf("Node selector overlaps with pool(s): %s", + strings.Join(slices.Sorted(slices.Values(conflictingPools)), ", ")), + } + } else { + desired = metav1.Condition{ + Type: bootcv1alpha1.PoolDegraded, + Status: metav1.ConditionFalse, + Reason: bootcv1alpha1.PoolOK, + } + } + + existing := apimeta.FindStatusCondition(pool.Status.Conditions, bootcv1alpha1.PoolDegraded) + if existing != nil && existing.Status == desired.Status && existing.Reason == desired.Reason && existing.Message == desired.Message { + // conflict condition status already matches desired + return nil + } + + apimeta.SetStatusCondition(&pool.Status.Conditions, desired) + if err := r.Status().Update(ctx, pool); err != nil { + return fmt.Errorf("updating pool status: %w", err) + } + return nil +} + +// syncBootcNodeSpec updates a BootcNode's spec fields to match the pool. +func (r *BootcNodePoolReconciler) syncBootcNodeSpec(ctx context.Context, pool *bootcv1alpha1.BootcNodePool, bn *bootcv1alpha1.BootcNode) error { + modified := bn.DeepCopy() + desiredImage := pool.Spec.Image.Ref + needPatch := false + + if modified.Spec.DesiredImage != desiredImage { + modified.Spec.DesiredImage = desiredImage + // desiredImage changed; reset desired state to Staged to revoke any + // pending reboot approval + modified.Spec.DesiredImageState = bootcv1alpha1.DesiredImageStateStaged + needPatch = true + } + + newPullSecretRef := pool.Spec.PullSecretRef.DeepCopy() + if !reflect.DeepEqual(modified.Spec.PullSecretRef, newPullSecretRef) { + modified.Spec.PullSecretRef = newPullSecretRef + needPatch = true + } + + if needPatch { + if err := r.Patch(ctx, modified, client.MergeFrom(bn)); err != nil { + return fmt.Errorf("patching BootcNode: %w", err) + } + *bn = *modified + } + + return nil +} + // SetupWithManager sets up the controller with the Manager. func (r *BootcNodePoolReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&bootcv1alpha1.BootcNodePool{}). + Owns(&bootcv1alpha1.BootcNode{}). + Watches(&corev1.Node{}, handler.EnqueueRequestsFromMapFunc(r.mapNodeToPoolRequests), builder.WithPredicates(nodePredicates())). Named("bootcnodepool"). Complete(r) } + +// mapNodeToPoolRequests maps a Node event to the BootcNodePool(s) that should +// be reconciled. It enqueues two sets: (1) pools whose nodeSelector matches +// the node's current labels, and (2) if a BootcNode exists for this node, the +// pool that owns it. The second set is needed so the owning pool can clean up +// when a node's labels change such that it no longer matches, or when the node +// is deleted entirely. +func (r *BootcNodePoolReconciler) mapNodeToPoolRequests(ctx context.Context, obj client.Object) []reconcile.Request { + node, ok := obj.(*corev1.Node) + if !ok { + return nil + } + log := logf.FromContext(ctx).WithValues("node", node.Name) + + var requests []reconcile.Request + seen := map[types.NamespacedName]bool{} + + // (1) Pools whose selector matches this node's labels. + var pools bootcv1alpha1.BootcNodePoolList + if err := r.List(ctx, &pools); err != nil { + log.Error(err, "Failed to list BootcNodePools in node mapper") + return nil + } + for i := range pools.Items { + pool := &pools.Items[i] + matches, err := nodeSelectorMatchesNode(pool.Spec.NodeSelector, node) + if err != nil { + log.Error(err, "Failed to evaluate nodeSelector", "pool", pool.Name) + continue + } + if matches { + key := types.NamespacedName{Name: pool.Name} + if !seen[key] { + log.V(1).Info("Node matches pool selector", "pool", pool.Name) + requests = append(requests, reconcile.Request{NamespacedName: key}) + seen[key] = true + } + } + } + + // (2) Pool that owns the BootcNode for this node (if any). + var bootcNode bootcv1alpha1.BootcNode + if err := r.Get(ctx, types.NamespacedName{Name: node.Name}, &bootcNode); err != nil { + // No BootcNode for this node — nothing else to enqueue. + return requests + } + for _, ref := range bootcNode.OwnerReferences { + if ref.Kind == "BootcNodePool" { + key := types.NamespacedName{Name: ref.Name} + if !seen[key] { + log.V(1).Info("Node has BootcNode owned by pool", "pool", ref.Name) + requests = append(requests, reconcile.Request{NamespacedName: key}) + seen[key] = true + } + } + } + + return requests +} + +// nodePredicates returns predicates that filter Node events to only those +// relevant to pool membership: label changes, Ready condition changes, and +// spec.unschedulable changes. +func nodePredicates() predicate.Predicate { + return predicate.Funcs{ + CreateFunc: func(e event.CreateEvent) bool { + return true + }, + DeleteFunc: func(e event.DeleteEvent) bool { + return true + }, + UpdateFunc: func(e event.UpdateEvent) bool { + oldNode, ok1 := e.ObjectOld.(*corev1.Node) + newNode, ok2 := e.ObjectNew.(*corev1.Node) + if !ok1 || !ok2 { + return true + } + return nodeLabelsChanged(oldNode, newNode) || + nodeReadyConditionChanged(oldNode, newNode) || + nodeUnschedulableChanged(oldNode, newNode) + }, + GenericFunc: func(e event.GenericEvent) bool { + return true + }, + } +} + +// listMatchingNodes returns all Nodes whose labels match the pool's +// nodeSelector. +func (r *BootcNodePoolReconciler) listMatchingNodes(ctx context.Context, pool *bootcv1alpha1.BootcNodePool) ([]corev1.Node, error) { + selector, err := metav1.LabelSelectorAsSelector(pool.Spec.NodeSelector) + if err != nil { + return nil, fmt.Errorf("parsing nodeSelector: %w", err) + } + + var nodeList corev1.NodeList + if err := r.List(ctx, &nodeList, client.MatchingLabelsSelector{Selector: selector}); err != nil { + return nil, fmt.Errorf("listing nodes: %w", err) + } + return nodeList.Items, nil +} + +// listAllBootcNodes returns all BootcNodes keyed by name. +func (r *BootcNodePoolReconciler) listAllBootcNodes(ctx context.Context) (map[string]*bootcv1alpha1.BootcNode, error) { + var bnList bootcv1alpha1.BootcNodeList + if err := r.List(ctx, &bnList); err != nil { + return nil, fmt.Errorf("listing BootcNodes: %w", err) + } + + all := make(map[string]*bootcv1alpha1.BootcNode, len(bnList.Items)) + for i := range bnList.Items { + all[bnList.Items[i].Name] = &bnList.Items[i] + } + return all, nil +} + +// ensureManagedLabel adds or removes the bootc.dev/managed label on a Node. +func (r *BootcNodePoolReconciler) ensureManagedLabel(ctx context.Context, node *corev1.Node, managed bool) error { + _, hasLabel := node.Labels[bootcv1alpha1.LabelManaged] + if managed && hasLabel { + return nil + } + if !managed && !hasLabel { + return nil + } + + modified := node.DeepCopy() + if managed { + if modified.Labels == nil { + modified.Labels = map[string]string{} + } + modified.Labels[bootcv1alpha1.LabelManaged] = "" + } else { + delete(modified.Labels, bootcv1alpha1.LabelManaged) + } + if err := r.Patch(ctx, modified, client.StrategicMergeFrom(node)); err != nil { + return err + } + *node = *modified + return nil +} + +// restoreCordonState restores a node's cordon state based on the +// bootc.dev/was-cordoned annotation. If the annotation is "true", the +// node was already cordoned before the operator touched it, so we leave +// it as is. Otherwise we uncordon it. The annotation is removed. +func (r *BootcNodePoolReconciler) restoreCordonState(ctx context.Context, node *corev1.Node) error { + _, hasAnnotation := node.Annotations[bootcv1alpha1.AnnotationWasCordoned] + if !hasAnnotation { + return nil + } + + modified := node.DeepCopy() + wasCordoned := modified.Annotations[bootcv1alpha1.AnnotationWasCordoned] == "true" + if !wasCordoned { + // Node was not cordoned before we touched it; uncordon it. + modified.Spec.Unschedulable = false + } + + delete(modified.Annotations, bootcv1alpha1.AnnotationWasCordoned) + if err := r.Patch(ctx, modified, client.StrategicMergeFrom(node)); err != nil { + return err + } + *node = *modified + return nil +} + +// nodeSelectorMatchesNode evaluates whether a node's labels match a +// LabelSelector. +func nodeSelectorMatchesNode(sel *metav1.LabelSelector, node *corev1.Node) (bool, error) { + selector, err := metav1.LabelSelectorAsSelector(sel) + if err != nil { + return false, err + } + return selector.Matches(labels.Set(node.Labels)), nil +} + +func nodeLabelsChanged(oldNode, newNode *corev1.Node) bool { + return !reflect.DeepEqual(oldNode.Labels, newNode.Labels) +} + +func nodeReadyConditionChanged(oldNode, newNode *corev1.Node) bool { + return nodeReadyStatus(oldNode) != nodeReadyStatus(newNode) +} + +func nodeReadyStatus(node *corev1.Node) corev1.ConditionStatus { + for _, c := range node.Status.Conditions { + if c.Type == corev1.NodeReady { + return c.Status + } + } + return corev1.ConditionUnknown +} + +func nodeUnschedulableChanged(oldNode, newNode *corev1.Node) bool { + return oldNode.Spec.Unschedulable != newNode.Spec.Unschedulable +} diff --git a/internal/controller/crd_test.go b/internal/controller/crd_test.go index 8ea6987..e68fcb2 100644 --- a/internal/controller/crd_test.go +++ b/internal/controller/crd_test.go @@ -18,10 +18,11 @@ package controller import ( "context" - "reflect" "testing" "time" + . "github.com/onsi/gomega" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" @@ -46,6 +47,7 @@ const ( ) func TestBootcNodePoolCRD(t *testing.T) { + g := NewWithT(t) ctx := context.Background() pool := testutil.NewPool("workers", testImageTaggedRef, @@ -59,76 +61,21 @@ func TestBootcNodePoolCRD(t *testing.T) { wantSpec := *pool.Spec.DeepCopy() // Create - if err := k8sClient.Create(ctx, pool); err != nil { - t.Fatalf("Failed to create BootcNodePool: %v", err) - } + g.Expect(k8sClient.Create(ctx, pool)).To(Succeed()) t.Cleanup(func() { - if err := k8sClient.Delete(ctx, pool); client.IgnoreNotFound(err) != nil { - t.Logf("cleanup: failed to delete pool: %v", err) - } + _ = client.IgnoreNotFound(k8sClient.Delete(ctx, pool)) }) // Retrieve and verify spec round-trips. We set all defaulted // fields explicitly (RebootPolicy), so the input and output specs // should match exactly. got := &bootcv1alpha1.BootcNodePool{} - if err := k8sClient.Get(ctx, client.ObjectKeyFromObject(pool), got); err != nil { - t.Fatalf("Failed to get BootcNodePool: %v", err) - } - if !reflect.DeepEqual(got.Spec, wantSpec) { - t.Errorf("spec mismatch:\n got: %+v\n want: %+v", got.Spec, wantSpec) - } - - // Update status. Use a fixed timestamp truncated to seconds to match the - // precision the API server stores so we can just `DeepEqual` the whole thing. - now := metav1.NewTime(time.Now().UTC().Truncate(time.Second)) - got.Status = bootcv1alpha1.BootcNodePoolStatus{ - ObservedGeneration: got.Generation, - TargetDigest: testDigestA, - DeployedDigest: testDigestB, - UpdateAvailable: true, - NodeCount: 3, - UpdatedCount: 1, - UpdatingCount: 1, - DegradedCount: 1, - Conditions: []metav1.Condition{ - { - Type: bootcv1alpha1.PoolUpToDate, - Status: metav1.ConditionFalse, - Reason: bootcv1alpha1.PoolRolloutInProgress, - Message: "1/3 updated; 1 staging", - LastTransitionTime: now, - }, - { - Type: bootcv1alpha1.PoolDegraded, - Status: metav1.ConditionTrue, - Reason: bootcv1alpha1.PoolStagingFailed, - Message: "node worker-3 failed to stage", - LastTransitionTime: now, - }, - }, - } - - wantStatus := *got.Status.DeepCopy() // snapshot before Update - if err := k8sClient.Status().Update(ctx, got); err != nil { - t.Fatalf("Failed to update BootcNodePool status: %v", err) - } - - // Verify status round-trips - if err := k8sClient.Get(ctx, client.ObjectKeyFromObject(pool), got); err != nil { - t.Fatalf("Failed to get BootcNodePool after status update: %v", err) - } - // Copy canonical timestamps from the server response into our - // expected status so DeepEqual ignores timezone/precision differences. - for i := range wantStatus.Conditions { - wantStatus.Conditions[i].LastTransitionTime = got.Status.Conditions[i].LastTransitionTime - } - if !reflect.DeepEqual(got.Status, wantStatus) { - t.Errorf("status mismatch:\n got: %+v\n want: %+v", got.Status, wantStatus) - } + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(pool), got)).To(Succeed()) + g.Expect(got.Spec).To(Equal(wantSpec)) } func TestBootcNodeCRD(t *testing.T) { + g := NewWithT(t) ctx := context.Background() node := testutil.NewNode("worker-1", testImageDigestRefA, @@ -139,27 +86,19 @@ func TestBootcNodeCRD(t *testing.T) { wantSpec := *node.Spec.DeepCopy() // Create - if err := k8sClient.Create(ctx, node); err != nil { - t.Fatalf("Failed to create BootcNode: %v", err) - } + g.Expect(k8sClient.Create(ctx, node)).To(Succeed()) t.Cleanup(func() { - if err := k8sClient.Delete(ctx, node); client.IgnoreNotFound(err) != nil { - t.Logf("cleanup: failed to delete node: %v", err) - } + _ = client.IgnoreNotFound(k8sClient.Delete(ctx, node)) }) // Retrieve and verify spec round-trips. BootcNodeSpec has no // defaulted fields, so the input and output should match exactly. got := &bootcv1alpha1.BootcNode{} - if err := k8sClient.Get(ctx, client.ObjectKeyFromObject(node), got); err != nil { - t.Fatalf("Failed to get BootcNode: %v", err) - } - if !reflect.DeepEqual(got.Spec, wantSpec) { - t.Errorf("spec mismatch:\n got: %+v\n want: %+v", got.Spec, wantSpec) - } + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(node), got)).To(Succeed()) + g.Expect(got.Spec).To(Equal(wantSpec)) // Update status. Use a fixed timestamp truncated to seconds to match the - // precision the API server stores so we can just `DeepEqual` the whole thing. + // precision the API server stores so we can just `Equal` the whole thing. now := metav1.NewTime(time.Now().UTC().Truncate(time.Second)) ts := metav1.NewTime(time.Date(2026, 3, 20, 12, 0, 0, 0, time.UTC)) got.Status = bootcv1alpha1.BootcNodeStatus{ @@ -193,68 +132,69 @@ func TestBootcNodeCRD(t *testing.T) { }, } wantStatus := *got.Status.DeepCopy() // snapshot before Update - if err := k8sClient.Status().Update(ctx, got); err != nil { - t.Fatalf("Failed to update BootcNode status: %v", err) - } + g.Expect(k8sClient.Status().Update(ctx, got)).To(Succeed()) // Verify status round-trips - if err := k8sClient.Get(ctx, client.ObjectKeyFromObject(node), got); err != nil { - t.Fatalf("Failed to get BootcNode after status update: %v", err) - } + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(node), got)).To(Succeed()) // Copy canonical timestamps from the server response so - // DeepEqual ignores timezone/precision differences. + // Equal ignores timezone/precision differences. for i := range wantStatus.Conditions { wantStatus.Conditions[i].LastTransitionTime = got.Status.Conditions[i].LastTransitionTime } if wantStatus.Booted != nil && wantStatus.Booted.Timestamp != nil { wantStatus.Booted.Timestamp = got.Status.Booted.Timestamp } - if !reflect.DeepEqual(got.Status, wantStatus) { - t.Errorf("status mismatch:\n got: %+v\n want: %+v", got.Status, wantStatus) - } - + g.Expect(got.Status).To(Equal(wantStatus)) } func TestBootcNodePoolEnumValidation(t *testing.T) { + g := NewWithT(t) ctx := context.Background() pool := testutil.NewPool("invalid-reboot-policy", testImageTaggedRef, testutil.WithWorkerSelector(), testutil.WithRebootPolicy("Invalid"), ) - if err := k8sClient.Create(ctx, pool); err == nil { + err := k8sClient.Create(ctx, pool) + if err == nil { _ = k8sClient.Delete(ctx, pool) - t.Fatal("Expected creation with invalid rebootPolicy to fail, but it succeeded") } + g.Expect(err).To(MatchError(apierrors.IsInvalid, "IsInvalid")) } func TestBootcNodeEnumValidation(t *testing.T) { + g := NewWithT(t) ctx := context.Background() node := testutil.NewNode("invalid-image-state", testImageDigestRefA) node.Spec.DesiredImageState = "Invalid" - if err := k8sClient.Create(ctx, node); err == nil { + err := k8sClient.Create(ctx, node) + if err == nil { _ = k8sClient.Delete(ctx, node) - t.Fatal("Expected creation with invalid desiredImageState to fail, but it succeeded") } + g.Expect(err).To(MatchError(apierrors.IsInvalid, "IsInvalid")) } func TestBootcNodePoolMinLengthValidation(t *testing.T) { + g := NewWithT(t) ctx := context.Background() pool := testutil.NewPool("empty-image-ref", "", testutil.WithWorkerSelector()) - if err := k8sClient.Create(ctx, pool); err == nil { + err := k8sClient.Create(ctx, pool) + if err == nil { _ = k8sClient.Delete(ctx, pool) - t.Fatal("Expected creation with empty image.ref to fail, but it succeeded") } + g.Expect(err).To(MatchError(apierrors.IsInvalid, "IsInvalid")) } func TestBootcNodeMinLengthValidation(t *testing.T) { + g := NewWithT(t) ctx := context.Background() node := testutil.NewNode("empty-desired-image", "") - if err := k8sClient.Create(ctx, node); err == nil { + err := k8sClient.Create(ctx, node) + if err == nil { _ = k8sClient.Delete(ctx, node) - t.Fatal("Expected creation with empty desiredImage to fail, but it succeeded") } + g.Expect(err).To(MatchError(apierrors.IsInvalid, "IsInvalid")) } diff --git a/internal/controller/membership_test.go b/internal/controller/membership_test.go new file mode 100644 index 0000000..239789e --- /dev/null +++ b/internal/controller/membership_test.go @@ -0,0 +1,283 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "testing" + "time" + + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + apimeta "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + bootcv1alpha1 "github.com/jlebon/bootc-operator/api/v1alpha1" + testutil "github.com/jlebon/bootc-operator/test/util" +) + +// pollInterval and pollTimeout control how long tests wait for the +// async reconciler to act. +const ( + pollInterval = 200 * time.Millisecond + pollTimeout = 10 * time.Second +) + +// TestMembershipCreatesBootcNodes verifies that creating a pool and +// matching nodes causes BootcNodes to be created with the correct +// ownerReference, desiredImage, and that nodes are labeled +// bootc.dev/managed. It also verifies that removing a node's matching +// label causes cleanup (BootcNode deleted, managed label removed). +func TestMembershipCreatesBootcNodes(t *testing.T) { + g := NewWithT(t) + g.SetDefaultEventuallyTimeout(pollTimeout) + g.SetDefaultEventuallyPollingInterval(pollInterval) + ctx := context.Background() + + // Create two worker nodes. + node1 := testutil.NewK8sNode("mem-worker-1", testutil.WorkerLabels()) + node2 := testutil.NewK8sNode("mem-worker-2", testutil.WorkerLabels()) + for _, n := range []*corev1.Node{node1, node2} { + g.Expect(k8sClient.Create(ctx, n)).To(Succeed()) + t.Cleanup(func() { + _ = k8sClient.Delete(ctx, n) + }) + } + + // Create a pool selecting workers. + pool := testutil.NewPool("mem-workers", testImageDigestRefA, testutil.WithWorkerSelector()) + g.Expect(k8sClient.Create(ctx, pool)).To(Succeed()) + t.Cleanup(func() { + _ = k8sClient.Delete(ctx, pool) + }) + + // Wait for BootcNodes to appear and verify their properties. + for _, nodeName := range []string{"mem-worker-1", "mem-worker-2"} { + name := nodeName + var bn bootcv1alpha1.BootcNode + g.Eventually(func() error { + return k8sClient.Get(ctx, client.ObjectKey{Name: name}, &bn) + }).Should(Succeed()) + + // Check ownerReference. + owner := metav1.GetControllerOf(&bn) + g.Expect(owner).NotTo(BeNil(), "BootcNode %s has no controller owner", name) + g.Expect(owner.Name).To(Equal(pool.Name), "BootcNode %s owner mismatch", name) + + // Check desiredImage. + g.Expect(bn.Spec.DesiredImage).To(Equal(testImageDigestRefA), "BootcNode %s desiredImage mismatch", name) + + // Check desiredImageState. + g.Expect(bn.Spec.DesiredImageState).To(Equal(bootcv1alpha1.DesiredImageStateStaged), "BootcNode %s desiredImageState mismatch", name) + } + + // Verify nodes are labeled bootc.dev/managed. + for _, nodeName := range []string{"mem-worker-1", "mem-worker-2"} { + name := nodeName + g.Eventually(func(g Gomega) { + var n corev1.Node + g.Expect(k8sClient.Get(ctx, client.ObjectKey{Name: name}, &n)).To(Succeed()) + g.Expect(n.Labels).To(HaveKey(bootcv1alpha1.LabelManaged)) + }).Should(Succeed()) + } + + // Remove the worker label from mem-worker-1 and verify cleanup. + var fresh corev1.Node + g.Expect(k8sClient.Get(ctx, client.ObjectKey{Name: "mem-worker-1"}, &fresh)).To(Succeed()) + patch := client.StrategicMergeFrom(fresh.DeepCopy()) + delete(fresh.Labels, "node-role.kubernetes.io/worker") + g.Expect(k8sClient.Patch(ctx, &fresh, patch)).To(Succeed()) + + // Wait for BootcNode to be deleted. + g.Eventually(func() error { + return k8sClient.Get(ctx, client.ObjectKey{Name: "mem-worker-1"}, &bootcv1alpha1.BootcNode{}) + }).Should(MatchError(apierrors.IsNotFound, "IsNotFound")) + + // Verify managed label is removed. + g.Eventually(func(g Gomega) { + var n corev1.Node + g.Expect(k8sClient.Get(ctx, client.ObjectKey{Name: "mem-worker-1"}, &n)).To(Succeed()) + g.Expect(n.Labels).NotTo(HaveKey(bootcv1alpha1.LabelManaged)) + }).Should(Succeed()) + + // Delete mem-worker-2 and verify its BootcNode is also deleted. + g.Expect(k8sClient.Delete(ctx, node2)).To(Succeed()) + + g.Eventually(func() error { + return k8sClient.Get(ctx, client.ObjectKey{Name: "mem-worker-2"}, &bootcv1alpha1.BootcNode{}) + }).Should(MatchError(apierrors.IsNotFound, "IsNotFound")) +} + +// TestMembershipSyncsDesiredImage verifies that changing the pool's +// image ref updates desiredImage on all owned BootcNodes and resets +// desiredImageState to Staged. +func TestMembershipSyncsDesiredImage(t *testing.T) { + g := NewWithT(t) + g.SetDefaultEventuallyTimeout(pollTimeout) + g.SetDefaultEventuallyPollingInterval(pollInterval) + ctx := context.Background() + + node := testutil.NewK8sNode("mem-sync-1", testutil.WorkerLabels()) + g.Expect(k8sClient.Create(ctx, node)).To(Succeed()) + t.Cleanup(func() { + _ = k8sClient.Delete(ctx, node) + }) + + pool := testutil.NewPool("mem-sync-pool", testImageDigestRefA, testutil.WithWorkerSelector()) + g.Expect(k8sClient.Create(ctx, pool)).To(Succeed()) + t.Cleanup(func() { + _ = k8sClient.Delete(ctx, pool) + }) + + // Wait for BootcNode to be created and verify image A. + var bn bootcv1alpha1.BootcNode + g.Eventually(func() error { + return k8sClient.Get(ctx, client.ObjectKeyFromObject(node), &bn) + }).Should(Succeed()) + g.Expect(bn.Spec.DesiredImage).To(Equal(testImageDigestRefA)) + + // Update pool image to B. + var freshPool bootcv1alpha1.BootcNodePool + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(pool), &freshPool)).To(Succeed()) + freshPool.Spec.Image.Ref = testImageDigestRefB + g.Expect(k8sClient.Update(ctx, &freshPool)).To(Succeed()) + + // Wait for BootcNode to be updated with image B. + g.Eventually(func(g Gomega) { + var bn bootcv1alpha1.BootcNode + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(node), &bn)).To(Succeed()) + g.Expect(bn.Spec.DesiredImage).To(Equal(testImageDigestRefB)) + g.Expect(bn.Spec.DesiredImageState).To(Equal(bootcv1alpha1.DesiredImageStateStaged)) + }).Should(Succeed()) +} + +// TestMembershipConflictDetection verifies that when a node matches two +// pools, the conflicting pool is marked Degraded with reason +// NodeConflict for the contested node, but non-contested nodes in +// that pool are still handled. +func TestMembershipConflictDetection(t *testing.T) { + g := NewWithT(t) + g.SetDefaultEventuallyTimeout(pollTimeout) + g.SetDefaultEventuallyPollingInterval(pollInterval) + ctx := context.Background() + + // node1: pool1 only, node2: pool2 only, node3: both (contested). + node1 := testutil.NewK8sNode("mem-conflict-1", map[string]string{"pool1": "true"}) + node2 := testutil.NewK8sNode("mem-conflict-2", map[string]string{"pool2": "true"}) + node3 := testutil.NewK8sNode("mem-conflict-3", map[string]string{"pool1": "true", "pool2": "true"}) + for _, n := range []*corev1.Node{node1, node2, node3} { + g.Expect(k8sClient.Create(ctx, n)).To(Succeed()) + t.Cleanup(func() { + _ = k8sClient.Delete(ctx, n) + }) + } + + // Create first pool selecting pool1=true — matches node1 and node3. + pool1 := testutil.NewPool("mem-conflict-pool1", testImageDigestRefA, + testutil.WithNodeSelector(map[string]string{"pool1": "true"})) + g.Expect(k8sClient.Create(ctx, pool1)).To(Succeed()) + t.Cleanup(func() { + _ = k8sClient.Delete(ctx, pool1) + }) + + // Wait for pool1 to claim node1 and node3. + for _, name := range []string{node1.Name, node3.Name} { + name := name + g.Eventually(func() error { + return k8sClient.Get(ctx, client.ObjectKey{Name: name}, &bootcv1alpha1.BootcNode{}) + }).Should(Succeed()) + } + + // Create second pool selecting pool2=true — matches node2 and node3, + // but node3 is already owned by pool1 (conflict). + pool2 := testutil.NewPool("mem-conflict-pool2", testImageDigestRefB, + testutil.WithNodeSelector(map[string]string{"pool2": "true"})) + g.Expect(k8sClient.Create(ctx, pool2)).To(Succeed()) + t.Cleanup(func() { + _ = k8sClient.Delete(ctx, pool2) + }) + + // Wait for pool2 to be marked Degraded/NodeConflict. + g.Eventually(func(g Gomega) { + var p bootcv1alpha1.BootcNodePool + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(pool2), &p)).To(Succeed()) + cond := apimeta.FindStatusCondition(p.Status.Conditions, bootcv1alpha1.PoolDegraded) + g.Expect(cond).NotTo(BeNil()) + g.Expect(cond.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(cond.Reason).To(Equal(bootcv1alpha1.PoolNodeConflict)) + g.Expect(cond.Message).To(ContainSubstring(pool1.Name)) + }).Should(Succeed()) + + // Verify pool1 is not degraded. + var p1 bootcv1alpha1.BootcNodePool + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(pool1), &p1)).To(Succeed()) + cond := apimeta.FindStatusCondition(p1.Status.Conditions, bootcv1alpha1.PoolDegraded) + if cond != nil { + g.Expect(cond.Status).To(Equal(metav1.ConditionFalse), + "pool1 should not be degraded, but got: %s/%s: %s", cond.Reason, cond.Status, cond.Message) + } + + // Verify non-contested nodes are still handled: node1 by pool1, + // node2 by pool2. + for _, tc := range []struct { + nodeName string + poolName string + }{ + {node1.Name, pool1.Name}, + {node2.Name, pool2.Name}, + } { + tc := tc + var bn bootcv1alpha1.BootcNode + g.Eventually(func() error { + return k8sClient.Get(ctx, client.ObjectKey{Name: tc.nodeName}, &bn) + }).Should(Succeed()) + owner := metav1.GetControllerOf(&bn) + g.Expect(owner).NotTo(BeNil()) + g.Expect(owner.Name).To(Equal(tc.poolName)) + } + + // Now resolve the conflict: remove pool1=true from node3 so pool1 + // releases it, then pool2 can claim it. + var freshNode3 corev1.Node + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(node3), &freshNode3)).To(Succeed()) + patch := client.StrategicMergeFrom(freshNode3.DeepCopy()) + delete(freshNode3.Labels, "pool1") + g.Expect(k8sClient.Patch(ctx, &freshNode3, patch)).To(Succeed()) + + // Verify pool2 recovers: Degraded condition should clear. + g.Eventually(func() ([]metav1.Condition, error) { + var p bootcv1alpha1.BootcNodePool + err := k8sClient.Get(ctx, client.ObjectKeyFromObject(pool2), &p) + return p.Status.Conditions, err + }).Should(ContainElement(And( + HaveField("Type", bootcv1alpha1.PoolDegraded), + HaveField("Status", metav1.ConditionFalse), + HaveField("Reason", bootcv1alpha1.PoolOK), + ))) + + // Verify node3 is now owned by pool2. + g.Eventually(func() (*metav1.OwnerReference, error) { + var bn bootcv1alpha1.BootcNode + err := k8sClient.Get(ctx, client.ObjectKeyFromObject(node3), &bn) + return metav1.GetControllerOf(&bn), err + }).Should(And( + Not(BeNil()), + HaveField("Name", pool2.Name), + )) +} diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go index 1632314..49d28b9 100644 --- a/internal/controller/suite_test.go +++ b/internal/controller/suite_test.go @@ -17,14 +17,18 @@ limitations under the License. package controller import ( + "context" "fmt" "os" "path/filepath" "testing" "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/envtest" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" bootcv1alpha1 "github.com/jlebon/bootc-operator/api/v1alpha1" ) @@ -35,6 +39,8 @@ var ( ) func TestMain(m *testing.M) { + ctrl.SetLogger(zap.New(zap.UseDevMode(true))) + if err := bootcv1alpha1.AddToScheme(scheme.Scheme); err != nil { fmt.Fprintf(os.Stderr, "Failed to add scheme: %v\n", err) os.Exit(1) @@ -57,8 +63,44 @@ func TestMain(m *testing.M) { os.Exit(1) } + // Start a manager with the reconciler so controller logic runs + // against the envtest API server. + mgr, err := ctrl.NewManager(cfg, ctrl.Options{ + Scheme: scheme.Scheme, + Metrics: metricsserver.Options{ + BindAddress: "0", // disable metrics server in tests + }, + }) + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to create manager: %v\n", err) + os.Exit(1) + } + + if err := (&BootcNodePoolReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + fmt.Fprintf(os.Stderr, "Failed to setup reconciler: %v\n", err) + os.Exit(1) + } + + mgrCtx, mgrCancel := context.WithCancel(context.Background()) + + mgrDone := make(chan struct{}) + go func() { + defer close(mgrDone) + if err := mgr.Start(mgrCtx); err != nil { + fmt.Fprintf(os.Stderr, "Manager exited with error: %v\n", err) + os.Exit(1) + } + }() + code := m.Run() + // Stop the manager; this implicitly tests that it can shut down cleanly. + mgrCancel() + <-mgrDone + if err := testEnv.Stop(); err != nil { fmt.Fprintf(os.Stderr, "Failed to stop envtest: %v\n", err) } diff --git a/test/e2e/controller_test.go b/test/e2e/controller_test.go new file mode 100644 index 0000000..1a38226 --- /dev/null +++ b/test/e2e/controller_test.go @@ -0,0 +1,87 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + "maps" + "testing" + "time" + + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + bootcv1alpha1 "github.com/jlebon/bootc-operator/api/v1alpha1" + "github.com/jlebon/bootc-operator/test/e2e/e2eutil" + testutil "github.com/jlebon/bootc-operator/test/util" +) + +const ( + pollTimeout = 60 * time.Second + pollInterval = 2 * time.Second +) + +// TestControllerMembership deploys the controller in a bink cluster, +// creates a BootcNodePool selecting the worker node, and verifies that +// a BootcNode is created and the node is labeled bootc.dev/managed. +func TestControllerMembership(t *testing.T) { + g := NewWithT(t) + g.SetDefaultEventuallyTimeout(pollTimeout) + g.SetDefaultEventuallyPollingInterval(pollInterval) + + env := e2eutil.New(t) + + ctx := context.Background() + + // The bink cluster has a node called "node1". Label it as a worker so it + // matches our pool's default nodeSelector. XXX: lower this down to bink? + var node corev1.Node + g.Expect(env.Client.Get(ctx, client.ObjectKey{Name: "node1"}, &node)).To(Succeed()) + patch := client.StrategicMergeFrom(node.DeepCopy()) + if node.Labels == nil { + node.Labels = map[string]string{} + } + maps.Copy(node.Labels, testutil.WorkerLabels()) + g.Expect(env.Client.Patch(ctx, &node, patch)).To(Succeed()) + + // Create a pool with a digest ref. + imageRef := "quay.io/example/myos@sha256:06f961b802bc46ee168555f066d28f4f0e9afdf3f88174c1ee6f9de004fc30a0" + pool := testutil.NewPool("e2e-workers", imageRef, testutil.WithWorkerSelector()) + g.Expect(env.Client.Create(ctx, pool)).To(Succeed()) + + // Wait for BootcNode to appear for node1. + var bn bootcv1alpha1.BootcNode + g.Eventually(func() error { + return env.Client.Get(ctx, client.ObjectKey{Name: "node1"}, &bn) + }).Should(Succeed()) + + // Verify ownerReference. + owner := metav1.GetControllerOf(&bn) + g.Expect(owner).NotTo(BeNil()) + g.Expect(owner.Name).To(Equal("e2e-workers")) + + // Verify desiredImage. + g.Expect(bn.Spec.DesiredImage).To(Equal(imageRef)) + + // Verify node1 has the managed label. + g.Eventually(func() (map[string]string, error) { + err := env.Client.Get(ctx, client.ObjectKey{Name: "node1"}, &node) + return node.Labels, err + }).Should(HaveKey(bootcv1alpha1.LabelManaged)) +} diff --git a/test/e2e/crd_smoke_test.go b/test/e2e/crd_smoke_test.go index 135f720..9ce2b2b 100644 --- a/test/e2e/crd_smoke_test.go +++ b/test/e2e/crd_smoke_test.go @@ -20,6 +20,7 @@ import ( "context" "testing" + . "github.com/onsi/gomega" "sigs.k8s.io/controller-runtime/pkg/client" bootcv1alpha1 "github.com/jlebon/bootc-operator/api/v1alpha1" @@ -38,45 +39,31 @@ func TestCRDSmoke(t *testing.T) { ctx := context.Background() t.Run("BootcNodePool", func(t *testing.T) { + g := NewWithT(t) + pool := testutil.NewPool("smoke-pool", "quay.io/example/myos:latest", testutil.WithWorkerSelector()) - if err := env.Client.Create(ctx, pool); err != nil { - t.Fatalf("Failed to create BootcNodePool: %v", err) - } + g.Expect(env.Client.Create(ctx, pool)).To(Succeed()) got := &bootcv1alpha1.BootcNodePool{} - if err := env.Client.Get(ctx, client.ObjectKeyFromObject(pool), got); err != nil { - t.Fatalf("Failed to get BootcNodePool: %v", err) - } - if got.Spec.Image.Ref != "quay.io/example/myos:latest" { - t.Errorf("image.ref = %q, want %q", got.Spec.Image.Ref, "quay.io/example/myos:latest") - } - - if err := env.Client.Delete(ctx, pool); err != nil { - t.Fatalf("Failed to delete BootcNodePool: %v", err) - } + g.Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(pool), got)).To(Succeed()) + g.Expect(got.Spec.Image.Ref).To(Equal("quay.io/example/myos:latest")) + + g.Expect(env.Client.Delete(ctx, pool)).To(Succeed()) }) t.Run("BootcNode", func(t *testing.T) { + g := NewWithT(t) + node := testutil.NewNode("smoke-node", "quay.io/example/myos@sha256:abc123") - if err := env.Client.Create(ctx, node); err != nil { - t.Fatalf("Failed to create BootcNode: %v", err) - } + g.Expect(env.Client.Create(ctx, node)).To(Succeed()) got := &bootcv1alpha1.BootcNode{} - if err := env.Client.Get(ctx, client.ObjectKeyFromObject(node), got); err != nil { - t.Fatalf("Failed to get BootcNode: %v", err) - } - if got.Spec.DesiredImage != "quay.io/example/myos@sha256:abc123" { - t.Errorf("desiredImage = %q, want %q", got.Spec.DesiredImage, "quay.io/example/myos@sha256:abc123") - } - if got.Spec.DesiredImageState != bootcv1alpha1.DesiredImageStateStaged { - t.Errorf("desiredImageState = %q, want %q", got.Spec.DesiredImageState, bootcv1alpha1.DesiredImageStateStaged) - } - - if err := env.Client.Delete(ctx, node); err != nil { - t.Fatalf("Failed to delete BootcNode: %v", err) - } + g.Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(node), got)).To(Succeed()) + g.Expect(got.Spec.DesiredImage).To(Equal("quay.io/example/myos@sha256:abc123")) + g.Expect(got.Spec.DesiredImageState).To(Equal(bootcv1alpha1.DesiredImageStateStaged)) + + g.Expect(env.Client.Delete(ctx, node)).To(Succeed()) }) } diff --git a/test/e2e/e2eutil/env.go b/test/e2e/e2eutil/env.go index 2ccd102..cb6fa1f 100644 --- a/test/e2e/e2eutil/env.go +++ b/test/e2e/e2eutil/env.go @@ -31,8 +31,10 @@ import ( "testing" "time" + . "github.com/onsi/gomega" //nolint:staticcheck appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/clientcmd" "sigs.k8s.io/controller-runtime/pkg/client" @@ -346,33 +348,22 @@ func waitForControllerReady(t *testing.T, c client.Client) { t.Helper() t.Log("Waiting for controller to be ready...") - ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) - defer cancel() - - ticker := time.NewTicker(5 * time.Second) - defer ticker.Stop() - - for { - select { - case <-ctx.Done(): - t.Fatal("timed out waiting for controller to be ready") - case <-ticker.C: - var dep appsv1.Deployment - key := client.ObjectKey{ - Namespace: "bootc-operator", - Name: "bootc-operator-controller-manager", - } - if err := c.Get(ctx, key, &dep); err != nil { - t.Logf(" controller deployment not found yet: %v", err) - continue - } - if dep.Status.ReadyReplicas > 0 { - t.Log(" controller is ready") - return - } - t.Logf(" controller not ready yet (ready=%d)", dep.Status.ReadyReplicas) + g := NewWithT(t) + ctx := context.Background() + g.Eventually(func(g Gomega) { + var dep appsv1.Deployment + key := client.ObjectKey{ + Namespace: "bootc-operator", + Name: "bootc-operator-controller-manager", } - } + err := c.Get(ctx, key, &dep) + if apierrors.IsNotFound(err) { + t.Logf(" controller deployment not found yet") + } + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(dep.Status.ReadyReplicas).To(BeNumerically(">", 0)) + t.Log(" controller is ready") + }).WithTimeout(3 * time.Minute).WithPolling(5 * time.Second).Should(Succeed()) } // buildClient creates a controller-runtime client from the kubeconfig @@ -402,30 +393,21 @@ func waitForNodeReady(t *testing.T, c client.Client, nodeName string) { t.Helper() t.Logf("Waiting for node %q to be Ready...", nodeName) - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) - defer cancel() - - ticker := time.NewTicker(5 * time.Second) - defer ticker.Stop() - - for { - select { - case <-ctx.Done(): - t.Fatalf("timed out waiting for node %q to become Ready", nodeName) - case <-ticker.C: - node := &corev1.Node{} - if err := c.Get(ctx, client.ObjectKey{Name: nodeName}, node); err != nil { - t.Logf(" node %q not found yet: %v", nodeName, err) - continue - } - for _, cond := range node.Status.Conditions { - if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue { - t.Logf(" node %q is Ready", nodeName) - return - } - } + g := NewWithT(t) + ctx := context.Background() + g.Eventually(func(g Gomega) { + node := &corev1.Node{} + err := c.Get(ctx, client.ObjectKey{Name: nodeName}, node) + if apierrors.IsNotFound(err) { + t.Logf(" node %q not found yet", nodeName) } - } + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(node.Status.Conditions).To(ContainElement(And( + HaveField("Type", corev1.NodeReady), + HaveField("Status", corev1.ConditionTrue), + )), "node %q not Ready yet", nodeName) + t.Logf(" node %q is Ready", nodeName) + }).WithTimeout(5 * time.Minute).WithPolling(5 * time.Second).Should(Succeed()) } // runBink executes a bink command and returns any error. diff --git a/test/util/builders.go b/test/util/builders.go index 21b439b..4b9ac6a 100644 --- a/test/util/builders.go +++ b/test/util/builders.go @@ -19,6 +19,7 @@ limitations under the License. package testutil import ( + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" @@ -130,3 +131,21 @@ func WithNodePullSecret(name, namespace, hash string) NodeOption { node.Spec.PullSecretHash = hash } } + +// K8sNodeOption configures a corev1.Node. +type K8sNodeOption func(*corev1.Node) + +// NewK8sNode creates a corev1.Node with the given name and labels. This is +// strictly used by envtests since there are no nodes there. +func NewK8sNode(name string, labels map[string]string, opts ...K8sNodeOption) *corev1.Node { + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: labels, + }, + } + for _, o := range opts { + o(node) + } + return node +}