diff --git a/api/hypershift/v1beta1/nodepool_types.go b/api/hypershift/v1beta1/nodepool_types.go index 750360bffee5..5472fd818993 100644 --- a/api/hypershift/v1beta1/nodepool_types.go +++ b/api/hypershift/v1beta1/nodepool_types.go @@ -264,8 +264,10 @@ type NodePoolSpec struct { const ( // OSImageStreamRHEL9 is the OS image stream name for RHEL 9. + // This value must match support/releaseinfo.StreamRHEL9. OSImageStreamRHEL9 = "rhel-9" // OSImageStreamRHEL10 is the OS image stream name for RHEL 10. + // This value must match support/releaseinfo.StreamRHEL10. OSImageStreamRHEL10 = "rhel-10" ) diff --git a/hypershift-operator/controllers/nodepool/aws.go b/hypershift-operator/controllers/nodepool/aws.go index f3cde2061fde..15d933a03d75 100644 --- a/hypershift-operator/controllers/nodepool/aws.go +++ b/hypershift-operator/controllers/nodepool/aws.go @@ -60,8 +60,8 @@ func isSpotEnabled(nodePool *hyperv1.NodePool) bool { return false } -func awsMachineTemplateSpec(infraName string, hostedCluster *hyperv1.HostedCluster, nodePool *hyperv1.NodePool, defaultSG bool, releaseImage *releaseinfo.ReleaseImage) (*capiaws.AWSMachineTemplateSpec, error) { - ami, err := resolveAWSAMI(hostedCluster, nodePool, releaseImage) +func awsMachineTemplateSpec(infraName string, hostedCluster *hyperv1.HostedCluster, nodePool *hyperv1.NodePool, defaultSG bool, releaseImage *releaseinfo.ReleaseImage, rhelStream string) (*capiaws.AWSMachineTemplateSpec, error) { + ami, err := resolveAWSAMI(hostedCluster, nodePool, releaseImage, rhelStream) if err != nil { return nil, err } @@ -118,7 +118,7 @@ func awsMachineTemplateSpec(infraName string, hostedCluster *hyperv1.HostedClust return awsMachineTemplateSpec, nil } -func resolveAWSAMI(hostedCluster *hyperv1.HostedCluster, nodePool *hyperv1.NodePool, releaseImage *releaseinfo.ReleaseImage) (string, error) { +func resolveAWSAMI(hostedCluster *hyperv1.HostedCluster, nodePool *hyperv1.NodePool, releaseImage *releaseinfo.ReleaseImage, rhelStream string) (string, error) { // TODO: Should the region be included in the NodePool platform information? region := hostedCluster.Spec.Platform.AWS.Region arch := nodePool.Spec.Arch @@ -134,8 +134,7 @@ func resolveAWSAMI(hostedCluster *hyperv1.HostedCluster, nodePool *hyperv1.NodeP return ami, nil } // Default behavior for Linux/RHCOS AMIs - // TODO(CNTRLPLANE-3553): resolve streamName via GetRHELStream once osImageStream API field is available - ami, err := defaultNodePoolAMI(region, arch, "", releaseImage) + ami, err := defaultNodePoolAMI(region, arch, rhelStream, releaseImage) if err != nil { return "", fmt.Errorf("couldn't discover an AMI for release image: %w", err) } @@ -271,7 +270,7 @@ func awsAdditionalTags(nodePool *hyperv1.NodePool, hostedCluster *hyperv1.Hosted } func (c *CAPI) awsMachineTemplate(ctx context.Context, templateNameGenerator func(spec any) (string, error)) (*capiaws.AWSMachineTemplate, error) { - desiredSpec, err := awsMachineTemplateSpec(c.capiClusterName, c.hostedCluster, c.nodePool, c.cpoCapabilities.CreateDefaultAWSSecurityGroup, c.releaseImage) + desiredSpec, err := awsMachineTemplateSpec(c.capiClusterName, c.hostedCluster, c.nodePool, c.cpoCapabilities.CreateDefaultAWSSecurityGroup, c.releaseImage, c.resolvedRHELStreamForBootImage) if err != nil { return nil, fmt.Errorf("failed to generate AWSMachineTemplateSpec: %w", err) } @@ -361,9 +360,23 @@ func (r *NodePoolReconciler) setAWSConditions(_ context.Context, nodePool *hyper ObservedGeneration: nodePool.Generation, }) } else { - // Default behavior for Linux/RHCOS AMIs - // TODO(CNTRLPLANE-3553): resolve streamName via GetRHELStream once osImageStream API field is available - ami, err := defaultNodePoolAMI(hcluster.Spec.Platform.AWS.Region, nodePool.Spec.Arch, "", releaseImage) + // Default behavior for Linux/RHCOS AMIs. + // Use getRHELStreamForBootImage so that the AMI lookup is consistent + // with the CAPI path: version-derived default (rhel-9 for OCP < 5.0, + // rhel-10 for OCP >= 5.0) for unset osImageStream, or a validated + // stream name for explicit osImageStream. + rhelStream, err := getRHELStreamForBootImage(nodePool, releaseImage) + if err != nil { + SetStatusCondition(&nodePool.Status.Conditions, hyperv1.NodePoolCondition{ + Type: hyperv1.NodePoolValidPlatformImageType, + Status: corev1.ConditionFalse, + Reason: hyperv1.NodePoolValidationFailedReason, + Message: fmt.Sprintf("Couldn't resolve RHEL stream for release image %q: %s", nodePool.Spec.Release.Image, err.Error()), + ObservedGeneration: nodePool.Generation, + }) + return fmt.Errorf("couldn't resolve RHEL stream for release image: %w", err) + } + ami, err := defaultNodePoolAMI(hcluster.Spec.Platform.AWS.Region, nodePool.Spec.Arch, rhelStream, releaseImage) if err != nil { SetStatusCondition(&nodePool.Status.Conditions, hyperv1.NodePoolCondition{ Type: hyperv1.NodePoolValidPlatformImageType, diff --git a/hypershift-operator/controllers/nodepool/aws_test.go b/hypershift-operator/controllers/nodepool/aws_test.go index fc38aeaab01a..3eaff3a40073 100644 --- a/hypershift-operator/controllers/nodepool/aws_test.go +++ b/hypershift-operator/controllers/nodepool/aws_test.go @@ -298,6 +298,7 @@ func TestAWSMachineTemplateSpec(t *testing.T) { }, true, releaseImage, + "", ) if tc.checkError != nil { tc.checkError(t, err) @@ -1090,19 +1091,7 @@ func TestSetAWSConditions(t *testing.T) { ImageStream: &v1.ImageStream{ ObjectMeta: metav1.ObjectMeta{Name: "4.17.0"}, }, - StreamMetadata: &stream.Stream{ - Architectures: map[string]stream.Arch{ - "x86_64": { - Images: stream.Images{ - Aws: &stream.AwsImage{ - Regions: map[string]stream.SingleImage{ - "us-east-1": {Release: "4.17.0", Image: "ami-linux-us-east-1"}, - }, - }, - }, - }, - }, - }, + StreamMetadata: testAWSStreamWithRelease("x86_64", "us-east-1", "ami-linux-us-east-1", "4.17.0"), } testCases := []struct { @@ -1175,6 +1164,28 @@ func TestSetAWSConditions(t *testing.T) { expectedCondType: string(hyperv1.NodePoolValidPlatformImageType), expectedCondValue: corev1.ConditionFalse, }, + { + name: "When osImageStream is invalid for the release version it should set ValidPlatformImage to false", + nodePool: &hyperv1.NodePool{ + Spec: hyperv1.NodePoolSpec{ + Arch: hyperv1.ArchitectureAMD64, + Platform: hyperv1.NodePoolPlatform{Type: hyperv1.AWSPlatform, AWS: &hyperv1.AWSNodePoolPlatform{}}, + OSImageStream: hyperv1.OSImageStreamReference{Name: "rhel-10"}, + }, + }, + hostedCluster: &hyperv1.HostedCluster{ + Spec: hyperv1.HostedClusterSpec{ + Platform: hyperv1.PlatformSpec{AWS: &hyperv1.AWSPlatformSpec{Region: "us-east-1"}}, + }, + Status: hyperv1.HostedClusterStatus{ + Platform: &hyperv1.PlatformStatus{AWS: &hyperv1.AWSPlatformStatus{DefaultWorkerSecurityGroupID: "sg-123"}}, + }, + }, + releaseImage: releaseImageWithStreams, // 4.17.0 — rhel-10 is not valid + expectError: true, + expectedCondType: string(hyperv1.NodePoolValidPlatformImageType), + expectedCondValue: corev1.ConditionFalse, + }, { name: "When HostedCluster has no AWS platform it should return error", nodePool: &hyperv1.NodePool{ @@ -1250,6 +1261,7 @@ func TestResolveAWSAMI(t *testing.T) { hostedCluster *hyperv1.HostedCluster nodePool *hyperv1.NodePool releaseImage *releaseinfo.ReleaseImage + rhelStream string expectedAMI string expectError bool }{ @@ -1335,12 +1347,100 @@ func TestResolveAWSAMI(t *testing.T) { }, expectError: true, }, + { + name: "When rhelStream is rhel-9 with single-stream payload (OSStreams nil), it should fall back to StreamMetadata", + hostedCluster: &hyperv1.HostedCluster{ + Spec: hyperv1.HostedClusterSpec{ + Platform: hyperv1.PlatformSpec{AWS: &hyperv1.AWSPlatformSpec{Region: "us-east-1"}}, + }, + }, + nodePool: &hyperv1.NodePool{ + Spec: hyperv1.NodePoolSpec{ + Arch: hyperv1.ArchitectureAMD64, + Platform: hyperv1.NodePoolPlatform{AWS: &hyperv1.AWSNodePoolPlatform{}}, + }, + }, + rhelStream: "rhel-9", + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &v1.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "4.17.0"}}, + StreamMetadata: testAWSStreamWithRelease("x86_64", "us-east-1", "ami-fallback-stream-metadata", "4.17.0"), + OSStreams: nil, + }, + expectedAMI: "ami-fallback-stream-metadata", + }, + { + name: "When rhelStream is rhel-9 with multi-stream payload, it should use OSStreams rhel-9", + hostedCluster: &hyperv1.HostedCluster{ + Spec: hyperv1.HostedClusterSpec{ + Platform: hyperv1.PlatformSpec{AWS: &hyperv1.AWSPlatformSpec{Region: "us-east-1"}}, + }, + }, + nodePool: &hyperv1.NodePool{ + Spec: hyperv1.NodePoolSpec{ + Arch: hyperv1.ArchitectureAMD64, + Platform: hyperv1.NodePoolPlatform{AWS: &hyperv1.AWSNodePoolPlatform{}}, + }, + }, + rhelStream: "rhel-9", + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &v1.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "5.0.0"}}, + StreamMetadata: testAWSStreamWithRelease("x86_64", "us-east-1", "ami-default-stream", "5.0.0"), + OSStreams: map[string]*stream.Stream{ + "rhel-9": testAWSStreamWithRelease("x86_64", "us-east-1", "ami-rhel9-osstreams", "5.0.0"), + }, + }, + expectedAMI: "ami-rhel9-osstreams", + }, + { + name: "When rhelStream is rhel-10 with multi-stream payload, it should use OSStreams rhel-10", + hostedCluster: &hyperv1.HostedCluster{ + Spec: hyperv1.HostedClusterSpec{ + Platform: hyperv1.PlatformSpec{AWS: &hyperv1.AWSPlatformSpec{Region: "us-east-1"}}, + }, + }, + nodePool: &hyperv1.NodePool{ + Spec: hyperv1.NodePoolSpec{ + Arch: hyperv1.ArchitectureAMD64, + Platform: hyperv1.NodePoolPlatform{AWS: &hyperv1.AWSNodePoolPlatform{}}, + }, + }, + rhelStream: "rhel-10", + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &v1.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "5.0.0"}}, + StreamMetadata: testAWSStreamWithRelease("x86_64", "us-east-1", "ami-default-stream", "5.0.0"), + OSStreams: map[string]*stream.Stream{ + "rhel-10": testAWSStreamWithRelease("x86_64", "us-east-1", "ami-rhel10-osstreams", "5.0.0"), + }, + }, + expectedAMI: "ami-rhel10-osstreams", + }, + { + name: "When rhelStream is rhel-10 with single-stream payload (OSStreams nil), it should fall back to StreamMetadata", + hostedCluster: &hyperv1.HostedCluster{ + Spec: hyperv1.HostedClusterSpec{ + Platform: hyperv1.PlatformSpec{AWS: &hyperv1.AWSPlatformSpec{Region: "us-east-1"}}, + }, + }, + nodePool: &hyperv1.NodePool{ + Spec: hyperv1.NodePoolSpec{ + Arch: hyperv1.ArchitectureAMD64, + Platform: hyperv1.NodePoolPlatform{AWS: &hyperv1.AWSNodePoolPlatform{}}, + }, + }, + rhelStream: "rhel-10", + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &v1.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "4.18.0"}}, + StreamMetadata: testAWSStreamWithRelease("x86_64", "us-east-1", "ami-legacy-fallback", "4.18.0"), + OSStreams: nil, + }, + expectedAMI: "ami-legacy-fallback", + }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { g := NewWithT(t) - ami, err := resolveAWSAMI(tc.hostedCluster, tc.nodePool, tc.releaseImage) + ami, err := resolveAWSAMI(tc.hostedCluster, tc.nodePool, tc.releaseImage, tc.rhelStream) if tc.expectError { g.Expect(err).To(HaveOccurred()) } else { @@ -1864,3 +1964,71 @@ func TestApplyAWSPlacementOptions(t *testing.T) { }) } } + +// TestAWSMachineTemplateSpec_StreamSelection verifies that on a multi-stream +// OCP 5.0+ payload, passing different rhelStream values to awsMachineTemplateSpec +// selects different AMIs and produces different machine template name hashes, +// confirming that a stream switch triggers a CAPI node rollout. +func TestAWSMachineTemplateSpec_StreamSelection(t *testing.T) { + g := NewWithT(t) + + const ( + legacyAMI = "ami-legacy-rhel9" + rhel10AMI = "ami-rhel10-new" + region = "us-east-1" + ) + + releaseImage := &releaseinfo.ReleaseImage{ + ImageStream: &v1.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "5.0.0"}}, + StreamMetadata: testAWSStreamWithRelease("x86_64", region, legacyAMI, "5.0.0"), + OSStreams: map[string]*stream.Stream{ + "rhel-9": testAWSStreamWithRelease("x86_64", region, legacyAMI, "5.0.0"), + "rhel-10": testAWSStreamWithRelease("x86_64", region, rhel10AMI, "5.0.0"), + }, + } + + hostedCluster := &hyperv1.HostedCluster{ + Spec: hyperv1.HostedClusterSpec{ + Platform: hyperv1.PlatformSpec{ + AWS: &hyperv1.AWSPlatformSpec{Region: region}, + }, + }, + Status: hyperv1.HostedClusterStatus{ + Platform: &hyperv1.PlatformStatus{ + AWS: &hyperv1.AWSPlatformStatus{DefaultWorkerSecurityGroupID: "sg-default"}, + }, + }, + } + + nodePool := &hyperv1.NodePool{ + ObjectMeta: metav1.ObjectMeta{Name: "test-nodepool"}, + Spec: hyperv1.NodePoolSpec{ + Arch: hyperv1.ArchitectureAMD64, + Platform: hyperv1.NodePoolPlatform{ + Type: hyperv1.AWSPlatform, + AWS: &hyperv1.AWSNodePoolPlatform{}, + }, + }, + } + + // Legacy path (empty stream) should select the RHEL-9 AMI from StreamMetadata. + legacySpec, err := awsMachineTemplateSpec(infraName, hostedCluster, nodePool, true, releaseImage, "") + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(*legacySpec.Template.Spec.AMI.ID).To(Equal(legacyAMI)) + + // Concrete "rhel-10" stream should select the RHEL-10 AMI from OSStreams. + rhel10Spec, err := awsMachineTemplateSpec(infraName, hostedCluster, nodePool, true, releaseImage, "rhel-10") + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(*rhel10Spec.Template.Spec.AMI.ID).To(Equal(rhel10AMI)) + + // Different AMIs should produce different machine template name hashes, + // causing CAPI to create a new infrastructure ref and trigger node replacement. + legacyJSON, err := json.Marshal(legacySpec) + g.Expect(err).ToNot(HaveOccurred()) + rhel10JSON, err := json.Marshal(rhel10Spec) + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(generateMachineTemplateName(nodePool, legacyJSON)). + ToNot(Equal(generateMachineTemplateName(nodePool, rhel10JSON)), + "different streams should produce different machine template names") +} diff --git a/hypershift-operator/controllers/nodepool/azure.go b/hypershift-operator/controllers/nodepool/azure.go index 655c9754269b..0419a4af8576 100644 --- a/hypershift-operator/controllers/nodepool/azure.go +++ b/hypershift-operator/controllers/nodepool/azure.go @@ -122,6 +122,8 @@ func defaultAzureNodePoolImage(nodePool *hyperv1.NodePool, releaseImage *release // getAzureMarketplaceMetadata extracts Azure Marketplace metadata from the release payload func getAzureMarketplaceMetadata(releaseImage *releaseinfo.ReleaseImage, arch string) (*azureMarketplaceMetadata, error) { + // TODO(CNTRLPLANE-3553): use releaseImage.StreamForName(rhelStream) instead of + // accessing StreamMetadata directly, to support dual-stream payloads. if releaseImage.StreamMetadata == nil { return nil, nil // No stream metadata available } diff --git a/hypershift-operator/controllers/nodepool/conditions.go b/hypershift-operator/controllers/nodepool/conditions.go index cff009e3590e..878e2a69d9af 100644 --- a/hypershift-operator/controllers/nodepool/conditions.go +++ b/hypershift-operator/controllers/nodepool/conditions.go @@ -368,6 +368,20 @@ func (r *NodePoolReconciler) validMachineConfigCondition(ctx context.Context, no return &ctrl.Result{}, nil } + // Validate osImageStream before expensive config generation to fail fast. + // TODO(CNTRLPLANE-3553): add integration test covering this condition path + // (invalid osImageStream.Name → ValidMachineConfig condition False + error return). + if err := validateOSImageStream(nodePool, releaseImage); err != nil { + SetStatusCondition(&nodePool.Status.Conditions, hyperv1.NodePoolCondition{ + Type: hyperv1.NodePoolValidMachineConfigConditionType, + Status: corev1.ConditionFalse, + Reason: hyperv1.NodePoolValidationFailedReason, + Message: err.Error(), + ObservedGeneration: nodePool.Generation, + }) + return &ctrl.Result{}, fmt.Errorf("failed to validate osImageStream: %w", err) + } + haproxyRawConfig, err := r.generateHAProxyRawConfig(ctx, nodePool, hcluster, releaseImage) if err != nil { return &ctrl.Result{}, fmt.Errorf("failed to generate HAProxy raw config: %w", err) @@ -385,6 +399,7 @@ func (r *NodePoolReconciler) validMachineConfigCondition(ctx context.Context, no }) return &ctrl.Result{}, fmt.Errorf("failed to generate config: %w", err) } + SetStatusCondition(&nodePool.Status.Conditions, hyperv1.NodePoolCondition{ Type: hyperv1.NodePoolValidMachineConfigConditionType, Status: corev1.ConditionTrue, diff --git a/hypershift-operator/controllers/nodepool/config.go b/hypershift-operator/controllers/nodepool/config.go index 8771f45ea83c..4550a732463f 100644 --- a/hypershift-operator/controllers/nodepool/config.go +++ b/hypershift-operator/controllers/nodepool/config.go @@ -31,6 +31,8 @@ import ( "k8s.io/apimachinery/pkg/util/yaml" "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/blang/semver" ) // ConfigGenerator knows how to: @@ -42,6 +44,18 @@ type ConfigGenerator struct { hostedCluster *hyperv1.HostedCluster nodePool *hyperv1.NodePool controlplaneNamespace string + // resolvedRHELStreamForBootImage is the RHEL stream name used for boot + // image resolution via StreamForName. It is set by + // getRHELStreamForBootImage, which delegates to GetRHELStream for + // version-aware default resolution: rhel-9 for OCP < 5.0, rhel-10 + // for OCP >= 5.0 when spec.osImageStream.Name is unset. When the + // field is explicitly set, the user's choice is validated and returned. + // See the dual-stream RHEL NodePool enhancement: + // https://github.com/openshift/enhancements/blob/master/enhancements/hypershift/dual-stream-rhel-nodepool.md + // + // This field is intentionally outside rolloutConfig because it does not + // participate in the config hash that drives rollouts. + resolvedRHELStreamForBootImage string *rolloutConfig } @@ -60,6 +74,13 @@ type rolloutConfig struct { // TODO(alberto): consider let haproxyRawConfig be an implementation detail of ConfigGenerator. // For now, it's a required input to keep the haproxy business logic and files outside the scope of this initial refactor. haproxyRawConfig string + // rhelStream is the OS image stream name used for hash computation. + // It is set from spec.osImageStream.Name but normalized: if the explicit + // value matches the version-derived default it is kept empty so that + // setting the default stream does not change the hash. + // Only a non-default stream (e.g. "rhel-9" on a ≥5.0 release) produces + // a non-empty value here and triggers a rollout. + rhelStream string } // NewConfigGenerator is the contract to create a new ConfigGenerator. @@ -77,16 +98,43 @@ func NewConfigGenerator(ctx context.Context, client client.Client, hostedCluster return nil, err } + // Resolve the RHEL stream for boot image lookup (AMI, VHD, etc.). + resolvedRHELStreamForBootImage, err := getRHELStreamForBootImage(nodePool, releaseImage) + if err != nil { + return nil, fmt.Errorf("failed to resolve RHEL stream for boot image: %w", err) + } + + // Normalize rhelStream for the config hash: when the resolved stream + // matches the version-derived default, keep it empty so that setting the + // default explicitly does not change the hash and trigger a spurious rollout. + rhelStream := nodePool.Spec.OSImageStream.Name + if rhelStream != "" { + version, err := semver.Parse(releaseImage.Version()) + if err != nil { + return nil, fmt.Errorf("failed to parse release image version %q: %w", releaseImage.Version(), err) + } + // TODO(CNTRLPLANE-3553): pass actual usesRunc once container runtime detection is wired in. + defaultStream, err := GetRHELStream("", version, false) + if err != nil { + return nil, fmt.Errorf("failed to resolve default RHEL stream: %w", err) + } + if rhelStream == defaultStream { + rhelStream = "" + } + } + cg := &ConfigGenerator{ - Client: client, - hostedCluster: hostedCluster, - nodePool: nodePool, - controlplaneNamespace: controlPlaneNamespace, + Client: client, + hostedCluster: hostedCluster, + nodePool: nodePool, + controlplaneNamespace: controlPlaneNamespace, + resolvedRHELStreamForBootImage: resolvedRHELStreamForBootImage, rolloutConfig: &rolloutConfig{ releaseImage: releaseImage, pullSecretName: hostedCluster.Spec.PullSecret.Name, globalConfig: globalConfig, haproxyRawConfig: haproxyRawConfig, + rhelStream: rhelStream, }, } @@ -118,7 +166,7 @@ func (cg *ConfigGenerator) CompressedAndEncoded() (*bytes.Buffer, error) { // TODO(alberto): hash the struct directly instead of the string representation field by field. // This is kept like this for now to contain the scope of the refactor and avoid backward compatibility issues. func (cg *ConfigGenerator) Hash() string { - return supportutil.HashSimple(cg.mcoRawConfig + cg.releaseImage.Version() + cg.pullSecretName + cg.additionalTrustBundleName + cg.globalConfig) + return supportutil.HashSimple(cg.mcoRawConfig + cg.releaseImage.Version() + cg.pullSecretName + cg.additionalTrustBundleName + cg.globalConfig + cg.rhelStream) } // HashWithOutVersion is like Hash but doesn't compute the release version. @@ -126,7 +174,7 @@ func (cg *ConfigGenerator) Hash() string { // TODO(alberto): This was left inconsistent in https://github.com/openshift/hypershift/pull/3795/files. It should also contain cg.globalConfig. // This is kept like this for now to contain the scope of the refactor and avoid backward compatibility issues. func (cg *ConfigGenerator) HashWithoutVersion() string { - return supportutil.HashSimple(cg.mcoRawConfig + cg.pullSecretName + cg.additionalTrustBundleName) + return supportutil.HashSimple(cg.mcoRawConfig + cg.pullSecretName + cg.additionalTrustBundleName + cg.rhelStream) } func (cg *ConfigGenerator) Version() string { diff --git a/hypershift-operator/controllers/nodepool/config_test.go b/hypershift-operator/controllers/nodepool/config_test.go index ef9096ab218c..493e956f8b21 100644 --- a/hypershift-operator/controllers/nodepool/config_test.go +++ b/hypershift-operator/controllers/nodepool/config_test.go @@ -152,13 +152,13 @@ spec: }{ { name: "When all input is given it should not return an error", - expectedHash: "e1d8d58e", + expectedHash: "83935368", expectedHashWithoutVersion: "0db5756d", nodePool: &hyperv1.NodePool{}, releaseImage: &releaseinfo.ReleaseImage{ ImageStream: &imageapi.ImageStream{ ObjectMeta: metav1.ObjectMeta{ - Name: "latest", + Name: "4.18.0", }, }, }, @@ -184,7 +184,7 @@ spec: }, { name: "When nodepool has configs it should populate mcoRawConfig ", - expectedHash: "801aff6a", + expectedHash: "af67f27c", expectedHashWithoutVersion: "fef02451", nodePool: &hyperv1.NodePool{ ObjectMeta: metav1.ObjectMeta{ @@ -213,7 +213,7 @@ spec: releaseImage: &releaseinfo.ReleaseImage{ ImageStream: &imageapi.ImageStream{ ObjectMeta: metav1.ObjectMeta{ - Name: "latest", + Name: "4.18.0", }, }, }, @@ -236,14 +236,112 @@ spec: }, }, expectedMCORawConfig: machineConfigDefaulted, - releaseImage: &releaseinfo.ReleaseImage{}, - hostedCluster: hostedCluster, - client: true, - error: fmt.Errorf("configmaps \"does-not-exist\" not found"), + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ + ObjectMeta: metav1.ObjectMeta{ + Name: "4.18.0", + }, + }, + }, + hostedCluster: hostedCluster, + client: true, + error: fmt.Errorf("configmaps \"does-not-exist\" not found"), + }, + { + name: "When release version is 4.18.0 with no osImageStream it should produce baseline hash", + expectedHash: "83935368", + expectedHashWithoutVersion: "0db5756d", + nodePool: &hyperv1.NodePool{}, + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ + ObjectMeta: metav1.ObjectMeta{ + Name: "4.18.0", + }, + }, + }, + hostedCluster: hostedCluster, + client: true, + error: nil, + }, + { + name: "When osImageStream is set to version-derived default it should produce the same hash as no stream", + expectedHash: "83935368", + expectedHashWithoutVersion: "0db5756d", + nodePool: &hyperv1.NodePool{ + Spec: hyperv1.NodePoolSpec{ + OSImageStream: hyperv1.OSImageStreamReference{Name: "rhel-9"}, + }, + }, + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ + ObjectMeta: metav1.ObjectMeta{ + Name: "4.18.0", + }, + }, + }, + hostedCluster: hostedCluster, + client: true, + error: nil, + }, + { + name: "When osImageStream is set to non-default it should produce a different hash", + expectedHash: "ccd46cc1", + expectedHashWithoutVersion: "3a158178", + nodePool: &hyperv1.NodePool{ + Spec: hyperv1.NodePoolSpec{ + OSImageStream: hyperv1.OSImageStreamReference{Name: "rhel-9"}, + }, + }, + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ + ObjectMeta: metav1.ObjectMeta{ + Name: "5.0.0", + }, + }, + }, + hostedCluster: hostedCluster, + client: true, + error: nil, + }, + { + name: "When release version is 5.0.0 with no osImageStream it should normalize rhelStream to empty", + expectedHash: "ff80e2c8", + expectedHashWithoutVersion: "0db5756d", + nodePool: &hyperv1.NodePool{}, + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ + ObjectMeta: metav1.ObjectMeta{ + Name: "5.0.0", + }, + }, + }, + hostedCluster: hostedCluster, + client: true, + error: nil, + }, + { + name: "When osImageStream is rhel-10 on 5.0.0 it should normalize to empty and match unset hash", + expectedHash: "ff80e2c8", + expectedHashWithoutVersion: "0db5756d", + nodePool: &hyperv1.NodePool{ + Spec: hyperv1.NodePoolSpec{ + OSImageStream: hyperv1.OSImageStreamReference{Name: "rhel-10"}, + }, + }, + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ + ObjectMeta: metav1.ObjectMeta{ + Name: "5.0.0", + }, + }, + }, + hostedCluster: hostedCluster, + client: true, + error: nil, }, { name: "When additionalTrustBundle is specified it should be included in rolloutConfig", - expectedHash: "dc74976e", + expectedHash: "632801f8", expectedHashWithoutVersion: "71375893", nodePool: &hyperv1.NodePool{ ObjectMeta: metav1.ObjectMeta{ @@ -272,7 +370,7 @@ spec: releaseImage: &releaseinfo.ReleaseImage{ ImageStream: &imageapi.ImageStream{ ObjectMeta: metav1.ObjectMeta{ - Name: "latest", + Name: "4.18.0", }, }, }, @@ -434,6 +532,7 @@ func TestHash(t *testing.T) { pullSecretName string additionalTrustBundleName string globalConfig string + rhelStream string expected string }{ { @@ -490,6 +589,16 @@ func TestHash(t *testing.T) { globalConfig: "different", expected: "e916ddfe", }, + { + name: "When rhelStream is a non-default stream, it should change the hash", + mcoRawConfig: baseCaseMCORawConfig, + releaseVersion: baseCaseReleaseVersion, + pullSecretName: baseCasePullSecretName, + additionalTrustBundleName: baseCaseAdditionalTrustBundleName, + globalConfig: baseCaseGlobalConfig, + rhelStream: "rhel-10", + expected: "2dbbd41b", + }, } for _, tc := range testCases { @@ -508,6 +617,7 @@ func TestHash(t *testing.T) { pullSecretName: tc.pullSecretName, additionalTrustBundleName: tc.additionalTrustBundleName, globalConfig: tc.globalConfig, + rhelStream: tc.rhelStream, releaseImage: releaseImage, }, } @@ -536,6 +646,7 @@ func TestHashWithoutVersion(t *testing.T) { pullSecretName string additionalTrustBundleName string globalConfig string + rhelStream string expected string }{ { @@ -594,6 +705,16 @@ func TestHashWithoutVersion(t *testing.T) { globalConfig: "different", expected: baseCaseHash, }, + { + name: "When rhelStream is a non-default stream, it should change the hash", + mcoRawConfig: baseCaseMCORawConfig, + releaseVersion: baseCaseReleaseVersion, + pullSecretName: baseCasePullSecretName, + additionalTrustBundleName: baseCaseAdditionalTrustBundleName, + globalConfig: baseCaseGlobalConfig, + rhelStream: "rhel-10", + expected: "671fe083", + }, } for _, tc := range testCases { @@ -612,6 +733,7 @@ func TestHashWithoutVersion(t *testing.T) { pullSecretName: tc.pullSecretName, additionalTrustBundleName: tc.additionalTrustBundleName, globalConfig: tc.globalConfig, + rhelStream: tc.rhelStream, releaseImage: releaseImage, }, } diff --git a/hypershift-operator/controllers/nodepool/gcp.go b/hypershift-operator/controllers/nodepool/gcp.go index 50a107b210b4..18773cfd8a01 100644 --- a/hypershift-operator/controllers/nodepool/gcp.go +++ b/hypershift-operator/controllers/nodepool/gcp.go @@ -37,6 +37,7 @@ func (c *CAPI) gcpMachineTemplate(_ context.Context, templateNameGenerator func( hc, nodePool, c.releaseImage, + c.resolvedRHELStreamForBootImage, ) if err != nil { return nil, fmt.Errorf("failed to generate GCP machine template spec: %w", err) @@ -81,12 +82,13 @@ func gcpMachineTemplateSpec( hostedCluster *hyperv1.HostedCluster, nodePool *hyperv1.NodePool, releaseImage *releaseinfo.ReleaseImage, + rhelStream string, ) (*capigcp.GCPMachineSpec, error) { gcpPlatform := nodePool.Spec.Platform.GCP hcGCPPlatform := hostedCluster.Spec.Platform.GCP // Resolve image - image, err := resolveGCPImage(nodePool, releaseImage) + image, err := resolveGCPImage(nodePool, releaseImage, rhelStream) if err != nil { return nil, fmt.Errorf("failed to resolve GCP image: %w", err) } @@ -159,7 +161,7 @@ func gcpMachineTemplateSpec( } // resolveGCPImage determines the correct image to use based on NodePool configuration and release info. -func resolveGCPImage(nodePool *hyperv1.NodePool, releaseImage *releaseinfo.ReleaseImage) (string, error) { +func resolveGCPImage(nodePool *hyperv1.NodePool, releaseImage *releaseinfo.ReleaseImage, rhelStream string) (string, error) { gcpPlatform := nodePool.Spec.Platform.GCP // If user specified a custom image, use it @@ -168,7 +170,7 @@ func resolveGCPImage(nodePool *hyperv1.NodePool, releaseImage *releaseinfo.Relea } // Resolve image from release metadata - image, err := defaultNodePoolGCPImage(nodePool.Spec.Arch, releaseImage) + image, err := defaultNodePoolGCPImage(nodePool.Spec.Arch, releaseImage, rhelStream) if err != nil { return "", fmt.Errorf("couldn't discover a GCP image for release image: %w", err) } diff --git a/hypershift-operator/controllers/nodepool/gcp_test.go b/hypershift-operator/controllers/nodepool/gcp_test.go index 14a3623acf82..2611e6a6035e 100644 --- a/hypershift-operator/controllers/nodepool/gcp_test.go +++ b/hypershift-operator/controllers/nodepool/gcp_test.go @@ -514,6 +514,7 @@ func TestGcpMachineTemplateSpec(t *testing.T) { tc.hc, tc.nodePool, releaseImage, + "", ) if tc.expectedErr { @@ -535,6 +536,7 @@ func TestDefaultNodePoolGCPImage(t *testing.T) { testCases := []struct { name string arch string + rhelStream string releaseImage *releaseinfo.ReleaseImage expectedImage string expectedErr bool @@ -655,13 +657,56 @@ func TestDefaultNodePoolGCPImage(t *testing.T) { expectedErr: true, expectedErrMsg: "release image metadata has no GCP image for architecture \"amd64\"", }, + { + name: "When named stream is set, it should look up OSStreams map", + arch: hyperv1.ArchitectureAMD64, + rhelStream: "rhel-10", + releaseImage: &releaseinfo.ReleaseImage{ + StreamMetadata: &stream.Stream{ + Architectures: map[string]stream.Arch{ + "x86_64": { + Images: stream.Images{ + Gcp: &stream.GcpImage{ + Project: "rhcos-cloud", + Name: "rhcos-9-6-default-gcp-x86-64", + }, + }, + }, + }, + }, + OSStreams: map[string]*stream.Stream{ + "rhel-10": { + Architectures: map[string]stream.Arch{ + "x86_64": { + Images: stream.Images{ + Gcp: &stream.GcpImage{ + Project: "rhcos-cloud", + Name: "rhcos-10-0-20251023-0-gcp-x86-64", + }, + }, + }, + }, + }, + }, + }, + expectedImage: "projects/rhcos-cloud/global/images/rhcos-10-0-20251023-0-gcp-x86-64", + expectedErr: false, + }, + { + name: "When stream metadata is nil with empty stream name, it should return error", + arch: hyperv1.ArchitectureAMD64, + rhelStream: "", + releaseImage: &releaseinfo.ReleaseImage{}, + expectedErr: true, + expectedErrMsg: "couldn't resolve stream metadata", + }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { g := NewWithT(t) - image, err := defaultNodePoolGCPImage(tc.arch, tc.releaseImage) + image, err := defaultNodePoolGCPImage(tc.arch, tc.releaseImage, tc.rhelStream) if tc.expectedErr { g.Expect(err).To(HaveOccurred()) @@ -787,3 +832,187 @@ func TestConfigureGCPNetworkTags(t *testing.T) { }) } } + +func TestGcpMachineTemplateSpecWithRHELStream(t *testing.T) { + baseHC := &hyperv1.HostedCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "test-namespace", + }, + Spec: hyperv1.HostedClusterSpec{ + InfraID: "test-infra-id", + Platform: hyperv1.PlatformSpec{ + Type: hyperv1.GCPPlatform, + GCP: &hyperv1.GCPPlatformSpec{ + Project: "test-project", + Region: "us-central1", + NetworkConfig: hyperv1.GCPNetworkConfig{ + PrivateServiceConnectSubnet: hyperv1.GCPResourceReference{ + Name: "test-psc-subnet", + }, + }, + }, + }, + }, + } + + baseNodePool := &hyperv1.NodePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-nodepool", + Namespace: "test-namespace", + }, + Spec: hyperv1.NodePoolSpec{ + Arch: hyperv1.ArchitectureAMD64, + Platform: hyperv1.NodePoolPlatform{ + Type: hyperv1.GCPPlatform, + GCP: &hyperv1.GCPNodePoolPlatform{ + MachineType: "n1-standard-4", + Zone: "us-central1-a", + }, + }, + }, + } + + testCases := []struct { + name string + rhelStream string + releaseImage *releaseinfo.ReleaseImage + expectedImage string + }{ + { + name: "When rhelStream is empty with valid StreamMetadata, it should resolve GCP image", + rhelStream: "", + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ + ObjectMeta: metav1.ObjectMeta{Name: "4.18.0"}, + }, + StreamMetadata: &stream.Stream{ + Architectures: map[string]stream.Arch{ + "x86_64": { + Images: stream.Images{ + Gcp: &stream.GcpImage{ + Project: "rhcos-cloud", + Name: "rhcos-418-x86-64", + }, + }, + }, + }, + }, + }, + expectedImage: "projects/rhcos-cloud/global/images/rhcos-418-x86-64", + }, + { + name: "When rhelStream is rhel-9 with single-stream payload, it should fall back to StreamMetadata", + rhelStream: "rhel-9", + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ + ObjectMeta: metav1.ObjectMeta{Name: "4.17.0"}, + }, + StreamMetadata: &stream.Stream{ + Architectures: map[string]stream.Arch{ + "x86_64": { + Images: stream.Images{ + Gcp: &stream.GcpImage{ + Project: "rhcos-cloud", + Name: "rhcos-417-fallback-x86-64", + }, + }, + }, + }, + }, + OSStreams: nil, + }, + expectedImage: "projects/rhcos-cloud/global/images/rhcos-417-fallback-x86-64", + }, + { + name: "When rhelStream is rhel-9 with multi-stream payload, it should use OSStreams rhel-9 image", + rhelStream: "rhel-9", + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ + ObjectMeta: metav1.ObjectMeta{Name: "5.0.0"}, + }, + StreamMetadata: &stream.Stream{ + Architectures: map[string]stream.Arch{ + "x86_64": { + Images: stream.Images{ + Gcp: &stream.GcpImage{ + Project: "rhcos-cloud", + Name: "rhcos-default-x86-64", + }, + }, + }, + }, + }, + OSStreams: map[string]*stream.Stream{ + "rhel-9": { + Architectures: map[string]stream.Arch{ + "x86_64": { + Images: stream.Images{ + Gcp: &stream.GcpImage{ + Project: "rhcos-cloud", + Name: "rhcos-96-rhel9-x86-64", + }, + }, + }, + }, + }, + }, + }, + expectedImage: "projects/rhcos-cloud/global/images/rhcos-96-rhel9-x86-64", + }, + { + name: "When rhelStream is rhel-10 with multi-stream payload, it should use OSStreams rhel-10 image", + rhelStream: "rhel-10", + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ + ObjectMeta: metav1.ObjectMeta{Name: "5.0.0"}, + }, + StreamMetadata: &stream.Stream{ + Architectures: map[string]stream.Arch{ + "x86_64": { + Images: stream.Images{ + Gcp: &stream.GcpImage{ + Project: "rhcos-cloud", + Name: "rhcos-default-x86-64", + }, + }, + }, + }, + }, + OSStreams: map[string]*stream.Stream{ + "rhel-10": { + Architectures: map[string]stream.Arch{ + "x86_64": { + Images: stream.Images{ + Gcp: &stream.GcpImage{ + Project: "rhcos-cloud", + Name: "rhcos-100-rhel10-x86-64", + }, + }, + }, + }, + }, + }, + }, + expectedImage: "projects/rhcos-cloud/global/images/rhcos-100-rhel10-x86-64", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + + spec, err := gcpMachineTemplateSpec( + baseHC.Spec.InfraID, + baseHC, + baseNodePool, + tc.releaseImage, + tc.rhelStream, + ) + + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(spec).ToNot(BeNil()) + g.Expect(*spec.Image).To(Equal(tc.expectedImage)) + }) + } +} diff --git a/hypershift-operator/controllers/nodepool/nodepool_controller.go b/hypershift-operator/controllers/nodepool/nodepool_controller.go index b13329126bbb..ab728d3e2bb5 100644 --- a/hypershift-operator/controllers/nodepool/nodepool_controller.go +++ b/hypershift-operator/controllers/nodepool/nodepool_controller.go @@ -279,11 +279,19 @@ func (r *NodePoolReconciler) reconcile(ctx context.Context, hcluster *hyperv1.Ho controlPlaneNamespace := manifests.HostedControlPlaneNamespace(hcluster.Namespace, hcluster.Name) infraID := hcluster.Spec.InfraID - // Aggregate node version and health information into NodesInfo status. - // This is done before the conditions loop so that nodesInfo stays accurate - // even when later validations (e.g. release image) short-circuit the reconcile. - if err := r.setNodesInfoStatus(ctx, nodePool); err != nil { - log.Error(err, "Failed to set NodesInfo status") + // Fetch machines once for all status aggregations that need them. + machines, err := r.getMachinesForNodePool(ctx, nodePool) + if err != nil { + log.Error(err, "Failed to get Machines for status aggregation") + } else { + // Aggregate node version and health information into NodesInfo status. + // This is done before the conditions loop so that nodesInfo stays accurate + // even when later validations (e.g. release image) short-circuit the reconcile. + r.setNodesInfoStatus(nodePool, machines) + + // Infer the observed RHEL stream from Machine NodeInfo.OSImage and set + // status.osImageStream when a majority of machines report a consistent stream. + r.setOSImageStreamStatus(nodePool, machines) } // Loop over all conditions. @@ -332,7 +340,7 @@ func (r *NodePoolReconciler) reconcile(ctx context.Context, hcluster *hyperv1.Ho return ctrl.Result{}, nil } // Retrieve pull secret name to check for changes when config is checked for updates - _, err := r.getPullSecretName(ctx, hcluster) + _, err = r.getPullSecretName(ctx, hcluster) if err != nil { return ctrl.Result{}, err } @@ -740,13 +748,11 @@ func isAutoscalingEnabled(nodePool *hyperv1.NodePool) bool { } // defaultNodePoolAMI resolves the default AWS AMI for a NodePool from release image stream metadata. -// TODO(CNTRLPLANE-3553): once the osImageStream API field is available, callers should resolve -// streamName via GetRHELStream and pass it here instead of hardcoding "". -func defaultNodePoolAMI(region string, specifiedArch string, streamName string, releaseImage *releaseinfo.ReleaseImage) (string, error) { +func defaultNodePoolAMI(region string, specifiedArch string, rhelStream string, releaseImage *releaseinfo.ReleaseImage) (string, error) { if releaseImage == nil { return "", fmt.Errorf("release image is nil") } - streamMeta, err := releaseImage.StreamForName(streamName) + streamMeta, err := releaseImage.StreamForName(rhelStream) if err != nil { return "", fmt.Errorf("couldn't resolve stream metadata: %w", err) } @@ -769,15 +775,16 @@ func defaultNodePoolAMI(region string, specifiedArch string, streamName string, } // defaultNodePoolGCPImage returns the default GCP image for a given architecture from release metadata. -func defaultNodePoolGCPImage(specifiedArch string, releaseImage *releaseinfo.ReleaseImage) (string, error) { +func defaultNodePoolGCPImage(specifiedArch string, releaseImage *releaseinfo.ReleaseImage, rhelStream string) (string, error) { if releaseImage == nil { return "", fmt.Errorf("release image is nil, cannot determine GCP image") } - if releaseImage.StreamMetadata == nil { - return "", fmt.Errorf("release image stream metadata is nil, cannot determine GCP image for architecture %q", specifiedArch) + streamMeta, err := releaseImage.StreamForName(rhelStream) + if err != nil { + return "", fmt.Errorf("couldn't resolve stream metadata: %w", err) } - arch, foundArch := releaseImage.StreamMetadata.Architectures[hyperv1.ArchAliases[specifiedArch]] + arch, foundArch := streamMeta.Architectures[hyperv1.ArchAliases[specifiedArch]] if !foundArch { return "", fmt.Errorf("couldn't find OS metadata for architecture %q", specifiedArch) } diff --git a/hypershift-operator/controllers/nodepool/nodepool_controller_test.go b/hypershift-operator/controllers/nodepool/nodepool_controller_test.go index cc560748d6e9..6bd147256270 100644 --- a/hypershift-operator/controllers/nodepool/nodepool_controller_test.go +++ b/hypershift-operator/controllers/nodepool/nodepool_controller_test.go @@ -578,7 +578,7 @@ func TestDefaultNodePoolAMI(t *testing.T) { name string region string specifiedArch string - streamName string + rhelStream string releaseImage *releaseinfo.ReleaseImage expectedImage string expectedErr string @@ -602,7 +602,7 @@ func TestDefaultNodePoolAMI(t *testing.T) { name: "When resolving rhel-9 stream it should return the rhel-9 AMI", region: "us-east-1", specifiedArch: "amd64", - streamName: "rhel-9", + rhelStream: "rhel-9", releaseImage: multiStreamReleaseImage, expectedImage: "ami-06a6b025350ff1e23", }, @@ -610,7 +610,7 @@ func TestDefaultNodePoolAMI(t *testing.T) { name: "When resolving rhel-10 stream it should return the rhel-10 AMI", region: "us-east-1", specifiedArch: "amd64", - streamName: "rhel-10", + rhelStream: "rhel-10", releaseImage: multiStreamReleaseImage, expectedImage: "ami-04b3d999e39d62c5b", }, @@ -618,7 +618,7 @@ func TestDefaultNodePoolAMI(t *testing.T) { name: "When resolving rhel-10 arm64 stream it should return the rhel-10 arm64 AMI", region: "us-east-1", specifiedArch: "arm64", - streamName: "rhel-10", + rhelStream: "rhel-10", releaseImage: multiStreamReleaseImage, expectedImage: "ami-0d7237e6b04d9a9e1", }, @@ -672,7 +672,7 @@ func TestDefaultNodePoolAMI(t *testing.T) { t.Parallel() g := NewWithT(t) - image, err := defaultNodePoolAMI(tc.region, tc.specifiedArch, tc.streamName, tc.releaseImage) + image, err := defaultNodePoolAMI(tc.region, tc.specifiedArch, tc.rhelStream, tc.releaseImage) if tc.expectedErr != "" { g.Expect(err).To(HaveOccurred()) g.Expect(err.Error()).To(Equal(tc.expectedErr)) diff --git a/hypershift-operator/controllers/nodepool/openstack/openstack.go b/hypershift-operator/controllers/nodepool/openstack/openstack.go index dada4b8ffa63..75878b5bd515 100644 --- a/hypershift-operator/controllers/nodepool/openstack/openstack.go +++ b/hypershift-operator/controllers/nodepool/openstack/openstack.go @@ -132,6 +132,8 @@ func ReconcileOpenStackImageSpec(hcluster *hyperv1.HostedCluster, openStackImage // OpenstackDefaultImage returns the default RHCOS image for the given release. // The image URL and SHA256 hash are returned. func OpenstackDefaultImage(releaseImage *releaseinfo.ReleaseImage) (string, string, error) { + // TODO(CNTRLPLANE-3553): use releaseImage.StreamForName(rhelStream) instead of + // accessing StreamMetadata directly, to support dual-stream payloads. arch, foundArch := releaseImage.StreamMetadata.Architectures["x86_64"] if !foundArch { return "", "", fmt.Errorf("couldn't find OS metadata for architecture %q", "x86_64") @@ -154,6 +156,8 @@ func OpenstackDefaultImage(releaseImage *releaseinfo.ReleaseImage) (string, stri // OpenStackReleaseImage returns the release version for the OpenStack image. // The release version is extracted from the release metadata. func OpenStackReleaseImage(releaseImage *releaseinfo.ReleaseImage) (string, error) { + // TODO(CNTRLPLANE-3553): use releaseImage.StreamForName(rhelStream) instead of + // accessing StreamMetadata directly, to support dual-stream payloads. arch, foundArch := releaseImage.StreamMetadata.Architectures["x86_64"] if !foundArch { return "", fmt.Errorf("couldn't find OS metadata for architecture %q", "x86_64") diff --git a/hypershift-operator/controllers/nodepool/osstream.go b/hypershift-operator/controllers/nodepool/osstream.go new file mode 100644 index 000000000000..700b08a57fb1 --- /dev/null +++ b/hypershift-operator/controllers/nodepool/osstream.go @@ -0,0 +1,47 @@ +package nodepool + +import ( + "fmt" + + hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" + "github.com/openshift/hypershift/support/releaseinfo" + + "github.com/blang/semver" +) + +// getRHELStreamForBootImage returns the RHEL stream name to pass to +// StreamForName when resolving platform-specific boot images (AMIs, VHDs, +// GCE images, etc.). +// +// It always delegates to GetRHELStream for version-aware default +// resolution, validation, and runc constraint checking. When +// spec.osImageStream.Name is unset, GetRHELStream derives the default +// from the release version: rhel-9 for OCP < 5.0, rhel-10 for +// OCP >= 5.0. This matches the dual-stream RHEL NodePool enhancement: +// https://github.com/openshift/enhancements/blob/master/enhancements/hypershift/dual-stream-rhel-nodepool.md +// +// On upgrade to OCP 5.0+, existing NodePools with unset +// spec.osImageStream will transition from rhel-9 to rhel-10 boot +// images. This is the intended behavior per the enhancement: +// implicit-stream NodePools automatically adopt the new default. +func getRHELStreamForBootImage(nodePool *hyperv1.NodePool, releaseImage *releaseinfo.ReleaseImage) (string, error) { + version, err := semver.Parse(releaseImage.Version()) + if err != nil { + return "", fmt.Errorf("failed to parse release image version %q: %w", releaseImage.Version(), err) + } + + // TODO(CNTRLPLANE-3553): pass actual usesRunc once container runtime detection is wired in. + return GetRHELStream(nodePool.Spec.OSImageStream.Name, version, false) +} + +// validateOSImageStream checks that spec.osImageStream.Name, if set, is a +// valid stream for the given release version. Returns an error describing the +// problem or nil. It delegates to getRHELStreamForBootImage for version-aware +// validation. +func validateOSImageStream(nodePool *hyperv1.NodePool, releaseImage *releaseinfo.ReleaseImage) error { + if nodePool.Spec.OSImageStream.Name == "" { + return nil + } + _, err := getRHELStreamForBootImage(nodePool, releaseImage) + return err +} diff --git a/hypershift-operator/controllers/nodepool/osstream_test.go b/hypershift-operator/controllers/nodepool/osstream_test.go new file mode 100644 index 000000000000..04406a2d098c --- /dev/null +++ b/hypershift-operator/controllers/nodepool/osstream_test.go @@ -0,0 +1,229 @@ +package nodepool + +import ( + "testing" + + . "github.com/onsi/gomega" + + hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" + "github.com/openshift/hypershift/support/releaseinfo" + + imageapi "github.com/openshift/api/image/v1" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestGetRHELStreamForBootImage(t *testing.T) { + testCases := []struct { + name string + nodePool *hyperv1.NodePool + releaseImage *releaseinfo.ReleaseImage + expectedStream string + expectErr bool + }{ + { + name: "When spec.osImageStream.Name is rhel-10 and version is 5.x, it should return rhel-10", + nodePool: &hyperv1.NodePool{ + Spec: hyperv1.NodePoolSpec{ + OSImageStream: hyperv1.OSImageStreamReference{Name: "rhel-10"}, + }, + }, + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "5.0.0"}}, + }, + expectedStream: "rhel-10", + }, + { + name: "When spec.osImageStream.Name is rhel-9 and version is 4.x, it should return rhel-9", + nodePool: &hyperv1.NodePool{ + Spec: hyperv1.NodePoolSpec{ + OSImageStream: hyperv1.OSImageStreamReference{Name: "rhel-9"}, + }, + }, + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "4.18.0"}}, + }, + expectedStream: "rhel-9", + }, + { + name: "When spec.osImageStream.Name is rhel-9 and version is 5.x, it should return rhel-9", + nodePool: &hyperv1.NodePool{ + Spec: hyperv1.NodePoolSpec{ + OSImageStream: hyperv1.OSImageStreamReference{Name: "rhel-9"}, + }, + }, + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "5.0.0"}}, + }, + expectedStream: "rhel-9", + }, + { + name: "When spec.osImageStream.Name is rhel-10 and version is 4.x, it should return an error", + nodePool: &hyperv1.NodePool{ + Spec: hyperv1.NodePoolSpec{ + OSImageStream: hyperv1.OSImageStreamReference{Name: "rhel-10"}, + }, + }, + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "4.18.0"}}, + }, + expectErr: true, + }, + { + name: "When spec.osImageStream.Name is invalid, it should return an error", + nodePool: &hyperv1.NodePool{ + Spec: hyperv1.NodePoolSpec{ + OSImageStream: hyperv1.OSImageStreamReference{Name: "rhel-8"}, + }, + }, + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "5.0.0"}}, + }, + expectErr: true, + }, + { + name: "When spec.osImageStream.Name is empty and version is 4.x, it should return rhel-9", + nodePool: &hyperv1.NodePool{ + Spec: hyperv1.NodePoolSpec{}, + }, + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "4.18.0"}}, + }, + expectedStream: "rhel-9", + }, + { + name: "When spec.osImageStream.Name is empty and version is 5.x, it should return rhel-10", + nodePool: &hyperv1.NodePool{ + Spec: hyperv1.NodePoolSpec{}, + }, + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "5.0.0"}}, + }, + expectedStream: "rhel-10", + }, + { + name: "When spec.osImageStream.Name is empty and version is 6.x, it should return rhel-10", + nodePool: &hyperv1.NodePool{ + Spec: hyperv1.NodePoolSpec{}, + }, + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "6.1.0"}}, + }, + expectedStream: "rhel-10", + }, + { + name: "When spec.osImageStream.Name is empty and version is unparsable, it should return an error", + nodePool: &hyperv1.NodePool{ + Spec: hyperv1.NodePoolSpec{}, + }, + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "not-a-version"}}, + }, + expectErr: true, + }, + { + name: "When spec.osImageStream.Name is set and version is unparsable, it should return an error", + nodePool: &hyperv1.NodePool{ + Spec: hyperv1.NodePoolSpec{ + OSImageStream: hyperv1.OSImageStreamReference{Name: "rhel-9"}, + }, + }, + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "not-a-version"}}, + }, + expectErr: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + + stream, err := getRHELStreamForBootImage(tc.nodePool, tc.releaseImage) + if tc.expectErr { + g.Expect(err).To(HaveOccurred()) + return + } + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(stream).To(Equal(tc.expectedStream)) + }) + } +} + +func TestValidateOSImageStream(t *testing.T) { + testCases := []struct { + name string + nodePool *hyperv1.NodePool + releaseImage *releaseinfo.ReleaseImage + expectErr bool + }{ + { + name: "When osImageStream.Name is empty, it should succeed", + nodePool: &hyperv1.NodePool{ + Spec: hyperv1.NodePoolSpec{}, + }, + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "5.0.0"}}, + }, + }, + { + name: "When osImageStream.Name is rhel-9 and version is 4.x, it should succeed", + nodePool: &hyperv1.NodePool{ + Spec: hyperv1.NodePoolSpec{ + OSImageStream: hyperv1.OSImageStreamReference{Name: "rhel-9"}, + }, + }, + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "4.18.0"}}, + }, + }, + { + name: "When osImageStream.Name is rhel-10 and version is 5.x, it should succeed", + nodePool: &hyperv1.NodePool{ + Spec: hyperv1.NodePoolSpec{ + OSImageStream: hyperv1.OSImageStreamReference{Name: "rhel-10"}, + }, + }, + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "5.0.0"}}, + }, + }, + { + name: "When osImageStream.Name is rhel-10 and version is 4.x, it should return an error", + nodePool: &hyperv1.NodePool{ + Spec: hyperv1.NodePoolSpec{ + OSImageStream: hyperv1.OSImageStreamReference{Name: "rhel-10"}, + }, + }, + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "4.18.0"}}, + }, + expectErr: true, + }, + { + name: "When osImageStream.Name is invalid, it should return an error", + nodePool: &hyperv1.NodePool{ + Spec: hyperv1.NodePoolSpec{ + OSImageStream: hyperv1.OSImageStreamReference{Name: "rhel-8"}, + }, + }, + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "5.0.0"}}, + }, + expectErr: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + + err := validateOSImageStream(tc.nodePool, tc.releaseImage) + if tc.expectErr { + g.Expect(err).To(HaveOccurred()) + } else { + g.Expect(err).ToNot(HaveOccurred()) + } + }) + } +} diff --git a/hypershift-operator/controllers/nodepool/powervs.go b/hypershift-operator/controllers/nodepool/powervs.go index 7f2bf8ee9fb4..af06b9254442 100644 --- a/hypershift-operator/controllers/nodepool/powervs.go +++ b/hypershift-operator/controllers/nodepool/powervs.go @@ -111,6 +111,8 @@ func (c *CAPI) ibmPowerVSMachineTemplate(templateNameGenerator func(spec any) (s } func getPowerVSImage(region string, releaseImage *releaseinfo.ReleaseImage) (*stream.SingleObject, string, error) { + // TODO(CNTRLPLANE-3553): use releaseImage.StreamForName(rhelStream) instead of + // accessing StreamMetadata directly, to support dual-stream payloads. arch, foundArch := releaseImage.StreamMetadata.Architectures["ppc64le"] if !foundArch { return nil, "", fmt.Errorf("couldn't find OS metadata for architecture %q", "ppc64le") diff --git a/hypershift-operator/controllers/nodepool/stream.go b/hypershift-operator/controllers/nodepool/stream.go index 83e73af564c4..f0524dddfec1 100644 --- a/hypershift-operator/controllers/nodepool/stream.go +++ b/hypershift-operator/controllers/nodepool/stream.go @@ -15,7 +15,8 @@ const ( // GetRHELStream resolves which RHEL CoreOS stream a NodePool should use. // Returns the resolved stream name, or an error for invalid combinations. -// An empty return means "use legacy single-stream behavior" (OCP 4.x). +// For OCP < 5.0 with no explicit stream it returns StreamRHEL9 (the only +// stream available on those releases). // Exported for use by integration tests and future Phase 2 consumers // (token secret plumbing, validMachineConfigCondition). func GetRHELStream(explicitStream string, releaseVersion semver.Version, usesRunc bool) (string, error) { @@ -39,7 +40,7 @@ func GetRHELStream(explicitStream string, releaseVersion semver.Version, usesRun } if !isOCP5Plus { - return "", nil + return StreamRHEL9, nil } if usesRunc { diff --git a/hypershift-operator/controllers/nodepool/stream_test.go b/hypershift-operator/controllers/nodepool/stream_test.go index 202057cabb73..b6ebd2b6e45f 100644 --- a/hypershift-operator/controllers/nodepool/stream_test.go +++ b/hypershift-operator/controllers/nodepool/stream_test.go @@ -19,17 +19,17 @@ func TestGetRHELStream(t *testing.T) { }{ // --- Implicit stream (explicitStream = "") --- { - name: "When no explicit stream and release is 4.x it should return empty string", + name: "When no explicit stream and release is 4.x it should return rhel-9", explicitStream: "", releaseVersion: semver.MustParse("4.18.0"), - expectResult: "", + expectResult: "rhel-9", }, { - name: "When no explicit stream and release is 4.x with runc it should return empty string", + name: "When no explicit stream and release is 4.x with runc it should return rhel-9", explicitStream: "", releaseVersion: semver.MustParse("4.19.0"), usesRunc: true, - expectResult: "", + expectResult: "rhel-9", }, { name: "When no explicit stream and release is 5.0 it should return rhel-10", diff --git a/hypershift-operator/controllers/nodepool/stream_test_helpers_test.go b/hypershift-operator/controllers/nodepool/stream_test_helpers_test.go new file mode 100644 index 000000000000..a1895393fc6c --- /dev/null +++ b/hypershift-operator/controllers/nodepool/stream_test_helpers_test.go @@ -0,0 +1,41 @@ +package nodepool + +import ( + "github.com/coreos/stream-metadata-go/stream" +) + +// testAWSStream returns a minimal stream.Stream with a single AWS region/arch/AMI entry. +// Use this to reduce boilerplate in test cases that only need a simple AWS image lookup. +func testAWSStream(arch, region, ami string) *stream.Stream { + return &stream.Stream{ + Architectures: map[string]stream.Arch{ + arch: { + Images: stream.Images{ + Aws: &stream.AwsImage{ + Regions: map[string]stream.SingleImage{ + region: {Image: ami}, + }, + }, + }, + }, + }, + } +} + +// testAWSStreamWithRelease returns a minimal stream.Stream with a single AWS +// region/arch/AMI entry plus the Release field set. +func testAWSStreamWithRelease(arch, region, ami, release string) *stream.Stream { + return &stream.Stream{ + Architectures: map[string]stream.Arch{ + arch: { + Images: stream.Images{ + Aws: &stream.AwsImage{ + Regions: map[string]stream.SingleImage{ + region: {Release: release, Image: ami}, + }, + }, + }, + }, + }, + } +} diff --git a/hypershift-operator/controllers/nodepool/token.go b/hypershift-operator/controllers/nodepool/token.go index 4a39caffba7d..4c8daa216fbf 100644 --- a/hypershift-operator/controllers/nodepool/token.go +++ b/hypershift-operator/controllers/nodepool/token.go @@ -45,6 +45,7 @@ const ( TokenSecretAnnotation = "hypershift.openshift.io/ignition-config" TokenSecretIgnitionReachedAnnotation = "hypershift.openshift.io/ignition-reached" TokenSecretNodePoolUpgradeType = "hypershift.openshift.io/node-pool-upgrade-type" + TokenSecretOSStreamKey = "os-stream" ) // Token knows how to create an UUUID token for a unique configGenerator Hash. @@ -354,6 +355,9 @@ func (t *Token) reconcileTokenSecret(tokenSecret *corev1.Secret) error { tokenSecret.Data[TokenSecretPullSecretHashKey] = t.pullSecretHash tokenSecret.Data[TokenSecretAdditionalTrustBundleKey] = t.additionalTrustBundleHash tokenSecret.Data[TokenSecretHCConfigurationHashKey] = t.globalConfigHash + // TODO(CNTRLPLANE-3553): consumed by the ignition-server's TokenSecretReconciler once + // multi-stream ignition support lands. Until then this key is written but not read downstream. + tokenSecret.Data[TokenSecretOSStreamKey] = []byte(t.resolvedRHELStreamForBootImage) } // TODO (alberto): Only apply this on creation and change the hash generation to only use triggering upgrade fields. // We let this change to happen inplace now as the tokenSecret and the mcs config use the whole spec.Config for the comparing hash. @@ -381,7 +385,7 @@ func (t *Token) reconcileUserDataSecret(log logr.Logger, userDataSecret *corev1. if karpenterutil.IsKarpenterEnabled(t.hostedCluster.Spec.AutoNode) { npLabels := t.nodePool.GetLabels() if npLabels != nil && npLabels[karpenterutil.ManagedByKarpenterLabel] == "true" { - err := setKarpenterAMILabels(log, userDataSecret, t.hostedCluster.Spec.Platform.AWS.Region, t.releaseImage, t.hostedCluster.Spec.Platform.Type) + err := setKarpenterAMILabels(log, userDataSecret, t.hostedCluster.Spec.Platform.AWS.Region, t.releaseImage, t.hostedCluster.Spec.Platform.Type, t.resolvedRHELStreamForBootImage) if err != nil { return err } @@ -403,15 +407,14 @@ func (t *Token) reconcileUserDataSecret(log logr.Logger, userDataSecret *corev1. return nil } -func setKarpenterAMILabels(log logr.Logger, userDataSecret *corev1.Secret, region string, releaseImage *releaseinfo.ReleaseImage, platform hyperv1.PlatformType) error { - // TODO(CNTRLPLANE-3553): resolve streamName via GetRHELStream once osImageStream API field is available +func setKarpenterAMILabels(log logr.Logger, userDataSecret *corev1.Secret, region string, releaseImage *releaseinfo.ReleaseImage, platform hyperv1.PlatformType, rhelStream string) error { supportedArchitectures, err := karpenterutil.SupportedArchitectures(platform) if err != nil { return fmt.Errorf("failed to get supported architectures: %w", err) } supported := 0 for _, arch := range supportedArchitectures { - ami, err := defaultNodePoolAMI(region, arch, "", releaseImage) + ami, err := defaultNodePoolAMI(region, arch, rhelStream, releaseImage) if err != nil { // skip unavailable architectures gracefully log.Error(err, "failed to get default NodePool AMI for architecture", "architecture", arch) diff --git a/hypershift-operator/controllers/nodepool/token_test.go b/hypershift-operator/controllers/nodepool/token_test.go index 5266d08331cb..33df617a04ef 100644 --- a/hypershift-operator/controllers/nodepool/token_test.go +++ b/hypershift-operator/controllers/nodepool/token_test.go @@ -759,6 +759,9 @@ func TestTokenReconcile(t *testing.T) { g.Expect(gotTokenSecret.Data[TokenSecretAdditionalTrustBundleKey]).To(Equal(expectedAdditionalTrustBundleHash)) g.Expect(gotTokenSecret.Data[TokenSecretHCConfigurationHashKey]).To(Equal([]byte(expectedGlobalConfig))) + // Validate the os-stream key is set to the resolved RHEL stream. + g.Expect(gotTokenSecret.Data[TokenSecretOSStreamKey]).To(Equal([]byte(tc.configGenerator.resolvedRHELStreamForBootImage))) + // Validate the user data secret has all the expected annotations. // Start Generation Here gotUserDataSecret := &corev1.Secret{} @@ -1086,6 +1089,7 @@ func TestSetKarpenterAMILabels(t *testing.T) { userDataSecret *corev1.Secret releaseImage *releaseinfo.ReleaseImage region string + rhelStream string expectedError string expectedLabels map[string]string }{ @@ -1139,28 +1143,79 @@ func TestSetKarpenterAMILabels(t *testing.T) { ImageStream: &imageapi.ImageStream{ ObjectMeta: metav1.ObjectMeta{Name: "test-release"}, }, + StreamMetadata: testAWSStream("x86_64", "us-east-1", "ami-amd64-only"), + }, + expectedLabels: map[string]string{ + karpenterutil.ArchToAMILabelKey(hyperv1.ArchitectureAMD64): "ami-amd64-only", + }, + }, + { + name: "when the user data secret is created for unsupported platform it should return an error", + platform: hyperv1.AzurePlatform, + region: "us-east-1", + userDataSecret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "user-data-secret", + Namespace: "test-namespace", + Labels: map[string]string{ + karpenterutil.ManagedByKarpenterLabel: "true", + }, + }, + }, + expectedError: "failed to get supported architectures: unsupported platform: Azure", + }, + { + name: "When rhelStream is rhel-9 with single-stream payload, it should set AMI labels from StreamMetadata fallback", + platform: hyperv1.AWSPlatform, + region: "us-east-1", + rhelStream: "rhel-9", + userDataSecret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "user-data-secret", + Namespace: "test-namespace", + Labels: map[string]string{ + karpenterutil.ManagedByKarpenterLabel: "true", + }, + }, + }, + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ + ObjectMeta: metav1.ObjectMeta{Name: "4.17.0"}, + }, StreamMetadata: &stream.Stream{ Architectures: map[string]stream.Arch{ "x86_64": { Images: stream.Images{ Aws: &stream.AwsImage{ Regions: map[string]stream.SingleImage{ - "us-east-1": {Image: "ami-amd64-only"}, + "us-east-1": {Image: "ami-rhel9-fallback-amd64"}, + }, + }, + }, + }, + "aarch64": { + Images: stream.Images{ + Aws: &stream.AwsImage{ + Regions: map[string]stream.SingleImage{ + "us-east-1": {Image: "ami-rhel9-fallback-arm64"}, }, }, }, }, }, }, + OSStreams: nil, }, expectedLabels: map[string]string{ - karpenterutil.ArchToAMILabelKey(hyperv1.ArchitectureAMD64): "ami-amd64-only", + karpenterutil.ArchToAMILabelKey(hyperv1.ArchitectureAMD64): "ami-rhel9-fallback-amd64", + karpenterutil.ArchToAMILabelKey(hyperv1.ArchitectureARM64): "ami-rhel9-fallback-arm64", }, }, { - name: "when the user data secret is created for unsupported platform it should return an error", - platform: hyperv1.AzurePlatform, - region: "us-east-1", + name: "When rhelStream is rhel-9 with multi-stream payload, it should use OSStreams rhel-9 AMI", + platform: hyperv1.AWSPlatform, + region: "us-east-1", + rhelStream: "rhel-9", userDataSecret: &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Name: "user-data-secret", @@ -1170,7 +1225,45 @@ func TestSetKarpenterAMILabels(t *testing.T) { }, }, }, - expectedError: "failed to get supported architectures: unsupported platform: Azure", + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ + ObjectMeta: metav1.ObjectMeta{Name: "5.0.0"}, + }, + StreamMetadata: testAWSStream("x86_64", "us-east-1", "ami-default-amd64"), + OSStreams: map[string]*stream.Stream{ + "rhel-9": testAWSStream("x86_64", "us-east-1", "ami-rhel9-osstream-amd64"), + }, + }, + expectedLabels: map[string]string{ + karpenterutil.ArchToAMILabelKey(hyperv1.ArchitectureAMD64): "ami-rhel9-osstream-amd64", + }, + }, + { + name: "When rhelStream is rhel-10 with multi-stream payload, it should use OSStreams rhel-10 AMI", + platform: hyperv1.AWSPlatform, + region: "us-east-1", + rhelStream: "rhel-10", + userDataSecret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "user-data-secret", + Namespace: "test-namespace", + Labels: map[string]string{ + karpenterutil.ManagedByKarpenterLabel: "true", + }, + }, + }, + releaseImage: &releaseinfo.ReleaseImage{ + ImageStream: &imageapi.ImageStream{ + ObjectMeta: metav1.ObjectMeta{Name: "5.0.0"}, + }, + StreamMetadata: testAWSStream("x86_64", "us-east-1", "ami-default-amd64"), + OSStreams: map[string]*stream.Stream{ + "rhel-10": testAWSStream("x86_64", "us-east-1", "ami-rhel10-osstream-amd64"), + }, + }, + expectedLabels: map[string]string{ + karpenterutil.ArchToAMILabelKey(hyperv1.ArchitectureAMD64): "ami-rhel10-osstream-amd64", + }, }, } log := testr.New(t) @@ -1181,7 +1274,7 @@ func TestSetKarpenterAMILabels(t *testing.T) { if ri == nil { ri = testutils.InitReleaseImageOrDie("test-release") } - err := setKarpenterAMILabels(log, tc.userDataSecret, tc.region, ri, tc.platform) + err := setKarpenterAMILabels(log, tc.userDataSecret, tc.region, ri, tc.platform, tc.rhelStream) if tc.expectedError != "" { g.Expect(err).To(HaveOccurred()) g.Expect(err.Error()).To(Equal(tc.expectedError)) diff --git a/hypershift-operator/controllers/nodepool/version.go b/hypershift-operator/controllers/nodepool/version.go index d63ca3417daa..e440aeb79a28 100644 --- a/hypershift-operator/controllers/nodepool/version.go +++ b/hypershift-operator/controllers/nodepool/version.go @@ -1,8 +1,7 @@ package nodepool import ( - "context" - "fmt" + "regexp" "sort" hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" @@ -21,7 +20,7 @@ type versionKey struct { // nodeVersionsFromMachines aggregates version and health information from CAPI Machines. // It groups machines by (ocpVersion, kubeletVersion) and counts ready/unready nodes // based on the CAPI NodeHealthy condition. -func (r *NodePoolReconciler) nodeVersionsFromMachines(_ context.Context, machines []*capiv1.Machine, nodePool *hyperv1.NodePool) []hyperv1.NodeVersion { +func (r *NodePoolReconciler) nodeVersionsFromMachines(machines []*capiv1.Machine, nodePool *hyperv1.NodePool) []hyperv1.NodeVersion { type counts struct { ready int32 unready int32 @@ -87,16 +86,80 @@ func (r *NodePoolReconciler) nodeVersionsFromMachines(_ context.Context, machine // setNodesInfoStatus aggregates node version and health information from CAPI Machines // and sets it on nodePool.Status.NodesInfo. -func (r *NodePoolReconciler) setNodesInfoStatus(ctx context.Context, nodePool *hyperv1.NodePool) error { - machines, err := r.getMachinesForNodePool(ctx, nodePool) - if err != nil { - return fmt.Errorf("failed to get Machines for NodesInfo: %w", err) - } - - nodeVersions := r.nodeVersionsFromMachines(ctx, machines, nodePool) +func (r *NodePoolReconciler) setNodesInfoStatus(nodePool *hyperv1.NodePool, machines []*capiv1.Machine) { + nodeVersions := r.nodeVersionsFromMachines(machines, nodePool) nodePool.Status.NodesInfo = hyperv1.NodePoolNodesInfo{ NodeVersions: nodeVersions, } +} + +// rhcosOSImageRe matches the RHCOS version from the NodeInfo.OSImage string. +// The first capture group is the leading digit of the RHCOS version (e.g. "4" +// in "419.97…"), which determines the RHEL generation (4xx → RHEL 9, 5xx → RHEL 10). +var rhcosOSImageRe = regexp.MustCompile(`Red Hat Enterprise Linux CoreOS (\d)\d{2}\.`) + +// rhcosStreamFromOSImage parses a Machine's NodeInfo.OSImage string and +// returns the corresponding RHEL stream name. RHCOS versions starting with +// 4xx map to RHEL 9 and 5xx to RHEL 10. +// Returns empty string if the OS image string is unrecognized. +func rhcosStreamFromOSImage(osImage string) string { + matches := rhcosOSImageRe.FindStringSubmatch(osImage) + if len(matches) < 2 { + return "" + } + switch matches[1] { + case "4": + return StreamRHEL9 + case "5": + return StreamRHEL10 + default: + return "" + } +} - return nil +// osImageStreamFromMachines determines the observed RHEL stream by examining +// Machine NodeInfo.OSImage across the pool. Returns the stream name when a +// majority of observed machines report the same stream, or empty string when +// no majority exists or no machines have reported yet. +func osImageStreamFromMachines(machines []*capiv1.Machine) string { + streamCounts := make(map[string]int) + total := 0 + for _, machine := range machines { + if machine.Status.NodeInfo == nil { + continue + } + stream := rhcosStreamFromOSImage(machine.Status.NodeInfo.OSImage) + if stream == "" { + continue + } + streamCounts[stream]++ + total++ + } + + if total == 0 { + return "" + } + + // Set status when a strict majority (> N/2) of observed nodes agree. + for stream, count := range streamCounts { + if count > total/2 { + return stream + } + } + + return "" +} + +// setOSImageStreamStatus infers the RHEL stream from observed Machine +// NodeInfo.OSImage and sets nodePool.Status.OSImageStream when a majority +// of machines report a consistent stream. +// When no majority exists (e.g. during rolling upgrades, scale-to-zero, or +// unrecognized OS images), the status retains its previous value to avoid +// flapping. It reflects the "last-known majority" and is intentionally never +// reset to empty. +func (r *NodePoolReconciler) setOSImageStreamStatus(nodePool *hyperv1.NodePool, machines []*capiv1.Machine) { + stream := osImageStreamFromMachines(machines) + if stream != "" { + nodePool.Status.OSImageStream = hyperv1.OSImageStreamReference{Name: stream} + } } diff --git a/hypershift-operator/controllers/nodepool/version_test.go b/hypershift-operator/controllers/nodepool/version_test.go index 581ad975c5ab..c6fa2b50df6b 100644 --- a/hypershift-operator/controllers/nodepool/version_test.go +++ b/hypershift-operator/controllers/nodepool/version_test.go @@ -1,13 +1,11 @@ package nodepool import ( - "context" "testing" . "github.com/onsi/gomega" hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" - "github.com/openshift/hypershift/support/api" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -15,7 +13,6 @@ import ( "sigs.k8s.io/cluster-api/api/core/v1beta1" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/fake" ) func TestNodeVersionsFromMachines(t *testing.T) { @@ -169,13 +166,8 @@ func TestNodeVersionsFromMachines(t *testing.T) { t.Run(tc.name, func(t *testing.T) { g := NewGomegaWithT(t) - fakeClient := fake.NewClientBuilder().WithScheme(api.Scheme).Build() - r := &NodePoolReconciler{ - Client: fakeClient, - } - - ctx := context.Background() - result := r.nodeVersionsFromMachines(ctx, tc.machines, tc.nodePool) + r := &NodePoolReconciler{} + result := r.nodeVersionsFromMachines(tc.machines, tc.nodePool) g.Expect(result).To(Equal(tc.expected)) }) } @@ -288,21 +280,329 @@ func TestSetNodesInfoStatus(t *testing.T) { t.Run(tc.name, func(t *testing.T) { g := NewGomegaWithT(t) - objs := make([]client.Object, 0, len(tc.machines)) - objs = append(objs, tc.machines...) - - fakeClient := fake.NewClientBuilder().WithScheme(api.Scheme).WithObjects(objs...).Build() - r := &NodePoolReconciler{ - Client: fakeClient, + machines := make([]*v1beta1.Machine, 0, len(tc.machines)) + for _, obj := range tc.machines { + machines = append(machines, obj.(*v1beta1.Machine)) } - err := r.setNodesInfoStatus(t.Context(), tc.nodePool) - g.Expect(err).ToNot(HaveOccurred()) + r := &NodePoolReconciler{} + r.setNodesInfoStatus(tc.nodePool, machines) g.Expect(tc.nodePool.Status.NodesInfo).To(Equal(tc.expectedNodesInfo)) }) } } +func TestRhcosStreamFromOSImage(t *testing.T) { + testCases := []struct { + name string + osImage string + expected string + }{ + { + name: "When OSImage is RHCOS 4xx it should return rhel-9", + osImage: "Red Hat Enterprise Linux CoreOS 419.97.202505081234-0 (Plow)", + expected: StreamRHEL9, + }, + { + name: "When OSImage is RHCOS 5xx it should return rhel-10", + osImage: "Red Hat Enterprise Linux CoreOS 510.97.202506011200-0 (Plow)", + expected: StreamRHEL10, + }, + { + name: "When OSImage has different 4xx version it should return rhel-9", + osImage: "Red Hat Enterprise Linux CoreOS 418.94.202501011200-0 (Plow)", + expected: StreamRHEL9, + }, + { + name: "When OSImage is empty it should return empty string", + osImage: "", + expected: "", + }, + { + name: "When OSImage is unrecognized it should return empty string", + osImage: "Ubuntu 22.04 LTS", + expected: "", + }, + { + name: "When OSImage has unknown major version it should return empty string", + osImage: "Red Hat Enterprise Linux CoreOS 300.97.202505081234-0 (Plow)", + expected: "", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + g := NewGomegaWithT(t) + g.Expect(rhcosStreamFromOSImage(tc.osImage)).To(Equal(tc.expected)) + }) + } +} + +func TestOsImageStreamFromMachines(t *testing.T) { + testCases := []struct { + name string + machines []*v1beta1.Machine + expected string + }{ + { + name: "When there are no machines it should return empty string", + machines: nil, + expected: "", + }, + { + name: "When a single machine reports RHEL 9 it should return rhel-9", + machines: []*v1beta1.Machine{ + machineWithOSImage("m1", "Red Hat Enterprise Linux CoreOS 419.97.202505081234-0 (Plow)"), + }, + expected: StreamRHEL9, + }, + { + name: "When all machines report RHEL 9 it should return rhel-9", + machines: []*v1beta1.Machine{ + machineWithOSImage("m1", "Red Hat Enterprise Linux CoreOS 419.97.202505081234-0 (Plow)"), + machineWithOSImage("m2", "Red Hat Enterprise Linux CoreOS 419.97.202505081234-0 (Plow)"), + machineWithOSImage("m3", "Red Hat Enterprise Linux CoreOS 419.97.202505081234-0 (Plow)"), + }, + expected: StreamRHEL9, + }, + { + name: "When all machines report RHEL 10 it should return rhel-10", + machines: []*v1beta1.Machine{ + machineWithOSImage("m1", "Red Hat Enterprise Linux CoreOS 510.97.202506011200-0 (Plow)"), + machineWithOSImage("m2", "Red Hat Enterprise Linux CoreOS 510.97.202506011200-0 (Plow)"), + }, + expected: StreamRHEL10, + }, + { + name: "When a majority reports RHEL 10 during rolling upgrade it should return rhel-10", + machines: []*v1beta1.Machine{ + machineWithOSImage("m1", "Red Hat Enterprise Linux CoreOS 419.97.202505081234-0 (Plow)"), + machineWithOSImage("m2", "Red Hat Enterprise Linux CoreOS 510.97.202506011200-0 (Plow)"), + machineWithOSImage("m3", "Red Hat Enterprise Linux CoreOS 510.97.202506011200-0 (Plow)"), + }, + expected: StreamRHEL10, + }, + { + name: "When streams are evenly split it should return empty string", + machines: []*v1beta1.Machine{ + machineWithOSImage("m1", "Red Hat Enterprise Linux CoreOS 419.97.202505081234-0 (Plow)"), + machineWithOSImage("m2", "Red Hat Enterprise Linux CoreOS 510.97.202506011200-0 (Plow)"), + }, + expected: "", + }, + { + name: "When machines have no NodeInfo it should return empty string", + machines: []*v1beta1.Machine{ + {ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: v1beta1.MachineStatus{}}, + }, + expected: "", + }, + { + name: "When machines have unrecognized OSImage it should return empty string", + machines: []*v1beta1.Machine{ + machineWithOSImage("m1", "Ubuntu 22.04 LTS"), + }, + expected: "", + }, + { + name: "When some machines have no NodeInfo it should count only those with NodeInfo", + machines: []*v1beta1.Machine{ + machineWithOSImage("m1", "Red Hat Enterprise Linux CoreOS 510.97.202506011200-0 (Plow)"), + {ObjectMeta: metav1.ObjectMeta{Name: "m2"}, Status: v1beta1.MachineStatus{}}, + {ObjectMeta: metav1.ObjectMeta{Name: "m3"}, Status: v1beta1.MachineStatus{}}, + }, + expected: StreamRHEL10, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + g := NewGomegaWithT(t) + g.Expect(osImageStreamFromMachines(tc.machines)).To(Equal(tc.expected)) + }) + } +} + +func TestSetOSImageStreamStatus(t *testing.T) { + testCases := []struct { + name string + machines []client.Object + nodePool *hyperv1.NodePool + expectedOSImageStream hyperv1.OSImageStreamReference + }{ + { + name: "When no machines exist it should not change OSImageStream status", + machines: nil, + nodePool: &hyperv1.NodePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-nodepool", + Namespace: "clusters", + }, + Spec: hyperv1.NodePoolSpec{ + ClusterName: "test-cluster", + }, + }, + expectedOSImageStream: hyperv1.OSImageStreamReference{}, + }, + { + name: "When all machines report RHEL 9 it should set status to rhel-9", + machines: []client.Object{ + &v1beta1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "m1", + Namespace: "clusters-test-cluster", + Annotations: map[string]string{ + nodePoolAnnotation: "clusters/test-nodepool", + }, + }, + Status: v1beta1.MachineStatus{ + NodeInfo: &corev1.NodeSystemInfo{ + OSImage: "Red Hat Enterprise Linux CoreOS 419.97.202505081234-0 (Plow)", + }, + }, + }, + &v1beta1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "m2", + Namespace: "clusters-test-cluster", + Annotations: map[string]string{ + nodePoolAnnotation: "clusters/test-nodepool", + }, + }, + Status: v1beta1.MachineStatus{ + NodeInfo: &corev1.NodeSystemInfo{ + OSImage: "Red Hat Enterprise Linux CoreOS 419.97.202505081234-0 (Plow)", + }, + }, + }, + }, + nodePool: &hyperv1.NodePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-nodepool", + Namespace: "clusters", + }, + Spec: hyperv1.NodePoolSpec{ + ClusterName: "test-cluster", + }, + }, + expectedOSImageStream: hyperv1.OSImageStreamReference{Name: StreamRHEL9}, + }, + { + name: "When majority reports RHEL 10 it should set status to rhel-10", + machines: []client.Object{ + &v1beta1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "m1", + Namespace: "clusters-test-cluster", + Annotations: map[string]string{ + nodePoolAnnotation: "clusters/test-nodepool", + }, + }, + Status: v1beta1.MachineStatus{ + NodeInfo: &corev1.NodeSystemInfo{ + OSImage: "Red Hat Enterprise Linux CoreOS 419.97.202505081234-0 (Plow)", + }, + }, + }, + &v1beta1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "m2", + Namespace: "clusters-test-cluster", + Annotations: map[string]string{ + nodePoolAnnotation: "clusters/test-nodepool", + }, + }, + Status: v1beta1.MachineStatus{ + NodeInfo: &corev1.NodeSystemInfo{ + OSImage: "Red Hat Enterprise Linux CoreOS 510.97.202506011200-0 (Plow)", + }, + }, + }, + &v1beta1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "m3", + Namespace: "clusters-test-cluster", + Annotations: map[string]string{ + nodePoolAnnotation: "clusters/test-nodepool", + }, + }, + Status: v1beta1.MachineStatus{ + NodeInfo: &corev1.NodeSystemInfo{ + OSImage: "Red Hat Enterprise Linux CoreOS 510.97.202506011200-0 (Plow)", + }, + }, + }, + }, + nodePool: &hyperv1.NodePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-nodepool", + Namespace: "clusters", + }, + Spec: hyperv1.NodePoolSpec{ + ClusterName: "test-cluster", + }, + }, + expectedOSImageStream: hyperv1.OSImageStreamReference{Name: StreamRHEL10}, + }, + { + name: "When previous status exists and no machines have NodeInfo it should preserve previous status", + machines: []client.Object{ + &v1beta1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "m1", + Namespace: "clusters-test-cluster", + Annotations: map[string]string{ + nodePoolAnnotation: "clusters/test-nodepool", + }, + }, + Status: v1beta1.MachineStatus{}, + }, + }, + nodePool: &hyperv1.NodePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-nodepool", + Namespace: "clusters", + }, + Spec: hyperv1.NodePoolSpec{ + ClusterName: "test-cluster", + }, + Status: hyperv1.NodePoolStatus{ + OSImageStream: hyperv1.OSImageStreamReference{Name: StreamRHEL9}, + }, + }, + expectedOSImageStream: hyperv1.OSImageStreamReference{Name: StreamRHEL9}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + g := NewGomegaWithT(t) + + machines := make([]*v1beta1.Machine, 0, len(tc.machines)) + for _, obj := range tc.machines { + machines = append(machines, obj.(*v1beta1.Machine)) + } + + r := &NodePoolReconciler{} + r.setOSImageStreamStatus(tc.nodePool, machines) + g.Expect(tc.nodePool.Status.OSImageStream).To(Equal(tc.expectedOSImageStream)) + }) + } +} + +func machineWithOSImage(name, osImage string) *v1beta1.Machine { + return &v1beta1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Status: v1beta1.MachineStatus{ + NodeInfo: &corev1.NodeSystemInfo{ + OSImage: osImage, + }, + }, + } +} + func machineWithVersionAndHealth(name, kubeletVersion string, healthy bool, annotations map[string]string) *v1beta1.Machine { healthStatus := corev1.ConditionTrue if !healthy { diff --git a/support/releaseinfo/releaseinfo.go b/support/releaseinfo/releaseinfo.go index 2bd482698cd1..a42b6bef14b0 100644 --- a/support/releaseinfo/releaseinfo.go +++ b/support/releaseinfo/releaseinfo.go @@ -36,7 +36,11 @@ type ProviderWithOpenShiftImageRegistryOverrides interface { } const ( - StreamRHEL9 = "rhel-9" + // StreamRHEL9 is the canonical stream name for RHEL 9. + // This value must match api/hypershift/v1beta1.OSImageStreamRHEL9. + StreamRHEL9 = "rhel-9" + // StreamRHEL10 is the canonical stream name for RHEL 10. + // This value must match api/hypershift/v1beta1.OSImageStreamRHEL10. StreamRHEL10 = "rhel-10" ) diff --git a/support/releaseinfo/releaseinfo_test.go b/support/releaseinfo/releaseinfo_test.go index 3038817e6d21..a826add9fa1f 100644 --- a/support/releaseinfo/releaseinfo_test.go +++ b/support/releaseinfo/releaseinfo_test.go @@ -356,6 +356,30 @@ func TestStreamForName(t *testing.T) { expectError: true, expectContains: "rhel-9", }, + { + name: "When it is a pre-5.0 release image (has StreamMetadata, OSStreams empty) it should return that stream", + releaseImage: &ReleaseImage{ + ImageStream: &imageapi.ImageStream{}, + StreamMetadata: &stream.Stream{ + Stream: "rhcos-4.18", + Architectures: map[string]stream.Arch{"x86_64": {}}, + }, + }, + streamName: "", + expectStream: "rhcos-4.18", + }, + { + name: "When it is a 5.0+ release image (has OSStreams, StreamMetadata nil) it should return that stream", + releaseImage: &ReleaseImage{ + ImageStream: &imageapi.ImageStream{}, + OSStreams: map[string]*stream.Stream{ + "rhel-9": rhel9Stream, + "rhel-10": rhel10Stream, + }, + }, + streamName: "rhel-10", + expectStream: "rhcos-5.0", + }, } for _, tt := range tests { diff --git a/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/nodepool_types.go b/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/nodepool_types.go index 750360bffee5..5472fd818993 100644 --- a/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/nodepool_types.go +++ b/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/nodepool_types.go @@ -264,8 +264,10 @@ type NodePoolSpec struct { const ( // OSImageStreamRHEL9 is the OS image stream name for RHEL 9. + // This value must match support/releaseinfo.StreamRHEL9. OSImageStreamRHEL9 = "rhel-9" // OSImageStreamRHEL10 is the OS image stream name for RHEL 10. + // This value must match support/releaseinfo.StreamRHEL10. OSImageStreamRHEL10 = "rhel-10" )