Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 65 additions & 37 deletions pkg/manager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,67 @@ var HypershiftSupportedVersions = HypershiftSupportedVersionsType{}
var reBranchVersion = regexp.MustCompile(`^(openshift-|release-)(\d+\.\d+)$`)
var reMajorMinorVersion = regexp.MustCompile(`^(\d+)\.(\d+)$`)

// platformQuotaSlices maps each cloud platform to its available quota-slice
// accounts. The first entry is the primary (default) account. Subsequent entries
// are alternates that can be selected when they have more free resources.
var platformQuotaSlices = map[string][]CloudAccountProfile{
"aws": {
{QuotaSlice: "aws-quota-slice"},
{
QuotaSlice: "aws-2-quota-slice",
ProfileName: "aws-2",
ProfileSecret: "cluster-secrets-aws-2",
AccountDomain: "aws-2.ci.openshift.org",
},
},
"azure": {
{QuotaSlice: "azure4-quota-slice"},
{
QuotaSlice: "azure-2-quota-slice",
ProfileName: "azure-2",
ProfileSecret: "cluster-secrets-azure-2",
AccountDomain: "ci2.azure.devcluster.openshift.com",
},
},
"gcp": {
{QuotaSlice: "gcp-quota-slice"},
{
QuotaSlice: "gcp-openshift-gce-devel-ci-2-quota-slice",
ProfileName: "gcp-openshift-gce-devel-ci-2",
ProfileSecret: "cluster-secrets-gcp-openshift-gce-devel-ci-2",
},
},
}

// selectCloudAccountProfile queries Boskos metrics for each quota-slice
// candidate for the given platform and returns the profile with the most free
// resources. Returns nil if the platform has no configured accounts, if the
// primary (index 0) has the most free resources (no conversion needed), or if
// Boskos metrics are unavailable (falls back to the default account).
func selectCloudAccountProfile(platform string, lClient LeaseClient) (*CloudAccountProfile, error) {
accounts, ok := platformQuotaSlices[platform]
if !ok || len(accounts) < 2 {
return nil, nil
}
bestIdx := 0
bestFree := -1
for i := range accounts {
metrics, err := lClient.Metrics(accounts[i].QuotaSlice)
if err != nil {
klog.Warningf("Failed to get metrics for %q leases, falling back to default account: %v", accounts[i].QuotaSlice, err)
return nil, nil
}
if metrics.Free > bestFree {
bestIdx = i
bestFree = metrics.Free
}
}
if bestIdx == 0 {
return nil, nil
}
return &accounts[bestIdx], nil
}

func (j Job) IsComplete() bool {
return j.Complete || len(j.Credentials) > 0 || (len(j.State) > 0 && j.State != prowapiv1.PendingState)
}
Expand Down Expand Up @@ -2170,44 +2231,11 @@ func (m *jobManager) LaunchJobForUser(req *JobRequest) (string, error) {

// check what leases are available for platform
if req.Architecture == "amd64" && m.lClient != nil {
switch req.Platform {
case "aws":
metrics1, err := m.lClient.Metrics("aws-quota-slice")
if err != nil {
return "", fmt.Errorf("failed to get metrics for `aws` leases: %v", err)
}
metrics2, err := m.lClient.Metrics("aws-2-quota-slice")
if err != nil {
return "", fmt.Errorf("failed to get metrics for `aws-2` leases: %v", err)
}
if metrics2.Free > metrics1.Free {
job.UseSecondaryAccount = true
}
case "azure":
metrics1, err := m.lClient.Metrics("azure4-quota-slice")
if err != nil {
return "", fmt.Errorf("failed to get metrics for `azure` leases: %v", err)
}
metrics2, err := m.lClient.Metrics("azure-2-quota-slice")
if err != nil {
return "", fmt.Errorf("failed to get metrics for `azure-2` leases: %v", err)
}
if metrics2.Free > metrics1.Free {
job.UseSecondaryAccount = true
}
case "gcp":
metrics1, err := m.lClient.Metrics("gcp-quota-slice")
if err != nil {
return "", fmt.Errorf("failed to get metrics for `gcp` leases: %v", err)
}
metrics2, err := m.lClient.Metrics("gcp-openshift-gce-devel-ci-2-quota-slice")
if err != nil {
return "", fmt.Errorf("failed to get metrics for `gcp-openshift-gce-devel-ci-2` leases: %v", err)
}
if metrics2.Free > metrics1.Free {
job.UseSecondaryAccount = true
}
profile, err := selectCloudAccountProfile(req.Platform, m.lClient)
if err != nil {
return "", err
}
job.CloudAccountProfile = profile
}

msg, err := func() (string, error) {
Expand Down
120 changes: 119 additions & 1 deletion pkg/manager/manager_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,124 @@
package manager

import "testing"
import (
"fmt"
"testing"

"github.com/openshift/ci-tools/pkg/lease"
)

type mockLeaseClient struct {
metrics map[string]lease.Metrics
}

func (m *mockLeaseClient) Metrics(rtype string) (lease.Metrics, error) {
if metrics, ok := m.metrics[rtype]; ok {
return metrics, nil
}
return lease.Metrics{}, fmt.Errorf("resource type %q not found", rtype)
}

func Test_selectCloudAccountProfile(t *testing.T) {
tests := []struct {
name string
platform string
metrics map[string]lease.Metrics
wantNil bool
wantProfile string
wantErr bool
}{
{
name: "platform not in map returns nil",
platform: "metal",
metrics: map[string]lease.Metrics{},
wantNil: true,
},
{
name: "primary has more free resources returns nil",
platform: "aws",
metrics: map[string]lease.Metrics{
"aws-quota-slice": {Free: 10, Leased: 5},
"aws-2-quota-slice": {Free: 3, Leased: 12},
},
wantNil: true,
},
{
name: "secondary has more free resources returns that profile",
platform: "aws",
metrics: map[string]lease.Metrics{
"aws-quota-slice": {Free: 2, Leased: 13},
"aws-2-quota-slice": {Free: 8, Leased: 7},
},
wantProfile: "aws-2",
},
{
name: "equal free counts returns nil (primary wins)",
platform: "aws",
metrics: map[string]lease.Metrics{
"aws-quota-slice": {Free: 5, Leased: 10},
"aws-2-quota-slice": {Free: 5, Leased: 10},
},
wantNil: true,
},
{
name: "all zero free returns nil",
platform: "aws",
metrics: map[string]lease.Metrics{
"aws-quota-slice": {Free: 0, Leased: 15},
"aws-2-quota-slice": {Free: 0, Leased: 15},
},
wantNil: true,
},
{
name: "gcp secondary wins",
platform: "gcp",
metrics: map[string]lease.Metrics{
"gcp-quota-slice": {Free: 1, Leased: 14},
"gcp-openshift-gce-devel-ci-2-quota-slice": {Free: 7, Leased: 8},
},
wantProfile: "gcp-openshift-gce-devel-ci-2",
},
{
name: "azure secondary wins",
platform: "azure",
metrics: map[string]lease.Metrics{
"azure4-quota-slice": {Free: 3, Leased: 12},
"azure-2-quota-slice": {Free: 9, Leased: 6},
},
wantProfile: "azure-2",
},
{
name: "metrics error falls back to default account",
platform: "aws",
metrics: map[string]lease.Metrics{},
wantNil: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
client := &mockLeaseClient{metrics: tt.metrics}
got, err := selectCloudAccountProfile(tt.platform, client)
if (err != nil) != tt.wantErr {
t.Errorf("selectCloudAccountProfile() error = %v, wantErr %v", err, tt.wantErr)
return
}
if tt.wantErr {
return
}
if tt.wantNil && got != nil {
t.Errorf("selectCloudAccountProfile() = %+v, want nil", got)
return
}
if !tt.wantNil && got == nil {
t.Errorf("selectCloudAccountProfile() = nil, want profile %q", tt.wantProfile)
return
}
if !tt.wantNil && got.ProfileName != tt.wantProfile {
t.Errorf("selectCloudAccountProfile() ProfileName = %q, want %q", got.ProfileName, tt.wantProfile)
}
})
}
}

func Test_containsValidVersion(t *testing.T) {
type args struct {
Expand Down
34 changes: 6 additions & 28 deletions pkg/manager/prow.go
Original file line number Diff line number Diff line change
Expand Up @@ -568,21 +568,11 @@ func (m *jobManager) newJob(job *Job) (string, error) {
}
}

// if a step based config, launch should now be the test config we will run; time to update the config for lease balancing
if job.UseSecondaryAccount {
switch job.Platform {
case "aws":
if err := convertAWSToAWS2(pj, sourceConfig); err != nil {
return "", fmt.Errorf("failed updating aws job to aws-2: %w", err)
}
case "gcp":
if err := convertGCPToGCP2(pj, sourceConfig); err != nil {
return "", fmt.Errorf("failed updating gcp job to gcp-openshift-gce-devel-ci-2: %w", err)
}
case "azure":
if err := convertAzureToAzure2(pj, sourceConfig); err != nil {
return "", fmt.Errorf("failed updating azure job to azure-2: %w", err)
}
// if an alternate cloud account was selected for lease balancing, apply it
if job.CloudAccountProfile != nil {
p := job.CloudAccountProfile
if err := applyClusterProfile(pj, sourceConfig, p.ProfileName, p.ProfileSecret, p.AccountDomain); err != nil {
return "", fmt.Errorf("failed applying cluster profile %q: %w", p.ProfileName, err)
}
}

Expand Down Expand Up @@ -1718,7 +1708,7 @@ func (e *resolvedEnvironment) Lookup(name string) string {
return ""
}

func convertToAccount2(job *prowapiv1.ProwJob, sourceConfig *citools.ReleaseBuildConfiguration, profileName, profileSecret, accountDomain string) error {
func applyClusterProfile(job *prowapiv1.ProwJob, sourceConfig *citools.ReleaseBuildConfiguration, profileName, profileSecret, accountDomain string) error {
job.Labels["ci-operator.openshift.io/cloud-cluster-profile"] = profileName
for index, volume := range job.Spec.PodSpec.Volumes {
// TODO: only some ci-chat-bot jobs have this; check if they can all be removed
Expand Down Expand Up @@ -1749,15 +1739,3 @@ func convertToAccount2(job *prowapiv1.ProwJob, sourceConfig *citools.ReleaseBuil
}
return nil
}

func convertAWSToAWS2(job *prowapiv1.ProwJob, sourceConfig *citools.ReleaseBuildConfiguration) error {
return convertToAccount2(job, sourceConfig, "aws-2", "cluster-secrets-aws-2", "aws-2.ci.openshift.org")
}

func convertAzureToAzure2(job *prowapiv1.ProwJob, sourceConfig *citools.ReleaseBuildConfiguration) error {
return convertToAccount2(job, sourceConfig, "azure-2", "cluster-secrets-azure-2", "ci2.azure.devcluster.openshift.com")
}

func convertGCPToGCP2(job *prowapiv1.ProwJob, sourceConfig *citools.ReleaseBuildConfiguration) error {
return convertToAccount2(job, sourceConfig, "gcp-openshift-gce-devel-ci-2", "cluster-secrets-gcp-openshift-gce-devel-ci-2", "")
}
11 changes: 10 additions & 1 deletion pkg/manager/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,15 @@ type JobInput struct {
Refs []prowapiv1.Refs
}

// CloudAccountProfile holds the parameters needed to redirect a ProwJob to an
// alternate cloud account (e.g. aws-2 instead of the default aws account).
type CloudAccountProfile struct {
QuotaSlice string // boskos resource type, e.g. "aws-2-quota-slice"
ProfileName string // cluster profile name, e.g. "aws-2"
ProfileSecret string // k8s secret name, e.g. "cluster-secrets-aws-2"
AccountDomain string // base domain override (optional), e.g. "aws-2.ci.openshift.org"
}

// Job responds to user requests and tracks the state of the launched
// jobs. This object must be recreatable from a ProwJob, but the RequestedChannel
// field may be empty to indicate the user has already been notified.
Expand Down Expand Up @@ -486,7 +495,7 @@ type Job struct {

WorkflowName string

UseSecondaryAccount bool
CloudAccountProfile *CloudAccountProfile

Operator OperatorInfo
CatalogComplete bool
Expand Down