diff --git a/addons/ai-platform/operator/values-dev.yaml b/addons/ai-platform/operator/values-dev.yaml new file mode 100644 index 0000000..b3df1e7 --- /dev/null +++ b/addons/ai-platform/operator/values-dev.yaml @@ -0,0 +1,3 @@ +# eks-agent-platform operator — dev deltas only (base is values.yaml). +# config.environment is injected from the cluster Secret label by the +# ApplicationSet, so it is intentionally not set here. diff --git a/addons/ai-platform/operator/values-production.yaml b/addons/ai-platform/operator/values-production.yaml new file mode 100644 index 0000000..07b9569 --- /dev/null +++ b/addons/ai-platform/operator/values-production.yaml @@ -0,0 +1,3 @@ +# eks-agent-platform operator — production deltas only (base is values.yaml). +# config.environment is injected from the cluster Secret label by the +# ApplicationSet, so it is intentionally not set here. diff --git a/addons/ai-platform/operator/values-staging.yaml b/addons/ai-platform/operator/values-staging.yaml new file mode 100644 index 0000000..e9f7372 --- /dev/null +++ b/addons/ai-platform/operator/values-staging.yaml @@ -0,0 +1,3 @@ +# eks-agent-platform operator — staging deltas only (base is values.yaml). +# config.environment is injected from the cluster Secret label by the +# ApplicationSet, so it is intentionally not set here. diff --git a/addons/ai-platform/operator/values.yaml b/addons/ai-platform/operator/values.yaml new file mode 100644 index 0000000..a091539 --- /dev/null +++ b/addons/ai-platform/operator/values.yaml @@ -0,0 +1,19 @@ +# eks-agent-platform operator — base Helm values (all environments). +# +# Per-cluster IRSA wiring (config.oidc.providerArn/issuerHost, the +# serviceAccount eks.amazonaws.com/role-arn annotation) and config.environment / +# config.region are injected by the addons-agent-operator ApplicationSet from +# the in-cluster ArgoCD Secret (labels + annotations cluster-bootstrap sets). +# Do NOT set them here — they embed the AWS account ID, which must never be +# committed to this public repo. +# +# The operator's admission webhooks need a cert-manager Issuer. The chart +# provisions a self-signed ClusterIssuer so the addon is self-contained; point +# at a real issuer (Vault / ACM Private CA) per-env if preferred. +webhooks: + certManager: + installSelfSignedIssuer: true + +# networkPolicy.engine defaults to cilium (the cluster CNI). The operator's +# egress to the kube-apiserver uses a Cilium reserved identity that a vanilla +# Kubernetes NetworkPolicy cannot match, so leave it on cilium. diff --git a/applicationsets/addons-agent-operator.yaml b/applicationsets/addons-agent-operator.yaml new file mode 100644 index 0000000..fc46593 --- /dev/null +++ b/applicationsets/addons-agent-operator.yaml @@ -0,0 +1,83 @@ +# Installs the eks-agent-platform operator on every cluster opted in via the +# `eks-agent-platform/enabled=true` label that cluster-bootstrap sets on the +# in-cluster ArgoCD Secret. The operator needs per-cluster IRSA wiring (its OIDC +# provider + a role-arn ServiceAccount annotation) that embeds the AWS account +# ID — so rather than committing it, cluster-bootstrap publishes those values as +# Secret ANNOTATIONS and this ApplicationSet reads them through ArgoCD's cluster +# generator and injects them as Helm values via valuesObject. The account ID +# never lands in this (public) repo. +# +# The chart is sourced from git (the public eks-agent-platform repo's +# charts/operator) rather than the OCI registry, so no chart release is required; +# switch repoURL/chart to oci://ghcr.io/nanohype/eks-agent-platform/charts once +# the operator chart is published there to match the other ai-platform addons. +# NOTE: the operator container image (chart default +# ghcr.io/nanohype/eks-agent-platform/operator, tag = chart version) must be +# published multi-arch (incl. arm64 for Graviton) for the pods to start; pin +# image.repository/tag in addons/ai-platform/operator/values.yaml otherwise. +apiVersion: argoproj.io/v1alpha1 +kind: ApplicationSet +metadata: + name: addons-agent-operator + namespace: argocd + annotations: + argocd.argoproj.io/sync-wave: "21" +spec: + goTemplate: true + goTemplateOptions: ["missingkey=error"] + generators: + - clusters: + selector: + matchLabels: + argocd.argoproj.io/secret-type: cluster + eks-agent-platform/enabled: "true" + template: + metadata: + name: eks-agent-platform-operator + annotations: + argocd.argoproj.io/sync-wave: "21" + spec: + project: platform + sources: + - repoURL: https://github.com/nanohype/eks-agent-platform.git + targetRevision: main + path: charts/operator + helm: + releaseName: operator + # Per-cluster IRSA + identity, read from the cluster Secret's labels + # (account-agnostic) and annotations (account-specific, published by + # cluster-bootstrap). valuesObject overrides the valueFiles below, so + # the static config lives in the repo and the account-specific bits + # stay out of git. + valuesObject: + config: + environment: '{{ index .metadata.labels "environment" }}' + region: '{{ index .metadata.labels "region" }}' + oidc: + providerArn: '{{ index .metadata.annotations "eks-agent-platform/oidc-provider-arn" }}' + issuerHost: '{{ index .metadata.annotations "eks-agent-platform/oidc-issuer-host" }}' + serviceAccount: + annotations: + eks.amazonaws.com/role-arn: '{{ index .metadata.annotations "eks-agent-platform/operator-role-arn" }}' + valueFiles: + - $values/addons/ai-platform/operator/values.yaml + - $values/addons/ai-platform/operator/values-{{ index .metadata.labels "environment" }}.yaml + - repoURL: https://github.com/nanohype/eks-gitops.git + targetRevision: main + ref: values + destination: + server: '{{ .server }}' + namespace: eks-agent-platform + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true + - ServerSideApply=true + retry: + limit: 5 + backoff: + duration: 5s + factor: 2 + maxDuration: 3m