diff --git a/README.md b/README.md index 970643cdef7..7b1c9a76d06 100644 --- a/README.md +++ b/README.md @@ -186,6 +186,9 @@ Goreman exposes services on the following ports: | Redis | 6379 | Cache & events | | PostgreSQL | 5432 | Database | | Pulsar | 6650 | Message broker | +| OTEL Collector gRPC | 4317 | OTLP ingest | +| OTEL Collector HTTP | 4318 | OTLP ingest | +| Jaeger UI | 16686 | Trace visualization | ## Documentation diff --git a/_local/binoculars/config-auth.yaml b/_local/binoculars/config-auth.yaml index 58aac40dbac..59c1fa8361b 100644 --- a/_local/binoculars/config-auth.yaml +++ b/_local/binoculars/config-auth.yaml @@ -1,6 +1,14 @@ httpPort: 8084 grpcPort: 50053 metricsPort: 9007 +observability: + enabled: true + exporter: + endpoint: "http://localhost:4318" + protocol: "http/protobuf" + traces: + sampler: "parent_based_trace_id_ratio" + samplerArg: 1.0 application: clusterId: local-cluster kubernetes: @@ -24,4 +32,4 @@ auth: cancel_any_jobs: ["admins"] reprioritize_any_jobs: ["admins"] watch_all_events: ["admins"] - execute_jobs: ["admins", "executors"] \ No newline at end of file + execute_jobs: ["admins", "executors"] diff --git a/_local/binoculars/config.yaml b/_local/binoculars/config.yaml index ea659fbf9c2..dcd6973e9e8 100644 --- a/_local/binoculars/config.yaml +++ b/_local/binoculars/config.yaml @@ -1,6 +1,14 @@ httpPort: 8084 grpcPort: 50053 metricsPort: 9007 +observability: + enabled: true + exporter: + endpoint: "http://localhost:4318" + protocol: "http/protobuf" + traces: + sampler: "parent_based_trace_id_ratio" + samplerArg: 1.0 application: clusterId: local-cluster kubernetes: diff --git a/_local/compose/full.yaml b/_local/compose/full.yaml index ec4fb276646..6a0e5b4d9a9 100644 --- a/_local/compose/full.yaml +++ b/_local/compose/full.yaml @@ -75,7 +75,8 @@ services: PULSAR_PREFIX_allowAutoTopicCreation: "true" PULSAR_PREFIX_allowAutoTopicCreationType: non-partitioned PULSAR_PREFIX_autoSkipNonRecoverableData: "true" - entrypoint: sh -c "bin/apply-config-from-env.py conf/standalone.conf && bin/pulsar standalone" + entrypoint: sh -c "bin/apply-config-from-env.py conf/standalone.conf && + bin/pulsar standalone" ports: - "6650:6650" - "8090:8080" @@ -88,6 +89,28 @@ services: retries: 10 start_period: 30s + jaeger: + container_name: jaeger + image: ${JAEGER_IMAGE:-jaegertracing/all-in-one:1.76.0} + environment: + - COLLECTOR_OTLP_ENABLED=true + ports: + - "16686:16686" # Jaeger UI + restart: unless-stopped + + otel-collector: + container_name: otel-collector + image: ${OTEL_IMAGE:-otel/opentelemetry-collector-contrib:0.154.0} + command: ["--config=/etc/otelcol-contrib/config.yaml"] + volumes: + - ../otel/collector-config.yaml:/etc/otelcol-contrib/config.yaml:ro + ports: + - "4317:4317" # OTLP gRPC + - "4318:4318" # OTLP HTTP + restart: unless-stopped + depends_on: + - jaeger + # ========================================================= # Database migrations (run once, then exit) # ========================================================= @@ -96,7 +119,7 @@ services: container_name: scheduler-migration image: ${ARMADA_IMAGE:-gresearch/armada-bundle}:${ARMADA_IMAGE_TAG:-latest} depends_on: - postgres: { condition: service_healthy } + postgres: {condition: service_healthy} volumes: - ../scheduler/config.yaml:/config/config.yaml:ro environment: @@ -108,7 +131,7 @@ services: container_name: lookout-migration image: ${ARMADA_IMAGE:-gresearch/armada-lookout-bundle}:${ARMADA_IMAGE_TAG:-latest} depends_on: - postgres: { condition: service_healthy } + postgres: {condition: service_healthy} volumes: - ../lookout/config.yaml:/config/config.yaml:ro environment: @@ -123,7 +146,7 @@ services: container_name: scheduler image: ${ARMADA_IMAGE:-gresearch/armada-bundle}:${ARMADA_IMAGE_TAG:-latest} depends_on: - scheduler-migration: { condition: service_completed_successfully } + scheduler-migration: {condition: service_completed_successfully} volumes: - ../scheduler/config.yaml:/config/config.yaml:ro environment: @@ -140,8 +163,8 @@ services: container_name: scheduleringester image: ${ARMADA_IMAGE:-gresearch/armada-bundle}:${ARMADA_IMAGE_TAG:-latest} depends_on: - scheduler-migration: { condition: service_completed_successfully } - pulsar: { condition: service_healthy } + scheduler-migration: {condition: service_completed_successfully} + pulsar: {condition: service_healthy} volumes: - ../scheduleringester/config.yaml:/config/config.yaml:ro environment: @@ -161,9 +184,9 @@ services: - "50051:50051" - "8081:8081" depends_on: - lookout-migration: { condition: service_completed_successfully } - pulsar: { condition: service_healthy } - redis: { condition: service_healthy } + lookout-migration: {condition: service_completed_successfully} + pulsar: {condition: service_healthy} + redis: {condition: service_healthy} volumes: - ../server/config.yaml:/config/config.yaml:ro environment: @@ -182,7 +205,7 @@ services: extra_hosts: - "host.docker.internal:host-gateway" depends_on: - scheduler: { condition: service_started } + scheduler: {condition: service_started} volumes: - ../executor/config.yaml:/config/config.yaml:ro - ../../.kube/internal:/.kube:ro @@ -196,8 +219,8 @@ services: container_name: eventingester image: ${ARMADA_IMAGE:-gresearch/armada-bundle}:${ARMADA_IMAGE_TAG:-latest} depends_on: - pulsar: { condition: service_healthy } - redis: { condition: service_healthy } + pulsar: {condition: service_healthy} + redis: {condition: service_healthy} volumes: - ../eventingester/config.yaml:/config/config.yaml:ro environment: @@ -211,8 +234,8 @@ services: container_name: lookoutingester image: ${ARMADA_IMAGE:-gresearch/armada-lookout-bundle}:${ARMADA_IMAGE_TAG:-latest} depends_on: - lookout-migration: { condition: service_completed_successfully } - pulsar: { condition: service_healthy } + lookout-migration: {condition: service_completed_successfully} + pulsar: {condition: service_healthy} volumes: - ../lookoutingester/config.yaml:/config/config.yaml:ro environment: @@ -229,7 +252,7 @@ services: ports: - "8089:8089" depends_on: - lookout-migration: { condition: service_completed_successfully } + lookout-migration: {condition: service_completed_successfully} volumes: - ../lookout/config.yaml:/config/config.yaml:ro environment: diff --git a/_local/compose/stack.yaml b/_local/compose/stack.yaml index 4ed542cf61c..c2398a03dae 100644 --- a/_local/compose/stack.yaml +++ b/_local/compose/stack.yaml @@ -61,7 +61,8 @@ services: PULSAR_PREFIX_allowAutoTopicCreation: "true" PULSAR_PREFIX_allowAutoTopicCreationType: non-partitioned PULSAR_PREFIX_autoSkipNonRecoverableData: "true" - entrypoint: sh -c "bin/apply-config-from-env.py conf/standalone.conf && bin/pulsar standalone" + entrypoint: sh -c "bin/apply-config-from-env.py conf/standalone.conf && + bin/pulsar standalone" ports: - "6650:6650" - "8090:8080" @@ -94,3 +95,25 @@ services: # No healthcheck on purpose: keycloak's realm import + warmup is slower than # `docker compose up --wait` is willing to block for. `mage dev:up auth` polls the # realm endpoint via waitForKeycloak before starting goreman. + + jaeger: + container_name: jaeger + image: ${JAEGER_IMAGE:-jaegertracing/all-in-one:1.76.0} + environment: + - COLLECTOR_OTLP_ENABLED=true + ports: + - "16686:16686" # Jaeger UI + restart: unless-stopped + + otel-collector: + container_name: otel-collector + image: ${OTEL_IMAGE:-otel/opentelemetry-collector-contrib:0.154.0} + command: ["--config=/etc/otelcol-contrib/config.yaml"] + volumes: + - ../otel/collector-config.yaml:/etc/otelcol-contrib/config.yaml:ro + ports: + - "4317:4317" # OTLP gRPC + - "4318:4318" # OTLP HTTP + restart: unless-stopped + depends_on: + - jaeger diff --git a/_local/eventingester/config.yaml b/_local/eventingester/config.yaml index fecd76092aa..22eb2f0b2ab 100644 --- a/_local/eventingester/config.yaml +++ b/_local/eventingester/config.yaml @@ -9,6 +9,14 @@ pulsar: jobsetEventsTopic: "events" subscriptionName: "events-ingester" metricsPort: 9004 +observability: + enabled: true + exporter: + endpoint: "http://localhost:4318" + protocol: "http/protobuf" + traces: + sampler: "parent_based_trace_id_ratio" + samplerArg: 1.0 metrics: redis: enabled: true diff --git a/_local/executor/config-auth.yaml b/_local/executor/config-auth.yaml index de2f10a1d38..98da8547fc5 100644 --- a/_local/executor/config-auth.yaml +++ b/_local/executor/config-auth.yaml @@ -7,6 +7,14 @@ executorApiConnection: clientId: "armada-executor" clientSecret: "executor-secret" scopes: ["profile", "email"] +observability: + enabled: true + exporter: + endpoint: "http://localhost:4318" + protocol: "http/protobuf" + traces: + sampler: "parent_based_trace_id_ratio" + samplerArg: 1.0 metric: port: 9002 application: diff --git a/_local/executor/config.yaml b/_local/executor/config.yaml index ac38aad66e4..9fd905fc424 100644 --- a/_local/executor/config.yaml +++ b/_local/executor/config.yaml @@ -2,6 +2,14 @@ httpPort: 8082 executorApiConnection: armadaUrl: "localhost:50052" forceNoTls: true +observability: + enabled: true + exporter: + endpoint: "http://localhost:4318" + protocol: "http/protobuf" + traces: + sampler: "parent_based_trace_id_ratio" + samplerArg: 1.0 metric: port: 9002 application: diff --git a/_local/lookout/config-auth.yaml b/_local/lookout/config-auth.yaml index b8ac6dfa349..67682a080da 100644 --- a/_local/lookout/config-auth.yaml +++ b/_local/lookout/config-auth.yaml @@ -1,5 +1,13 @@ apiPort: 8089 metricsPort: 9003 +observability: + enabled: true + exporter: + endpoint: "http://localhost:4318" + protocol: "http/protobuf" + traces: + sampler: "parent_based_trace_id_ratio" + samplerArg: 1.0 corsAllowedOrigins: - "http://localhost:3000" - "http://localhost:8089" diff --git a/_local/lookout/config.yaml b/_local/lookout/config.yaml index c7e374ad73d..028b12f39e2 100644 --- a/_local/lookout/config.yaml +++ b/_local/lookout/config.yaml @@ -1,5 +1,13 @@ apiPort: 8089 metricsPort: 9003 +observability: + enabled: true + exporter: + endpoint: "http://localhost:4318" + protocol: "http/protobuf" + traces: + sampler: "parent_based_trace_id_ratio" + samplerArg: 1.0 corsAllowedOrigins: - "http://localhost:3000" - "http://localhost:8089" diff --git a/_local/lookoutingester/config.yaml b/_local/lookoutingester/config.yaml index fba04adaa74..fffb575434d 100644 --- a/_local/lookoutingester/config.yaml +++ b/_local/lookoutingester/config.yaml @@ -1,4 +1,12 @@ metricsPort: 9005 +observability: + enabled: true + exporter: + endpoint: "http://localhost:4318" + protocol: "http/protobuf" + traces: + sampler: "parent_based_trace_id_ratio" + samplerArg: 1.0 postgres: connection: host: localhost diff --git a/_local/otel/collector-config.yaml b/_local/otel/collector-config.yaml new file mode 100644 index 00000000000..a3b7316f796 --- /dev/null +++ b/_local/otel/collector-config.yaml @@ -0,0 +1,23 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +processors: + batch: + +exporters: + otlp/jaeger: + endpoint: jaeger:4317 + tls: + insecure: true + +service: + pipelines: + traces: + receivers: [otlp] + processors: [batch] + exporters: [otlp/jaeger] diff --git a/_local/scheduler/config-auth.yaml b/_local/scheduler/config-auth.yaml index 1a3be09fe48..cf7050199fb 100644 --- a/_local/scheduler/config-auth.yaml +++ b/_local/scheduler/config-auth.yaml @@ -2,6 +2,14 @@ grpc: port: 50052 tls: enabled: false +observability: + enabled: true + exporter: + endpoint: "http://localhost:4318" + protocol: "http/protobuf" + traces: + sampler: "parent_based_trace_id_ratio" + samplerArg: 1.0 auth: anonymousAuth: true permissionGroupMapping: diff --git a/_local/scheduler/config.yaml b/_local/scheduler/config.yaml index f9fd62026aa..8db93987075 100644 --- a/_local/scheduler/config.yaml +++ b/_local/scheduler/config.yaml @@ -2,6 +2,14 @@ grpc: port: 50052 tls: enabled: false +observability: + enabled: true + exporter: + endpoint: "http://localhost:4318" + protocol: "http/protobuf" + traces: + sampler: "parent_based_trace_id_ratio" + samplerArg: 1.0 auth: anonymousAuth: true permissionGroupMapping: diff --git a/_local/scheduleringester/config.yaml b/_local/scheduleringester/config.yaml index 2c767454531..bdc4f1b7292 100644 --- a/_local/scheduleringester/config.yaml +++ b/_local/scheduleringester/config.yaml @@ -1,5 +1,13 @@ metrics: port: 9006 +observability: + enabled: true + exporter: + endpoint: "http://localhost:4318" + protocol: "http/protobuf" + traces: + sampler: "parent_based_trace_id_ratio" + samplerArg: 1.0 postgres: connection: host: localhost diff --git a/_local/server/config-auth.yaml b/_local/server/config-auth.yaml index cbf9705c425..0a6f1f38f81 100644 --- a/_local/server/config-auth.yaml +++ b/_local/server/config-auth.yaml @@ -1,6 +1,14 @@ httpPort: 8081 grpcPort: 50051 metricsPort: 9000 +observability: + enabled: true + exporter: + endpoint: "http://localhost:4318" + protocol: "http/protobuf" + traces: + sampler: "parent_based_trace_id_ratio" + samplerArg: 1.0 corsAllowedOrigins: - "http://localhost:3000" - "http://localhost:8089" diff --git a/_local/server/config.yaml b/_local/server/config.yaml index b947686416d..212b54066a9 100644 --- a/_local/server/config.yaml +++ b/_local/server/config.yaml @@ -1,6 +1,14 @@ httpPort: 8081 grpcPort: 50051 metricsPort: 9000 +observability: + enabled: true + exporter: + endpoint: "http://localhost:4318" + protocol: "http/protobuf" + traces: + sampler: "parent_based_trace_id_ratio" + samplerArg: 1.0 corsAllowedOrigins: - "http://localhost:3000" - "http://localhost:8089" diff --git a/cmd/binoculars/main.go b/cmd/binoculars/main.go index cd4e0221766..969b7802de8 100644 --- a/cmd/binoculars/main.go +++ b/cmd/binoculars/main.go @@ -6,6 +6,7 @@ import ( "os" "os/signal" "syscall" + "time" "github.com/grpc-ecosystem/grpc-gateway/runtime" "github.com/spf13/pflag" @@ -19,6 +20,7 @@ import ( gateway "github.com/armadaproject/armada/internal/common/grpc" "github.com/armadaproject/armada/internal/common/health" log "github.com/armadaproject/armada/internal/common/logging" + "github.com/armadaproject/armada/internal/common/observability" "github.com/armadaproject/armada/internal/common/profiling" api "github.com/armadaproject/armada/pkg/api/binoculars" ) @@ -44,6 +46,18 @@ func main() { log.Info("Starting...") + // Initialize OpenTelemetry + if err := observability.InitOTel(config.Observability); err != nil { + log.Warnf("Failed to initialize OTel: %v", err) + } + defer func() { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := observability.ShutdownOTel(ctx); err != nil { + log.Warnf("Failed to shutdown OTel: %v", err) + } + }() + // Expose profiling endpoints if enabled. err := profiling.SetupPprof(config.Profiling, armadacontext.Background(), nil) if err != nil { diff --git a/cmd/eventingester/main.go b/cmd/eventingester/main.go index c6cca9d2791..6141fc081ab 100644 --- a/cmd/eventingester/main.go +++ b/cmd/eventingester/main.go @@ -1,13 +1,16 @@ package main import ( - "github.com/armadaproject/armada/internal/common/logging" - "github.com/armadaproject/armada/internal/eventingester" + "context" + "time" "github.com/spf13/pflag" "github.com/spf13/viper" "github.com/armadaproject/armada/internal/common" + "github.com/armadaproject/armada/internal/common/logging" + "github.com/armadaproject/armada/internal/common/observability" + "github.com/armadaproject/armada/internal/eventingester" "github.com/armadaproject/armada/internal/eventingester/configuration" ) @@ -32,5 +35,18 @@ func main() { userSpecifiedConfigs := viper.GetStringSlice(CustomConfigLocation) common.LoadConfig(&config, "./config/eventingester", userSpecifiedConfigs) + + // Initialize OpenTelemetry + if err := observability.InitOTel(config.Observability); err != nil { + logging.Warnf("Failed to initialize OTel: %v", err) + } + defer func() { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := observability.ShutdownOTel(ctx); err != nil { + logging.Warnf("Failed to shutdown OTel: %v", err) + } + }() + eventingester.Run(&config) } diff --git a/cmd/executor/main.go b/cmd/executor/main.go index 75567416a1f..16f4a0f2707 100644 --- a/cmd/executor/main.go +++ b/cmd/executor/main.go @@ -1,10 +1,12 @@ package main import ( + "context" "net/http" "os" "os/signal" "syscall" + "time" "github.com/prometheus/client_golang/prometheus" "github.com/spf13/pflag" @@ -14,6 +16,7 @@ import ( "github.com/armadaproject/armada/internal/common/armadacontext" "github.com/armadaproject/armada/internal/common/health" log "github.com/armadaproject/armada/internal/common/logging" + "github.com/armadaproject/armada/internal/common/observability" "github.com/armadaproject/armada/internal/common/profiling" "github.com/armadaproject/armada/internal/executor" "github.com/armadaproject/armada/internal/executor/configuration" @@ -38,6 +41,17 @@ func main() { userSpecifiedConfigs := viper.GetStringSlice(CustomConfigLocation) common.LoadConfig(&config, "./config/executor", userSpecifiedConfigs) + if err := observability.InitOTel(config.Observability); err != nil { + log.Warnf("Failed to initialize OTel: %v", err) + } + defer func() { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := observability.ShutdownOTel(ctx); err != nil { + log.Warnf("Failed to shutdown OTel: %v", err) + } + }() + // Expose profiling endpoints if enabled. err := profiling.SetupPprof(config.Profiling, armadacontext.Background(), nil) if err != nil { diff --git a/cmd/lookout/main.go b/cmd/lookout/main.go index af1eaaa6962..52f53fdee35 100644 --- a/cmd/lookout/main.go +++ b/cmd/lookout/main.go @@ -1,6 +1,7 @@ package main import ( + "context" "os" "os/signal" "syscall" @@ -14,6 +15,7 @@ import ( "github.com/armadaproject/armada/internal/common/armadacontext" "github.com/armadaproject/armada/internal/common/database" log "github.com/armadaproject/armada/internal/common/logging" + "github.com/armadaproject/armada/internal/common/observability" "github.com/armadaproject/armada/internal/common/profiling" "github.com/armadaproject/armada/internal/lookout" "github.com/armadaproject/armada/internal/lookout/configuration" @@ -163,6 +165,18 @@ func main() { userSpecifiedConfigs := viper.GetStringSlice(CustomConfigLocation) common.LoadConfig(&config, "./config/lookout", userSpecifiedConfigs) + // Initialize OpenTelemetry + if err := observability.InitOTel(config.Observability); err != nil { + log.Warnf("Failed to initialize OTel: %v", err) + } + defer func() { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := observability.ShutdownOTel(ctx); err != nil { + log.Warnf("Failed to shutdown OTel: %v", err) + } + }() + // Expose profiling endpoints if enabled. err := profiling.SetupPprof(config.Profiling, armadacontext.Background(), nil) if err != nil { diff --git a/cmd/lookoutingester/main.go b/cmd/lookoutingester/main.go index 5f35380d8d8..08d400c1302 100644 --- a/cmd/lookoutingester/main.go +++ b/cmd/lookoutingester/main.go @@ -1,11 +1,15 @@ package main import ( + "context" + "time" + "github.com/spf13/pflag" "github.com/spf13/viper" "github.com/armadaproject/armada/internal/common" log "github.com/armadaproject/armada/internal/common/logging" + "github.com/armadaproject/armada/internal/common/observability" "github.com/armadaproject/armada/internal/lookoutingester" "github.com/armadaproject/armada/internal/lookoutingester/benchmark" "github.com/armadaproject/armada/internal/lookoutingester/configuration" @@ -35,6 +39,18 @@ func main() { common.LoadConfig(&config, "./config/lookoutingester", userSpecifiedConfigs) + // Initialize OpenTelemetry + if err := observability.InitOTel(config.Observability); err != nil { + log.Warnf("Failed to initialize OTel: %v", err) + } + defer func() { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := observability.ShutdownOTel(ctx); err != nil { + log.Warnf("Failed to shutdown OTel: %v", err) + } + }() + runBenchmarks := viper.GetBool(Benchmark) if runBenchmarks { log.Info("Running Lookout Ingester benchmarks") diff --git a/cmd/scheduleringester/main.go b/cmd/scheduleringester/main.go index 625b2a97da6..1adf1241519 100644 --- a/cmd/scheduleringester/main.go +++ b/cmd/scheduleringester/main.go @@ -1,14 +1,17 @@ package main import ( + "context" "fmt" "os" + "time" "github.com/spf13/pflag" "github.com/spf13/viper" "github.com/armadaproject/armada/internal/common" "github.com/armadaproject/armada/internal/common/logging" + "github.com/armadaproject/armada/internal/common/observability" "github.com/armadaproject/armada/internal/scheduleringester" ) @@ -32,6 +35,18 @@ func main() { common.LoadConfig(&config, "./config/scheduleringester", userSpecifiedConfigs) + // Initialize OpenTelemetry + if err := observability.InitOTel(config.Observability); err != nil { + logging.Warnf("Failed to initialize OTel: %v", err) + } + defer func() { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := observability.ShutdownOTel(ctx); err != nil { + logging.Warnf("Failed to shutdown OTel: %v", err) + } + }() + if err := scheduleringester.Run(config); err != nil { fmt.Println(err) os.Exit(-1) diff --git a/cmd/server/main.go b/cmd/server/main.go index 38286f393b8..c20d4cc7398 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -1,6 +1,7 @@ package main import ( + "context" "fmt" "net/http" "os" @@ -17,6 +18,7 @@ import ( "github.com/armadaproject/armada/internal/common/health" "github.com/armadaproject/armada/internal/common/logging" log "github.com/armadaproject/armada/internal/common/logging" + "github.com/armadaproject/armada/internal/common/observability" "github.com/armadaproject/armada/internal/common/profiling" "github.com/armadaproject/armada/internal/server" "github.com/armadaproject/armada/internal/server/configuration" @@ -44,6 +46,18 @@ func main() { userSpecifiedConfigs := viper.GetStringSlice(CustomConfigLocation) common.LoadConfig(&config, "./config/server", userSpecifiedConfigs) + // Initialize OpenTelemetry + if err := observability.InitOTel(config.Observability); err != nil { + log.Warnf("Failed to initialize OTel: %v", err) + } + defer func() { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := observability.ShutdownOTel(ctx); err != nil { + log.Warnf("Failed to shutdown OTel: %v", err) + } + }() + log.Info("Starting...") // Run services within an errgroup to propagate errors between services. diff --git a/go.mod b/go.mod index 295fca8526a..d73f4ee5fcd 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/armadaproject/armada go 1.26.1 require ( - github.com/apache/pulsar-client-go v0.18.0 + github.com/apache/pulsar-client-go v0.15.1-candidate-1 github.com/coreos/go-oidc/v3 v3.17.0 github.com/go-openapi/analysis v0.24.2 github.com/go-openapi/jsonreference v0.21.4 @@ -32,11 +32,11 @@ require ( github.com/spf13/viper v1.21.0 github.com/stretchr/testify v1.11.1 golang.org/x/exp v0.0.0-20260112195511-716be5621a96 - golang.org/x/net v0.49.0 - golang.org/x/oauth2 v0.34.0 - golang.org/x/sync v0.19.0 + golang.org/x/net v0.55.0 + golang.org/x/oauth2 v0.36.0 + golang.org/x/sync v0.20.0 google.golang.org/genproto v0.0.0-20260122232226-8e98ce8d340d // indirect - google.golang.org/grpc v1.78.0 + google.golang.org/grpc v1.81.1 gopkg.in/yaml.v2 v2.4.0 k8s.io/api v0.32.11 k8s.io/apimachinery v0.32.11 @@ -59,7 +59,7 @@ require ( github.com/go-playground/validator/v10 v10.30.1 github.com/go-viper/mapstructure/v2 v2.5.0 github.com/gogo/status v1.1.1 - github.com/goreleaser/goreleaser/v2 v2.13.3 + github.com/goreleaser/goreleaser/v2 v2.8.2 github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.1.0 github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.3 github.com/jackc/pgx/v5 v5.8.0 @@ -75,12 +75,20 @@ require ( github.com/segmentio/fasthash v1.0.3 github.com/xitongsys/parquet-go v1.6.2 github.com/zalando/go-keyring v0.2.6 + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.69.0 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 + go.opentelemetry.io/otel v1.44.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.33.0 + go.opentelemetry.io/otel/metric v1.44.0 + go.opentelemetry.io/otel/sdk v1.44.0 + go.opentelemetry.io/otel/trace v1.44.0 go.uber.org/atomic v1.11.0 go.uber.org/mock v0.6.0 - golang.org/x/term v0.39.0 - golang.org/x/text v0.33.0 + golang.org/x/term v0.43.0 + golang.org/x/text v0.37.0 golang.org/x/time v0.14.0 - google.golang.org/genproto/googleapis/api v0.0.0-20260122232226-8e98ce8d340d + google.golang.org/genproto/googleapis/api v0.0.0-20260226221140-a57be14db171 gopkg.in/inf.v0 v0.9.1 gopkg.in/natefinch/lumberjack.v2 v2.2.1 gopkg.in/yaml.v3 v3.0.1 @@ -91,15 +99,16 @@ require ( al.essio.dev/pkg/shellescape v1.6.0 // indirect charm.land/lipgloss/v2 v2.0.0-beta.3.0.20251120230642-dcccabe2cd63 // indirect dario.cat/mergo v1.0.2 // indirect + github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 // indirect + github.com/99designs/keyring v1.2.1 // indirect github.com/AlekSi/pointer v1.2.0 // indirect - github.com/AthenZ/athenz v1.12.14 // indirect + github.com/AthenZ/athenz v1.12.12 // indirect github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect github.com/DataDog/zstd v1.5.7 // indirect github.com/Masterminds/goutils v1.1.1 // indirect github.com/Masterminds/sprig/v3 v3.3.0 // indirect github.com/Microsoft/go-winio v0.6.2 // indirect github.com/ProtonMail/go-crypto v1.3.0 // indirect - github.com/RoaringBitmap/roaring/v2 v2.14.4 // indirect github.com/alecthomas/chroma/v2 v2.23.1 // indirect github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 // indirect github.com/apache/thrift v0.22.0 // indirect @@ -112,6 +121,7 @@ require ( github.com/blang/semver/v4 v4.0.0 // indirect github.com/buger/jsonparser v1.1.1 // indirect github.com/caarlos0/log v0.5.4 // indirect + github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/charmbracelet/colorprofile v0.4.1 // indirect github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834 // indirect @@ -132,9 +142,11 @@ require ( github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect github.com/dlclark/regexp2 v1.11.5 // indirect github.com/docker/go-units v0.5.0 // indirect + github.com/dvsekhvalnov/jose2go v1.6.0 // indirect github.com/emicklei/go-restful/v3 v3.13.0 // indirect github.com/emirpasic/gods v1.18.1 // indirect github.com/fatih/camelcase v1.0.0 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect github.com/fsnotify/fsnotify v1.9.0 // indirect github.com/fxamacker/cbor/v2 v2.9.0 // indirect github.com/gabriel-vasile/mimetype v1.4.12 // indirect @@ -142,8 +154,9 @@ require ( github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect github.com/go-git/go-billy/v5 v5.7.0 // indirect github.com/go-git/go-git/v5 v5.16.4 // indirect - github.com/go-jose/go-jose/v4 v4.1.3 // indirect + github.com/go-jose/go-jose/v4 v4.1.4 // indirect github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect github.com/go-openapi/jsonpointer v0.22.4 // indirect github.com/go-openapi/swag/cmdutils v0.25.4 // indirect github.com/go-openapi/swag/conv v0.25.4 // indirect @@ -159,6 +172,7 @@ require ( github.com/go-playground/locales v0.14.1 // indirect github.com/go-playground/universal-translator v0.18.1 // indirect github.com/gobwas/glob v0.2.3 // indirect + github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2 // indirect github.com/godbus/dbus/v5 v5.2.2 // indirect github.com/gogo/googleapis v1.4.1 // indirect github.com/golang-jwt/jwt/v5 v5.3.0 // indirect @@ -173,6 +187,8 @@ require ( github.com/goreleaser/nfpm/v2 v2.44.1 // indirect github.com/gorilla/css v1.0.1 // indirect github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3 // indirect + github.com/gsterjov/go-libsecret v0.0.0-20161001094733-a6f4afe4910c // indirect github.com/hamba/avro/v2 v2.31.0 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-immutable-radix v1.3.1 // indirect @@ -199,12 +215,13 @@ require ( github.com/microcosm-cc/bluemonday v1.0.27 // indirect github.com/mitchellh/copystructure v1.2.0 // indirect github.com/mitchellh/reflectwalk v1.0.2 // indirect - github.com/moby/go-archive v0.2.0 // indirect + github.com/moby/sys/sequential v0.6.0 // indirect + github.com/moby/sys/user v0.4.0 // indirect github.com/moby/term v0.5.2 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect - github.com/mschoch/smat v0.2.0 // indirect + github.com/mtibben/percent v0.2.1 // indirect github.com/muesli/cancelreader v0.2.2 // indirect github.com/muesli/reflow v0.3.0 // indirect github.com/muesli/termenv v0.16.0 // indirect @@ -235,13 +252,16 @@ require ( github.com/yuin/goldmark-emoji v1.0.6 // indirect gitlab.com/digitalxero/go-conventional-commit v1.0.7 // indirect go.mongodb.org/mongo-driver v1.17.7 // indirect + go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.39.0 // indirect + go.opentelemetry.io/proto/otlp v1.9.0 // indirect go.yaml.in/yaml/v2 v2.4.3 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect - golang.org/x/crypto v0.47.0 // indirect - golang.org/x/mod v0.32.0 // indirect - golang.org/x/sys v0.40.0 // indirect + golang.org/x/crypto v0.51.0 // indirect + golang.org/x/mod v0.35.0 // indirect + golang.org/x/sys v0.45.0 // indirect golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20260122232226-8e98ce8d340d // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20260526163538-3dc84a4a5aaa // indirect google.golang.org/protobuf v1.36.11 // indirect gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect gopkg.in/warnings.v0 v0.1.2 // indirect diff --git a/go.sum b/go.sum index f45e1a98563..75356616c05 100644 --- a/go.sum +++ b/go.sum @@ -32,7 +32,7 @@ cloud.google.com/go v0.97.0/go.mod h1:GF7l59pYBVlXQIBLx3a761cZ41F9bBH3JUlihCt2Ud cloud.google.com/go v0.99.0/go.mod h1:w0Xx2nLzqWJPuozYQX+hFfCSI8WioryfRDzkoI/Y2ZA= cloud.google.com/go v0.100.1/go.mod h1:fs4QogzfH5n2pBXBP9vRiU+eCny7lD2vmFZy79Iuw1U= cloud.google.com/go v0.100.2/go.mod h1:4Xra9TjzAeYHrl5+oeLlzbM2k3mjVhZh4UqTZ//w99A= -cloud.google.com/go v0.123.0 h1:2NAUJwPR47q+E35uaJeYoNhuNEM9kM8SjgRgdeOJUSE= +cloud.google.com/go v0.115.1 h1:Jo0SM9cQnSkYfp44+v+NQXHpcHqlnRJk2qxh6yvxxxQ= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= @@ -78,10 +78,14 @@ dario.cat/mergo v1.0.2 h1:85+piFYR1tMbRrLcDwR18y4UKJ3aH1Tbzi24VRW1TK8= dario.cat/mergo v1.0.2/go.mod h1:E/hbnu0NxMFBjpMIE34DRGLWqDy0g5FuKDhCb31ngxA= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= gioui.org v0.0.0-20210308172011-57750fc8a0a6/go.mod h1:RSH6KIUZ0p2xy5zHDxgAM4zumjgTw83q2ge/PI+yyw8= +github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 h1:/vQbFIOMbk2FiG/kXiLl8BRyzTWDw7gX/Hz7Dd5eDMs= +github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4/go.mod h1:hN7oaIRCjzsZ2dE+yG5k+rsdt3qcwykqK6HVGcKwsw4= +github.com/99designs/keyring v1.2.1 h1:tYLp1ULvO7i3fI5vE21ReQuj99QFSs7lGm0xWyJo87o= +github.com/99designs/keyring v1.2.1/go.mod h1:fc+wB5KTk9wQ9sDx0kFXB3A0MaeGHM9AwRStKOQ5vOA= github.com/AlekSi/pointer v1.2.0 h1:glcy/gc4h8HnG2Z3ZECSzZ1IX1x2JxRVuDzaJwQE0+w= github.com/AlekSi/pointer v1.2.0/go.mod h1:gZGfd3dpW4vEc/UlyfKKi1roIqcCgwOIvb0tSNSBle0= -github.com/AthenZ/athenz v1.12.14 h1:y/SbWMBU1CejnkLSWgGOJuQEBEcCGDn9bsPGDEPAqDc= -github.com/AthenZ/athenz v1.12.14/go.mod h1:syp1M8L/dB9KimW+VKgpAWZIart3HVTbWgm0smRXLVI= +github.com/AthenZ/athenz v1.12.12 h1:Upf5Zx96GAgOGRwnGZN2YdgNGd52p+yyLZ85WvHpdC8= +github.com/AthenZ/athenz v1.12.12/go.mod h1:tepNDlRtQPpJ0f8C1WNx8T/L/C/D3fbA7FrGVI2fbFc= github.com/Azure/azure-amqp-common-go/v3 v3.2.1/go.mod h1:O6X1iYHP7s2x7NjUKsXVhkwWrQhxrd+d8/3rRadj4CI= github.com/Azure/azure-amqp-common-go/v3 v3.2.2/go.mod h1:O6X1iYHP7s2x7NjUKsXVhkwWrQhxrd+d8/3rRadj4CI= github.com/Azure/azure-pipeline-go v0.2.3/go.mod h1:x841ezTBIMG6O3lAcl8ATHnsOPVl2bqk7S3ta6S6u4k= @@ -144,8 +148,6 @@ github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/ProtonMail/go-crypto v1.3.0 h1:ILq8+Sf5If5DCpHQp4PbZdS1J7HDFRXz/+xKBiRGFrw= github.com/ProtonMail/go-crypto v1.3.0/go.mod h1:9whxjD8Rbs29b4XWbB8irEcE8KHMqaR2e7GWU1R+/PE= -github.com/RoaringBitmap/roaring/v2 v2.14.4 h1:4aKySrrg9G/5oRtJ3TrZLObVqxgQ9f1znCRBwEwjuVw= -github.com/RoaringBitmap/roaring/v2 v2.14.4/go.mod h1:oMvV6omPWr+2ifRdeZvVJyaz+aoEUopyv5iH0u/+wbY= github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= github.com/alecthomas/assert/v2 v2.11.0 h1:2Q9r3ki8+JYXvGsDyBXwH3LcJ+WK5D0gc5E8vS6K3D0= github.com/alecthomas/assert/v2 v2.11.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k= @@ -159,8 +161,8 @@ github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kd github.com/apache/arrow/go/arrow v0.0.0-20200730104253-651201b0f516/go.mod h1:QNYViu/X0HXDHw7m3KXzWSVXIbfUvJqBFe6Gj8/pYA0= github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 h1:q4dksr6ICHXqG5hm0ZW5IHyeEJXoIJSOZeBLmWPNeIQ= github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40/go.mod h1:Q7yQnSMnLvcXlZ8RV+jwz/6y1rQTqbX6C82SndT52Zs= -github.com/apache/pulsar-client-go v0.18.0 h1:YsySoOds7WCXkRcOKHb85gk/v1Jndp+2oCkkRQEowUA= -github.com/apache/pulsar-client-go v0.18.0/go.mod h1:GKmTD1u5YLuhUnoVTNGdhdGNAYhoglWNWgwLJZTljAw= +github.com/apache/pulsar-client-go v0.15.1-candidate-1 h1:5LFEXv7goIO3XbQHpNWXjxp7xlduOVascUY4s90JNmc= +github.com/apache/pulsar-client-go v0.15.1-candidate-1/go.mod h1:HyzPvgO7Nc48/Mzk7Coo1YaZY+SN63F+nNwkHmjXkSI= github.com/apache/thrift v0.0.0-20181112125854-24918abba929/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= github.com/apache/thrift v0.14.2/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= github.com/apache/thrift v0.22.0 h1:r7mTJdj51TMDe6RtcmNdQxgn9XcyfGDOzegMDRg47uc= @@ -247,7 +249,6 @@ github.com/caarlos0/testfs v0.4.4 h1:3PHvzHi5Lt+g332CiShwS8ogTgS3HjrmzZxCm6JCDr8 github.com/caarlos0/testfs v0.4.4/go.mod h1:bRN55zgG4XCUVVHZCeU+/Tz1Q6AxEJOEJTliBy+1DMk= github.com/cavaliergopher/cpio v1.0.1 h1:KQFSeKmZhv0cr+kawA3a0xTQCU4QxXF1vhU7P7av2KM= github.com/cavaliergopher/cpio v1.0.1/go.mod h1:pBdaqQjnvXxdS/6CvNDwIANIFSP0xRKI16PX4xejRQc= -github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4= github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= @@ -302,10 +303,6 @@ github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWH github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cockroachdb/apd v1.1.0/go.mod h1:8Sl8LxpKi29FqWXR16WEFZRNSz3SoPzUzeMeY4+DwBQ= github.com/colinmarc/hdfs/v2 v2.1.1/go.mod h1:M3x+k8UKKmxtFu++uAZ0OtDU8jR3jnaZIAc6yK4Ue0c= -github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI= -github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M= -github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE= -github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk= github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A= @@ -344,13 +341,15 @@ github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZ github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/dnaeon/go-vcr v1.1.0/go.mod h1:M7tiix8f0r6mKKJ3Yq/kqU1OYf3MnfmBWVbPx/yU9ko= github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= -github.com/docker/docker v28.5.2+incompatible h1:DBX0Y0zAjZbSrm1uzOkdr1onVghKaftjlSWt4AFexzM= -github.com/docker/docker v28.5.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= -github.com/docker/go-connections v0.6.0 h1:LlMG9azAe1TqfR7sO+NJttz1gy6KO7VJBh+pMmjSD94= -github.com/docker/go-connections v0.6.0/go.mod h1:AahvXYshr6JgfUJGdDCs2b5EZG/vmaMAntpSFH5BFKE= +github.com/docker/docker v27.5.0+incompatible h1:um++2NcQtGRTz5eEgO6aJimo6/JxrTXC941hd05JO6U= +github.com/docker/docker v27.5.0+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c= +github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= +github.com/dvsekhvalnov/jose2go v1.6.0 h1:Y9gnSnP4qEI0+/uQkHvFXeD2PLPJeXEL+ySMEA2EjTY= +github.com/dvsekhvalnov/jose2go v1.6.0/go.mod h1:QsHjhyTlD/lAVqn/NSbVZmSCGeDehTB/mPZadG+mhXU= github.com/elazarl/goproxy v1.7.2 h1:Y2o6urb7Eule09PjlhQRGNsqRfPmYI3KKQLFpCAV3+o= github.com/elazarl/goproxy v1.7.2/go.mod h1:82vkLNir0ALaW14Rc399OTTjyNREgmdL2cVoIbS6XaE= github.com/emicklei/go-restful/v3 v3.13.0 h1:C4Bl2xDndpU6nJ4bc1jXd+uTmYPVUwkD6bFY/oTyCes= @@ -408,11 +407,12 @@ github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9 github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-ini/ini v1.25.4/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= -github.com/go-jose/go-jose/v4 v4.1.3 h1:CVLmWDhDVRa6Mi/IgCgaopNosCaHz7zrMeF9MlZRkrs= -github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08= +github.com/go-jose/go-jose/v4 v4.1.4 h1:moDMcTHmvE6Groj34emNPLs/qtYXRVcd6S7NHbHz3kA= +github.com/go-jose/go-jose/v4 v4.1.4/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08= github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2CSIqUrmQPqA0gdRIlnLEY0gK5JGjh37zN5U= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= @@ -492,6 +492,8 @@ github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJA github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo= github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM= +github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2 h1:ZpnhV/YsD2/4cESfV5+Hoeu/iUR3ruzNvZ+yQfO03a0= +github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2/go.mod h1:bBOAhwG1umN6/6ZUMtDFBMQR8jRg9O75tm9K00oMsK4= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.2.2 h1:TUR3TgtSVDmjiXOgAAyaZbYmIeP3DPkld3jgKGV8mXQ= github.com/godbus/dbus/v5 v5.2.2/go.mod h1:3AAv2+hPq5rdnr5txxxRwiGjPXamgoIHgz9FPBfOp3c= @@ -633,8 +635,8 @@ github.com/goreleaser/chglog v0.7.4 h1:3pnNt/XCrUcAOq+KC91Azlgp5CRv4GHo1nl8Aws7O github.com/goreleaser/chglog v0.7.4/go.mod h1:dTVoZZagTz7hHdWaZ9OshHntKiF44HbWIHWxYJQ/h0Y= github.com/goreleaser/fileglob v1.4.0 h1:Y7zcUnzQjT1gbntacGAkIIfLv+OwojxTXBFxjSFoBBs= github.com/goreleaser/fileglob v1.4.0/go.mod h1:1pbHx7hhmJIxNZvm6fi6WVrnP0tndq6p3ayWdLn1Yf8= -github.com/goreleaser/goreleaser/v2 v2.13.3 h1:S8d13YgzzFXxoUJ9NJInuyq3lPNCXTcuW8wSvM+rXnQ= -github.com/goreleaser/goreleaser/v2 v2.13.3/go.mod h1:Rj+yhhXrO6WHc6cNh1GggpxzhhHXv9lczL5M4cSV3oA= +github.com/goreleaser/goreleaser/v2 v2.8.2 h1:S7fQyaumFjJKkUKQ2yHLKanfs2Uc1JK+P9mzDAc5hsE= +github.com/goreleaser/goreleaser/v2 v2.8.2/go.mod h1:dqm6yLhjxeROOrM+Y9LvBToheVcgJSd1oqShSJcR+dQ= github.com/goreleaser/nfpm/v2 v2.44.1 h1:g+QNjkEx+C2Zu8dB48t9da/VfV0CWS5TMjxT8HG1APY= github.com/goreleaser/nfpm/v2 v2.44.1/go.mod h1:drIYLqkla9SaOLbSnaFOmSIv5LXGfhHcbK54st97b4s= github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8= @@ -648,6 +650,10 @@ github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.3 h1:B+8ClL/kCQkRiU82d9xajR github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.3/go.mod h1:NbCUVmiS4foBGBHOYlCT25+YmGpJ32dZPi75pGEUpj4= github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3 h1:NmZ1PKzSTQbuGHw9DGPFomqkkLWMC+vZCkfs+FHv1Vg= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3/go.mod h1:zQrxl1YP88HQlA6i9c63DSVPFklWpGX4OWAc9bFuaH4= +github.com/gsterjov/go-libsecret v0.0.0-20161001094733-a6f4afe4910c h1:6rhixN/i8ZofjG1Y75iExal34USq5p+wiN1tpie8IrU= +github.com/gsterjov/go-libsecret v0.0.0-20161001094733-a6f4afe4910c/go.mod h1:NMPJylDgVpX0MLRlPy15sqSwOFv/U1GZ2m21JhFfek0= github.com/hamba/avro/v2 v2.31.0 h1:wv3nmua7lCEIwWsb6vqsTS3pXktTxcKg5eoyNu0VhrU= github.com/hamba/avro/v2 v2.31.0/go.mod h1:t6lJYAGE5Mswfn17zjtyQsssRQgnqO6TXLBCHHWRqrw= github.com/hanwen/go-fuse v1.0.0/go.mod h1:unqXarDXqzAk0rt98O2tVndEPIpUgLD9+rwFisZH3Ok= @@ -839,8 +845,6 @@ github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zx github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= -github.com/moby/go-archive v0.2.0 h1:zg5QDUM2mi0JIM9fdQZWC7U8+2ZfixfTYoHL7rWUcP8= -github.com/moby/go-archive v0.2.0/go.mod h1:mNeivT14o8xU+5q1YnNrkQVpK+dnNe/K6fHqnTg4qPU= github.com/moby/patternmatcher v0.6.0 h1:GmP9lR19aU5GqSSFko+5pRqHi+Ohk1O69aFiKkVGiPk= github.com/moby/patternmatcher v0.6.0/go.mod h1:hDPoyOpDY7OrrMDLaYoY3hf52gNCR/YOUYxkhApJIxc= github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU= @@ -865,8 +869,8 @@ github.com/montanaflynn/stats v0.6.6/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt github.com/montanaflynn/stats v0.7.0/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow= github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= -github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM= -github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw= +github.com/mtibben/percent v0.2.1 h1:5gssi8Nqo8QU/r2pynCm+hBQHpkB/uNK7BJCFogWdzs= +github.com/mtibben/percent v0.2.1/go.mod h1:KG9uO+SZkUp+VkRHsCdYQV3XSZrrSpR3O9ibNBTZrns= github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA= github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo= github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s= @@ -877,6 +881,7 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/ncw/swift v1.0.52/go.mod h1:23YIA4yWVnGwv2dQlN4bB7egfYX6YLn0Yo/S6zZO/ZM= github.com/nhooyr/websocket v1.8.7/go.mod h1:B70DZP8IakI65RVQ51MsWP/8jndNma26DVA/nFSCgW0= +github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4= @@ -885,14 +890,14 @@ github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= github.com/onsi/ginkgo/v2 v2.21.0 h1:7rg/4f3rB88pb5obDgNZrNHrQ4e6WpjonchcpuBRnZM= github.com/onsi/ginkgo/v2 v2.21.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= -github.com/onsi/gomega v1.37.0 h1:CdEG8g0S133B4OswTDC/5XPSzE1OeP29QOioj2PID2Y= -github.com/onsi/gomega v1.37.0/go.mod h1:8D9+Txp43QWKhM24yyOBEdpkzN8FvJyAwecBgsU4KU0= +github.com/onsi/gomega v1.35.1 h1:Cwbd75ZBPxFSuZ6T+rN/WCb/gOc6YgFBXLlZLhC7Ds4= +github.com/onsi/gomega v1.35.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= github.com/openconfig/goyang v1.6.3 h1:9nWXBwd6b4+nZr8ni7O4zUXVhrVMXCLFz8os5YWFuo4= github.com/openconfig/goyang v1.6.3/go.mod h1:5WolITjek1NF8yrNERyVZ7jqjOClJTpO8p/+OwmETM4= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= -github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040= -github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M= +github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug= +github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM= github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc= github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ= github.com/pborman/getopt v0.0.0-20180729010549-6fdd0a2c7117/go.mod h1:85jBQOZwpVEaDAr341tbn15RS4fCAsIst0qp7i8ex1o= @@ -1065,19 +1070,29 @@ go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.64.0 h1:ssfIgGNANqpVFCndZvcuyKbl0g+UAVcbBcqGkG28H0Y= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.64.0/go.mod h1:GQ/474YrbE4Jx8gZ4q5I4hrhUzM6UPzyrqJYV2AqPoQ= -go.opentelemetry.io/otel v1.39.0 h1:8yPrr/S0ND9QEfTfdP9V+SiwT4E0G7Y5MO7p85nis48= -go.opentelemetry.io/otel v1.39.0/go.mod h1:kLlFTywNWrFyEdH0oj2xK0bFYZtHRYUdv1NklR/tgc8= -go.opentelemetry.io/otel/metric v1.39.0 h1:d1UzonvEZriVfpNKEVmHXbdf909uGTOQjA0HF0Ls5Q0= -go.opentelemetry.io/otel/metric v1.39.0/go.mod h1:jrZSWL33sD7bBxg1xjrqyDjnuzTUB0x1nBERXd7Ftcs= -go.opentelemetry.io/otel/sdk v1.39.0 h1:nMLYcjVsvdui1B/4FRkwjzoRVsMK8uL/cj0OyhKzt18= -go.opentelemetry.io/otel/sdk v1.39.0/go.mod h1:vDojkC4/jsTJsE+kh+LXYQlbL8CgrEcwmt1ENZszdJE= -go.opentelemetry.io/otel/sdk/metric v1.39.0 h1:cXMVVFVgsIf2YL6QkRF4Urbr/aMInf+2WKg+sEJTtB8= -go.opentelemetry.io/otel/sdk/metric v1.39.0/go.mod h1:xq9HEVH7qeX69/JnwEfp6fVq5wosJsY1mt4lLfYdVew= -go.opentelemetry.io/otel/trace v1.39.0 h1:2d2vfpEDmCJ5zVYz7ijaJdOF59xLomrvj7bjt6/qCJI= -go.opentelemetry.io/otel/trace v1.39.0/go.mod h1:88w4/PnZSazkGzz/w84VHpQafiU4EtqqlVdxWy+rNOA= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.69.0 h1:2yEATaop1/a1I4psnSLgWVPLWwCzkqWakgJy7xTDVy0= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.69.0/go.mod h1:D7J12YRapIekYyPWgGPlA/23pRmpSEZC5xJC/TTLI9U= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 h1:yd02MEjBdJkG3uabWP9apV+OuWRIXGDuJEUJbOHmCFU= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0/go.mod h1:umTcuxiv1n/s/S6/c2AT/g2CQ7u5C59sHDNmfSwgz7Q= +go.opentelemetry.io/otel v1.44.0 h1:JjwHmHpA4iZ3wBxluu2fbbE7j4kqlE8jXyAyPXH7HqU= +go.opentelemetry.io/otel v1.44.0/go.mod h1:BMgjTHL9WPRlRjL2oZCBTL4whCGtXch2H4BhOPIAyYc= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.39.0 h1:f0cb2XPmrqn4XMy9PNliTgRKJgS5WcL/u0/WRYGz4t0= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.39.0/go.mod h1:vnakAaFckOMiMtOIhFI2MNH4FYrZzXCYxmb1LlhoGz8= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0 h1:5pojmb1U1AogINhN3SurB+zm/nIcusopeBNp42f45QM= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0/go.mod h1:57gTHJSE5S1tqg+EKsLPlTWhpHMsWlVmer+LA926XiA= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.33.0 h1:wpMfgF8E1rkrT1Z6meFh1NDtownE9Ii3n3X2GJYjsaU= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.33.0/go.mod h1:wAy0T/dUbs468uOlkT31xjvqQgEVXv58BRFWEgn5v/0= +go.opentelemetry.io/otel/metric v1.44.0 h1:1w0gILTcHdr3YI+ixLyjemwrVnsMURbTZFrSYCdDdmc= +go.opentelemetry.io/otel/metric v1.44.0/go.mod h1:8O7hanEPBNgEMmybD3s2VBKcgWOCsA6tzHBPODAiquo= +go.opentelemetry.io/otel/sdk v1.44.0 h1:nHYwb9lK+fJPU/dnT6s7W7Z8itMWyqrnVfbheVYrZ58= +go.opentelemetry.io/otel/sdk v1.44.0/go.mod h1:Osuydd3Se74nqjAKxid74N5eC+jfEqfTegHRnq58oK0= +go.opentelemetry.io/otel/sdk/metric v1.44.0 h1:3LlKgI+VjbVsjNRFZJZAJ30WjXC5VkNRks6si09iEfI= +go.opentelemetry.io/otel/sdk/metric v1.44.0/go.mod h1:5B5pMARnXxKhltooO4xUuCBorl65a4EpnTalObqOigA= +go.opentelemetry.io/otel/trace v1.44.0 h1:jxF5CsGYCe74MCRx2X4g7WsY/VBKRqqpNvXlX/6gtIk= +go.opentelemetry.io/otel/trace v1.44.0/go.mod h1:oLl1jrMQAVo6v3GAggN+1VH9VIz9iUSvW53sW1Q8PIE= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= +go.opentelemetry.io/proto/otlp v1.9.0 h1:l706jCMITVouPOqEnii2fIAuO3IVGBRPV5ICjceRb/A= +go.opentelemetry.io/proto/otlp v1.9.0/go.mod h1:xE+Cx5E/eEHw+ISFkwPLwCZefwVjY+pqKg1qcK03+/4= go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= @@ -1127,8 +1142,8 @@ golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0 golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.7.0/go.mod h1:pYwdfH91IfpZVANVyUOhSIPZaFoJGxTFbZhFTx+dXZU= golang.org/x/crypto v0.9.0/go.mod h1:yrmDGqONDYtNj3tH8X9dzUun2m2lzPa9ngI6/RUPGR0= -golang.org/x/crypto v0.47.0 h1:V6e3FRj+n4dbpw86FJ8Fv7XVOql7TEwpHapKoMJ/GO8= -golang.org/x/crypto v0.47.0/go.mod h1:ff3Y9VzzKbwSSEzWqJsJVBnWmRwRSHt/6Op5n9bQc4A= +golang.org/x/crypto v0.51.0 h1:IBPXwPfKxY7cWQZ38ZCIRPI50YLeevDLlLnyC5wRGTI= +golang.org/x/crypto v0.51.0/go.mod h1:8AdwkbraGNABw2kOX6YFPs3WM22XqI4EXEd8g+x7Oc8= golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= @@ -1180,8 +1195,8 @@ golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.32.0 h1:9F4d3PHLljb6x//jOyokMv3eX+YDeepZSEo3mFJy93c= -golang.org/x/mod v0.32.0/go.mod h1:SgipZ/3h2Ci89DlEtEXWUk/HteuRin+HHhN+WbNhguU= +golang.org/x/mod v0.35.0 h1:Ww1D637e6Pg+Zb2KrWfHQUnH2dQRLBQyAtpr/haaJeM= +golang.org/x/mod v0.35.0/go.mod h1:+GwiRhIInF8wPm+4AoT6L0FA1QWAad3OMdTRx4tFYlU= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -1235,8 +1250,8 @@ golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= -golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o= -golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8= +golang.org/x/net v0.55.0 h1:bcvxaJn3e1U6InsFWt1JUq1aSjnRxLzT2rtD2KfkDF8= +golang.org/x/net v0.55.0/go.mod h1:L5U2KuzuOe1lY7Z+aWVIKK6qEeJXnXV9yzGA+WCHJww= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -1257,8 +1272,8 @@ golang.org/x/oauth2 v0.0.0-20211005180243-6b3c2da341f1/go.mod h1:KelEdhl1UZF7XfJ golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20220223155221-ee480838109b/go.mod h1:DAh4E804XQdzx2j+YRIaUnCqCV2RuMz24cGBJ5QYIrc= golang.org/x/oauth2 v0.0.0-20220309155454-6242fa91716a/go.mod h1:DAh4E804XQdzx2j+YRIaUnCqCV2RuMz24cGBJ5QYIrc= -golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw= -golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/oauth2 v0.36.0 h1:peZ/1z27fi9hUOFCAZaHyrpWG5lwe0RJEEEeH0ThlIs= +golang.org/x/oauth2 v0.36.0/go.mod h1:YDBUJMTkDnJS+A4BP4eZBjCqtokkg1hODuPjwiGPO7Q= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -1272,8 +1287,8 @@ golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= -golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= +golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -1356,16 +1371,16 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ= -golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/sys v0.45.0 h1:dO4czNzziLiiXplLQgBCEpCvXQ3dnkn0SdaZSYdQ+FY= +golang.org/x/sys v0.45.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= -golang.org/x/term v0.39.0 h1:RclSuaJf32jOqZz74CkPA9qFuVTX7vhLlpfj/IGWlqY= -golang.org/x/term v0.39.0/go.mod h1:yxzUCTP/U+FzoxfdKmLaA0RV1WgE0VY7hXBwKtY/4ww= +golang.org/x/term v0.43.0 h1:S4RLU2sB31O/NCl+zFN9Aru9A/Cq2aqKpTZJ6B+DwT4= +golang.org/x/term v0.43.0/go.mod h1:lrhlHNdQJHO+1qVYiHfFKVuVioJIheAc3fBSMFYEIsk= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -1378,8 +1393,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE= -golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8= +golang.org/x/text v0.37.0 h1:Cqjiwd9eSg8e0QAkyCaQTNHFIIzWtidPahFWR83rTrc= +golang.org/x/text v0.37.0/go.mod h1:a5sjxXGs9hsn/AJVwuElvCAo9v8QYLzvavO5z2PiM38= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -1454,8 +1469,8 @@ golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/tools v0.41.0 h1:a9b8iMweWG+S0OBnlU36rzLp20z1Rp10w+IY2czHTQc= -golang.org/x/tools v0.41.0/go.mod h1:XSY6eDqxVNiYgezAVqqCeihT4j1U2CCsqvH3WhQpnlg= +golang.org/x/tools v0.44.0 h1:UP4ajHPIcuMjT1GqzDWRlalUEoY+uzoZKnhOjbIPD2c= +golang.org/x/tools v0.44.0/go.mod h1:KA0AfVErSdxRZIsOVipbv3rQhVXTnlU6UhKxHd1seDI= golang.org/x/xerrors v0.0.0-20190410155217-1f06c39b4373/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20190513163551-3ee3066db522/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -1467,8 +1482,8 @@ golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6f gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0= gonum.org/v1/gonum v0.9.3/go.mod h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0= -gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= -gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= +gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4= +gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E= gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc= gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY= @@ -1612,10 +1627,10 @@ google.golang.org/genproto v0.0.0-20220324131243-acbaeb5b85eb/go.mod h1:hAL49I2I google.golang.org/genproto v0.0.0-20220401170504-314d38edb7de/go.mod h1:8w6bsBMX6yCPbAVTeqQHvzxW0EIFigd5lZyahWgyfDo= google.golang.org/genproto v0.0.0-20260122232226-8e98ce8d340d h1:hUplc9kLwH374NIY3PreRUK3Unc0xLm/W7MDsm0gCNo= google.golang.org/genproto v0.0.0-20260122232226-8e98ce8d340d/go.mod h1:SpjiK7gGN2j/djoQMxLl3QOe/J/XxNzC5M+YLecVVWU= -google.golang.org/genproto/googleapis/api v0.0.0-20260122232226-8e98ce8d340d h1:tUKoKfdZnSjTf5LW7xpG4c6SZ3Ozisn5eumcoTuMEN4= -google.golang.org/genproto/googleapis/api v0.0.0-20260122232226-8e98ce8d340d/go.mod h1:p3MLuOwURrGBRoEyFHBT3GjUwaCQVKeNqqWxlcISGdw= -google.golang.org/genproto/googleapis/rpc v0.0.0-20260122232226-8e98ce8d340d h1:xXzuihhT3gL/ntduUZwHECzAn57E8dA6l8SOtYWdD8Q= -google.golang.org/genproto/googleapis/rpc v0.0.0-20260122232226-8e98ce8d340d/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ= +google.golang.org/genproto/googleapis/api v0.0.0-20260226221140-a57be14db171 h1:tu/dtnW1o3wfaxCOjSLn5IRX4YDcJrtlpzYkhHhGaC4= +google.golang.org/genproto/googleapis/api v0.0.0-20260226221140-a57be14db171/go.mod h1:M5krXqk4GhBKvB596udGL3UyjL4I1+cTbK0orROM9ng= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260526163538-3dc84a4a5aaa h1:mZHHdPZl0dbGHCflZgAq/Q468DWVFcU2whhB2KAo8fk= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260526163538-3dc84a4a5aaa/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= google.golang.org/grpc v1.12.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= @@ -1646,8 +1661,8 @@ google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9K google.golang.org/grpc v1.40.1/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= google.golang.org/grpc v1.44.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= google.golang.org/grpc v1.45.0/go.mod h1:lN7owxKUQEqMfSyQikvvk5tf/6zMPsrK+ONuO11+0rQ= -google.golang.org/grpc v1.78.0 h1:K1XZG/yGDJnzMdd/uZHAkVqJE+xIDOcmdSFZkBUicNc= -google.golang.org/grpc v1.78.0/go.mod h1:I47qjTo4OKbMkjA/aOOwxDIiPSBofUtQUI5EfpWvW7U= +google.golang.org/grpc v1.81.1 h1:VnnIIZ88UzOOKLukQi+ImGz8O1Wdp8nAGGnvOfEIWQQ= +google.golang.org/grpc v1.81.1/go.mod h1:xGH9GfzOyMTGIOXBJmXt+BX/V0kcdQbdcuwQ/zNw42I= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= @@ -1668,6 +1683,7 @@ google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= diff --git a/internal/binoculars/configuration/types.go b/internal/binoculars/configuration/types.go index a10c81f59c4..6ba8c1b1c18 100644 --- a/internal/binoculars/configuration/types.go +++ b/internal/binoculars/configuration/types.go @@ -3,6 +3,7 @@ package configuration import ( "github.com/armadaproject/armada/internal/common/auth/configuration" grpcconfig "github.com/armadaproject/armada/internal/common/grpc/configuration" + "github.com/armadaproject/armada/internal/common/observability" profilingconfig "github.com/armadaproject/armada/internal/common/profiling/configuration" ) @@ -10,10 +11,11 @@ type BinocularsConfig struct { Cordon CordonConfiguration Auth configuration.AuthConfig - GrpcPort uint16 - HttpPort uint16 - MetricsPort uint16 - Profiling *profilingconfig.ProfilingConfig + GrpcPort uint16 + HttpPort uint16 + MetricsPort uint16 + Profiling *profilingconfig.ProfilingConfig + Observability observability.ObservabilityConfig CorsAllowedOrigins []string diff --git a/internal/binoculars/configuration/validation.go b/internal/binoculars/configuration/validation.go index cd0d321ffd2..77b6eb316fb 100644 --- a/internal/binoculars/configuration/validation.go +++ b/internal/binoculars/configuration/validation.go @@ -1,9 +1,14 @@ package configuration import ( + "os" + "github.com/go-playground/validator/v10" + "github.com/google/uuid" commonconfig "github.com/armadaproject/armada/internal/common/config" + "github.com/armadaproject/armada/internal/common/observability" + "github.com/armadaproject/armada/internal/lookout/version" ) func (c BinocularsConfig) Validate() error { @@ -11,6 +16,19 @@ func (c BinocularsConfig) Validate() error { return validate.Struct(c) } -func (c BinocularsConfig) Mutate() (commonconfig.Config, error) { +func (c *BinocularsConfig) Mutate() (commonconfig.Config, error) { + serviceInstance, err := os.Hostname() + if err != nil { + serviceInstance = uuid.New().String() + } + observabilityConfig, err := c.Observability.WithDefaults(observability.ResourceAttributes{ + ServiceName: "binoculars", + ServiceVersion: version.Version, + ServiceInstance: serviceInstance, + }) + if err != nil { + return nil, err + } + c.Observability = observabilityConfig return c, nil } diff --git a/internal/common/grpc/gateway.go b/internal/common/grpc/gateway.go index 9ae6aeddd09..233b0fd28a6 100644 --- a/internal/common/grpc/gateway.go +++ b/internal/common/grpc/gateway.go @@ -10,6 +10,7 @@ import ( "github.com/go-openapi/runtime/middleware" "github.com/grpc-ecosystem/grpc-gateway/runtime" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" "golang.org/x/exp/slices" "google.golang.org/grpc" "google.golang.org/grpc/credentials" @@ -59,12 +60,15 @@ func CreateGatewayHandler( } } + handler := otelhttp.NewHandler( + logRestRequests(allowCORS(gw, corsAllowedOrigins)), + "grpc-gateway", + ) + if stripPrefix { - prefixToStrip := strings.TrimSuffix(apiBasePath, "/") - mux.Handle(apiBasePath, http.StripPrefix(prefixToStrip, logRestRequests(allowCORS(gw, corsAllowedOrigins)))) - } else { - mux.Handle(apiBasePath, logRestRequests(allowCORS(gw, corsAllowedOrigins))) + handler = http.StripPrefix(strings.TrimSuffix(apiBasePath, "/"), handler) } + mux.Handle(apiBasePath, handler) mux.Handle(path.Join(apiBasePath, "swagger.json"), middleware.Spec(apiBasePath, []byte(spec), nil)) return func() { diff --git a/internal/common/grpc/grpc.go b/internal/common/grpc/grpc.go index d678580ccf5..2bd5c400afd 100644 --- a/internal/common/grpc/grpc.go +++ b/internal/common/grpc/grpc.go @@ -15,6 +15,7 @@ import ( grpc_recovery "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/recovery" "github.com/prometheus/client_golang/prometheus" + "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials" @@ -51,6 +52,7 @@ func CreateGrpcServer( grpc.KeepaliveParams(keepaliveParams), grpc.KeepaliveEnforcementPolicy(keepaliveEnforcementPolicy), setupTls(tlsConfig), + grpc.StatsHandler(otelgrpc.NewServerHandler()), grpc.ChainUnaryInterceptor( requestid.UnaryServerInterceptor(false), grpc_auth.UnaryServerInterceptor(authFunction), @@ -81,7 +83,7 @@ func setupPromMetrics() *grpc_prometheus.ServerMetrics { ), grpc_prometheus.WithContextLabels("user"), ) - prometheus.MustRegister(srvMetrics) + prometheus.DefaultRegisterer.MustRegister(srvMetrics) return srvMetrics } diff --git a/internal/common/logging/logger.go b/internal/common/logging/logger.go index 3763fe02c47..427032c7ca3 100644 --- a/internal/common/logging/logger.go +++ b/internal/common/logging/logger.go @@ -1,10 +1,14 @@ package logging import ( + "context" "fmt" "github.com/pkg/errors" "github.com/rs/zerolog" + "go.opentelemetry.io/otel/trace" + + "github.com/armadaproject/armada/internal/common/requestid" ) // Logger wraps a zerolog.Logger so that the rest of the code doesn't depend directly on zerolog @@ -148,6 +152,37 @@ func (l *Logger) WithFields(args map[string]any) *Logger { } } +// WithContext returns a new Logger that extracts and adds trace context fields (trace_id, span_id) +// from the provided context. It also preserves the x-request-id if present. +// If no trace context is available or the span is invalid, returns the logger unchanged. +func (l *Logger) WithContext(ctx context.Context) *Logger { + if ctx == nil { + return l + } + + fields := make(map[string]any) + + // Extract trace context from OTel + span := trace.SpanFromContext(ctx) + spanCtx := span.SpanContext() + if spanCtx.IsValid() { + fields["trace_id"] = spanCtx.TraceID().String() + fields["span_id"] = spanCtx.SpanID().String() + } + + // Preserve existing x-request-id if present + if reqID, ok := requestid.FromContext(ctx); ok { + fields["x-request-id"] = reqID + } + + // If no fields were extracted, return the logger unchanged + if len(fields) == 0 { + return l + } + + return l.WithFields(fields) +} + // WithCallerSkip returns a new Logger with the number of callers skipped increased by the skip amount. // This is needed when building wrappers around the Logger so as to prevent us from always reporting the // wrapper code as the caller. diff --git a/internal/common/logging/logger_test.go b/internal/common/logging/logger_test.go index 290e43d7a1a..c5dda35f372 100644 --- a/internal/common/logging/logger_test.go +++ b/internal/common/logging/logger_test.go @@ -2,6 +2,7 @@ package logging import ( "bytes" + "context" "encoding/json" "fmt" "testing" @@ -10,6 +11,10 @@ import ( "github.com/rs/zerolog" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/sdk/trace" + "google.golang.org/grpc/metadata" + + "github.com/armadaproject/armada/internal/common/requestid" ) type testLogEntry struct { @@ -19,6 +24,9 @@ type testLogEntry struct { CustomField2 string `json:"customField2,omitempty"` Error string `json:"error,omitempty"` Stacktrace string `json:"stacktrace,omitempty"` + TraceID string `json:"trace_id,omitempty"` + SpanID string `json:"span_id,omitempty"` + RequestID string `json:"x-request-id,omitempty"` } func TestWithField(t *testing.T) { @@ -246,4 +254,141 @@ func assertLogLineExpected(t *testing.T, expected *testLogEntry, logOutput *byte assert.Equal(t, expected.CustomField2, entry.CustomField2) assert.Equal(t, expected.Error, entry.Error) assert.Equal(t, expected.Stacktrace, entry.Stacktrace) + assert.Equal(t, expected.TraceID, entry.TraceID) + assert.Equal(t, expected.SpanID, entry.SpanID) + assert.Equal(t, expected.RequestID, entry.RequestID) +} + +// TestLogIncludesTraceAndSpanIds verifies that when logging from a traced request context, +// the log includes trace_id and span_id fields. +func TestLogIncludesTraceAndSpanIds(t *testing.T) { + logger, buf := testLogger() + + // Create a test tracer and span + tp := trace.NewTracerProvider() + tracer := tp.Tracer("test") + ctx, span := tracer.Start(context.Background(), "test-operation") + defer span.End() + + // Extract trace and span IDs + spanCtx := span.SpanContext() + expectedTraceID := spanCtx.TraceID().String() + expectedSpanID := spanCtx.SpanID().String() + + // Log from the traced context + contextLogger := logger.WithContext(ctx) + contextLogger.Info("Request processed") + + assertLogLineExpected( + t, + &testLogEntry{ + Level: "info", + Message: "Request processed", + TraceID: expectedTraceID, + SpanID: expectedSpanID, + RequestID: "", + }, + buf, + ) +} + +// TestLogRetainsRequestId verifies that x-request-id is preserved alongside trace fields. +func TestLogRetainsRequestId(t *testing.T) { + logger, buf := testLogger() + + // Create a context with trace span and request ID + tp := trace.NewTracerProvider() + tracer := tp.Tracer("test") + ctx, span := tracer.Start(context.Background(), "test-operation") + defer span.End() + + // Add gRPC metadata and request ID to context + ctx = metadata.NewIncomingContext(ctx, metadata.New(map[string]string{})) + ctx, _ = requestid.AddToIncomingContext(ctx, "test-request-123") + + // Extract expected values + spanCtx := span.SpanContext() + expectedTraceID := spanCtx.TraceID().String() + expectedSpanID := spanCtx.SpanID().String() + + // Log from the context + contextLogger := logger.WithContext(ctx) + contextLogger.Info("Request with ID") + + assertLogLineExpected( + t, + &testLogEntry{ + Level: "info", + Message: "Request with ID", + TraceID: expectedTraceID, + SpanID: expectedSpanID, + RequestID: "test-request-123", + }, + buf, + ) +} + +// TestBackgroundLogsWithoutSpanContext verifies that logging without trace context +// doesn't panic and works normally. +func TestBackgroundLogsWithoutSpanContext(t *testing.T) { + logger, buf := testLogger() + + // Log without any trace context + backgroundLogger := logger.WithContext(context.Background()) + backgroundLogger.Info("Background operation") + + assertLogLineExpected( + t, + &testLogEntry{ + Level: "info", + Message: "Background operation", + TraceID: "", + SpanID: "", + }, + buf, + ) +} + +// TestLogWithNilContext verifies that WithContext handles nil context gracefully. +func TestLogWithNilContext(t *testing.T) { + logger, buf := testLogger() + + // Log with nil context + nilContextLogger := logger.WithContext(nil) + nilContextLogger.Info("Nil context log") + + assertLogLineExpected( + t, + &testLogEntry{ + Level: "info", + Message: "Nil context log", + TraceID: "", + SpanID: "", + }, + buf, + ) +} + +// TestLogWithOnlyRequestIdNoTrace verifies that request ID alone is logged without trace fields. +func TestLogWithOnlyRequestIdNoTrace(t *testing.T) { + logger, buf := testLogger() + + ctx := context.Background() + ctx = metadata.NewIncomingContext(ctx, metadata.New(map[string]string{})) + ctx, _ = requestid.AddToIncomingContext(ctx, "request-only-456") + + contextLogger := logger.WithContext(ctx) + contextLogger.Info("Request ID only") + + assertLogLineExpected( + t, + &testLogEntry{ + Level: "info", + Message: "Request ID only", + TraceID: "", + SpanID: "", + RequestID: "request-only-456", + }, + buf, + ) } diff --git a/internal/common/observability/attribute_policy.go b/internal/common/observability/attribute_policy.go new file mode 100644 index 00000000000..5fd3dbf08eb --- /dev/null +++ b/internal/common/observability/attribute_policy.go @@ -0,0 +1,304 @@ +package observability + +import ( + "context" + "fmt" + "strings" + "sync" + + "go.opentelemetry.io/otel/attribute" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + + "github.com/armadaproject/armada/internal/common/logging" +) + +const ( + // DefaultAttributeCardinalityLimit is the maximum number of unique values + // allowed per custom span attribute key before values are redacted. + DefaultAttributeCardinalityLimit = 1000 + + // AttributeRedactedValue is used when an attribute key is denied for PII/sensitive content. + AttributeRedactedValue = "[REDACTED]" + // AttributeDisallowedValue is used when an attribute key is not in the allow-list. + AttributeDisallowedValue = "[DISALLOWED]" + // AttributeHighCardinalityValue is used once cardinality guardrails are exceeded. + AttributeHighCardinalityValue = "[HIGH_CARDINALITY]" +) + +// SpanAttributePolicy defines guardrails for span attribute keys and values. +// +// Policy summary: +// - Allow-list: rpc.*, http.*, net.*, server.*, service.*, armada.*, trace_id, span_id +// - Deny-list: explicit sensitive keys and key-name patterns (password/secret/token/key) +// - Cardinality: custom (non-standard) keys capped at DefaultAttributeCardinalityLimit unique values +// +// Important: OTel SDK span processors cannot delete already-set attributes from an active span. +// To prevent raw PII leakage, denied/disallowed keys are overwritten with marker values. +type SpanAttributePolicy struct { + allowedPrefixes []string + allowedExact map[string]struct{} + deniedExact map[string]struct{} + deniedContains []string + + cardinalityExemptPrefixes []string + cardinalityExemptExact map[string]struct{} + cardinality *attributeCardinalityTracker +} + +type SpanAttributeViolationReason string + +const ( + SpanAttributeViolationDenied SpanAttributeViolationReason = "denied" + SpanAttributeViolationDisallowed SpanAttributeViolationReason = "disallowed" + SpanAttributeViolationHighCardinality SpanAttributeViolationReason = "high_cardinality" +) + +type SpanAttributeViolation struct { + Key string + Reason SpanAttributeViolationReason +} + +// NewDefaultSpanAttributePolicy returns the default attribute policy for Armada traces. +func NewDefaultSpanAttributePolicy() *SpanAttributePolicy { + return &SpanAttributePolicy{ + allowedPrefixes: []string{"rpc.", "http.", "net.", "server.", "service.", "armada."}, + allowedExact: map[string]struct{}{ + "trace_id": {}, + "span_id": {}, + }, + // Explicit deny-list for common PII/sensitive payload fields. + deniedExact: map[string]struct{}{ + "user_id": {}, + "user_email": {}, + "user_name": {}, + "password": {}, + "api_key": {}, + "token": {}, + "secret": {}, + "job_payload": {}, + "request_body": {}, + "response_body": {}, + }, + deniedContains: []string{"password", "secret", "token", "api_key", "apikey", "key"}, + cardinalityExemptPrefixes: []string{"rpc.", "http.", "net.", "server.", "service."}, + cardinalityExemptExact: map[string]struct{}{ + "trace_id": {}, + "span_id": {}, + "http.user_agent": {}, + }, + cardinality: newAttributeCardinalityTracker(DefaultAttributeCardinalityLimit), + } +} + +// SanitizeForSpan returns attributes safe for emission by applying deny-list, +// allow-list, and cardinality guardrails. +// +// This function only sanitizes the attributes passed to it. In the OTel +// SDK processor model, OnStart can sanitize initial attributes, but attributes added +// later via span.SetAttributes(...) cannot be rewritten at OnEnd. See +// ViolationsForSpan/OnEnd guardrails for post-start detection. +func (p *SpanAttributePolicy) SanitizeForSpan(attrs []attribute.KeyValue) []attribute.KeyValue { + out := make([]attribute.KeyValue, 0, len(attrs)) + for _, kv := range attrs { + key := string(kv.Key) + keyLower := strings.ToLower(key) + + switch { + case p.isDenied(keyLower): + out = append(out, attribute.String(key, AttributeRedactedValue)) + continue + case !p.isAllowed(keyLower): + out = append(out, attribute.String(key, AttributeDisallowedValue)) + continue + } + + if p.shouldGuardCardinality(keyLower) && p.cardinality.ExceedsLimit(keyLower, attributeValueFingerprint(kv.Value)) { + out = append(out, attribute.String(key, AttributeHighCardinalityValue)) + continue + } + + out = append(out, kv) + } + return out +} + +// ViolationsForSpan returns policy violations present in the provided attributes. +// +// Use this as a guardrail for ended spans where mutation is no longer possible, +// e.g. to detect attributes that were added after OnStart and therefore bypassed +// processor-time sanitization. +func (p *SpanAttributePolicy) ViolationsForSpan(attrs []attribute.KeyValue) []SpanAttributeViolation { + violations := make([]SpanAttributeViolation, 0) + for _, kv := range attrs { + if isSanitizedMarkerValue(kv.Value) { + continue + } + + key := string(kv.Key) + keyLower := strings.ToLower(key) + + switch { + case p.isDenied(keyLower): + violations = append(violations, SpanAttributeViolation{Key: key, Reason: SpanAttributeViolationDenied}) + case !p.isAllowed(keyLower): + violations = append(violations, SpanAttributeViolation{Key: key, Reason: SpanAttributeViolationDisallowed}) + case p.shouldGuardCardinality(keyLower) && p.cardinality.IsOverLimit(keyLower, attributeValueFingerprint(kv.Value)): + violations = append(violations, SpanAttributeViolation{Key: key, Reason: SpanAttributeViolationHighCardinality}) + } + } + return violations +} + +func isSanitizedMarkerValue(value attribute.Value) bool { + if value.Type() != attribute.STRING { + return false + } + + s := value.AsString() + return s == AttributeRedactedValue || s == AttributeDisallowedValue || s == AttributeHighCardinalityValue +} + +func (p *SpanAttributePolicy) isAllowed(key string) bool { + if _, ok := p.allowedExact[key]; ok { + return true + } + for _, prefix := range p.allowedPrefixes { + if strings.HasPrefix(key, prefix) { + return true + } + } + return false +} + +func (p *SpanAttributePolicy) isDenied(key string) bool { + if _, ok := p.deniedExact[key]; ok { + return true + } + for _, disallowedPart := range p.deniedContains { + if strings.Contains(key, disallowedPart) { + return true + } + } + return false +} + +func (p *SpanAttributePolicy) shouldGuardCardinality(key string) bool { + if _, ok := p.cardinalityExemptExact[key]; ok { + return false + } + for _, prefix := range p.cardinalityExemptPrefixes { + if strings.HasPrefix(key, prefix) { + return false + } + } + return true +} + +func attributeValueFingerprint(value attribute.Value) string { + return fmt.Sprintf("%d:%v", value.Type(), value.AsInterface()) +} + +type attributeCardinalityTracker struct { + limit int + + mu sync.Mutex + seen map[string]map[string]struct{} + overLimit map[string]bool +} + +func newAttributeCardinalityTracker(limit int) *attributeCardinalityTracker { + return &attributeCardinalityTracker{ + limit: limit, + seen: make(map[string]map[string]struct{}), + overLimit: make(map[string]bool), + } +} + +func (c *attributeCardinalityTracker) ExceedsLimit(key, value string) bool { + c.mu.Lock() + defer c.mu.Unlock() + + knownValues, ok := c.seen[key] + if !ok { + knownValues = make(map[string]struct{}) + c.seen[key] = knownValues + } + + if _, seen := knownValues[value]; seen { + return false + } + + if len(knownValues) >= c.limit { + c.overLimit[key] = true + return true + } + + knownValues[value] = struct{}{} + return false +} + +func (c *attributeCardinalityTracker) IsOverLimit(key, value string) bool { + c.mu.Lock() + defer c.mu.Unlock() + + if c.overLimit[key] { + return true + } + + knownValues, ok := c.seen[key] + if !ok { + return false + } + + _, seen := knownValues[value] + return !seen && len(knownValues) >= c.limit +} + +type spanAttributePolicyProcessor struct { + policy *SpanAttributePolicy +} + +// NewSpanAttributePolicyProcessor creates a span processor that sanitizes span attributes +// at span start using the configured policy and applies OnEnd guardrails for attributes +// added after span start. +// +// Constraint: OTel SDK processors cannot mutate attributes on ended spans. Therefore, +// attributes set via span.SetAttributes(...) after OnStart cannot be rewritten at +// processor level. OnEnd logs policy violations as an operational guardrail. +func NewSpanAttributePolicyProcessor(policy *SpanAttributePolicy) sdktrace.SpanProcessor { + if policy == nil { + policy = NewDefaultSpanAttributePolicy() + } + return &spanAttributePolicyProcessor{policy: policy} +} + +func (p *spanAttributePolicyProcessor) OnStart(_ context.Context, span sdktrace.ReadWriteSpan) { + attrs := span.Attributes() + if len(attrs) == 0 { + return + } + sanitized := p.policy.SanitizeForSpan(attrs) + span.SetAttributes(sanitized...) +} + +func (p *spanAttributePolicyProcessor) OnEnd(span sdktrace.ReadOnlySpan) { + violations := p.policy.ViolationsForSpan(span.Attributes()) + if len(violations) == 0 { + return + } + + keys := make([]string, 0, len(violations)) + for _, violation := range violations { + keys = append(keys, fmt.Sprintf("%s(%s)", violation.Key, violation.Reason)) + } + + logging.Warnf( + "Span %q contains policy-violating attributes that could not be rewritten after span start: %s", + span.Name(), + strings.Join(keys, ", "), + ) +} + +func (p *spanAttributePolicyProcessor) Shutdown(context.Context) error { return nil } + +func (p *spanAttributePolicyProcessor) ForceFlush(context.Context) error { return nil } diff --git a/internal/common/observability/attribute_policy_test.go b/internal/common/observability/attribute_policy_test.go new file mode 100644 index 00000000000..171918b41b6 --- /dev/null +++ b/internal/common/observability/attribute_policy_test.go @@ -0,0 +1,173 @@ +package observability + +import ( + "context" + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/sdk/resource" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/sdk/trace/tracetest" + semconv "go.opentelemetry.io/otel/semconv/v1.26.0" + oteltrace "go.opentelemetry.io/otel/trace" +) + +func TestSpanAttributeAllowListPreservesStandardKeys(t *testing.T) { + policy := NewDefaultSpanAttributePolicy() + + attrs := []attribute.KeyValue{ + attribute.String("rpc.system.name", "grpc"), + attribute.String("http.method", "GET"), + attribute.Int("http.status_code", 200), + attribute.String("net.peer.name", "localhost"), + attribute.String("server.address", "armada.local"), + attribute.String("service.name", "server"), + attribute.String("trace_id", "abc"), + attribute.String("span_id", "def"), + } + + sanitized := policy.SanitizeForSpan(attrs) + require.Len(t, sanitized, len(attrs)) + + for i := range attrs { + assert.Equal(t, attrs[i], sanitized[i]) + } +} + +func TestSpanAttributePolicyDropsPII(t *testing.T) { + policy := NewDefaultSpanAttributePolicy() + + attrs := []attribute.KeyValue{ + attribute.String("user_email", "user@example.com"), + attribute.String("password", "super-secret"), + attribute.String("request_body", `{"sensitive":true}`), + attribute.String("armada.custom_token_field", "token-value"), + attribute.String("rpc.system.name", "grpc"), + } + + sanitized := policy.SanitizeForSpan(attrs) + + assert.Equal(t, attribute.String("user_email", AttributeRedactedValue), sanitized[0]) + assert.Equal(t, attribute.String("password", AttributeRedactedValue), sanitized[1]) + assert.Equal(t, attribute.String("request_body", AttributeRedactedValue), sanitized[2]) + assert.Equal(t, attribute.String("armada.custom_token_field", AttributeRedactedValue), sanitized[3]) + assert.Equal(t, attribute.String("rpc.system.name", "grpc"), sanitized[4]) +} + +func TestSpanAttributePolicyBoundsCardinality(t *testing.T) { + policy := NewDefaultSpanAttributePolicy() + + customKey := "armada.user_agent_variant" + for i := range DefaultAttributeCardinalityLimit { + in := []attribute.KeyValue{attribute.String(customKey, fmt.Sprintf("ua-%d", i))} + out := policy.SanitizeForSpan(in) + require.Len(t, out, 1) + assert.Equal(t, in[0], out[0], "attribute should be preserved before limit") + } + + overLimit := []attribute.KeyValue{attribute.String(customKey, "ua-over-limit")} + sanitized := policy.SanitizeForSpan(overLimit) + require.Len(t, sanitized, 1) + assert.Equal(t, attribute.String(customKey, AttributeHighCardinalityValue), sanitized[0]) + + httpUserAgent := []attribute.KeyValue{attribute.String("http.user_agent", "ua-allowed")} + httpUserAgentSanitized := policy.SanitizeForSpan(httpUserAgent) + assert.Equal(t, httpUserAgent[0], httpUserAgentSanitized[0], "http.user_agent should be cardinality-exempt") +} + +func TestResourceAttributesSet(t *testing.T) { + spanRecorder := tracetest.NewSpanRecorder() + + res, err := resource.New(context.Background(), + resource.WithAttributes( + semconv.ServiceName("resource-service"), + semconv.ServiceNamespace("armada"), + semconv.ServiceVersion("1.2.3"), + ), + ) + require.NoError(t, err) + + tp := sdktrace.NewTracerProvider( + sdktrace.WithResource(res), + sdktrace.WithSpanProcessor(NewSpanAttributePolicyProcessor(NewDefaultSpanAttributePolicy())), + sdktrace.WithSpanProcessor(spanRecorder), + ) + defer func() { + _ = tp.Shutdown(context.Background()) + }() + + otel.SetTracerProvider(tp) + + tracer := tp.Tracer("resource-test") + _, span := tracer.Start(context.Background(), "resource-span") + span.SetAttributes(attribute.String("user_id", "pii-user")) + span.End() + + spans := spanRecorder.Ended() + require.Len(t, spans, 1) + + resourceAttrs := spans[0].Resource().Attributes() + assert.Contains(t, resourceAttrs, semconv.ServiceName("resource-service")) + assert.Contains(t, resourceAttrs, semconv.ServiceNamespace("armada")) + assert.Contains(t, resourceAttrs, semconv.ServiceVersion("1.2.3")) + + // Policy processor must not mutate resource attributes. + for _, kv := range resourceAttrs { + assert.NotEqual(t, "user_id", string(kv.Key)) + } +} + +func TestSpanAttributePolicySanitizesAttributesProvidedAtSpanStart(t *testing.T) { + spanRecorder := tracetest.NewSpanRecorder() + tp := sdktrace.NewTracerProvider( + sdktrace.WithSpanProcessor(NewSpanAttributePolicyProcessor(NewDefaultSpanAttributePolicy())), + sdktrace.WithSpanProcessor(spanRecorder), + ) + defer func() { + _ = tp.Shutdown(context.Background()) + }() + + tracer := tp.Tracer("start-attrs-test") + _, span := tracer.Start(context.Background(), "start-attrs", + oteltrace.WithAttributes(attribute.String("user_id", "pii-user-at-start")), + ) + span.End() + + spans := spanRecorder.Ended() + require.Len(t, spans, 1) + endedAttrs := spans[0].Attributes() + assert.Contains(t, endedAttrs, attribute.String("user_id", AttributeRedactedValue)) + + violations := NewDefaultSpanAttributePolicy().ViolationsForSpan(endedAttrs) + assert.NotContains(t, violations, SpanAttributeViolation{Key: "user_id", Reason: SpanAttributeViolationDenied}) +} + +func TestSpanAttributePolicyGuardrailDetectsDeniedAttributesSetAfterStart(t *testing.T) { + policy := NewDefaultSpanAttributePolicy() + spanRecorder := tracetest.NewSpanRecorder() + tp := sdktrace.NewTracerProvider( + sdktrace.WithSpanProcessor(NewSpanAttributePolicyProcessor(policy)), + sdktrace.WithSpanProcessor(spanRecorder), + ) + defer func() { + _ = tp.Shutdown(context.Background()) + }() + + tracer := tp.Tracer("late-attrs-test") + _, span := tracer.Start(context.Background(), "late-attrs") + span.SetAttributes(attribute.String("user_id", "late-pii-user")) + span.End() + + spans := spanRecorder.Ended() + require.Len(t, spans, 1) + + endedAttrs := spans[0].Attributes() + assert.Contains(t, endedAttrs, attribute.String("user_id", "late-pii-user"), "OTel processor cannot rewrite post-start attributes") + + violations := policy.ViolationsForSpan(endedAttrs) + assert.Contains(t, violations, SpanAttributeViolation{Key: "user_id", Reason: SpanAttributeViolationDenied}) +} diff --git a/internal/common/observability/config.go b/internal/common/observability/config.go new file mode 100644 index 00000000000..522ca1dda25 --- /dev/null +++ b/internal/common/observability/config.go @@ -0,0 +1,190 @@ +package observability + +import ( + "fmt" + "net/url" + "sort" + "strings" +) + +const ( + DefaultOtlpHTTPEndpoint = "http://localhost:4318" + DefaultOtlpHTTPProtocol = "http/protobuf" + SamplerParentBasedTraceRatio = "parent_based_trace_id_ratio" +) + +const ( + ConfigOtelExporterOtlpEndpoint = "observability.exporter.endpoint" + ConfigOtelExporterOtlpProtocol = "observability.exporter.protocol" + ConfigOtelTracesSampler = "observability.traces.sampler" + ConfigOtelTracesSamplerArg = "observability.traces.samplerArg" +) + +const ( + ResourceAttributeServiceName = "service.name" + ResourceAttributeServiceVersion = "service.version" + ResourceAttributeServiceInstance = "service.instance.id" +) + +var validSamplers = map[string]bool{ + SamplerParentBasedTraceRatio: true, + "trace_id_ratio": true, + "always_on": true, + "always_off": true, +} + +var validOTLPProtocols = map[string]struct{}{ + "http/protobuf": {}, + "grpc": {}, +} + +// ResourceAttributes define the required OpenTelemetry service identity contract. +type ResourceAttributes struct { + // Service name is the name of the service. + // Required attribute, with key `service.name`. + ServiceName string + // Service version is the version of the service. + // Required attribute, with key `service.version`. + ServiceVersion string + // Service instance is a unique identifier for the service instance. + // Required attribute, with key `service.instance.id`. + ServiceInstance string + // Extra attributes if specified will be added to the resource attributes. + // These can be used to add additional metadata about the service instance. + Extra map[string]string +} + +type OTLPExporterConfig struct { + Endpoint string + Protocol string +} + +type TracesConfig struct { + // Sampler controls root trace sampling policy. + // Supported values: + // - always_on + // - always_off + // - trace_id_ratio + // - parent_based_trace_id_ratio + // + // Default is parent_based_trace_id_ratio to respect upstream sampling decisions + // while still allowing root-span ratio sampling in this service. + Sampler string + // SamplerArg is the sampler parameter used by ratio samplers. + // Valid range for ratio samplers is 0.0 to 1.0. + SamplerArg float64 +} + +type ObservabilityConfig struct { + Enabled bool + Exporter OTLPExporterConfig + Traces TracesConfig + Resource ResourceAttributes +} + +func DefaultObservabilityConfig(defaultResource ResourceAttributes) ObservabilityConfig { + return ObservabilityConfig{ + Enabled: false, + Exporter: OTLPExporterConfig{ + Endpoint: DefaultOtlpHTTPEndpoint, + Protocol: DefaultOtlpHTTPProtocol, + }, + Traces: TracesConfig{ + Sampler: SamplerParentBasedTraceRatio, + SamplerArg: 1.0, + }, + Resource: defaultResource, + } +} + +func ReadObservabilityConfig(defaultResource ResourceAttributes) (ObservabilityConfig, error) { + return ObservabilityConfig{}.WithDefaults(defaultResource) +} + +func (c ObservabilityConfig) WithDefaults(defaultResource ResourceAttributes) (ObservabilityConfig, error) { + defaults := DefaultObservabilityConfig(defaultResource) + + if strings.TrimSpace(c.Exporter.Endpoint) == "" { + c.Exporter.Endpoint = defaults.Exporter.Endpoint + } else { + c.Exporter.Endpoint = strings.TrimSpace(c.Exporter.Endpoint) + } + + if strings.TrimSpace(c.Exporter.Protocol) == "" { + c.Exporter.Protocol = defaults.Exporter.Protocol + } else { + c.Exporter.Protocol = strings.ToLower(strings.TrimSpace(c.Exporter.Protocol)) + } + + if strings.TrimSpace(c.Traces.Sampler) == "" { + c.Traces.Sampler = defaults.Traces.Sampler + c.Traces.SamplerArg = defaults.Traces.SamplerArg + } else { + c.Traces.Sampler = strings.ToLower(strings.TrimSpace(c.Traces.Sampler)) + } + + if strings.TrimSpace(c.Resource.ServiceName) == "" { + c.Resource.ServiceName = defaults.Resource.ServiceName + } + if strings.TrimSpace(c.Resource.ServiceVersion) == "" { + c.Resource.ServiceVersion = defaults.Resource.ServiceVersion + } + if strings.TrimSpace(c.Resource.ServiceInstance) == "" { + c.Resource.ServiceInstance = defaults.Resource.ServiceInstance + } + + if err := c.Validate(); err != nil { + return ObservabilityConfig{}, err + } + + return c, nil +} + +func (c ObservabilityConfig) Validate() error { + if strings.TrimSpace(c.Exporter.Endpoint) == "" { + return fmt.Errorf("%s must not be empty", ConfigOtelExporterOtlpEndpoint) + } + + parsedURL, err := url.Parse(c.Exporter.Endpoint) + if err != nil || parsedURL.Scheme == "" || parsedURL.Host == "" { + return fmt.Errorf("%s must be a valid absolute URL", ConfigOtelExporterOtlpEndpoint) + } + if parsedURL.Scheme != "http" && parsedURL.Scheme != "https" { + return fmt.Errorf("%s scheme must be http or https", ConfigOtelExporterOtlpEndpoint) + } + + if _, ok := validOTLPProtocols[c.Exporter.Protocol]; !ok { + return fmt.Errorf("%s=%q is invalid: supported values are %v", ConfigOtelExporterOtlpProtocol, c.Exporter.Protocol, sortedKeys(validOTLPProtocols)) + } + + if _, ok := validSamplers[c.Traces.Sampler]; !ok { + return fmt.Errorf("%s=%q is invalid: supported values are %v", ConfigOtelTracesSampler, c.Traces.Sampler, sortedKeys(validSamplers)) + } + + if c.Traces.Sampler == SamplerParentBasedTraceRatio || c.Traces.Sampler == "parentbased_traceidratio" || c.Traces.Sampler == "trace_id_ratio" || c.Traces.Sampler == "traceidratio" { + if c.Traces.SamplerArg < 0 || c.Traces.SamplerArg > 1 { + return fmt.Errorf("%s must be between 0 and 1 for sampler %q", ConfigOtelTracesSamplerArg, c.Traces.Sampler) + } + } + + if strings.TrimSpace(c.Resource.ServiceName) == "" { + return fmt.Errorf("resource attribute %q must not be empty", ResourceAttributeServiceName) + } + if strings.TrimSpace(c.Resource.ServiceVersion) == "" { + return fmt.Errorf("resource attribute %q must not be empty", ResourceAttributeServiceVersion) + } + if strings.TrimSpace(c.Resource.ServiceInstance) == "" { + return fmt.Errorf("resource attribute %q must not be empty", ResourceAttributeServiceInstance) + } + + return nil +} + +func sortedKeys[T any](m map[string]T) []string { + keys := make([]string, 0, len(m)) + for key := range m { + keys = append(keys, key) + } + sort.Strings(keys) + return keys +} diff --git a/internal/common/observability/config_test.go b/internal/common/observability/config_test.go new file mode 100644 index 00000000000..ce23203f3df --- /dev/null +++ b/internal/common/observability/config_test.go @@ -0,0 +1,89 @@ +package observability + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestObservabilityConfig(t *testing.T) { + defaults := ResourceAttributes{ + ServiceName: "armada-server", + ServiceVersion: "1.2.3", + ServiceInstance: "instance-1", + } + + t.Run("uses defaults when config is unset", func(t *testing.T) { + cfg, err := ReadObservabilityConfig(defaults) + require.NoError(t, err) + + assert.False(t, cfg.Enabled) + assert.Equal(t, DefaultOtlpHTTPEndpoint, cfg.Exporter.Endpoint) + assert.Equal(t, DefaultOtlpHTTPProtocol, cfg.Exporter.Protocol) + assert.Equal(t, SamplerParentBasedTraceRatio, cfg.Traces.Sampler) + assert.Equal(t, 1.0, cfg.Traces.SamplerArg) + assert.Equal(t, defaults, cfg.Resource) + }) + + t.Run("preserves config values", func(t *testing.T) { + cfg, err := (ObservabilityConfig{ + Enabled: true, + Exporter: OTLPExporterConfig{ + Endpoint: "http://otel-collector:4318", + Protocol: "HTTP/PROTOBUF", + }, + Traces: TracesConfig{ + Sampler: "parent_based_trace_id_ratio", + SamplerArg: 0.25, + }, + Resource: ResourceAttributes{ + ServiceName: "armada-executor", + ServiceVersion: "v2.0.0", + ServiceInstance: "pod-xyz", + }, + }).WithDefaults(defaults) + require.NoError(t, err) + + assert.True(t, cfg.Enabled) + assert.Equal(t, "http://otel-collector:4318", cfg.Exporter.Endpoint) + assert.Equal(t, "http/protobuf", cfg.Exporter.Protocol) + assert.Equal(t, SamplerParentBasedTraceRatio, cfg.Traces.Sampler) + assert.Equal(t, 0.25, cfg.Traces.SamplerArg) + assert.Equal(t, "armada-executor", cfg.Resource.ServiceName) + assert.Equal(t, "v2.0.0", cfg.Resource.ServiceVersion) + assert.Equal(t, "pod-xyz", cfg.Resource.ServiceInstance) + }) +} + +func TestObservabilityConfigRejectsInvalidSampler(t *testing.T) { + defaults := ResourceAttributes{ + ServiceName: "armada-server", + ServiceVersion: "1.2.3", + ServiceInstance: "instance-1", + } + + _, err := (ObservabilityConfig{ + Traces: TracesConfig{Sampler: "invalid_sampler_name"}, + }).WithDefaults(defaults) + require.Error(t, err) + assert.ErrorContains(t, err, ConfigOtelTracesSampler) + assert.ErrorContains(t, err, "invalid_sampler_name") +} + +func TestObservabilityConfigRejectsUnsupportedEndpointScheme(t *testing.T) { + defaults := ResourceAttributes{ + ServiceName: "armada-server", + ServiceVersion: "1.2.3", + ServiceInstance: "instance-1", + } + + _, err := (ObservabilityConfig{ + Exporter: OTLPExporterConfig{ + Endpoint: "grpc://otel-collector:4317", + }, + }).WithDefaults(defaults) + require.Error(t, err) + assert.ErrorContains(t, err, ConfigOtelExporterOtlpEndpoint) + assert.ErrorContains(t, err, "http or https") +} diff --git a/internal/common/observability/lifecycle.go b/internal/common/observability/lifecycle.go new file mode 100644 index 00000000000..7ab23d5ba0f --- /dev/null +++ b/internal/common/observability/lifecycle.go @@ -0,0 +1,195 @@ +package observability + +import ( + "context" + "fmt" + "net/url" + "sync" + "time" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" + metricnoop "go.opentelemetry.io/otel/metric/noop" + "go.opentelemetry.io/otel/propagation" + "go.opentelemetry.io/otel/sdk/resource" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + tracenoop "go.opentelemetry.io/otel/trace/noop" + + "github.com/armadaproject/armada/internal/common/logging" +) + +const ( + exportTimeout = 10 * time.Second + batchTimeout = 5 * time.Second + maxExportBatch = 512 + maxBatchQueue = 2048 + shutdownTimeout = 5 * time.Second +) + +var ( + globalTracerProvider *sdktrace.TracerProvider + globalTracerProviderMu sync.RWMutex +) + +func setNoopOTelLocked() { + otel.SetTracerProvider(tracenoop.NewTracerProvider()) + otel.SetMeterProvider(metricnoop.NewMeterProvider()) + otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator()) + globalTracerProvider = nil +} + +func setNoopOTel() { + globalTracerProviderMu.Lock() + defer globalTracerProviderMu.Unlock() + setNoopOTelLocked() +} + +func init() { + setNoopOTel() +} + +// InitOTel initializes the global OpenTelemetry tracer provider with the given configuration. +// This function is fail-open: if the OTLP collector is unreachable, it logs the error but +// returns success to allow the service to start. The tracer provider is set globally via +// otel.SetTracerProvider() and W3C propagators are registered via otel.SetTextMapPropagator(). +// +// Returns an error only if configuration is invalid or critical setup fails (not collector reachability). +func InitOTel(cfg ObservabilityConfig) error { + if !cfg.Enabled { + setNoopOTel() + logging.Info("OpenTelemetry disabled by config") + return nil + } + + // Create resource with required semantic attributes + res, err := resource.New( + context.Background(), + resource.WithAttributes( + attribute.String(ResourceAttributeServiceName, cfg.Resource.ServiceName), + attribute.String(ResourceAttributeServiceVersion, cfg.Resource.ServiceVersion), + attribute.String(ResourceAttributeServiceInstance, cfg.Resource.ServiceInstance), + ), + ) + if err != nil { + return fmt.Errorf("failed to create OTel resource: %w", err) + } + + // Create OTLP exporter with bounded timeout + ctx, cancel := context.WithTimeout(context.Background(), exportTimeout) + defer cancel() + + exporter, err := newTraceExporter(ctx, cfg) + if err != nil { + // Fail-open: log error but continue with noop provider + logging.WithError(err).Warnf( + "Failed to create OTLP trace exporter (endpoint=%s, protocol=%s). Service will start without tracing.", + cfg.Exporter.Endpoint, + cfg.Exporter.Protocol, + ) + setNoopOTel() + return nil + } + + // Create sampler based on config + var sampler sdktrace.Sampler + switch cfg.Traces.Sampler { + case SamplerParentBasedTraceRatio: + sampler = sdktrace.ParentBased(sdktrace.TraceIDRatioBased(cfg.Traces.SamplerArg)) + case "trace_id_ratio": + sampler = sdktrace.TraceIDRatioBased(cfg.Traces.SamplerArg) + case "always_on": + sampler = sdktrace.AlwaysSample() + case "always_off": + sampler = sdktrace.NeverSample() + default: + return fmt.Errorf("unsupported sampler: %s", cfg.Traces.Sampler) + } + + // Create tracer provider with bounded batch processing guardrails: + // - max export batch size: 512 spans + // - max queue size: 2048 spans (drop-on-backpressure above this bound) + // - batch timeout: 5s + // - export timeout: 10s + tp := sdktrace.NewTracerProvider( + sdktrace.WithSpanProcessor(NewSpanAttributePolicyProcessor(NewDefaultSpanAttributePolicy())), + sdktrace.WithBatcher( + exporter, + sdktrace.WithMaxExportBatchSize(maxExportBatch), + sdktrace.WithMaxQueueSize(maxBatchQueue), + sdktrace.WithExportTimeout(exportTimeout), + sdktrace.WithBatchTimeout(batchTimeout), + ), + sdktrace.WithResource(res), + sdktrace.WithSampler(sampler), + ) + + // Set global tracer provider and propagators. + globalTracerProviderMu.Lock() + otel.SetTracerProvider(tp) + otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator( + propagation.TraceContext{}, + propagation.Baggage{}, + )) + globalTracerProvider = tp + globalTracerProviderMu.Unlock() + + logging.Infof( + "OpenTelemetry initialized: endpoint=%s, sampler=%s, ratio=%.2f, service=%s", + cfg.Exporter.Endpoint, + cfg.Traces.Sampler, + cfg.Traces.SamplerArg, + cfg.Resource.ServiceName, + ) + + return nil +} + +// ShutdownOTel gracefully shuts down the global tracer provider, flushing any +// pending spans to the collector. +// or the shutdown timeout (5s) is reached. Returns an error if shutdown fails or times out. +func ShutdownOTel(ctx context.Context) error { + globalTracerProviderMu.Lock() + tp := globalTracerProvider + setNoopOTelLocked() + globalTracerProviderMu.Unlock() + + if tp == nil { + return nil + } + + if err := tp.Shutdown(ctx); err != nil { + return fmt.Errorf("failed to shutdown OTel tracer provider: %w", err) + } + + logging.Info("OpenTelemetry tracer provider shut down successfully") + return nil +} + +func newTraceExporter(ctx context.Context, cfg ObservabilityConfig) (sdktrace.SpanExporter, error) { + parsedURL, err := url.Parse(cfg.Exporter.Endpoint) + if err != nil { + return nil, fmt.Errorf("failed to parse exporter endpoint: %w", err) + } + if parsedURL.Scheme != "http" && parsedURL.Scheme != "https" { + return nil, fmt.Errorf("unsupported endpoint scheme %q: must be http or https", parsedURL.Scheme) + } + + switch cfg.Exporter.Protocol { + case "http/protobuf": + exporterOpts := []otlptracehttp.Option{ + otlptracehttp.WithEndpointURL(cfg.Exporter.Endpoint), + otlptracehttp.WithTimeout(exportTimeout), + } + return otlptracehttp.New(ctx, exporterOpts...) + case "grpc": + exporterOpts := []otlptracegrpc.Option{ + otlptracegrpc.WithEndpointURL(cfg.Exporter.Endpoint), + otlptracegrpc.WithTimeout(exportTimeout), + } + return otlptracegrpc.New(ctx, exporterOpts...) + default: + return nil, fmt.Errorf("unsupported OTLP protocol: %s", cfg.Exporter.Protocol) + } +} diff --git a/internal/common/observability/lifecycle_bootstrap_test.go b/internal/common/observability/lifecycle_bootstrap_test.go new file mode 100644 index 00000000000..633190a975d --- /dev/null +++ b/internal/common/observability/lifecycle_bootstrap_test.go @@ -0,0 +1,147 @@ +package observability + +import ( + "context" + "testing" + + "github.com/google/uuid" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel" + + "github.com/armadaproject/armada/internal/lookout/version" +) + +func TestServiceBootstrapPatternServerConfig(t *testing.T) { + cfg, err := ReadObservabilityConfig(ResourceAttributes{ + ServiceName: "server", + ServiceVersion: version.Version, + ServiceInstance: uuid.New().String(), + }) + require.NoError(t, err) + require.Equal(t, "server", cfg.Resource.ServiceName) + require.NotEmpty(t, cfg.Resource.ServiceInstance) +} + +func TestServiceBootstrapPatternExecutorConfig(t *testing.T) { + cfg, err := ReadObservabilityConfig(ResourceAttributes{ + ServiceName: "executor", + ServiceVersion: version.Version, + ServiceInstance: uuid.New().String(), + }) + require.NoError(t, err) + require.Equal(t, "executor", cfg.Resource.ServiceName) +} + +func TestServiceBootstrapPatternSchedulerConfig(t *testing.T) { + cfg, err := ReadObservabilityConfig(ResourceAttributes{ + ServiceName: "scheduler", + ServiceVersion: version.Version, + ServiceInstance: uuid.New().String(), + }) + require.NoError(t, err) + require.Equal(t, "scheduler", cfg.Resource.ServiceName) +} + +func TestBootstrapWithDisabledOTel(t *testing.T) { + cfg, err := ReadObservabilityConfig(ResourceAttributes{ + ServiceName: "test-service", + ServiceVersion: "test", + ServiceInstance: uuid.New().String(), + }) + require.NoError(t, err) + require.False(t, cfg.Enabled) + + err = InitOTel(cfg) + require.NoError(t, err) + + tp := otel.GetTracerProvider() + tracer := tp.Tracer("test") + _, span := tracer.Start(context.Background(), "test-span") + defer span.End() + + spanCtx := span.SpanContext() + require.False(t, spanCtx.IsValid(), "Span should not be valid when OTel is disabled") + + err = ShutdownOTel(context.Background()) + require.NoError(t, err) +} + +func TestBootstrapWithEnabledOTelAndInvalidCollector(t *testing.T) { + defaults := ResourceAttributes{ + ServiceName: "test-service", + ServiceVersion: "test", + ServiceInstance: uuid.New().String(), + } + cfg, err := (ObservabilityConfig{ + Enabled: true, + Exporter: OTLPExporterConfig{ + Endpoint: "http://localhost:19999", + }, + }).WithDefaults(defaults) + require.NoError(t, err) + require.True(t, cfg.Enabled) + + err = InitOTel(cfg) + require.NoError(t, err) + + tp := otel.GetTracerProvider() + require.NotNil(t, tp) + + tracer := tp.Tracer("test-bootstrap") + _, span := tracer.Start(context.Background(), "bootstrap-test-span") + + spanCtx := span.SpanContext() + require.True(t, spanCtx.IsValid(), "Span should be valid even with unreachable collector (fail-open)") + require.NotEmpty(t, spanCtx.TraceID().String()) + require.NotEmpty(t, spanCtx.SpanID().String()) + + span.End() + + err = ShutdownOTel(context.Background()) + require.NoError(t, err) +} + +func TestBootstrapResourceAttributesFromConfig(t *testing.T) { + cfg, err := (ObservabilityConfig{ + Resource: ResourceAttributes{ + ServiceName: "configured-service", + ServiceVersion: "configured-v1", + }, + }).WithDefaults(ResourceAttributes{ + ServiceName: "default-service", + ServiceVersion: "default-version", + ServiceInstance: uuid.New().String(), + }) + require.NoError(t, err) + require.Equal(t, "configured-service", cfg.Resource.ServiceName) + require.Equal(t, "configured-v1", cfg.Resource.ServiceVersion) +} + +func TestBootstrapInitShutdownMultipleTimes(t *testing.T) { + for i := 0; i < 3; i++ { + defaults := ResourceAttributes{ + ServiceName: "test-service", + ServiceVersion: "test", + ServiceInstance: uuid.New().String(), + } + cfg, err := (ObservabilityConfig{ + Enabled: true, + Exporter: OTLPExporterConfig{ + Endpoint: "http://localhost:19999", + }, + }).WithDefaults(defaults) + require.NoError(t, err) + + err = InitOTel(cfg) + require.NoError(t, err) + + tp := otel.GetTracerProvider() + tracer := tp.Tracer("test") + _, span := tracer.Start(context.Background(), "test-span") + require.True(t, span.SpanContext().IsValid()) + span.End() + + err = ShutdownOTel(context.Background()) + require.NoError(t, err) + } +} diff --git a/internal/common/observability/lifecycle_test.go b/internal/common/observability/lifecycle_test.go new file mode 100644 index 00000000000..3a1f1c72c00 --- /dev/null +++ b/internal/common/observability/lifecycle_test.go @@ -0,0 +1,600 @@ +package observability + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel" + metricnoop "go.opentelemetry.io/otel/metric/noop" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/sdk/trace/tracetest" + tracenoop "go.opentelemetry.io/otel/trace/noop" +) + +func TestOtelLifecycleCollectorReachable(t *testing.T) { + spanRecorder := tracetest.NewSpanRecorder() + exportedSpans := make(chan []sdktrace.ReadOnlySpan, 1) + + mockCollector := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + spans := spanRecorder.Ended() + if len(spans) > 0 { + exportedSpans <- spans + } + w.WriteHeader(http.StatusOK) + })) + defer mockCollector.Close() + + cfg := ObservabilityConfig{ + Enabled: true, + Exporter: OTLPExporterConfig{ + Endpoint: mockCollector.URL, + Protocol: DefaultOtlpHTTPProtocol, + }, + Traces: TracesConfig{ + Sampler: SamplerParentBasedTraceRatio, + SamplerArg: 1.0, + }, + Resource: ResourceAttributes{ + ServiceName: "test-service", + ServiceVersion: "1.0.0", + ServiceInstance: "test-instance-1", + }, + } + + err := InitOTel(cfg) + require.NoError(t, err, "InitOTel should succeed with reachable collector") + + tp := otel.GetTracerProvider() + assert.NotNil(t, tp, "Global tracer provider should be set") + + tracer := tp.Tracer("test-tracer") + assert.NotNil(t, tracer, "Tracer should not be nil") + + ctx := context.Background() + _, span := tracer.Start(ctx, "test-span") + span.End() + + err = ShutdownOTel(context.Background()) + assert.NoError(t, err, "ShutdownOTel should succeed") +} + +func TestOtelAPIsAreSafeBeforeInit(t *testing.T) { + setNoopOTel() + + tracer := otel.Tracer("pre-init-tracer") + meter := otel.Meter("pre-init-meter") + require.NotNil(t, tracer) + require.NotNil(t, meter) + + _, span := tracer.Start(context.Background(), "pre-init-span") + require.NotPanics(t, func() { span.End() }) + + _, err := meter.Int64Counter("pre_init_counter") + require.NoError(t, err) +} + +func TestOtelLifecycleFailOpenWhenCollectorDown(t *testing.T) { + cfg := ObservabilityConfig{ + Enabled: true, + Exporter: OTLPExporterConfig{ + Endpoint: "http://localhost:19999", + Protocol: DefaultOtlpHTTPProtocol, + }, + Traces: TracesConfig{ + Sampler: SamplerParentBasedTraceRatio, + SamplerArg: 1.0, + }, + Resource: ResourceAttributes{ + ServiceName: "test-service", + ServiceVersion: "1.0.0", + ServiceInstance: "test-instance-2", + }, + } + + err := InitOTel(cfg) + require.NoError(t, err, "InitOTel should succeed even when collector is unreachable (fail-open)") + + tp := otel.GetTracerProvider() + assert.NotNil(t, tp, "Global tracer provider should be set even with unreachable collector") + + tracer := tp.Tracer("test-tracer") + assert.NotNil(t, tracer, "Tracer should not be nil") + + ctx := context.Background() + _, span := tracer.Start(ctx, "test-span") + span.End() + + err = ShutdownOTel(context.Background()) + assert.NoError(t, err, "ShutdownOTel should succeed") +} + +func TestOtelShutdownFlushes(t *testing.T) { + mockCollector := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer mockCollector.Close() + + cfg := ObservabilityConfig{ + Enabled: true, + Exporter: OTLPExporterConfig{ + Endpoint: mockCollector.URL, + Protocol: DefaultOtlpHTTPProtocol, + }, + Traces: TracesConfig{ + Sampler: "always_on", + SamplerArg: 1.0, + }, + Resource: ResourceAttributes{ + ServiceName: "test-service", + ServiceVersion: "1.0.0", + ServiceInstance: "test-instance-3", + }, + } + + err := InitOTel(cfg) + require.NoError(t, err) + + tracer := otel.GetTracerProvider().Tracer("test-tracer") + ctx := context.Background() + _, span := tracer.Start(ctx, "test-span") + span.End() + + start := time.Now() + err = ShutdownOTel(context.Background()) + elapsed := time.Since(start) + + assert.NoError(t, err, "ShutdownOTel should succeed") + assert.LessOrEqual(t, elapsed, shutdownTimeout+2*time.Second, "Shutdown should complete within timeout bounds") +} + +func TestOtelDisabledWhenConfigDisabled(t *testing.T) { + cfg := ObservabilityConfig{ + Enabled: false, + Exporter: OTLPExporterConfig{ + Endpoint: "http://localhost:4318", + Protocol: DefaultOtlpHTTPProtocol, + }, + Traces: TracesConfig{ + Sampler: SamplerParentBasedTraceRatio, + SamplerArg: 1.0, + }, + Resource: ResourceAttributes{ + ServiceName: "test-service", + ServiceVersion: "1.0.0", + ServiceInstance: "test-instance-4", + }, + } + + err := InitOTel(cfg) + require.NoError(t, err, "InitOTel should succeed when disabled") + assert.IsType(t, tracenoop.TracerProvider{}, otel.GetTracerProvider()) + assert.IsType(t, metricnoop.MeterProvider{}, otel.GetMeterProvider()) + + err = ShutdownOTel(context.Background()) + assert.NoError(t, err, "ShutdownOTel should succeed when OTel was disabled") +} + +func TestOtelShutdownResetsToNoopProviders(t *testing.T) { + mockCollector := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer mockCollector.Close() + + cfg := ObservabilityConfig{ + Enabled: true, + Exporter: OTLPExporterConfig{ + Endpoint: mockCollector.URL, + Protocol: DefaultOtlpHTTPProtocol, + }, + Traces: TracesConfig{ + Sampler: SamplerParentBasedTraceRatio, + SamplerArg: 1.0, + }, + Resource: ResourceAttributes{ + ServiceName: "test-service", + ServiceVersion: "1.0.0", + ServiceInstance: "test-instance-noop-reset", + }, + } + + require.NoError(t, InitOTel(cfg)) + require.NoError(t, ShutdownOTel(context.Background())) + assert.IsType(t, tracenoop.TracerProvider{}, otel.GetTracerProvider()) + assert.IsType(t, metricnoop.MeterProvider{}, otel.GetMeterProvider()) +} + +func TestOtelPropagatorSetup(t *testing.T) { + mockCollector := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer mockCollector.Close() + + cfg := ObservabilityConfig{ + Enabled: true, + Exporter: OTLPExporterConfig{ + Endpoint: mockCollector.URL, + Protocol: DefaultOtlpHTTPProtocol, + }, + Traces: TracesConfig{ + Sampler: SamplerParentBasedTraceRatio, + SamplerArg: 1.0, + }, + Resource: ResourceAttributes{ + ServiceName: "test-service", + ServiceVersion: "1.0.0", + ServiceInstance: "test-instance-5", + }, + } + + err := InitOTel(cfg) + require.NoError(t, err) + + propagator := otel.GetTextMapPropagator() + assert.NotNil(t, propagator, "Global propagator should be set") + + carrier := make(map[string]string) + ctx := context.Background() + + tracer := otel.GetTracerProvider().Tracer("test-tracer") + ctx, span := tracer.Start(ctx, "test-span") + defer span.End() + + propagator.Inject(ctx, &testCarrier{data: carrier}) + + assert.Contains(t, carrier, "traceparent", "Propagator should inject W3C traceparent header") + + err = ShutdownOTel(context.Background()) + assert.NoError(t, err) +} + +func TestOtelSamplerConfiguration(t *testing.T) { + tests := []struct { + name string + sampler string + samplerArg float64 + shouldFail bool + }{ + { + name: "parent_based_trace_id_ratio", + sampler: "parent_based_trace_id_ratio", + samplerArg: 0.5, + shouldFail: false, + }, + { + name: "always_on", + sampler: "always_on", + samplerArg: 1.0, + shouldFail: false, + }, + { + name: "always_off", + sampler: "always_off", + samplerArg: 0.0, + shouldFail: false, + }, + { + name: "trace_id_ratio", + sampler: "trace_id_ratio", + samplerArg: 0.1, + shouldFail: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mockCollector := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer mockCollector.Close() + + cfg := ObservabilityConfig{ + Enabled: true, + Exporter: OTLPExporterConfig{ + Endpoint: mockCollector.URL, + Protocol: DefaultOtlpHTTPProtocol, + }, + Traces: TracesConfig{ + Sampler: tt.sampler, + SamplerArg: tt.samplerArg, + }, + Resource: ResourceAttributes{ + ServiceName: "test-service", + ServiceVersion: "1.0.0", + ServiceInstance: "test-instance-sampler", + }, + } + + err := InitOTel(cfg) + if tt.shouldFail { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + + if err == nil { + tp := otel.GetTracerProvider() + assert.NotNil(t, tp) + _ = ShutdownOTel(context.Background()) + } + }) + } +} + +func TestSamplerDefaultsByConfig(t *testing.T) { + defaults := ResourceAttributes{ + ServiceName: "server", + ServiceVersion: "1.0.0", + ServiceInstance: "test-defaults", + } + + cfg, err := ReadObservabilityConfig(defaults) + require.NoError(t, err) + + assert.Equal(t, SamplerParentBasedTraceRatio, cfg.Traces.Sampler) + assert.Equal(t, 1.0, cfg.Traces.SamplerArg) +} + +func TestSamplerOverrideByConfig(t *testing.T) { + tests := []struct { + name string + sampler string + samplerArg float64 + wantArg float64 + }{ + {name: "always_on", sampler: "always_on", samplerArg: 0.5, wantArg: 0.5}, + {name: "always_off", sampler: "always_off", samplerArg: 0.5, wantArg: 0.5}, + {name: "trace_id_ratio", sampler: "trace_id_ratio", samplerArg: 0.10, wantArg: 0.10}, + {name: "parent_based_trace_id_ratio", sampler: SamplerParentBasedTraceRatio, samplerArg: 0.25, wantArg: 0.25}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + defaults := ResourceAttributes{ + ServiceName: "server", + ServiceVersion: "1.0.0", + ServiceInstance: "test-overrides", + } + + cfg, err := (ObservabilityConfig{ + Traces: TracesConfig{ + Sampler: tt.sampler, + SamplerArg: tt.samplerArg, + }, + }).WithDefaults(defaults) + require.NoError(t, err) + + assert.Equal(t, tt.sampler, cfg.Traces.Sampler) + assert.Equal(t, tt.wantArg, cfg.Traces.SamplerArg) + }) + } +} + +func TestSamplingRatioEnforcesValidRange(t *testing.T) { + tests := []struct { + name string + sampler string + samplerArg float64 + shouldFail bool + }{ + {name: "trace_id_ratio below 0", sampler: "trace_id_ratio", samplerArg: -0.01, shouldFail: true}, + {name: "trace_id_ratio above 1", sampler: "trace_id_ratio", samplerArg: 1.01, shouldFail: true}, + {name: "parent_based_trace_id_ratio below 0", sampler: SamplerParentBasedTraceRatio, samplerArg: -0.5, shouldFail: true}, + {name: "parent_based_trace_id_ratio above 1", sampler: SamplerParentBasedTraceRatio, samplerArg: 2.0, shouldFail: true}, + {name: "trace_id_ratio at lower bound", sampler: "trace_id_ratio", samplerArg: 0.0, shouldFail: false}, + {name: "trace_id_ratio at upper bound", sampler: "trace_id_ratio", samplerArg: 1.0, shouldFail: false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + defaults := ResourceAttributes{ + ServiceName: "server", + ServiceVersion: "1.0.0", + ServiceInstance: "test-ratio-range", + } + + cfg, err := (ObservabilityConfig{ + Traces: TracesConfig{ + Sampler: tt.sampler, + SamplerArg: tt.samplerArg, + }, + }).WithDefaults(defaults) + if tt.shouldFail { + require.Error(t, err) + assert.ErrorContains(t, err, ConfigOtelTracesSamplerArg) + return + } + + require.NoError(t, err) + assert.Equal(t, tt.sampler, cfg.Traces.Sampler) + assert.Equal(t, tt.samplerArg, cfg.Traces.SamplerArg) + }) + } +} + +func TestExporterBackpressureSafetyBounds(t *testing.T) { + assert.Equal(t, 512, maxExportBatch) + assert.Equal(t, 2048, maxBatchQueue) + assert.Equal(t, 10*time.Second, exportTimeout) + assert.Equal(t, 5*time.Second, batchTimeout) + + // Slow collector to induce exporter pressure without failing InitOTel. + mockCollector := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(200 * time.Millisecond) + w.WriteHeader(http.StatusOK) + })) + defer mockCollector.Close() + + cfg := ObservabilityConfig{ + Enabled: true, + Exporter: OTLPExporterConfig{ + Endpoint: mockCollector.URL, + Protocol: DefaultOtlpHTTPProtocol, + }, + Traces: TracesConfig{ + Sampler: "always_on", + SamplerArg: 1.0, + }, + Resource: ResourceAttributes{ + ServiceName: "test-service", + ServiceVersion: "1.0.0", + ServiceInstance: "test-backpressure", + }, + } + + err := InitOTel(cfg) + require.NoError(t, err) + t.Cleanup(func() { + _ = ShutdownOTel(context.Background()) + }) + + tracer := otel.GetTracerProvider().Tracer("backpressure-test") + start := time.Now() + for range maxBatchQueue * 2 { + _, span := tracer.Start(context.Background(), "backpressure-span") + span.End() + } + elapsed := time.Since(start) + + // If queue is bounded and drop-on-backpressure is active, span creation/end should remain fast. + assert.Less(t, elapsed, 3*time.Second) +} + +type testCarrier struct { + data map[string]string +} + +func (c *testCarrier) Get(key string) string { + return c.data[key] +} + +func (c *testCarrier) Set(key, value string) { + c.data[key] = value +} + +func (c *testCarrier) Keys() []string { + keys := make([]string, 0, len(c.data)) + for k := range c.data { + keys = append(keys, k) + } + return keys +} + +func TestOtelMultipleInitShutdownCycles(t *testing.T) { + mockCollector := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer mockCollector.Close() + + cfg := ObservabilityConfig{ + Enabled: true, + Exporter: OTLPExporterConfig{ + Endpoint: mockCollector.URL, + Protocol: DefaultOtlpHTTPProtocol, + }, + Traces: TracesConfig{ + Sampler: SamplerParentBasedTraceRatio, + SamplerArg: 1.0, + }, + Resource: ResourceAttributes{ + ServiceName: "test-service", + ServiceVersion: "1.0.0", + ServiceInstance: "test-instance-multi", + }, + } + + for i := range 3 { + err := InitOTel(cfg) + require.NoError(t, err, "InitOTel cycle %d should succeed", i) + + tracer := otel.GetTracerProvider().Tracer("test-tracer") + ctx := context.Background() + _, span := tracer.Start(ctx, "test-span") + span.End() + + err = ShutdownOTel(context.Background()) + assert.NoError(t, err, "ShutdownOTel cycle %d should succeed", i) + } +} + +func TestOtelInitWithInvalidResourceAttributes(t *testing.T) { + mockCollector := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer mockCollector.Close() + + cfg := ObservabilityConfig{ + Enabled: true, + Exporter: OTLPExporterConfig{ + Endpoint: mockCollector.URL, + Protocol: DefaultOtlpHTTPProtocol, + }, + Traces: TracesConfig{ + Sampler: SamplerParentBasedTraceRatio, + SamplerArg: 1.0, + }, + Resource: ResourceAttributes{ + ServiceName: "", + ServiceVersion: "1.0.0", + ServiceInstance: "test-instance-invalid", + }, + } + + err := cfg.Validate() + assert.Error(t, err, "Config validation should fail with empty service name") +} + +func TestOtelShutdownWithoutInit(t *testing.T) { + setNoopOTel() + err := ShutdownOTel(context.Background()) + assert.NoError(t, err, "ShutdownOTel should handle nil provider gracefully") +} + +func TestOtelTracerProviderIsGloballySet(t *testing.T) { + mockCollector := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer mockCollector.Close() + + cfg := ObservabilityConfig{ + Enabled: true, + Exporter: OTLPExporterConfig{ + Endpoint: mockCollector.URL, + Protocol: DefaultOtlpHTTPProtocol, + }, + Traces: TracesConfig{ + Sampler: SamplerParentBasedTraceRatio, + SamplerArg: 1.0, + }, + Resource: ResourceAttributes{ + ServiceName: "test-service", + ServiceVersion: "1.0.0", + ServiceInstance: "test-instance-global", + }, + } + + err := InitOTel(cfg) + require.NoError(t, err) + + tp := otel.GetTracerProvider() + _, ok := tp.(*sdktrace.TracerProvider) + assert.True(t, ok, "Global tracer provider should be of type *sdktrace.TracerProvider") + + tracer := tp.Tracer("integration-test") + ctx := context.Background() + _, span := tracer.Start(ctx, "global-span-test") + + spanContext := span.SpanContext() + assert.True(t, spanContext.IsValid(), "Span context should be valid") + assert.True(t, spanContext.TraceID().IsValid(), "Trace ID should be valid") + assert.True(t, spanContext.SpanID().IsValid(), "Span ID should be valid") + + span.End() + + err = ShutdownOTel(context.Background()) + assert.NoError(t, err) +} diff --git a/internal/eventingester/configuration/types.go b/internal/eventingester/configuration/types.go index c7fa60c77dd..d0100fbb8c8 100644 --- a/internal/eventingester/configuration/types.go +++ b/internal/eventingester/configuration/types.go @@ -6,6 +6,7 @@ import ( "github.com/redis/go-redis/v9" commonconfig "github.com/armadaproject/armada/internal/common/config" + "github.com/armadaproject/armada/internal/common/observability" profilingconfig "github.com/armadaproject/armada/internal/common/profiling/configuration" "github.com/armadaproject/armada/internal/leaderelection" ) @@ -20,6 +21,8 @@ type EventIngesterConfiguration struct { MetricsPort uint16 // Metrics configuration for Redis memory metrics collection Metrics MetricsConfig + // Configuration controlling OpenTelemetry observability + Observability observability.ObservabilityConfig // General Pulsar configuration Pulsar commonconfig.PulsarConfig // Pulsar subscription name diff --git a/internal/eventingester/configuration/validation.go b/internal/eventingester/configuration/validation.go index cc347cb1f9a..e05f07f74b5 100644 --- a/internal/eventingester/configuration/validation.go +++ b/internal/eventingester/configuration/validation.go @@ -1,9 +1,14 @@ package configuration import ( + "os" + "github.com/go-playground/validator/v10" + "github.com/google/uuid" commonconfig "github.com/armadaproject/armada/internal/common/config" + "github.com/armadaproject/armada/internal/common/observability" + "github.com/armadaproject/armada/internal/lookout/version" ) func (c EventIngesterConfiguration) Validate() error { @@ -11,6 +16,19 @@ func (c EventIngesterConfiguration) Validate() error { return validate.Struct(c) } -func (c EventIngesterConfiguration) Mutate() (commonconfig.Config, error) { +func (c *EventIngesterConfiguration) Mutate() (commonconfig.Config, error) { + serviceInstance, err := os.Hostname() + if err != nil { + serviceInstance = uuid.New().String() + } + observabilityConfig, err := c.Observability.WithDefaults(observability.ResourceAttributes{ + ServiceName: "eventingester", + ServiceVersion: version.Version, + ServiceInstance: serviceInstance, + }) + if err != nil { + return nil, err + } + c.Observability = observabilityConfig return c, nil } diff --git a/internal/executor/configuration/types.go b/internal/executor/configuration/types.go index c8665aae75d..be0bf128e9d 100644 --- a/internal/executor/configuration/types.go +++ b/internal/executor/configuration/types.go @@ -5,6 +5,7 @@ import ( "google.golang.org/grpc/keepalive" + "github.com/armadaproject/armada/internal/common/observability" profilingconfig "github.com/armadaproject/armada/internal/common/profiling/configuration" armadaresource "github.com/armadaproject/armada/internal/common/resource" "github.com/armadaproject/armada/internal/executor/categorizer" @@ -178,6 +179,7 @@ type ExecutorConfiguration struct { HttpPort uint16 // If non-nil, net/http/pprof endpoints are exposed on localhost on this port. Profiling *profilingconfig.ProfilingConfig + Observability observability.ObservabilityConfig Metric MetricConfiguration Application ApplicationConfiguration ExecutorApiConnection client.ApiConnectionDetails diff --git a/internal/executor/configuration/validation.go b/internal/executor/configuration/validation.go index 8106459223e..26bb477d80e 100644 --- a/internal/executor/configuration/validation.go +++ b/internal/executor/configuration/validation.go @@ -1,9 +1,14 @@ package configuration import ( + "os" + "github.com/go-playground/validator/v10" + "github.com/google/uuid" commonconfig "github.com/armadaproject/armada/internal/common/config" + "github.com/armadaproject/armada/internal/common/observability" + "github.com/armadaproject/armada/internal/lookout/version" ) func (c ExecutorConfiguration) Validate() error { @@ -11,6 +16,19 @@ func (c ExecutorConfiguration) Validate() error { return validate.Struct(c) } -func (c ExecutorConfiguration) Mutate() (commonconfig.Config, error) { +func (c *ExecutorConfiguration) Mutate() (commonconfig.Config, error) { + serviceInstance, err := os.Hostname() + if err != nil { + serviceInstance = uuid.New().String() + } + observabilityConfig, err := c.Observability.WithDefaults(observability.ResourceAttributes{ + ServiceName: "executor", + ServiceVersion: version.Version, + ServiceInstance: serviceInstance, + }) + if err != nil { + return nil, err + } + c.Observability = observabilityConfig return c, nil } diff --git a/internal/lookout/configuration/types.go b/internal/lookout/configuration/types.go index 4dff8a42fec..77ba9a13baf 100644 --- a/internal/lookout/configuration/types.go +++ b/internal/lookout/configuration/types.go @@ -5,6 +5,7 @@ import ( authconfig "github.com/armadaproject/armada/internal/common/auth/configuration" "github.com/armadaproject/armada/internal/common/database" + "github.com/armadaproject/armada/internal/common/observability" profilingconfig "github.com/armadaproject/armada/internal/common/profiling/configuration" "github.com/armadaproject/armada/internal/server/configuration" ) @@ -12,9 +13,10 @@ import ( type LookoutConfig struct { Auth authconfig.AuthConfig - ApiPort int - Profiling *profilingconfig.ProfilingConfig - MetricsPort int + ApiPort int + Profiling *profilingconfig.ProfilingConfig + MetricsPort int + Observability observability.ObservabilityConfig CorsAllowedOrigins []string Tls TlsConfig diff --git a/internal/lookout/configuration/validation.go b/internal/lookout/configuration/validation.go index a8060d5ed31..e8725d06703 100644 --- a/internal/lookout/configuration/validation.go +++ b/internal/lookout/configuration/validation.go @@ -1,9 +1,14 @@ package configuration import ( + "os" + "github.com/go-playground/validator/v10" + "github.com/google/uuid" commonconfig "github.com/armadaproject/armada/internal/common/config" + "github.com/armadaproject/armada/internal/common/observability" + "github.com/armadaproject/armada/internal/lookout/version" ) func (c LookoutConfig) Validate() error { @@ -11,6 +16,19 @@ func (c LookoutConfig) Validate() error { return validate.Struct(c) } -func (c LookoutConfig) Mutate() (commonconfig.Config, error) { +func (c *LookoutConfig) Mutate() (commonconfig.Config, error) { + serviceInstance, err := os.Hostname() + if err != nil { + serviceInstance = uuid.New().String() + } + observabilityConfig, err := c.Observability.WithDefaults(observability.ResourceAttributes{ + ServiceName: "lookout", + ServiceVersion: version.Version, + ServiceInstance: serviceInstance, + }) + if err != nil { + return nil, err + } + c.Observability = observabilityConfig return c, nil } diff --git a/internal/lookoutingester/configuration/types.go b/internal/lookoutingester/configuration/types.go index 1402454c513..7bebcadf7c6 100644 --- a/internal/lookoutingester/configuration/types.go +++ b/internal/lookoutingester/configuration/types.go @@ -6,6 +6,7 @@ import ( log "github.com/armadaproject/armada/internal/common/logging" commonconfig "github.com/armadaproject/armada/internal/common/config" + "github.com/armadaproject/armada/internal/common/observability" profilingconfig "github.com/armadaproject/armada/internal/common/profiling/configuration" "github.com/armadaproject/armada/internal/server/configuration" ) @@ -15,6 +16,8 @@ type LookoutIngesterConfiguration struct { Postgres configuration.PostgresConfig // Metrics configuration MetricsPort uint16 + // Configuration controlling OpenTelemetry observability + Observability observability.ObservabilityConfig // General Pulsar configuration Pulsar commonconfig.PulsarConfig // Pulsar subscription name diff --git a/internal/lookoutingester/configuration/validation.go b/internal/lookoutingester/configuration/validation.go index 5fe6ace8da3..215f6efcf98 100644 --- a/internal/lookoutingester/configuration/validation.go +++ b/internal/lookoutingester/configuration/validation.go @@ -1,9 +1,14 @@ package configuration import ( + "os" + "github.com/go-playground/validator/v10" + "github.com/google/uuid" commonconfig "github.com/armadaproject/armada/internal/common/config" + "github.com/armadaproject/armada/internal/common/observability" + "github.com/armadaproject/armada/internal/lookout/version" ) func (c LookoutIngesterConfiguration) Validate() error { @@ -11,6 +16,19 @@ func (c LookoutIngesterConfiguration) Validate() error { return validate.Struct(c) } -func (c LookoutIngesterConfiguration) Mutate() (commonconfig.Config, error) { +func (c *LookoutIngesterConfiguration) Mutate() (commonconfig.Config, error) { + serviceInstance, err := os.Hostname() + if err != nil { + serviceInstance = uuid.New().String() + } + observabilityConfig, err := c.Observability.WithDefaults(observability.ResourceAttributes{ + ServiceName: "lookoutingester", + ServiceVersion: version.Version, + ServiceInstance: serviceInstance, + }) + if err != nil { + return nil, err + } + c.Observability = observabilityConfig return c, nil } diff --git a/internal/scheduler/configuration/configuration.go b/internal/scheduler/configuration/configuration.go index 665f074aec1..9711375558b 100644 --- a/internal/scheduler/configuration/configuration.go +++ b/internal/scheduler/configuration/configuration.go @@ -10,6 +10,7 @@ import ( commonconfig "github.com/armadaproject/armada/internal/common/config" "github.com/armadaproject/armada/internal/common/database" grpcconfig "github.com/armadaproject/armada/internal/common/grpc/configuration" + "github.com/armadaproject/armada/internal/common/observability" profilingconfig "github.com/armadaproject/armada/internal/common/profiling/configuration" armadaresource "github.com/armadaproject/armada/internal/common/resource" "github.com/armadaproject/armada/internal/common/types" @@ -38,6 +39,8 @@ type Configuration struct { Leader leaderelection.Config // Configuration controlling metrics Metrics MetricsConfig + // Configuration controlling OpenTelemetry observability + Observability observability.ObservabilityConfig // Scheduler configuration (this is shared with the old scheduler) Scheduling SchedulingConfig Auth authconfig.AuthConfig diff --git a/internal/scheduler/configuration/validation.go b/internal/scheduler/configuration/validation.go index a6f8268cf6f..e3301e55134 100644 --- a/internal/scheduler/configuration/validation.go +++ b/internal/scheduler/configuration/validation.go @@ -2,11 +2,15 @@ package configuration import ( "fmt" + "os" "github.com/go-playground/validator/v10" + "github.com/google/uuid" "github.com/armadaproject/armada/internal/common/config" log "github.com/armadaproject/armada/internal/common/logging" + "github.com/armadaproject/armada/internal/common/observability" + "github.com/armadaproject/armada/internal/lookout/version" ) func (c *Configuration) Mutate() (config.Config, error) { @@ -20,6 +24,20 @@ func (c *Configuration) Mutate() (config.Config, error) { c.Scheduling.MaxNewJobSchedulingDuration = c.NewJobsSchedulingTimeout } + serviceInstance, err := os.Hostname() + if err != nil { + serviceInstance = uuid.New().String() + } + observabilityConfig, err := c.Observability.WithDefaults(observability.ResourceAttributes{ + ServiceName: "scheduler", + ServiceVersion: version.Version, + ServiceInstance: serviceInstance, + }) + if err != nil { + return nil, err + } + c.Observability = observabilityConfig + return c, nil } diff --git a/internal/scheduler/configuration/validation_test.go b/internal/scheduler/configuration/validation_test.go index c42e3026345..24d91906db7 100644 --- a/internal/scheduler/configuration/validation_test.go +++ b/internal/scheduler/configuration/validation_test.go @@ -5,9 +5,11 @@ import ( "time" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" v1 "k8s.io/api/core/v1" commonconfig "github.com/armadaproject/armada/internal/common/config" + "github.com/armadaproject/armada/internal/common/observability" "github.com/armadaproject/armada/internal/common/types" "github.com/armadaproject/armada/internal/leaderelection" schedulerdb "github.com/armadaproject/armada/internal/scheduler/database" @@ -74,11 +76,29 @@ func TestMutate(t *testing.T) { t.Run(name, func(t *testing.T) { result, err := tc.input.Mutate() assert.NoError(t, err) + + resultConfig, ok := result.(*Configuration) + require.True(t, ok) + assertSchedulerObservabilityConfig(t, resultConfig.Observability) + tc.expected.Observability = resultConfig.Observability + assert.Equal(t, tc.expected, result) }) } } +func assertSchedulerObservabilityConfig(t *testing.T, config observability.ObservabilityConfig) { + t.Helper() + assert.False(t, config.Enabled) + assert.Equal(t, observability.DefaultOtlpHTTPEndpoint, config.Exporter.Endpoint) + assert.Equal(t, observability.DefaultOtlpHTTPProtocol, config.Exporter.Protocol) + assert.Equal(t, observability.SamplerParentBasedTraceRatio, config.Traces.Sampler) + assert.Equal(t, 1.0, config.Traces.SamplerArg) + assert.Equal(t, "scheduler", config.Resource.ServiceName) + assert.Equal(t, "dev", config.Resource.ServiceVersion) + assert.NotEmpty(t, config.Resource.ServiceInstance) +} + func TestValidate_SchedulingTimeoutConfig(t *testing.T) { tests := map[string]struct { config func(c Configuration) Configuration diff --git a/internal/scheduler/schedulerapp.go b/internal/scheduler/schedulerapp.go index daaf793a0d8..56dc81a9b97 100644 --- a/internal/scheduler/schedulerapp.go +++ b/internal/scheduler/schedulerapp.go @@ -1,6 +1,7 @@ package scheduler import ( + "context" "fmt" "net" "net/http" @@ -25,6 +26,7 @@ import ( grpcCommon "github.com/armadaproject/armada/internal/common/grpc" "github.com/armadaproject/armada/internal/common/health" log "github.com/armadaproject/armada/internal/common/logging" + "github.com/armadaproject/armada/internal/common/observability" "github.com/armadaproject/armada/internal/common/profiling" "github.com/armadaproject/armada/internal/common/pulsarutils" "github.com/armadaproject/armada/internal/common/pulsarutils/jobsetevents" @@ -56,6 +58,20 @@ import ( func Run(config schedulerconfig.Configuration) error { g, ctx := armadacontext.ErrGroup(app.CreateContextWithShutdown()) + // //////////////////////////////////////////////////////////////////////// + // OpenTelemetry + // //////////////////////////////////////////////////////////////////////// + if err := observability.InitOTel(config.Observability); err != nil { + log.Warnf("Failed to initialize OTel: %v", err) + } + defer func() { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := observability.ShutdownOTel(ctx); err != nil { + log.Warnf("Failed to shutdown OTel: %v", err) + } + }() + // //////////////////////////////////////////////////////////////////////// // Expose profiling endpoints if enabled. // //////////////////////////////////////////////////////////////////////// diff --git a/internal/scheduleringester/config.go b/internal/scheduleringester/config.go index 7ae94a45c44..bd0bfc18d7d 100644 --- a/internal/scheduleringester/config.go +++ b/internal/scheduleringester/config.go @@ -1,12 +1,16 @@ package scheduleringester import ( + "os" "time" "github.com/go-playground/validator/v10" + "github.com/google/uuid" commonconfig "github.com/armadaproject/armada/internal/common/config" + "github.com/armadaproject/armada/internal/common/observability" profilingconfig "github.com/armadaproject/armada/internal/common/profiling/configuration" + "github.com/armadaproject/armada/internal/lookout/version" schedulerdb "github.com/armadaproject/armada/internal/scheduler/database" "github.com/armadaproject/armada/internal/server/configuration" ) @@ -16,6 +20,8 @@ type Configuration struct { Postgres configuration.PostgresConfig // Metrics Port MetricsPort uint16 + // Configuration controlling OpenTelemetry observability + Observability observability.ObservabilityConfig // General Pulsar configuration Pulsar commonconfig.PulsarConfig // Pulsar subscription name @@ -32,7 +38,20 @@ type Configuration struct { JobMetadataMigrationPhase schedulerdb.JobMetadataMigrationPhase `validate:"required,oneof=legacy dualWrite cutover"` } -func (c Configuration) Mutate() (commonconfig.Config, error) { +func (c *Configuration) Mutate() (commonconfig.Config, error) { + serviceInstance, err := os.Hostname() + if err != nil { + serviceInstance = uuid.New().String() + } + observabilityConfig, err := c.Observability.WithDefaults(observability.ResourceAttributes{ + ServiceName: "scheduleringester", + ServiceVersion: version.Version, + ServiceInstance: serviceInstance, + }) + if err != nil { + return nil, err + } + c.Observability = observabilityConfig return c, nil } diff --git a/internal/server/configuration/types.go b/internal/server/configuration/types.go index 509c0ed330e..c76f654d008 100644 --- a/internal/server/configuration/types.go +++ b/internal/server/configuration/types.go @@ -9,6 +9,7 @@ import ( authconfig "github.com/armadaproject/armada/internal/common/auth/configuration" commonconfig "github.com/armadaproject/armada/internal/common/config" grpcconfig "github.com/armadaproject/armada/internal/common/grpc/configuration" + "github.com/armadaproject/armada/internal/common/observability" profilingconfig "github.com/armadaproject/armada/internal/common/profiling/configuration" armadaresource "github.com/armadaproject/armada/internal/common/resource" "github.com/armadaproject/armada/pkg/client" @@ -17,10 +18,11 @@ import ( type ArmadaConfig struct { Auth authconfig.AuthConfig - GrpcPort uint16 - HttpPort uint16 - MetricsPort uint16 - Profiling *profilingconfig.ProfilingConfig + GrpcPort uint16 + HttpPort uint16 + MetricsPort uint16 + Profiling *profilingconfig.ProfilingConfig + Observability observability.ObservabilityConfig CorsAllowedOrigins []string GrpcGatewayPath string diff --git a/internal/server/configuration/validation.go b/internal/server/configuration/validation.go index ca7134db28c..c55f668ad5d 100644 --- a/internal/server/configuration/validation.go +++ b/internal/server/configuration/validation.go @@ -1,9 +1,14 @@ package configuration import ( + "os" + "github.com/go-playground/validator/v10" + "github.com/google/uuid" commonconfig "github.com/armadaproject/armada/internal/common/config" + "github.com/armadaproject/armada/internal/common/observability" + "github.com/armadaproject/armada/internal/lookout/version" ) func (c ArmadaConfig) Validate() error { @@ -11,6 +16,19 @@ func (c ArmadaConfig) Validate() error { return validate.Struct(c) } -func (c ArmadaConfig) Mutate() (commonconfig.Config, error) { +func (c *ArmadaConfig) Mutate() (commonconfig.Config, error) { + serviceInstance, err := os.Hostname() + if err != nil { + serviceInstance = uuid.New().String() + } + observabilityConfig, err := c.Observability.WithDefaults(observability.ResourceAttributes{ + ServiceName: "server", + ServiceVersion: version.Version, + ServiceInstance: serviceInstance, + }) + if err != nil { + return nil, err + } + c.Observability = observabilityConfig return c, nil } diff --git a/internal/server/server.go b/internal/server/server.go index f6411234aa2..bdeba593a64 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -12,6 +12,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/redis/go-redis/extra/redisprometheus/v9" "github.com/redis/go-redis/v9" + "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" "google.golang.org/grpc" "github.com/armadaproject/armada/internal/common/armadacontext" @@ -245,6 +246,7 @@ func createApiConnection(connectionDetails client.ApiConnectionDetails) (*grpc.C return client.CreateApiConnectionWithCallOptions( &connectionDetails, []grpc.CallOption{}, + grpc.WithStatsHandler(otelgrpc.NewClientHandler()), grpc.WithChainUnaryInterceptor(clientMetrics.UnaryClientInterceptor()), grpc.WithChainStreamInterceptor(clientMetrics.StreamClientInterceptor()), ) diff --git a/internal/testsuite/app.go b/internal/testsuite/app.go index e90a2a00c24..e01aa6d067b 100644 --- a/internal/testsuite/app.go +++ b/internal/testsuite/app.go @@ -20,10 +20,12 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/push" "github.com/renstrom/shortuuid" + "go.opentelemetry.io/otel" "golang.org/x/sync/errgroup" apimachineryYaml "k8s.io/apimachinery/pkg/util/yaml" "sigs.k8s.io/yaml" + "github.com/armadaproject/armada/internal/common/logging" "github.com/armadaproject/armada/internal/testsuite/build" "github.com/armadaproject/armada/internal/testsuite/eventbenchmark" "github.com/armadaproject/armada/internal/testsuite/eventlogger" @@ -302,6 +304,13 @@ func (a *App) RunTests(ctx context.Context, testSpecs []*api.TestSpec) (*TestSui rv.Finish = time.Now() }() + traceCollector := NewInMemoryTraceCollector() + defer func(ctx context.Context) { + _ = traceCollector.Shutdown(ctx) + }(ctx) + + otel.SetTracerProvider(traceCollector.TracerProvider()) + ctx, cancel := context.WithCancel(ctx) defer cancel() g, ctx := errgroup.WithContext(ctx) @@ -335,7 +344,27 @@ func (a *App) RunTests(ctx context.Context, testSpecs []*api.TestSpec) (*TestSui return nil, err } - // Optionally push metrics. + evidence, err := traceCollector.CollectEvidence() + if err != nil { + logging.WithError(err).Warn("Failed to collect trace evidence") + } else { + evidencePath := filepath.Join(DefaultEvidenceDir, "testsuite-traces.json") + if err := WriteTraceEvidenceFile(evidence, evidencePath); err != nil { + logging.WithError(err).Warnf("Failed to write trace evidence file: %s", evidencePath) + } else { + fmt.Fprintf(a.Out, "Trace evidence written to: %s\n", evidencePath) + fmt.Fprintf(a.Out, "Collected %d spans across %d traces from %d services\n", + evidence.TotalSpans, len(evidence.TraceIDs), len(evidence.ServiceNames)) + } + + if IsStrictModeEnabled() { + if err := ValidateTraceEvidence(evidence); err != nil { + return nil, fmt.Errorf("strict mode enabled: %w", err) + } + fmt.Fprintf(a.Out, "Strict mode: trace evidence validation passed\n") + } + } + if a.Params.PrometheusPushGatewayUrl != "" { if err := pushTestSuiteReportMetrics(rv, a.Params.PrometheusPushGatewayUrl, a.Params.PrometheusPushGatewayJobName); err != nil { return nil, err diff --git a/internal/testsuite/trace_evidence.go b/internal/testsuite/trace_evidence.go new file mode 100644 index 00000000000..f4b97e99eda --- /dev/null +++ b/internal/testsuite/trace_evidence.go @@ -0,0 +1,207 @@ +package testsuite + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "sort" + "strings" + + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/sdk/resource" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/sdk/trace/tracetest" + semconv "go.opentelemetry.io/otel/semconv/v1.26.0" +) + +const ( + DefaultEvidenceDir = ".sisyphus/evidence" + traceEvidenceStrictModeEnv = "OTEL_TRACE_EVIDENCE_STRICT" +) + +type TraceEvidence struct { + TraceIDs []string `json:"trace_ids"` + Spans []SpanEvidence `json:"spans"` + ServiceNames []string `json:"service_names"` + TotalSpans int `json:"total_spans"` +} + +type SpanEvidence struct { + TraceID string `json:"trace_id"` + SpanID string `json:"span_id"` + SpanName string `json:"span_name"` + ServiceName string `json:"service_name"` + Status string `json:"status"` + Attributes map[string]string `json:"attributes"` +} + +type InMemoryTracerCollector struct { + spanRecorder *tracetest.SpanRecorder + tracerProvider *sdktrace.TracerProvider +} + +func NewInMemoryTracerCollector() *InMemoryTracerCollector { + spanRecorder := tracetest.NewSpanRecorder() + tracerProvider := sdktrace.NewTracerProvider( + sdktrace.WithSampler(sdktrace.AlwaysSample()), + sdktrace.WithSpanProcessor(spanRecorder), + ) + + return &InMemoryTracerCollector{ + spanRecorder: spanRecorder, + tracerProvider: tracerProvider, + } +} + +// NewInMemoryTraceCollector is retained as a backward-compatible alias. +// Deprecated: use NewInMemoryTracerCollector instead. +func NewInMemoryTraceCollector() *InMemoryTracerCollector { + return NewInMemoryTracerCollector() +} + +func (c *InMemoryTracerCollector) TracerProvider() *sdktrace.TracerProvider { + return c.tracerProvider +} + +func (c *InMemoryTracerCollector) Shutdown(ctx context.Context) error { + return c.tracerProvider.Shutdown(ctx) +} + +func (c *InMemoryTracerCollector) CollectEvidence() (*TraceEvidence, error) { + spans := c.spanRecorder.Ended() + traceIDs := make(map[string]struct{}) + serviceNames := make(map[string]struct{}) + spanEvidence := make([]SpanEvidence, 0, len(spans)) + + for _, span := range spans { + traceID := span.SpanContext().TraceID().String() + spanID := span.SpanContext().SpanID().String() + serviceName := serviceNameForSpan(span) + + traceIDs[traceID] = struct{}{} + if serviceName != "" { + serviceNames[serviceName] = struct{}{} + } + + spanEvidence = append(spanEvidence, SpanEvidence{ + TraceID: traceID, + SpanID: spanID, + SpanName: span.Name(), + ServiceName: serviceName, + Status: spanStatus(span.Status().Code), + Attributes: evidenceAttributes(span.Attributes()), + }) + } + + return &TraceEvidence{ + TraceIDs: sortedStringSet(traceIDs), + Spans: spanEvidence, + ServiceNames: sortedStringSet(serviceNames), + TotalSpans: len(spans), + }, nil +} + +func WriteTraceEvidenceFile(evidence *TraceEvidence, path string) error { + if evidence == nil { + return fmt.Errorf("trace evidence must not be nil") + } + + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + return err + } + + data, err := json.MarshalIndent(evidence, "", " ") + if err != nil { + return err + } + + return os.WriteFile(path, append(data, '\n'), 0o644) +} + +func ValidateTraceEvidence(evidence *TraceEvidence) error { + if evidence == nil { + return fmt.Errorf("trace evidence must not be nil") + } + if evidence.TotalSpans == 0 { + return fmt.Errorf("trace evidence must contain at least one span") + } + if len(evidence.TraceIDs) == 0 { + return fmt.Errorf("trace evidence must contain at least one trace id") + } + if len(evidence.ServiceNames) == 0 { + return fmt.Errorf("trace evidence must contain at least one service name") + } + return nil +} + +func IsStrictModeEnabled() bool { + return strings.EqualFold(os.Getenv(traceEvidenceStrictModeEnv), "true") +} + +func serviceNameForSpan(span sdktrace.ReadOnlySpan) string { + if serviceName := serviceNameFromResource(span.Resource()); serviceName != "" { + return serviceName + } + + for _, attr := range span.Attributes() { + if attr.Key == semconv.ServiceNameKey { + return attr.Value.AsString() + } + } + + return span.InstrumentationScope().Name +} + +func serviceNameFromResource(resource *resource.Resource) string { + if resource == nil { + return "" + } + + for _, attr := range resource.Attributes() { + if attr.Key == semconv.ServiceNameKey { + return attr.Value.AsString() + } + } + + return "" +} + +func evidenceAttributes(attributes []attribute.KeyValue) map[string]string { + result := make(map[string]string) + for _, attr := range attributes { + key := string(attr.Key) + if isEvidenceAttribute(key) { + result[key] = attr.Value.AsString() + } + } + return result +} + +func isEvidenceAttribute(key string) bool { + return strings.HasPrefix(key, "rpc.") || + strings.HasPrefix(key, "http.") || + strings.HasPrefix(key, "service.") +} + +func spanStatus(code codes.Code) string { + switch code { + case codes.Ok: + return "ok" + case codes.Error: + return "error" + default: + return "unset" + } +} + +func sortedStringSet(values map[string]struct{}) []string { + result := make([]string, 0, len(values)) + for value := range values { + result = append(result, value) + } + sort.Strings(result) + return result +} diff --git a/pkg/client/connection.go b/pkg/client/connection.go index 2b9dacc244b..a7ff9c80b2f 100644 --- a/pkg/client/connection.go +++ b/pkg/client/connection.go @@ -8,6 +8,7 @@ import ( grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/retry" "github.com/pkg/errors" + "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" "google.golang.org/grpc" "google.golang.org/grpc/credentials" "google.golang.org/grpc/credentials/insecure" @@ -69,6 +70,7 @@ func CreateApiConnectionWithCallOptions( unuaryInterceptors := grpc.WithChainUnaryInterceptor(grpc_retry.UnaryClientInterceptor(retryOpts...)) streamInterceptors := grpc.WithChainStreamInterceptor(grpc_retry.StreamClientInterceptor(retryOpts...)) dialOpts := append(additionalDialOptions, + grpc.WithStatsHandler(otelgrpc.NewClientHandler()), defaultCallOptions, unuaryInterceptors, streamInterceptors, diff --git a/website/content/developer-guide.mdx b/website/content/developer-guide.mdx index 22a2473bda0..98f6a8d0235 100644 --- a/website/content/developer-guide.mdx +++ b/website/content/developer-guide.mdx @@ -68,7 +68,7 @@ mage dev:fullDown # stop the containerized stack and tear down Kind (after `ma docker compose -f _local/compose/stack.yaml up -d ``` - **Note:** Images can be overridden using environment variables: `REDIS_IMAGE`, `POSTGRES_IMAGE`, `PULSAR_IMAGE`, `KEYCLOAK_IMAGE` + **Note:** Images can be overridden using environment variables: `REDIS_IMAGE`, `POSTGRES_IMAGE`, `PULSAR_IMAGE`, `KEYCLOAK_IMAGE`, `OTEL_IMAGE`, `JAEGER_IMAGE`, `GRAFANA_IMAGE` 3. Initialize databases and Kubernetes resources: