diff --git a/README.md b/README.md index 17b75dd..147ddeb 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ Note: the CLI only works with RunsOn >= v2.6.3. ### Other - [Installation](#installation) - Download and install the CLI +- [Resource Discovery](#resource-discovery) - How the CLI discovers resources - [Contributing](#contributing) - Ideas for future improvements - [License](#license) - Project license information @@ -75,6 +76,23 @@ jobs: run: roc lint .github/runs-on.yml ``` +## Resource Discovery + +The CLI discovers RunsOn resources using the AWS Resource Groups Tagging API (RGTA): + +1. **Primary**: `runs-on-stack-name` tag (all new CF/TF deployments) +2. **Fallback**: Dynamic discovery via AppRunner service tags (older stacks) + +Resources are identified by their `runs-on-resource` tag (Terraform) or ARN pattern matching (CloudFormation fallback): + +| Resource | Tag Value | CF Fallback | +|----------|-----------|-------------| +| AppRunner Service | `apprunner-service` | ARN pattern | +| Config S3 Bucket | `config-bucket` | `runs-on/purpose=config` tag or name contains `-config` | +| EC2 Log Group | `ec2-log-group` | Name contains `{stack}/ec2/instances` | + +Tags are automatically applied when deploying RunsOn via Terraform/OpenTofu or CloudFormation. + ## Core Commands ### `roc connect` @@ -93,7 +111,7 @@ Flags: --watch Wait for instance ID if not found Global Flags: - --stack string CloudFormation stack name (default "runs-on") + --stack string Stack name (default "runs-on") ``` Example: @@ -120,7 +138,7 @@ Flags: -w, --watch string[="5s"] Watch for new logs with optional interval (e.g. --watch 2s) Global Flags: - --stack string CloudFormation stack name (default "runs-on") + --stack string Stack name (default "runs-on") ``` Examples: @@ -156,7 +174,7 @@ Flags: -w, --wait Wait for instance ID if not found Global Flags: - --stack string CloudFormation stack name (default "runs-on") + --stack string Stack name (default "runs-on") ``` **Requirements:** @@ -200,7 +218,7 @@ Flags: -h, --help help for lint Global Flags: - --stack string CloudFormation stack name (default "runs-on") + --stack string Stack name (default "runs-on") ``` **What it validates:** @@ -282,10 +300,9 @@ Now `roc lint` will automatically run on staged `runs-on.yml` files before each Diagnose RunsOn stack health and export troubleshooting information. -This command performs comprehensive health checks on your RunsOn CloudFormation stack: -- Verifies CloudFormation stack status -- Checks AppRunner service health and version -- Tests endpoint accessibility +This command performs comprehensive health checks on your RunsOn stack: +- Checks AppRunner service health +- Tests endpoint accessibility - Validates service configuration - Fetches application logs @@ -300,7 +317,7 @@ Flags: --since string Fetch logs since duration (e.g. 30m, 2h, 24h) (default "24h") Global Flags: - --stack string CloudFormation stack name (default "runs-on") + --stack string Stack name (default "runs-on") ``` Example: @@ -312,8 +329,7 @@ AWS_PROFILE=runs-on-admin roc stack doctor --since 2h Output: ``` -Checking CloudFormation stack health (https://console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/stackinfo?stackId=runs-on-test)... ✅ (status: UPDATE_COMPLETE) -Checking AppRunner service (https://console.aws.amazon.com/apprunner/home?region=us-east-1#/services/RunsOnService-4rHCauYu4m23)... ✅ (version: v2.8.4) +Checking AppRunner service (https://console.aws.amazon.com/apprunner/home?region=us-east-1#/services/RunsOnService-4rHCauYu4m23)... ✅ (status: RUNNING) Checking AppRunner service endpoint (https://wxrwksit5a.us-east-1.awsapprunner.com)... ✅ Checking for 'Congrats' response... ✅ Fetching AppRunner application logs (since 24h0m0s)... ✅ (5419 lines) @@ -341,7 +357,7 @@ Flags: -w, --watch string[="5s"] Watch for new logs with optional interval (e.g. --watch 2s) Global Flags: - --stack string CloudFormation stack name (default "runs-on") + --stack string Stack name (default "runs-on") ``` Examples: diff --git a/go.mod b/go.mod index 2c9e160..5a118c5 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module roc go 1.24.2 require ( - github.com/aws/aws-sdk-go-v2 v1.38.3 + github.com/aws/aws-sdk-go-v2 v1.40.1 github.com/aws/aws-sdk-go-v2/config v1.31.6 github.com/aws/aws-sdk-go-v2/service/apprunner v1.38.3 github.com/aws/aws-sdk-go-v2/service/cloudformation v1.66.0 @@ -11,6 +11,7 @@ require ( github.com/aws/aws-sdk-go-v2/service/ec2 v1.251.0 github.com/aws/aws-sdk-go-v2/service/fis v1.37.1 github.com/aws/aws-sdk-go-v2/service/iam v1.47.3 + github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi v1.31.3 github.com/aws/aws-sdk-go-v2/service/s3 v1.87.3 github.com/aws/aws-sdk-go-v2/service/ssm v1.64.2 github.com/aws/aws-sdk-go-v2/service/sts v1.38.2 @@ -24,8 +25,8 @@ require ( github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.1 // indirect github.com/aws/aws-sdk-go-v2/credentials v1.18.10 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.6 // indirect - github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.6 // indirect - github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.6 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.15 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.15 // indirect github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 // indirect github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.6 // indirect github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.1 // indirect @@ -34,7 +35,7 @@ require ( github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.6 // indirect github.com/aws/aws-sdk-go-v2/service/sso v1.29.1 // indirect github.com/aws/aws-sdk-go-v2/service/ssooidc v1.34.2 // indirect - github.com/aws/smithy-go v1.23.0 // indirect + github.com/aws/smithy-go v1.24.0 // indirect github.com/cockroachdb/apd/v3 v3.2.1 // indirect github.com/emicklei/proto v1.14.2 // indirect github.com/google/go-querystring v1.1.0 // indirect diff --git a/go.sum b/go.sum index d58da58..644bcb7 100644 --- a/go.sum +++ b/go.sum @@ -2,8 +2,8 @@ cuelabs.dev/go/oci/ociregistry v0.0.0-20250722084951-074d06050084 h1:4k1yAtPvZJZ cuelabs.dev/go/oci/ociregistry v0.0.0-20250722084951-074d06050084/go.mod h1:4WWeZNxUO1vRoZWAHIG0KZOd6dA25ypyWuwD3ti0Tdc= cuelang.org/go v0.15.0 h1:0jlWNxLp1In6dWJtywTXei7w0cqfHSTiCk/6Z+FUvxI= cuelang.org/go v0.15.0/go.mod h1:NYw6n4akZcTjA7QQwJ1/gqWrrhsN4aZwhcAL0jv9rZE= -github.com/aws/aws-sdk-go-v2 v1.38.3 h1:B6cV4oxnMs45fql4yRH+/Po/YU+597zgWqvDpYMturk= -github.com/aws/aws-sdk-go-v2 v1.38.3/go.mod h1:sDioUELIUO9Znk23YVmIk86/9DOpkbyyVb1i/gUNFXY= +github.com/aws/aws-sdk-go-v2 v1.40.1 h1:difXb4maDZkRH0x//Qkwcfpdg1XQVXEAEs2DdXldFFc= +github.com/aws/aws-sdk-go-v2 v1.40.1/go.mod h1:MayyLB8y+buD9hZqkCW3kX1AKq07Y5pXxtgB+rRFhz0= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.1 h1:i8p8P4diljCr60PpJp6qZXNlgX4m2yQFpYk+9ZT+J4E= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.1/go.mod h1:ddqbooRZYNoJ2dsTwOty16rM+/Aqmk/GOXrK8cg7V00= github.com/aws/aws-sdk-go-v2/config v1.31.6 h1:a1t8fXY4GT4xjyJExz4knbuoxSCacB5hT/WgtfPyLjo= @@ -12,10 +12,10 @@ github.com/aws/aws-sdk-go-v2/credentials v1.18.10 h1:xdJnXCouCx8Y0NncgoptztUocIY github.com/aws/aws-sdk-go-v2/credentials v1.18.10/go.mod h1:7tQk08ntj914F/5i9jC4+2HQTAuJirq7m1vZVIhEkWs= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.6 h1:wbjnrrMnKew78/juW7I2BtKQwa1qlf6EjQgS69uYY14= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.6/go.mod h1:AtiqqNrDioJXuUgz3+3T0mBWN7Hro2n9wll2zRUc0ww= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.6 h1:uF68eJA6+S9iVr9WgX1NaRGyQ/6MdIyc4JNUo6TN1FA= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.6/go.mod h1:qlPeVZCGPiobx8wb1ft0GHT5l+dc6ldnwInDFaMvC7Y= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.6 h1:pa1DEC6JoI0zduhZePp3zmhWvk/xxm4NB8Hy/Tlsgos= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.6/go.mod h1:gxEjPebnhWGJoaDdtDkA0JX46VRg1wcTHYe63OfX5pE= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.15 h1:Y5YXgygXwDI5P4RkteB5yF7v35neH7LfJKBG+hzIons= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.15/go.mod h1:K+/1EpG42dFSY7CBj+Fruzm8PsCGWTXJ3jdeJ659oGQ= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.15 h1:AvltKnW9ewxX2hFmQS0FyJH93aSvJVUEFvXfU+HWtSE= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.15/go.mod h1:3I4oCdZdmgrREhU74qS1dK9yZ62yumob+58AbFR4cQA= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 h1:bIqFDwgGXXN1Kpp99pDOdKMTTb5d2KyU5X/BZxjOkRo= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3/go.mod h1:H5O/EsxDWyU+LP/V8i5sm8cxoZgc2fdNR9bxlOFrQTo= github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.6 h1:R0tNFJqfjHL3900cqhXuwQ+1K4G0xc9Yf8EDbFXCKEw= @@ -40,6 +40,8 @@ github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.6 h1:LHS1YAIJX github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.6/go.mod h1:c9PCiTEuh0wQID5/KqA32J+HAgZxN9tOGXKCiYJjTZI= github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.6 h1:nEXUSAwyUfLTgnc9cxlDWy637qsq4UWwp3sNAfl0Z3Y= github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.6/go.mod h1:HGzIULx4Ge3Do2V0FaiYKcyKzOqwrhUZgCI77NisswQ= +github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi v1.31.3 h1:5IPVHY5tKT9sE1ncDErey9oz23PbcIqygEMxmM6AArw= +github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi v1.31.3/go.mod h1:M+X29LLlSyiVZ4mRTpDSIbw/v71npcOO62FlYh7pBbM= github.com/aws/aws-sdk-go-v2/service/s3 v1.87.3 h1:ETkfWcXP2KNPLecaDa++5bsQhCRa5M5sLUJa5DWYIIg= github.com/aws/aws-sdk-go-v2/service/s3 v1.87.3/go.mod h1:+/3ZTqoYb3Ur7DObD00tarKMLMuKg8iqz5CHEanqTnw= github.com/aws/aws-sdk-go-v2/service/ssm v1.64.2 h1:6P4W42RUTZixRG6TgfRB8KlsqNzHtvBhs6sTbkVPZvk= @@ -50,8 +52,8 @@ github.com/aws/aws-sdk-go-v2/service/ssooidc v1.34.2 h1:gKWSTnqudpo8dAxqBqZnDoDW github.com/aws/aws-sdk-go-v2/service/ssooidc v1.34.2/go.mod h1:x7+rkNmRoEN1U13A6JE2fXne9EWyJy54o3n6d4mGaXQ= github.com/aws/aws-sdk-go-v2/service/sts v1.38.2 h1:YZPjhyaGzhDQEvsffDEcpycq49nl7fiGcfJTIo8BszI= github.com/aws/aws-sdk-go-v2/service/sts v1.38.2/go.mod h1:2dIN8qhQfv37BdUYGgEC8Q3tteM3zFxTI1MLO2O3J3c= -github.com/aws/smithy-go v1.23.0 h1:8n6I3gXzWJB2DxBDnfxgBaSX6oe0d/t10qGz7OKqMCE= -github.com/aws/smithy-go v1.23.0/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI= +github.com/aws/smithy-go v1.24.0 h1:LpilSUItNPFr1eY85RYgTIg5eIEPtvFbskaFcmmIUnk= +github.com/aws/smithy-go v1.24.0/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0= github.com/cockroachdb/apd/v3 v3.2.1 h1:U+8j7t0axsIgvQUqthuNm82HIrYXodOV2iWLWtEaIwg= github.com/cockroachdb/apd/v3 v3.2.1/go.mod h1:klXJcjp+FffLTHlhIG69tezTDvdP065naDsHzKhYSqc= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= diff --git a/internal/cli/discovery.go b/internal/cli/discovery.go new file mode 100644 index 0000000..8d280ad --- /dev/null +++ b/internal/cli/discovery.go @@ -0,0 +1,195 @@ +package cli + +import ( + "context" + "fmt" + "strings" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/apprunner" + "github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi" + "github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi/types" + "github.com/spf13/cobra" +) + +// discoverResources finds RunsOn resources using a 2-tier RGTA strategy +func (s *Stack) discoverResources(cmd *cobra.Command) (*RunsOnConfig, error) { + stackName := cmd.Flag("stack").Value.String() + ctx := cmd.Context() + + // Tier 1: Try fixed "runs-on-stack-name" tag (new deployments) + if config, _ := s.discoverByTag(ctx, "runs-on-stack-name", stackName); config.isComplete() { + return config, nil + } + + // Tier 2: Discover tag key from AppRunner service (older stacks) + // TODO: Remove this fallback once all users have upgraded to stacks with runs-on-stack-name tag + tagKey, tagErr := s.discoverTagKeyFromAppRunner(ctx, stackName) + if tagErr == nil && tagKey != "" { + if config, _ := s.discoverByTag(ctx, tagKey, stackName); config.isComplete() { + return config, nil + } + } + + return nil, fmt.Errorf("could not discover resources for stack %q", stackName) +} + +// discoverByTag queries RGTA for resources with the given tag key=value +func (s *Stack) discoverByTag(ctx context.Context, tagKey, stackName string) (*RunsOnConfig, error) { + client := resourcegroupstaggingapi.NewFromConfig(s.cfg) + + paginator := resourcegroupstaggingapi.NewGetResourcesPaginator(client, &resourcegroupstaggingapi.GetResourcesInput{ + TagFilters: []types.TagFilter{{ + Key: aws.String(tagKey), + Values: []string{stackName}, + }}, + }) + + config := &RunsOnConfig{ + StackName: stackName, + AWSConfig: s.cfg, + } + + for paginator.HasMorePages() { + page, err := paginator.NextPage(ctx) + if err != nil { + return config, fmt.Errorf("failed to query resources: %w", err) + } + + for _, resource := range page.ResourceTagMappingList { + arn := *resource.ResourceARN + classifyResource(config, arn, resource.Tags, stackName) + } + } + + return config, nil +} + +// classifyResource determines resource type from runs-on-resource tag (TF) or ARN pattern (CF fallback) +func classifyResource(config *RunsOnConfig, arn string, tags []types.Tag, stackName string) { + resourceType := getTagValue(tags, "runs-on-resource") + + switch resourceType { + // TF deployments have runs-on-resource tag + case "apprunner-service": + config.AppRunnerServiceArn = arn + case "config-bucket": + config.BucketConfig = extractBucketName(arn) + case "ec2-log-group": + config.EC2LogGroupArn = arn + default: + // CF fallback: detect by ARN pattern + switch { + case isAppRunnerService(arn): + config.AppRunnerServiceArn = arn + case isS3Bucket(arn) && isConfigBucket(arn, tags): + config.BucketConfig = extractBucketName(arn) + case isCloudWatchLogGroup(arn) && isEC2LogGroup(arn, stackName): + config.EC2LogGroupArn = arn + } + } +} + +// discoverTagKeyFromAppRunner finds the tag key used for stack identification +// by searching all AppRunner services for one with a tag value matching stackName +func (s *Stack) discoverTagKeyFromAppRunner(ctx context.Context, stackName string) (string, error) { + arClient := apprunner.NewFromConfig(s.cfg) + + // List all AppRunner services + paginator := apprunner.NewListServicesPaginator(arClient, &apprunner.ListServicesInput{}) + + pageCount := 0 + serviceCount := 0 + for paginator.HasMorePages() { + page, err := paginator.NextPage(ctx) + if err != nil { + return "", fmt.Errorf("failed to list AppRunner services (page %d): %w", pageCount, err) + } + pageCount++ + serviceCount += len(page.ServiceSummaryList) + + // Check each service's tags for a value matching stackName + for _, svc := range page.ServiceSummaryList { + tagsResult, err := arClient.ListTagsForResource(ctx, &apprunner.ListTagsForResourceInput{ + ResourceArn: svc.ServiceArn, + }) + if err != nil { + continue // Skip services we can't get tags for + } + + // Find which tag key has value = stackName + for _, tag := range tagsResult.Tags { + if tag.Key != nil && tag.Value != nil && *tag.Value == stackName { + return *tag.Key, nil + } + } + } + } + + return "", fmt.Errorf("no AppRunner service found with tag value %s (searched %d pages, %d services)", stackName, pageCount, serviceCount) +} + +// isComplete checks if all required resources were discovered +func (c *RunsOnConfig) isComplete() bool { + return c.AppRunnerServiceArn != "" && c.BucketConfig != "" && c.EC2LogGroupArn != "" +} + +// ARN pattern detection helpers +func isAppRunnerService(arn string) bool { + return strings.Contains(arn, ":apprunner:") && strings.Contains(arn, ":service/") +} + +func isS3Bucket(arn string) bool { + return strings.HasPrefix(arn, "arn:aws:s3:::") +} + +func isCloudWatchLogGroup(arn string) bool { + return strings.Contains(arn, ":logs:") && strings.Contains(arn, ":log-group:") +} + +// isConfigBucket identifies config bucket by tag or naming convention +func isConfigBucket(arn string, tags []types.Tag) bool { + // Check for runs-on/purpose=config tag (CF has this) + for _, tag := range tags { + if tag.Key != nil && *tag.Key == "runs-on/purpose" && + tag.Value != nil && *tag.Value == "config" { + return true + } + } + // Fall back to naming convention + bucketName := extractBucketName(arn) + return strings.Contains(bucketName, "-config") +} + +// isEC2LogGroup identifies EC2 log group by naming convention +func isEC2LogGroup(arn string, stackName string) bool { + // TF naming: {stackName}/ec2/instances + if strings.Contains(arn, stackName+"/ec2/instances") { + return true + } + // CF naming: {stackName}-EC2InstanceLogGroup-{suffix} + if strings.Contains(arn, stackName+"-EC2InstanceLogGroup-") { + return true + } + return false +} + +// extractBucketName extracts the bucket name from an S3 ARN +// arn:aws:s3:::bucket-name -> bucket-name +func extractBucketName(arn string) string { + parts := strings.Split(arn, ":::") + if len(parts) == 2 { + return parts[1] + } + return "" +} + +// getTagValue finds a tag value by key from a list of tags +func getTagValue(tags []types.Tag, key string) string { + for _, tag := range tags { + if tag.Key != nil && *tag.Key == key && tag.Value != nil { + return *tag.Value + } + } + return "" +} diff --git a/internal/cli/doctor.go b/internal/cli/doctor.go index 7f10c71..b8ff0f8 100644 --- a/internal/cli/doctor.go +++ b/internal/cli/doctor.go @@ -14,7 +14,6 @@ import ( "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/apprunner" - "github.com/aws/aws-sdk-go-v2/service/cloudformation" "github.com/aws/aws-sdk-go-v2/service/cloudwatchlogs" "github.com/spf13/cobra" ) @@ -34,23 +33,20 @@ type DoctorResult struct { type StackDoctor struct { cfg aws.Config - cfn *cloudformation.Client apprunner *apprunner.Client cwl *cloudwatchlogs.Client - stackName string + config *RunsOnConfig // Discovered resources httpClient *http.Client result *DoctorResult - outputs map[string]string // Cache stack outputs - workDir string // Temporary workspace directory + workDir string // Temporary workspace directory } func NewStackDoctor(config *RunsOnConfig) *StackDoctor { return &StackDoctor{ cfg: config.AWSConfig, - cfn: cloudformation.NewFromConfig(config.AWSConfig), apprunner: apprunner.NewFromConfig(config.AWSConfig), cwl: cloudwatchlogs.NewFromConfig(config.AWSConfig), - stackName: config.StackName, + config: config, httpClient: &http.Client{ Timeout: 30 * time.Second, }, @@ -88,60 +84,35 @@ func (d *StackDoctor) failCheck(name, message string, err error) error { return err } -func (d *StackDoctor) loadStackOutputs(ctx context.Context) error { - if d.outputs != nil { - return nil // Already loaded +// getServiceURL gets the AppRunner service URL by calling DescribeService +func (d *StackDoctor) getServiceURL(ctx context.Context) (string, error) { + serviceArn := d.config.AppRunnerServiceArn + if serviceArn == "" { + return "", fmt.Errorf("AppRunner service ARN not found") } - out, err := d.cfn.DescribeStacks(ctx, &cloudformation.DescribeStacksInput{ - StackName: &d.stackName, - }) - if err != nil { - return err - } - if len(out.Stacks) == 0 { - return fmt.Errorf("stack %s not found", d.stackName) - } - - d.outputs = make(map[string]string) - for _, output := range out.Stacks[0].Outputs { - d.outputs[*output.OutputKey] = *output.OutputValue - } - return nil -} - -func (d *StackDoctor) checkStackHealth(ctx context.Context) error { - region := d.cfg.Region - cfnURL := fmt.Sprintf("https://console.aws.amazon.com/cloudformation/home?region=%s#/stacks/stackinfo?stackId=%s", region, d.stackName) - fmt.Printf("Checking CloudFormation stack health (%s)...", cfnURL) - - // Get stack status from the same API call - out, err := d.cfn.DescribeStacks(ctx, &cloudformation.DescribeStacksInput{ - StackName: &d.stackName, + out, err := d.apprunner.DescribeService(ctx, &apprunner.DescribeServiceInput{ + ServiceArn: &serviceArn, }) if err != nil { - return d.failCheck("CloudFormation stack health", "Failed to describe stack", err) + return "", err } - stack := out.Stacks[0] - status := string(stack.StackStatus) - - if strings.Contains(status, "COMPLETE") && !strings.Contains(status, "ROLLBACK") { - d.addCheck("CloudFormation stack health", "✅", fmt.Sprintf("Status: %s", status), nil) - d.printCheckResult("", "✅", fmt.Sprintf("status: %s", status)) - return nil - } else { - d.addCheck("CloudFormation stack health", "❌", fmt.Sprintf("Status: %s", status), nil) - d.printCheckResult("", "❌", fmt.Sprintf("status: %s", status)) - return fmt.Errorf("stack is in unhealthy state: %s", status) + if out.Service != nil && out.Service.ServiceUrl != nil { + url := *out.Service.ServiceUrl + if !strings.HasPrefix(url, "https://") { + url = "https://" + url + } + return url, nil } + return "", fmt.Errorf("service URL not available") } func (d *StackDoctor) checkAppRunnerService(ctx context.Context) error { - serviceArn, ok := d.outputs["RunsOnServiceArn"] - if !ok { + serviceArn := d.config.AppRunnerServiceArn + if serviceArn == "" { fmt.Print("Checking AppRunner service...") - err := fmt.Errorf("RunsOnServiceArn not found in stack outputs") + err := fmt.Errorf("AppRunner service ARN not found in discovered resources") return d.failCheck("AppRunner service running", "Service ARN not found", err) } @@ -157,8 +128,6 @@ func (d *StackDoctor) checkAppRunnerService(ctx context.Context) error { appRunnerURL := fmt.Sprintf("https://console.aws.amazon.com/apprunner/home?region=%s#/services/%s", region, serviceName) fmt.Printf("Checking AppRunner service (%s)...", appRunnerURL) - expectedTag := d.outputs["RunsOnAppTag"] - out, err := d.apprunner.DescribeService(ctx, &apprunner.DescribeServiceInput{ ServiceArn: &serviceArn, }) @@ -166,25 +135,16 @@ func (d *StackDoctor) checkAppRunnerService(ctx context.Context) error { return d.failCheck("AppRunner service running", "Failed to describe service", err) } + if out.Service == nil { + return d.failCheck("AppRunner service running", "Service not found in response", fmt.Errorf("DescribeService returned nil service")) + } + service := out.Service status := string(service.Status) if status == "RUNNING" { - // Extract image tag from the service configuration - imageUri := *service.SourceConfiguration.ImageRepository.ImageIdentifier - parts := strings.Split(imageUri, ":") - var actualTag string - if len(parts) > 1 { - actualTag = parts[len(parts)-1] - } - - if actualTag == expectedTag { - d.addCheck("AppRunner service running", "✅", fmt.Sprintf("Version: %s", actualTag), nil) - d.printCheckResult("", "✅", fmt.Sprintf("version: %s", actualTag)) - } else { - d.addCheck("AppRunner service running", "⚠️", fmt.Sprintf("Version mismatch - running: %s, expected: %s", actualTag, expectedTag), nil) - d.printCheckResult("", "⚠️", fmt.Sprintf("version mismatch - running: %s, expected: %s", actualTag, expectedTag)) - } + d.addCheck("AppRunner service running", "✅", fmt.Sprintf("Status: %s", status), nil) + d.printCheckResult("", "✅", fmt.Sprintf("status: %s", status)) return nil } else { d.addCheck("AppRunner service running", "❌", fmt.Sprintf("Status: %s", status), nil) @@ -194,16 +154,10 @@ func (d *StackDoctor) checkAppRunnerService(ctx context.Context) error { } func (d *StackDoctor) checkEndpointAccessibility(ctx context.Context) error { - entryPoint, ok := d.outputs["RunsOnEntryPoint"] - if !ok { + entryPoint, err := d.getServiceURL(ctx) + if err != nil { fmt.Print("Checking AppRunner service endpoint...") - err := fmt.Errorf("RunsOnEntryPoint not found in stack outputs") - return d.failCheck("AppRunner service endpoint accessible", "Entry point not found", err) - } - - // Ensure https:// prefix - if !strings.HasPrefix(entryPoint, "http://") && !strings.HasPrefix(entryPoint, "https://") { - entryPoint = "https://" + entryPoint + return d.failCheck("AppRunner service endpoint accessible", "Failed to get service URL", err) } fmt.Printf("Checking AppRunner service endpoint (%s)...", entryPoint) @@ -232,15 +186,9 @@ func (d *StackDoctor) checkEndpointAccessibility(ctx context.Context) error { func (d *StackDoctor) checkCongratsResponse(ctx context.Context) error { fmt.Print("Checking for 'Congrats' response...") - entryPoint, ok := d.outputs["RunsOnEntryPoint"] - if !ok { - err := fmt.Errorf("RunsOnEntryPoint not found in stack outputs") - return d.failCheck("AppRunner service returns 'Congrats'", "Entry point not found", err) - } - - // Ensure https:// prefix - if !strings.HasPrefix(entryPoint, "http://") && !strings.HasPrefix(entryPoint, "https://") { - entryPoint = "https://" + entryPoint + entryPoint, err := d.getServiceURL(ctx) + if err != nil { + return d.failCheck("AppRunner service returns 'Congrats'", "Failed to get service URL", err) } resp, err := d.httpClient.Get(entryPoint) @@ -312,8 +260,8 @@ func (d *StackDoctor) fetchLogs(ctx context.Context, since time.Duration) (int, return 0, fmt.Errorf("failed to create logs directory: %w", err) } - serviceArn, ok := d.outputs["RunsOnServiceArn"] - if !ok { + serviceArn := d.config.AppRunnerServiceArn + if serviceArn == "" { // Skip logs fetching for failed stacks - this is expected d.addCheck("Logs fetched", "⏭️", "Skipped - service not available", nil) return 0, nil @@ -471,13 +419,7 @@ func (d *StackDoctor) Run(ctx context.Context, since time.Duration) error { } defer d.cleanup() - // Load stack outputs once at the beginning - if err := d.loadStackOutputs(ctx); err != nil { - return fmt.Errorf("failed to load stack outputs: %w", err) - } - // Run all checks - d.checkStackHealth(ctx) d.checkAppRunnerService(ctx) d.checkEndpointAccessibility(ctx) d.checkCongratsResponse(ctx) @@ -514,10 +456,9 @@ func NewDoctorCmd(stack *Stack) *cobra.Command { Short: "Diagnose RunsOn stack health and export troubleshooting information", Long: `Diagnose RunsOn stack health and export troubleshooting information. -This command performs comprehensive health checks on your RunsOn CloudFormation stack: -- Verifies CloudFormation stack status -- Checks AppRunner service health and version -- Tests endpoint accessibility +This command performs comprehensive health checks on your RunsOn stack: +- Checks AppRunner service health +- Tests endpoint accessibility - Validates service configuration - Fetches application logs diff --git a/internal/cli/stack.go b/internal/cli/stack.go index 6bf6ed1..31327c4 100644 --- a/internal/cli/stack.go +++ b/internal/cli/stack.go @@ -1,10 +1,7 @@ package cli import ( - "fmt" - "github.com/aws/aws-sdk-go-v2/aws" - "github.com/aws/aws-sdk-go-v2/service/cloudformation" "github.com/spf13/cobra" ) @@ -12,37 +9,10 @@ type Stack struct { cfg aws.Config } +// getStackOutputs discovers RunsOn resources using the Resource Groups Tagging API. +// Resources are identified by the 'runs-on-stack-name' and 'runs-on-resource' tags. func (s *Stack) getStackOutputs(cmd *cobra.Command) (*RunsOnConfig, error) { - stackName := cmd.Flag("stack").Value.String() - cfg := s.cfg - - cfn := cloudformation.NewFromConfig(cfg) - out, err := cfn.DescribeStacks(cmd.Context(), &cloudformation.DescribeStacksInput{ - StackName: &stackName, - }) - if err != nil { - return nil, fmt.Errorf("failed to describe stack: %w", err) - } - if len(out.Stacks) == 0 { - return nil, fmt.Errorf("stack %s not found", stackName) - } - - config := &RunsOnConfig{ - StackName: stackName, - AWSConfig: cfg, - } - - for _, output := range out.Stacks[0].Outputs { - switch *output.OutputKey { - case "RunsOnServiceArn": - config.AppRunnerServiceArn = *output.OutputValue - case "RunsOnEC2InstanceLogGroupArn": - config.EC2LogGroupArn = *output.OutputValue - case "RunsOnBucketConfig": - config.BucketConfig = *output.OutputValue - } - } - return config, nil + return s.discoverResources(cmd) } func NewStack(cfg aws.Config) *Stack {