diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 63d0c970..a90ced29 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -158,6 +158,13 @@ If your contribution includes skill test fixtures, also run: ruby scripts/test_skill_fixtures.rb ``` +If your contribution adds or changes an auto-fix candidate, include remediation +fixture expectations and run: + +```bash +ruby scripts/test_remediation_fixtures.rb +``` + If your contribution changes CI/CD examples, update [docs/ci-cd-examples.md](docs/ci-cd-examples.md) and run: diff --git a/README.md b/README.md index fd3b4cd1..541e7236 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,12 @@ Validate skill fixture manifests and expected evidence strings with: ruby scripts/test_skill_fixtures.rb ``` +Validate remediation regression manifests and expected auto-fix diffs with: + +```bash +ruby scripts/test_remediation_fixtures.rb +``` + CI/CD examples for GitHub Actions, GitLab CI, Azure DevOps, Jenkins, pre-commit, and local agent usage are available in [`docs/ci-cd-examples.md`](docs/ci-cd-examples.md). Validate those examples diff --git a/scripts/test_remediation_fixtures.rb b/scripts/test_remediation_fixtures.rb new file mode 100644 index 00000000..84ce47c6 --- /dev/null +++ b/scripts/test_remediation_fixtures.rb @@ -0,0 +1,5 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +ARGV.replace(["--remediation-only"]) +load File.expand_path("test_skill_fixtures.rb", __dir__) diff --git a/scripts/test_skill_fixtures.rb b/scripts/test_skill_fixtures.rb index 98871452..f6d5e43b 100644 --- a/scripts/test_skill_fixtures.rb +++ b/scripts/test_skill_fixtures.rb @@ -2,10 +2,13 @@ # frozen_string_literal: true require "yaml" +require "open3" +require "tempfile" ROOT = File.expand_path("..", __dir__) FIXTURE_ROOT = File.join(ROOT, "tests", "fixtures") KINDS = %w[vulnerable benign].freeze +FIXER_CATEGORIES = %w[auto-fix assisted-fix guidance-only human-review-required].freeze REQUIRED_MANIFEST_FIELDS = %w[skill case_id kind target expected_findings].freeze REQUIRED_FINDING_FIELDS = %w[id severity evidence_contains].freeze @@ -123,6 +126,111 @@ def validate_evidence(manifest, manifest_path, target_path, errors) end end +def validate_string_list(value, prefix, errors) + unless value.is_a?(Array) && !value.empty? + errors << "#{prefix}: must be a non-empty array" + return + end + + value.each_with_index do |item, index| + errors << "#{prefix}[#{index}]: must be a non-empty string" unless item.is_a?(String) && !item.empty? + end +end + +def path_inside_case?(path, case_dir) + absolute_case_dir = File.expand_path(case_dir) + absolute_path = File.expand_path(path, case_dir) + + absolute_path == absolute_case_dir || absolute_path.start_with?("#{absolute_case_dir}#{File::SEPARATOR}") +end + +def validate_relative_case_file(value, field, manifest_path, case_dir, errors, must_exist: true) + unless value.is_a?(String) && !value.empty? + errors << "#{rel(manifest_path)}: #{field} must be a non-empty relative path" + return nil + end + + if value.start_with?("/") || value.split(File::SEPARATOR).include?("..") || !path_inside_case?(value, case_dir) + errors << "#{rel(manifest_path)}: #{field} must stay inside the case directory" + return nil + end + + absolute_path = File.expand_path(value, case_dir) + errors << "#{rel(manifest_path)}: #{field} does not exist: #{value}" if must_exist && !File.file?(absolute_path) + absolute_path +end + +def unified_diff(before_path, after_path, display_path) + Tempfile.create("empty-remediation-before") do |empty| + actual_before = File.file?(before_path) ? before_path : empty.path + before_label = File.file?(before_path) ? "before/#{display_path}" : "/dev/null" + after_label = "after/#{display_path}" + stdout, _stderr, status = Open3.capture3("diff", "-u", "--label", before_label, "--label", after_label, actual_before, after_path) + + return "" if status.success? + return stdout if [0, 1].include?(status.exitstatus) + + raise "diff failed for #{display_path}" + end +end + +def validate_remediation(manifest, manifest_path, errors) + remediation = manifest["remediation"] + return if remediation.nil? + + prefix = rel(manifest_path) + case_dir = File.dirname(manifest_path) + + unless remediation.is_a?(Hash) + errors << "#{prefix}: remediation must be an object" + return + end + + category = remediation["category"] + errors << "#{prefix}: remediation.category must be one of #{FIXER_CATEGORIES.join(', ')}" unless FIXER_CATEGORIES.include?(category) + errors << "#{prefix}: remediation.category must be auto-fix for expected diff regression cases" if category && category != "auto-fix" + + expected_files = remediation["expected_files"] + unless expected_files.is_a?(Array) && !expected_files.empty? + errors << "#{prefix}: remediation.expected_files must be a non-empty array" + return + end + + expected_files.each_with_index do |expected_file, index| + file_prefix = "#{prefix}: remediation.expected_files[#{index}]" + unless expected_file.is_a?(Hash) + errors << "#{file_prefix} must be an object" + next + end + + path = expected_file["path"] + before_path = validate_relative_case_file(path, "#{file_prefix}.path", manifest_path, case_dir, errors, must_exist: false) + after_path = validate_relative_case_file(expected_file["after"], "#{file_prefix}.after", manifest_path, case_dir, errors) + validate_string_list(expected_file["expected_diff_contains"], "#{file_prefix}.expected_diff_contains", errors) + validate_string_list(expected_file["expected_after_contains"], "#{file_prefix}.expected_after_contains", errors) if expected_file.key?("expected_after_contains") + next unless before_path && after_path && File.file?(after_path) + + diff_text = unified_diff(before_path, after_path, path) + if diff_text.empty? + errors << "#{file_prefix}: expected diff is empty" + else + expected_file["expected_diff_contains"].to_a.each do |snippet| + next unless snippet.is_a?(String) + + errors << "#{file_prefix}: expected_diff_contains not found in generated diff: #{snippet.inspect}" unless diff_text.include?(snippet) + end + end + + after_text = File.read(after_path) + expected_file["expected_after_contains"].to_a.each do |snippet| + next unless snippet.is_a?(String) + + errors << "#{file_prefix}: expected_after_contains not found in #{rel(after_path)}: #{snippet.inspect}" unless after_text.include?(snippet) + end + end +end + +remediation_only = ARGV.include?("--remediation-only") manifests = fixture_manifests if manifests.empty? @@ -131,19 +239,32 @@ def validate_evidence(manifest, manifest_path, target_path, errors) end errors = [] +remediation_count = 0 manifests.each do |manifest_path| begin manifest = load_manifest(manifest_path) + next if remediation_only && !manifest.key?("remediation") + validate_manifest_shape(manifest, manifest_path, errors) target_path = validate_case_paths(manifest, manifest_path, errors) validate_evidence(manifest, manifest_path, target_path, errors) + validate_remediation(manifest, manifest_path, errors) + remediation_count += 1 if manifest.key?("remediation") rescue StandardError => e errors << "#{rel(manifest_path)}: #{e.message}" end end +if remediation_only && remediation_count.zero? + errors << "no remediation regression manifests found" +end + if errors.empty? - puts "OK: validated #{manifests.size} skill fixture manifest(s)." + if remediation_only + puts "OK: validated #{remediation_count} remediation regression fixture manifest(s)." + else + puts "OK: validated #{manifests.size} skill fixture manifest(s), including #{remediation_count} remediation regression fixture(s)." + end else puts "FAIL: skill fixture validation failed." errors.each { |error| puts " - #{error}" } diff --git a/tests/fixtures/README.md b/tests/fixtures/README.md index 7bdb19f4..7ba43fad 100644 --- a/tests/fixtures/README.md +++ b/tests/fixtures/README.md @@ -31,11 +31,39 @@ Required manifest fields: Each expected finding must include `id`, `severity`, `evidence_contains`, and either `cwe` or `framework`. Benign cases must use `expected_findings: []`. +Vulnerable cases that document an auto-fix candidate may include an optional +`remediation` block. The harness generates unified diffs from the case files to +the listed `after` files and checks that the expected diff snippets are present: + +```yaml +remediation: + category: auto-fix + expected_files: + - path: app.js + after: expected/app.js + expected_diff_contains: + - "- const sql = unsafeInput" + - "+ const rows = await db.query(query, [input]);" + expected_after_contains: + - "db.query(query, [input])" +``` + +Use one `expected_files` entry per changed or generated file. For generated +files, `path` is the intended output path and does not need to exist in the +vulnerable fixture; `after` must point to the expected file under the case +directory. + Run the fixture harness locally with: ```bash ruby scripts/test_skill_fixtures.rb ``` +Run only remediation regression fixtures with: + +```bash +ruby scripts/test_remediation_fixtures.rb +``` + The `_example` fixture is intentionally tiny. It only keeps the harness and CI executable until real per-skill fixtures are added. diff --git a/tests/fixtures/secrets-management/hardcoded-test-secret-vulnerable/expected/settings.env b/tests/fixtures/secrets-management/hardcoded-test-secret-vulnerable/expected/settings.env new file mode 100644 index 00000000..1c362b66 --- /dev/null +++ b/tests/fixtures/secrets-management/hardcoded-test-secret-vulnerable/expected/settings.env @@ -0,0 +1,2 @@ +PAYMENT_API_KEY= +PAYMENT_API_URL=https://payments.example.test diff --git a/tests/fixtures/secrets-management/hardcoded-test-secret-vulnerable/manifest.yaml b/tests/fixtures/secrets-management/hardcoded-test-secret-vulnerable/manifest.yaml index 198b64d0..72a0424a 100644 --- a/tests/fixtures/secrets-management/hardcoded-test-secret-vulnerable/manifest.yaml +++ b/tests/fixtures/secrets-management/hardcoded-test-secret-vulnerable/manifest.yaml @@ -7,3 +7,14 @@ expected_findings: severity: critical cwe: CWE-798 evidence_contains: "sk_test_FAKE_DO_NOT_USE_1234567890" +remediation: + category: auto-fix + expected_files: + - path: settings.env + after: expected/settings.env + expected_diff_contains: + - "-PAYMENT_API_KEY=sk_test_FAKE_DO_NOT_USE_1234567890" + - "+PAYMENT_API_KEY=" + - " PAYMENT_API_URL=https://payments.example.test" + expected_after_contains: + - "PAYMENT_API_URL=https://payments.example.test" diff --git a/tests/fixtures/secure-code-review/sql-injection-vulnerable/expected/app.js b/tests/fixtures/secure-code-review/sql-injection-vulnerable/expected/app.js new file mode 100644 index 00000000..1c4d8da7 --- /dev/null +++ b/tests/fixtures/secure-code-review/sql-injection-vulnerable/expected/app.js @@ -0,0 +1,8 @@ +async function usersHandler(req, res, dbClient = db) { + const rows = await dbClient.query("SELECT * FROM users WHERE email = ?", [req.query.email]); + res.json(rows); +} + +app.get("/users", usersHandler); + +module.exports = { usersHandler }; diff --git a/tests/fixtures/secure-code-review/sql-injection-vulnerable/expected/app.test.js b/tests/fixtures/secure-code-review/sql-injection-vulnerable/expected/app.test.js new file mode 100644 index 00000000..bc5e7116 --- /dev/null +++ b/tests/fixtures/secure-code-review/sql-injection-vulnerable/expected/app.test.js @@ -0,0 +1,19 @@ +const { usersHandler } = require("./app"); + +describe("GET /users remediation", () => { + it("preserves the user lookup response while parameterizing email input", async () => { + const req = { query: { email: "alice@example.test" } }; + const res = { json: jest.fn() }; + const db = { + query: jest.fn().mockResolvedValue([{ id: 1, email: "alice@example.test" }]) + }; + + await usersHandler(req, res, db); + + expect(db.query).toHaveBeenCalledWith( + "SELECT * FROM users WHERE email = ?", + ["alice@example.test"] + ); + expect(res.json).toHaveBeenCalledWith([{ id: 1, email: "alice@example.test" }]); + }); +}); diff --git a/tests/fixtures/secure-code-review/sql-injection-vulnerable/manifest.yaml b/tests/fixtures/secure-code-review/sql-injection-vulnerable/manifest.yaml index 3b19dce7..a49e0759 100644 --- a/tests/fixtures/secure-code-review/sql-injection-vulnerable/manifest.yaml +++ b/tests/fixtures/secure-code-review/sql-injection-vulnerable/manifest.yaml @@ -7,3 +7,25 @@ expected_findings: severity: high cwe: CWE-89 evidence_contains: "SELECT * FROM users WHERE email = '" +remediation: + category: auto-fix + expected_files: + - path: app.js + after: expected/app.js + expected_diff_contains: + - "- const sql = \"SELECT * FROM users WHERE email = '\" + req.query.email + \"'\";" + - '+ const rows = await dbClient.query("SELECT * FROM users WHERE email = ?", [req.query.email]);' + - "+module.exports = { usersHandler };" + expected_after_contains: + - 'dbClient.query("SELECT * FROM users WHERE email = ?", [req.query.email])' + - path: app.test.js + after: expected/app.test.js + expected_diff_contains: + - "+++ after/app.test.js" + - '+const { usersHandler } = require("./app");' + - "+ expect(db.query).toHaveBeenCalledWith(" + - '+ "SELECT * FROM users WHERE email = ?",' + - '+ ["alice@example.test"]' + - '+ expect(res.json).toHaveBeenCalledWith([{ id: 1, email: "alice@example.test" }]);' + expected_after_contains: + - "preserves the user lookup response while parameterizing email input"