Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,13 @@ If your contribution includes skill test fixtures, also run:
ruby scripts/test_skill_fixtures.rb
```

If your contribution adds or changes an auto-fix candidate, include remediation
fixture expectations and run:

```bash
ruby scripts/test_remediation_fixtures.rb
```

If your contribution changes CI/CD examples, update
[docs/ci-cd-examples.md](docs/ci-cd-examples.md) and run:

Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,12 @@ Validate skill fixture manifests and expected evidence strings with:
ruby scripts/test_skill_fixtures.rb
```

Validate remediation regression manifests and expected auto-fix diffs with:

```bash
ruby scripts/test_remediation_fixtures.rb
```

CI/CD examples for GitHub Actions, GitLab CI, Azure DevOps, Jenkins,
pre-commit, and local agent usage are available in
[`docs/ci-cd-examples.md`](docs/ci-cd-examples.md). Validate those examples
Expand Down
5 changes: 5 additions & 0 deletions scripts/test_remediation_fixtures.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/usr/bin/env ruby
# frozen_string_literal: true

ARGV.replace(["--remediation-only"])
load File.expand_path("test_skill_fixtures.rb", __dir__)
123 changes: 122 additions & 1 deletion scripts/test_skill_fixtures.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
# frozen_string_literal: true

require "yaml"
require "open3"
require "tempfile"

ROOT = File.expand_path("..", __dir__)
FIXTURE_ROOT = File.join(ROOT, "tests", "fixtures")
KINDS = %w[vulnerable benign].freeze
FIXER_CATEGORIES = %w[auto-fix assisted-fix guidance-only human-review-required].freeze
REQUIRED_MANIFEST_FIELDS = %w[skill case_id kind target expected_findings].freeze
REQUIRED_FINDING_FIELDS = %w[id severity evidence_contains].freeze

Expand Down Expand Up @@ -123,6 +126,111 @@ def validate_evidence(manifest, manifest_path, target_path, errors)
end
end

def validate_string_list(value, prefix, errors)
unless value.is_a?(Array) && !value.empty?
errors << "#{prefix}: must be a non-empty array"
return
end

value.each_with_index do |item, index|
errors << "#{prefix}[#{index}]: must be a non-empty string" unless item.is_a?(String) && !item.empty?
end
end

def path_inside_case?(path, case_dir)
absolute_case_dir = File.expand_path(case_dir)
absolute_path = File.expand_path(path, case_dir)

absolute_path == absolute_case_dir || absolute_path.start_with?("#{absolute_case_dir}#{File::SEPARATOR}")
end

def validate_relative_case_file(value, field, manifest_path, case_dir, errors, must_exist: true)
unless value.is_a?(String) && !value.empty?
errors << "#{rel(manifest_path)}: #{field} must be a non-empty relative path"
return nil
end

if value.start_with?("/") || value.split(File::SEPARATOR).include?("..") || !path_inside_case?(value, case_dir)
errors << "#{rel(manifest_path)}: #{field} must stay inside the case directory"
return nil
end

absolute_path = File.expand_path(value, case_dir)
errors << "#{rel(manifest_path)}: #{field} does not exist: #{value}" if must_exist && !File.file?(absolute_path)
absolute_path
end

def unified_diff(before_path, after_path, display_path)
Tempfile.create("empty-remediation-before") do |empty|
actual_before = File.file?(before_path) ? before_path : empty.path
before_label = File.file?(before_path) ? "before/#{display_path}" : "/dev/null"
after_label = "after/#{display_path}"
stdout, _stderr, status = Open3.capture3("diff", "-u", "--label", before_label, "--label", after_label, actual_before, after_path)

return "" if status.success?
return stdout if [0, 1].include?(status.exitstatus)

raise "diff failed for #{display_path}"
end
end

def validate_remediation(manifest, manifest_path, errors)
remediation = manifest["remediation"]
return if remediation.nil?

prefix = rel(manifest_path)
case_dir = File.dirname(manifest_path)

unless remediation.is_a?(Hash)
errors << "#{prefix}: remediation must be an object"
return
end

category = remediation["category"]
errors << "#{prefix}: remediation.category must be one of #{FIXER_CATEGORIES.join(', ')}" unless FIXER_CATEGORIES.include?(category)
errors << "#{prefix}: remediation.category must be auto-fix for expected diff regression cases" if category && category != "auto-fix"

expected_files = remediation["expected_files"]
unless expected_files.is_a?(Array) && !expected_files.empty?
errors << "#{prefix}: remediation.expected_files must be a non-empty array"
return
end

expected_files.each_with_index do |expected_file, index|
file_prefix = "#{prefix}: remediation.expected_files[#{index}]"
unless expected_file.is_a?(Hash)
errors << "#{file_prefix} must be an object"
next
end

path = expected_file["path"]
before_path = validate_relative_case_file(path, "#{file_prefix}.path", manifest_path, case_dir, errors, must_exist: false)
after_path = validate_relative_case_file(expected_file["after"], "#{file_prefix}.after", manifest_path, case_dir, errors)
validate_string_list(expected_file["expected_diff_contains"], "#{file_prefix}.expected_diff_contains", errors)
validate_string_list(expected_file["expected_after_contains"], "#{file_prefix}.expected_after_contains", errors) if expected_file.key?("expected_after_contains")
next unless before_path && after_path && File.file?(after_path)

diff_text = unified_diff(before_path, after_path, path)
if diff_text.empty?
errors << "#{file_prefix}: expected diff is empty"
else
expected_file["expected_diff_contains"].to_a.each do |snippet|
next unless snippet.is_a?(String)

errors << "#{file_prefix}: expected_diff_contains not found in generated diff: #{snippet.inspect}" unless diff_text.include?(snippet)
end
end

after_text = File.read(after_path)
expected_file["expected_after_contains"].to_a.each do |snippet|
next unless snippet.is_a?(String)

errors << "#{file_prefix}: expected_after_contains not found in #{rel(after_path)}: #{snippet.inspect}" unless after_text.include?(snippet)
end
end
end

remediation_only = ARGV.include?("--remediation-only")
manifests = fixture_manifests

if manifests.empty?
Expand All @@ -131,19 +239,32 @@ def validate_evidence(manifest, manifest_path, target_path, errors)
end

errors = []
remediation_count = 0
manifests.each do |manifest_path|
begin
manifest = load_manifest(manifest_path)
next if remediation_only && !manifest.key?("remediation")

validate_manifest_shape(manifest, manifest_path, errors)
target_path = validate_case_paths(manifest, manifest_path, errors)
validate_evidence(manifest, manifest_path, target_path, errors)
validate_remediation(manifest, manifest_path, errors)
remediation_count += 1 if manifest.key?("remediation")
rescue StandardError => e
errors << "#{rel(manifest_path)}: #{e.message}"
end
end

if remediation_only && remediation_count.zero?
errors << "no remediation regression manifests found"
end

if errors.empty?
puts "OK: validated #{manifests.size} skill fixture manifest(s)."
if remediation_only
puts "OK: validated #{remediation_count} remediation regression fixture manifest(s)."
else
puts "OK: validated #{manifests.size} skill fixture manifest(s), including #{remediation_count} remediation regression fixture(s)."
end
else
puts "FAIL: skill fixture validation failed."
errors.each { |error| puts " - #{error}" }
Expand Down
28 changes: 28 additions & 0 deletions tests/fixtures/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,39 @@ Required manifest fields:
Each expected finding must include `id`, `severity`, `evidence_contains`, and
either `cwe` or `framework`. Benign cases must use `expected_findings: []`.

Vulnerable cases that document an auto-fix candidate may include an optional
`remediation` block. The harness generates unified diffs from the case files to
the listed `after` files and checks that the expected diff snippets are present:

```yaml
remediation:
category: auto-fix
expected_files:
- path: app.js
after: expected/app.js
expected_diff_contains:
- "- const sql = unsafeInput"
- "+ const rows = await db.query(query, [input]);"
expected_after_contains:
- "db.query(query, [input])"
```

Use one `expected_files` entry per changed or generated file. For generated
files, `path` is the intended output path and does not need to exist in the
vulnerable fixture; `after` must point to the expected file under the case
directory.

Run the fixture harness locally with:

```bash
ruby scripts/test_skill_fixtures.rb
```

Run only remediation regression fixtures with:

```bash
ruby scripts/test_remediation_fixtures.rb
```

The `_example` fixture is intentionally tiny. It only keeps the harness and CI
executable until real per-skill fixtures are added.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
PAYMENT_API_KEY=
PAYMENT_API_URL=https://payments.example.test
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,14 @@ expected_findings:
severity: critical
cwe: CWE-798
evidence_contains: "sk_test_FAKE_DO_NOT_USE_1234567890"
remediation:
category: auto-fix
expected_files:
- path: settings.env
after: expected/settings.env
expected_diff_contains:
- "-PAYMENT_API_KEY=sk_test_FAKE_DO_NOT_USE_1234567890"
- "+PAYMENT_API_KEY="
- " PAYMENT_API_URL=https://payments.example.test"
expected_after_contains:
- "PAYMENT_API_URL=https://payments.example.test"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
async function usersHandler(req, res, dbClient = db) {
const rows = await dbClient.query("SELECT * FROM users WHERE email = ?", [req.query.email]);
res.json(rows);
}

app.get("/users", usersHandler);

module.exports = { usersHandler };
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
const { usersHandler } = require("./app");

describe("GET /users remediation", () => {
it("preserves the user lookup response while parameterizing email input", async () => {
const req = { query: { email: "alice@example.test" } };
const res = { json: jest.fn() };
const db = {
query: jest.fn().mockResolvedValue([{ id: 1, email: "alice@example.test" }])
};

await usersHandler(req, res, db);

expect(db.query).toHaveBeenCalledWith(
"SELECT * FROM users WHERE email = ?",
["alice@example.test"]
);
expect(res.json).toHaveBeenCalledWith([{ id: 1, email: "alice@example.test" }]);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,25 @@ expected_findings:
severity: high
cwe: CWE-89
evidence_contains: "SELECT * FROM users WHERE email = '"
remediation:
category: auto-fix
expected_files:
- path: app.js
after: expected/app.js
expected_diff_contains:
- "- const sql = \"SELECT * FROM users WHERE email = '\" + req.query.email + \"'\";"
- '+ const rows = await dbClient.query("SELECT * FROM users WHERE email = ?", [req.query.email]);'
- "+module.exports = { usersHandler };"
expected_after_contains:
- 'dbClient.query("SELECT * FROM users WHERE email = ?", [req.query.email])'
- path: app.test.js
after: expected/app.test.js
expected_diff_contains:
- "+++ after/app.test.js"
- '+const { usersHandler } = require("./app");'
- "+ expect(db.query).toHaveBeenCalledWith("
- '+ "SELECT * FROM users WHERE email = ?",'
- '+ ["alice@example.test"]'
- '+ expect(res.json).toHaveBeenCalledWith([{ id: 1, email: "alice@example.test" }]);'
expected_after_contains:
- "preserves the user lookup response while parameterizing email input"
Loading