From fc603c47b4f7f53bea1ed48e8b05ae0e15dda360 Mon Sep 17 00:00:00 2001 From: Ray Walker Date: Sat, 6 Jun 2026 21:53:39 +1000 Subject: [PATCH 1/4] ci: make Kani verification fail loud and fix its toolchain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Kani Formal Verification job was failing open: every command had `|| echo "...skipping"` and the verify step had `continue-on-error: true`, so the job stayed green while verifying nothing. Combined with a stale toolchain pin, it had silently stopped running entirely: cargo install --locked kani-verifier -> rustc 1.85.1 is not supported (home 0.5.12 requires rustc 1.88) There are 11 real proof harnesses (byte_storage.rs, encryption/core.rs), so this is meaningful verification that was providing false assurance. - toolchain "1.85" -> stable (Kani's installer needs >=1.88; Kani downloads its own pinned verification toolchain in `cargo kani setup`). - cargo install ... --force (idempotent on the runner's persistent cargo cache). - Drop all `|| echo` swallows and `continue-on-error` — a failed install, setup, or proof now turns CI red. - Add workflow_dispatch so the schedule-only job can be run on demand (and so this fix can actually be verified now). --- .github/workflows/security.yml | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index a548a8b..3fc54c7 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -14,6 +14,9 @@ on: - cron: '7 11 * * 6' release: types: [published] + # On-demand: lets the schedule-only jobs (e.g. Kani) be run and verified + # without waiting for the weekly cron. + workflow_dispatch: {} concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -207,7 +210,7 @@ jobs: kani: name: Kani Formal Verification runs-on: cachekit - if: github.event_name == 'schedule' + if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' steps: - name: Checkout code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 @@ -215,7 +218,11 @@ jobs: - name: Install Rust toolchain uses: dtolnay/rust-toolchain@3c5f7ea28cd621ae0bf5283f0e981fb97b8a7af9 # master with: - toolchain: "1.85" + # Host toolchain for the kani-verifier installer only — Kani downloads its + # own pinned verification toolchain in `cargo kani setup`. Must be current + # stable: kani-verifier's deps (e.g. home 0.5.12) now require rustc >= 1.88, + # so the old "1.85" pin failed to compile the installer. + toolchain: stable - name: Cache Rust dependencies uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5 @@ -231,13 +238,16 @@ jobs: ${{ runner.os }}-cargo- - name: Install Kani + # No `|| echo` swallowing: a failed install/setup MUST fail the job. Silently + # skipping verification gives false green-CI assurance. --force makes the + # install idempotent on the runner's persistent cargo cache. run: | - cargo install --locked kani-verifier || echo "Kani install failed, skipping verification" - cargo kani setup || echo "Kani setup failed, skipping verification" + cargo install --locked --force kani-verifier + cargo kani setup - name: Run Kani verification - run: cargo kani --all-features || echo "Kani verification failed or not supported" - continue-on-error: true + # No continue-on-error, no `|| echo`: a failed proof must turn CI red. + run: cargo kani --all-features cargo-vet: name: Cargo Vet (Supply Chain) From c37cf98dbf1269c9e27d023ebfcfaa2472e0b564 Mon Sep 17 00:00:00 2001 From: Ray Walker Date: Sat, 6 Jun 2026 21:58:50 +1000 Subject: [PATCH 2/4] ci: add least-privilege permissions to Kani job Per CodeRabbit/zizmor review on #44: the kani job relied on default permissions. It only checks out and runs verification, so scope it to contents: read. --- .github/workflows/security.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index 3fc54c7..d5e63e5 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -211,6 +211,8 @@ jobs: name: Kani Formal Verification runs-on: cachekit if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' + permissions: + contents: read # least-privilege: the job only checks out and verifies steps: - name: Checkout code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 From 37687660c88b1d0fd635409ff824cd6dae47343d Mon Sep 17 00:00:00 2001 From: Ray Walker Date: Sat, 6 Jun 2026 22:15:28 +1000 Subject: [PATCH 3/4] ci: shrink scheduled deep-fuzz to 1h/target, add on-demand 8h dispatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The weekly deep-fuzz matrix (16 targets × 8h) shared the `cachekit` ARC pool with required PR CI and, at ~4 concurrent, monopolised it for ~32h every Saturday — starving the required `security` and `stable / ubuntu-latest` checks so no PR could merge until it drained. - Scheduled run: 1h/target (~16 runner-hours, drains in a few hours overnight). - On-demand: `workflow_dispatch` with `run_deep_fuzz=true` runs the full matrix at `fuzz_seconds` per target (default 8h). A plain dispatch does NOT trigger deep-fuzz, so verifying Kani on demand stays cheap. - Duration is driven by FUZZ_SECONDS (schedule=3600, dispatch=input); the timeout buffer logic is unchanged. --- .github/workflows/security.yml | 40 +++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index d5e63e5..47b45ca 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -7,7 +7,9 @@ on: branches: [main] schedule: # Saturday 11:07 UTC = Sat 21:07 AEST / 22:07 AEDT (Sydney night, year-round). - # Weekly cadence: deep fuzz at 8h/target × 16 targets is ~128 runner-hours. + # Weekly cadence: deep fuzz at 1h/target × 16 targets is ~16 runner-hours, which + # drains in a few hours overnight instead of monopolising the shared ARC pool for + # ~32h. The full 8h/target run is opt-in via workflow_dispatch (run_deep_fuzz). # PR-time coverage (cargo audit/deny, Cargo Vet, Quick Fuzz, CodeQL) catches # regressions promptly; deep fuzz is for finding bugs, not gating merges. # Off-minute (:07) avoids the cron pile-up that GitHub schedules at :00. @@ -15,8 +17,18 @@ on: release: types: [published] # On-demand: lets the schedule-only jobs (e.g. Kani) be run and verified - # without waiting for the weekly cron. - workflow_dispatch: {} + # without waiting for the weekly cron. A plain dispatch does NOT trigger the + # heavy deep-fuzz matrix — set run_deep_fuzz=true to opt into that. + workflow_dispatch: + inputs: + run_deep_fuzz: + description: "Run the full deep-fuzz matrix (heavy — occupies the ARC pool)" + type: boolean + default: false + fuzz_seconds: + description: "Seconds per target for an on-demand deep fuzz (default 8h)" + type: string + default: "28800" concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -135,9 +147,13 @@ jobs: timeout 150 cargo fuzz run ${{ matrix.target }} -- -runs=0 -max_total_time=120 || [ $? -eq 124 ] deep-fuzz: - name: Deep Fuzzing (8 hours) + name: Deep Fuzzing runs-on: cachekit - if: github.event_name == 'schedule' + # Scheduled weekly run is light (1h/target) so it can't monopolise the shared + # ARC pool. The full 8h/target run is opt-in via workflow_dispatch (run_deep_fuzz). + if: github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && inputs.run_deep_fuzz) + # 540min cap accommodates the on-demand 8h path; the inner timeout governs the + # actual scheduled (1h) vs dispatched (configurable) duration. timeout-minutes: 540 strategy: fail-fast: false @@ -185,19 +201,23 @@ jobs: # nightly rustc rejects. Let cargo resolve fresh deps. run: cargo install cargo-fuzz - - name: Run deep fuzz (8 hours per target) + - name: Run deep fuzz + # Scheduled runs use 1h/target (keeps the ARC pool free for PR CI); a manual + # workflow_dispatch can request the full 8h (or any duration) via fuzz_seconds. + env: + FUZZ_SECONDS: ${{ (github.event_name == 'workflow_dispatch' && inputs.fuzz_seconds) || '3600' }} run: | cd fuzz # Build first - fail fast on compile errors cargo fuzz build ${{ matrix.target }} - # libFuzzer self-exits at -max_total_time=8h with a graceful "Done N runs ..." + # libFuzzer self-exits at -max_total_time with a graceful "Done N runs ..." # (final stats + corpus consolidation). The outer `timeout` is +3min slack so # it only fires on a genuine hang — NOT as the normal end-of-budget stop. An # equal `timeout` would always win the race (libFuzzer's clock starts later, # after build/corpus-load) and every run would log "run interrupted" instead, - # making a real hang indistinguishable from normal completion. Still inside the - # 540min job cap. Exit 124 (hang killed by timeout) remains tolerated. - timeout 28980 cargo fuzz run ${{ matrix.target }} -- -max_total_time=28800 || [ $? -eq 124 ] + # making a real hang indistinguishable from normal completion. Exit 124 (hang + # killed by timeout) remains tolerated. + timeout "$((FUZZ_SECONDS + 180))" cargo fuzz run ${{ matrix.target }} -- -max_total_time="$FUZZ_SECONDS" || [ $? -eq 124 ] - name: Upload crash artifacts if: always() From 4c49c7b77e73ac4e72d49c109fb31014a143b8bc Mon Sep 17 00:00:00 2001 From: Ray Walker Date: Sat, 6 Jun 2026 22:21:16 +1000 Subject: [PATCH 4/4] ci: validate fuzz_seconds dispatch input before shell arithmetic Per CodeRabbit review on #44: fuzz_seconds (workflow_dispatch string input) flowed unguarded into $((FUZZ_SECONDS + 180)) and -max_total_time. Reject non-positive-integer values and cap at 32400s (the 540min job timeout) before use, so a bad input fails fast with a clear message instead of breaking the arithmetic or the fuzzer. --- .github/workflows/security.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index 47b45ca..6bdd914 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -207,6 +207,14 @@ jobs: env: FUZZ_SECONDS: ${{ (github.event_name == 'workflow_dispatch' && inputs.fuzz_seconds) || '3600' }} run: | + # Validate the dispatch-supplied duration before it reaches shell arithmetic + # and the fuzzer. Must be a positive integer and within the 540min job cap. + case "$FUZZ_SECONDS" in + ''|*[!0-9]*) echo "::error::fuzz_seconds must be a positive integer (got '$FUZZ_SECONDS')"; exit 1 ;; + esac + if [ "$FUZZ_SECONDS" -lt 1 ] || [ "$FUZZ_SECONDS" -gt 32400 ]; then + echo "::error::fuzz_seconds must be 1..32400 (<= 540min job cap), got $FUZZ_SECONDS"; exit 1 + fi cd fuzz # Build first - fail fast on compile errors cargo fuzz build ${{ matrix.target }}