From fc603c47b4f7f53bea1ed48e8b05ae0e15dda360 Mon Sep 17 00:00:00 2001
From: Ray Walker <ray@27b.io>
Date: Sat, 6 Jun 2026 21:53:39 +1000
Subject: [PATCH 1/4] ci: make Kani verification fail loud and fix its
 toolchain
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Kani Formal Verification job was failing open: every command had
`|| echo "...skipping"` and the verify step had `continue-on-error: true`,
so the job stayed green while verifying nothing. Combined with a stale
toolchain pin, it had silently stopped running entirely:

  cargo install --locked kani-verifier
  -> rustc 1.85.1 is not supported (home 0.5.12 requires rustc 1.88)

There are 11 real proof harnesses (byte_storage.rs, encryption/core.rs), so
this is meaningful verification that was providing false assurance.

- toolchain "1.85" -> stable (Kani's installer needs >=1.88; Kani downloads
  its own pinned verification toolchain in `cargo kani setup`).
- cargo install ... --force (idempotent on the runner's persistent cargo cache).
- Drop all `|| echo` swallows and `continue-on-error` — a failed install,
  setup, or proof now turns CI red.
- Add workflow_dispatch so the schedule-only job can be run on demand
  (and so this fix can actually be verified now).
---
 .github/workflows/security.yml | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml
index a548a8b..3fc54c7 100644
--- a/.github/workflows/security.yml
+++ b/.github/workflows/security.yml
@@ -14,6 +14,9 @@ on:
     - cron: '7 11 * * 6'
   release:
     types: [published]
+  # On-demand: lets the schedule-only jobs (e.g. Kani) be run and verified
+  # without waiting for the weekly cron.
+  workflow_dispatch: {}
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
@@ -207,7 +210,7 @@ jobs:
   kani:
     name: Kani Formal Verification
     runs-on: cachekit
-    if: github.event_name == 'schedule'
+    if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
     steps:
       - name: Checkout code
         uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
@@ -215,7 +218,11 @@ jobs:
       - name: Install Rust toolchain
         uses: dtolnay/rust-toolchain@3c5f7ea28cd621ae0bf5283f0e981fb97b8a7af9 # master
         with:
-          toolchain: "1.85"
+          # Host toolchain for the kani-verifier installer only — Kani downloads its
+          # own pinned verification toolchain in `cargo kani setup`. Must be current
+          # stable: kani-verifier's deps (e.g. home 0.5.12) now require rustc >= 1.88,
+          # so the old "1.85" pin failed to compile the installer.
+          toolchain: stable
 
       - name: Cache Rust dependencies
         uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5
@@ -231,13 +238,16 @@ jobs:
             ${{ runner.os }}-cargo-
 
       - name: Install Kani
+        # No `|| echo` swallowing: a failed install/setup MUST fail the job. Silently
+        # skipping verification gives false green-CI assurance. --force makes the
+        # install idempotent on the runner's persistent cargo cache.
         run: |
-          cargo install --locked kani-verifier || echo "Kani install failed, skipping verification"
-          cargo kani setup || echo "Kani setup failed, skipping verification"
+          cargo install --locked --force kani-verifier
+          cargo kani setup
 
       - name: Run Kani verification
-        run: cargo kani --all-features || echo "Kani verification failed or not supported"
-        continue-on-error: true
+        # No continue-on-error, no `|| echo`: a failed proof must turn CI red.
+        run: cargo kani --all-features
 
   cargo-vet:
     name: Cargo Vet (Supply Chain)

From c37cf98dbf1269c9e27d023ebfcfaa2472e0b564 Mon Sep 17 00:00:00 2001
From: Ray Walker <ray@27b.io>
Date: Sat, 6 Jun 2026 21:58:50 +1000
Subject: [PATCH 2/4] ci: add least-privilege permissions to Kani job

Per CodeRabbit/zizmor review on #44: the kani job relied on default
permissions. It only checks out and runs verification, so scope it to
contents: read.
---
 .github/workflows/security.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml
index 3fc54c7..d5e63e5 100644
--- a/.github/workflows/security.yml
+++ b/.github/workflows/security.yml
@@ -211,6 +211,8 @@ jobs:
     name: Kani Formal Verification
     runs-on: cachekit
     if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
+    permissions:
+      contents: read # least-privilege: the job only checks out and verifies
     steps:
       - name: Checkout code
         uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6

From 37687660c88b1d0fd635409ff824cd6dae47343d Mon Sep 17 00:00:00 2001
From: Ray Walker <ray@27b.io>
Date: Sat, 6 Jun 2026 22:15:28 +1000
Subject: [PATCH 3/4] ci: shrink scheduled deep-fuzz to 1h/target, add
 on-demand 8h dispatch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The weekly deep-fuzz matrix (16 targets × 8h) shared the `cachekit` ARC pool
with required PR CI and, at ~4 concurrent, monopolised it for ~32h every
Saturday — starving the required `security` and `stable / ubuntu-latest`
checks so no PR could merge until it drained.

- Scheduled run: 1h/target (~16 runner-hours, drains in a few hours overnight).
- On-demand: `workflow_dispatch` with `run_deep_fuzz=true` runs the full matrix
  at `fuzz_seconds` per target (default 8h). A plain dispatch does NOT trigger
  deep-fuzz, so verifying Kani on demand stays cheap.
- Duration is driven by FUZZ_SECONDS (schedule=3600, dispatch=input); the
  timeout buffer logic is unchanged.
---
 .github/workflows/security.yml | 40 +++++++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml
index d5e63e5..47b45ca 100644
--- a/.github/workflows/security.yml
+++ b/.github/workflows/security.yml
@@ -7,7 +7,9 @@ on:
     branches: [main]
   schedule:
     # Saturday 11:07 UTC = Sat 21:07 AEST / 22:07 AEDT (Sydney night, year-round).
-    # Weekly cadence: deep fuzz at 8h/target × 16 targets is ~128 runner-hours.
+    # Weekly cadence: deep fuzz at 1h/target × 16 targets is ~16 runner-hours, which
+    # drains in a few hours overnight instead of monopolising the shared ARC pool for
+    # ~32h. The full 8h/target run is opt-in via workflow_dispatch (run_deep_fuzz).
     # PR-time coverage (cargo audit/deny, Cargo Vet, Quick Fuzz, CodeQL) catches
     # regressions promptly; deep fuzz is for finding bugs, not gating merges.
     # Off-minute (:07) avoids the cron pile-up that GitHub schedules at :00.
@@ -15,8 +17,18 @@ on:
   release:
     types: [published]
   # On-demand: lets the schedule-only jobs (e.g. Kani) be run and verified
-  # without waiting for the weekly cron.
-  workflow_dispatch: {}
+  # without waiting for the weekly cron. A plain dispatch does NOT trigger the
+  # heavy deep-fuzz matrix — set run_deep_fuzz=true to opt into that.
+  workflow_dispatch:
+    inputs:
+      run_deep_fuzz:
+        description: "Run the full deep-fuzz matrix (heavy — occupies the ARC pool)"
+        type: boolean
+        default: false
+      fuzz_seconds:
+        description: "Seconds per target for an on-demand deep fuzz (default 8h)"
+        type: string
+        default: "28800"
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
@@ -135,9 +147,13 @@ jobs:
           timeout 150 cargo fuzz run ${{ matrix.target }} -- -runs=0 -max_total_time=120 || [ $? -eq 124 ]
 
   deep-fuzz:
-    name: Deep Fuzzing (8 hours)
+    name: Deep Fuzzing
     runs-on: cachekit
-    if: github.event_name == 'schedule'
+    # Scheduled weekly run is light (1h/target) so it can't monopolise the shared
+    # ARC pool. The full 8h/target run is opt-in via workflow_dispatch (run_deep_fuzz).
+    if: github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && inputs.run_deep_fuzz)
+    # 540min cap accommodates the on-demand 8h path; the inner timeout governs the
+    # actual scheduled (1h) vs dispatched (configurable) duration.
     timeout-minutes: 540
     strategy:
       fail-fast: false
@@ -185,19 +201,23 @@ jobs:
         # nightly rustc rejects. Let cargo resolve fresh deps.
         run: cargo install cargo-fuzz
 
-      - name: Run deep fuzz (8 hours per target)
+      - name: Run deep fuzz
+        # Scheduled runs use 1h/target (keeps the ARC pool free for PR CI); a manual
+        # workflow_dispatch can request the full 8h (or any duration) via fuzz_seconds.
+        env:
+          FUZZ_SECONDS: ${{ (github.event_name == 'workflow_dispatch' && inputs.fuzz_seconds) || '3600' }}
         run: |
           cd fuzz
           # Build first - fail fast on compile errors
           cargo fuzz build ${{ matrix.target }}
-          # libFuzzer self-exits at -max_total_time=8h with a graceful "Done N runs ..."
+          # libFuzzer self-exits at -max_total_time with a graceful "Done N runs ..."
           # (final stats + corpus consolidation). The outer `timeout` is +3min slack so
           # it only fires on a genuine hang — NOT as the normal end-of-budget stop. An
           # equal `timeout` would always win the race (libFuzzer's clock starts later,
           # after build/corpus-load) and every run would log "run interrupted" instead,
-          # making a real hang indistinguishable from normal completion. Still inside the
-          # 540min job cap. Exit 124 (hang killed by timeout) remains tolerated.
-          timeout 28980 cargo fuzz run ${{ matrix.target }} -- -max_total_time=28800 || [ $? -eq 124 ]
+          # making a real hang indistinguishable from normal completion. Exit 124 (hang
+          # killed by timeout) remains tolerated.
+          timeout "$((FUZZ_SECONDS + 180))" cargo fuzz run ${{ matrix.target }} -- -max_total_time="$FUZZ_SECONDS" || [ $? -eq 124 ]
 
       - name: Upload crash artifacts
         if: always()

From 4c49c7b77e73ac4e72d49c109fb31014a143b8bc Mon Sep 17 00:00:00 2001
From: Ray Walker <ray@27b.io>
Date: Sat, 6 Jun 2026 22:21:16 +1000
Subject: [PATCH 4/4] ci: validate fuzz_seconds dispatch input before shell
 arithmetic

Per CodeRabbit review on #44: fuzz_seconds (workflow_dispatch string input)
flowed unguarded into $((FUZZ_SECONDS + 180)) and -max_total_time. Reject
non-positive-integer values and cap at 32400s (the 540min job timeout) before
use, so a bad input fails fast with a clear message instead of breaking the
arithmetic or the fuzzer.
---
 .github/workflows/security.yml | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml
index 47b45ca..6bdd914 100644
--- a/.github/workflows/security.yml
+++ b/.github/workflows/security.yml
@@ -207,6 +207,14 @@ jobs:
         env:
           FUZZ_SECONDS: ${{ (github.event_name == 'workflow_dispatch' && inputs.fuzz_seconds) || '3600' }}
         run: |
+          # Validate the dispatch-supplied duration before it reaches shell arithmetic
+          # and the fuzzer. Must be a positive integer and within the 540min job cap.
+          case "$FUZZ_SECONDS" in
+            ''|*[!0-9]*) echo "::error::fuzz_seconds must be a positive integer (got '$FUZZ_SECONDS')"; exit 1 ;;
+          esac
+          if [ "$FUZZ_SECONDS" -lt 1 ] || [ "$FUZZ_SECONDS" -gt 32400 ]; then
+            echo "::error::fuzz_seconds must be 1..32400 (<= 540min job cap), got $FUZZ_SECONDS"; exit 1
+          fi
           cd fuzz
           # Build first - fail fast on compile errors
           cargo fuzz build ${{ matrix.target }}