cachekit-io · 27Bslash6 · May 30, 2026 · May 29, 2026 · May 29, 2026 · May 29, 2026
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -72,6 +72,7 @@ serde_json = "1.0"
 blake2 = "0.10"
 hex = "0.4"
 aes-gcm = { version = "0.10", features = ["zeroize"] }
+criterion = { version = "0.5", features = ["html_reports"] }
 
 [features]
 default = ["compression", "checksum", "messagepack"]
@@ -120,3 +121,30 @@ stubbing = false
 [lints.rust]
 # Suppress benign warnings about cfg(kani) which is set by Kani verifier
 unexpected_cfgs = { level = "warn", check-cfg = ['cfg(kani)'] }
+
+# Release profile for cachekit-core's own release builds.
+#
+# Cargo only honors the ROOT package's [profile.release]. Workspace consumers
+# (cachekit-rs, cachekit-py via PyO3, cachekit-ts via NAPI) define their own
+# [profile.release] and use it instead — this profile does NOT propagate to
+# them. The value of setting it here is:
+#
+#   1. `cargo bench` from inside cachekit-core runs under a stable, fully
+#      optimized profile (Phase 3 perf measurement depends on this).
+#   2. `cargo build --release` and `cargo publish` produce a consistently
+#      optimized artifact.
+#   3. Standalone binary consumers WITHOUT their own [profile.release]
+#      inherit this one instead of Cargo defaults (codegen-units=16, lto=false).
+#
+# Mirrors cachekit-py/rust/Cargo.toml for consistency.
+[profile.release]
+opt-level = 3
+lto = "fat"
+codegen-units = 1
+panic = "abort"
+strip = "symbols"
+debug = "line-tables-only"
+
+[[bench]]
+name = "hot_path"
+harness = false
diff --git a/Makefile b/Makefile
@@ -1,6 +1,6 @@
 # cachekit-core - Development Makefile
 
-.PHONY: help check test lint clippy audit deny vet fmt fmt-check fuzz-quick fuzz-deep sbom clean
+.PHONY: help check test lint clippy audit deny vet fmt fmt-check bench bench-quick fuzz-quick fuzz-deep sbom clean
 .DEFAULT_GOAL := help
 
 # Colors for output
@@ -76,6 +76,14 @@ fmt-check: ## Check code formatting
 	@cargo fmt --check
 	@echo "$(GREEN)✓ Code formatting OK$(RESET)"
 
+bench: ## Run Criterion benches (uses --features encryption)
+	$(call require_binary,cargo,Install Rust: https://rustup.rs)
+	@cargo bench -p cachekit-core --features encryption --bench hot_path
+
+bench-quick: ## Quick bench run for CI (1s warmup, 2s measurement, 10 samples)
+	$(call require_binary,cargo,Install Rust: https://rustup.rs)
+	@cargo bench -p cachekit-core --features encryption --bench hot_path -- --warm-up-time 1 --measurement-time 2 --sample-size 10
+
 fuzz-quick: ## Quick corpus-only fuzz run (2 min per target)
 	@echo "$(BLUE)Running quick fuzzing (2 min per target)...$(RESET)"
 	$(call require_binary,cargo-fuzz,Install: cargo install cargo-fuzz)

diff --git a/benches/hot_path.rs b/benches/hot_path.rs
@@ -0,0 +1,58 @@
+//! Criterion benchmark suite for cachekit-core hot paths.
+//!
+//! Run with: `cargo bench -p cachekit-core --features encryption`
+//! Output: `target/criterion/<bench_id>/report/index.html`
+//!
+//! This is the PGO training workload — extend with new groups as hot
+//! paths are identified. Sizes chosen to span the realistic cache-payload
+//! distribution (64B keys, 1KB values, 64KB large objects).
+
+use cachekit_core::{ByteStorage, ZeroKnowledgeEncryptor};
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
+
+const SIZES: &[usize] = &[64, 256, 1024, 4 * 1024, 16 * 1024, 64 * 1024];
+
+fn make_payload(size: usize) -> Vec<u8> {
+    (0..size).map(|i| (i % 256) as u8).collect()
+}
+
+fn bench_byte_storage_roundtrip(c: &mut Criterion) {
+    let storage = ByteStorage::new(None);
+    let mut group = c.benchmark_group("byte_storage/roundtrip");
+    for &size in SIZES {
+        let data = make_payload(size);
+        group.throughput(Throughput::Bytes(size as u64));
+        group.bench_with_input(BenchmarkId::from_parameter(size), &data, |b, data| {
+            b.iter(|| {
+                let envelope = storage.store(black_box(data), None).unwrap();
+                let (out, _fmt) = storage.retrieve(black_box(&envelope)).unwrap();
+                black_box(out);
+            });
+        });
+    }
+    group.finish();
+}
+
+fn bench_encrypt_decrypt(c: &mut Criterion) {
+    let encryptor = ZeroKnowledgeEncryptor::new().unwrap();
+    let key = [0x42u8; 32];
+    let aad = b"bench-aad";
+    let mut group = c.benchmark_group("encryption/aes_gcm_roundtrip");
+    for &size in SIZES {
+        let plaintext = make_payload(size);
+        group.throughput(Throughput::Bytes(size as u64));
+        group.bench_with_input(BenchmarkId::from_parameter(size), &plaintext, |b, pt| {
+            b.iter(|| {
+                let ct = encryptor.encrypt_aes_gcm(black_box(pt), &key, aad).unwrap();
+                let pt2 = encryptor
+                    .decrypt_aes_gcm(black_box(&ct), &key, aad)
+                    .unwrap();
+                black_box(pt2);
+            });
+        });
+    }
+    group.finish();
+}
+
+criterion_group!(benches, bench_byte_storage_roundtrip, bench_encrypt_decrypt);
+criterion_main!(benches);
diff --git a/src/encryption/core.rs b/src/encryption/core.rs
@@ -59,21 +59,22 @@ use thiserror::Error;
 ///
 /// # Why randomized start?
 /// If a process restarts, the counter would start at 0 again, potentially
-/// reusing instance IDs from the previous run. By starting with a random
-/// 32-bit offset, we get ~2^32 cross-process collision resistance while
-/// maintaining deterministic uniqueness within a single process.
+/// reusing instance IDs from the previous run. By seeding with a full 8-byte
+/// random value we get ~2^64 cross-process collision resistance — well below
+/// 2^-32 probability for any realistic fleet size (e.g. Cloudflare Workers).
 #[cfg(not(target_arch = "wasm32"))]
 static GLOBAL_INSTANCE_COUNTER: LazyLock<AtomicU64> = LazyLock::new(|| {
-    // Initialize with random 32-bit value in upper bits for cross-process uniqueness
-    // Lower 32 bits start at 0 for deterministic ordering
+    // Seed with 8 bytes of randomness so the full u64 space is covered.
+    // A 4-byte seed (legacy) gave only 2^32 cross-process collision resistance,
+    // which is reachable on Cloudflare Workers where isolates churn constantly.
+    // 8 bytes brings collision probability below 2^-32 for any realistic fleet.
     let rng = SystemRandom::new();
-    let mut random_seed = [0u8; 4];
+    let mut random_seed = [0u8; 8];
     // RNG failure is a hard error — silently falling back to 0 is a security risk
-    // because multiple restarts would produce the same instance IDs
+    // because multiple restarts would produce the same instance IDs.
     rng.fill(&mut random_seed)
         .expect("SystemRandom::fill failed during GLOBAL_INSTANCE_COUNTER initialization");
-    let seed = u32::from_be_bytes(random_seed) as u64;
-    AtomicU64::new(seed << 32)
+    AtomicU64::new(u64::from_be_bytes(random_seed))
 });
 
 // ── wasm32: thread_local Cell<u64> seeded from getrandom ────────────────────
@@ -85,11 +86,12 @@ static GLOBAL_INSTANCE_COUNTER: LazyLock<AtomicU64> = LazyLock::new(|| {
 #[cfg(target_arch = "wasm32")]
 thread_local! {
     static WASM_INSTANCE_COUNTER: std::cell::Cell<u64> = {
-        let mut seed_bytes = [0u8; 4];
+        // Seed with 8 bytes for full u64 entropy. See native path above for
+        // rationale. On wasm32, getrandom routes to the JS crypto API.
+        let mut seed_bytes = [0u8; 8];
         getrandom::getrandom(&mut seed_bytes)
             .expect("getrandom failed during WASM_INSTANCE_COUNTER initialization");
-        let seed = u32::from_be_bytes(seed_bytes) as u64;
-        std::cell::Cell::new(seed << 32)
+        std::cell::Cell::new(u64::from_be_bytes(seed_bytes))
     };
 }
 
@@ -1103,6 +1105,40 @@ mod tests {
         );
     }
 
+    /// HIGH-2 regression: instance_id must be seeded with full 8 bytes of randomness.
+    ///
+    /// Before the fix, the seed was `(rand_u32 << 32)`, so the lower 32 bits of every
+    /// process's first instance_id were always 0. Each subsequent encryptor created
+    /// in the same process incremented the counter by 1, so the lower 32 bits stayed
+    /// small (just the count of encryptors created so far) — typically < 100 in a
+    /// fresh test process.
+    ///
+    /// Post-fix, the lower 32 bits are randomly distributed across the full u32 space
+    /// at process start, so the probability they are < (2^31) is exactly 1/2 — but
+    /// the probability they are below some small threshold like 1000 is ~1000/2^32
+    /// ≈ 2.3e-7, vanishingly small.
+    ///
+    /// Test strategy: read GLOBAL_INSTANCE_COUNTER after one encryptor is created.
+    /// Pre-fix this is at most (small N + a few from other tests). Post-fix this
+    /// is randomly distributed — almost certainly ≥ 1000.
+    ///
+    /// NOTE: This test is order-dependent. If many other tests have run before it
+    /// and each created encryptors, the lower 32 bits pre-fix would still grow
+    /// linearly. To force a deterministic check, run isolated:
+    ///   cargo test -p cachekit-core --features encryption -- --test-threads=1
+    #[test]
+    fn test_instance_seed_uses_8_random_bytes() {
+        let _e = ZeroKnowledgeEncryptor::new().unwrap();
+        let counter = GLOBAL_INSTANCE_COUNTER.load(Ordering::SeqCst);
+        let low32 = counter as u32;
+        assert!(
+            low32 >= 1000,
+            "instance counter lower 32 bits = {low32}; pre-fix this would be a small \
+             integer (count of encryptors created in this process). Post-fix this is \
+             randomly seeded across the full u64 space."
+        );
+    }
+
     #[test]
     #[cfg(not(target_arch = "wasm32"))]
     fn test_concurrent_nonce_exhaustion() {

diff --git a/supply-chain/audits.toml b/supply-chain/audits.toml
@@ -2,3 +2,57 @@
 # cargo-vet audits file
 
 [audits]
+
+[[trusted.half]]
+criteria = "safe-to-run"
+user-id = 3416 # Kathryn Long (starkat99)
+start = "2019-10-04"
+end = "2027-05-30"
+
+[[trusted.is-terminal]]
+criteria = "safe-to-deploy"
+user-id = 6825 # Dan Gohman (sunfishcode)
+start = "2022-01-22"
+end = "2027-05-30"
+
+[[trusted.linux-raw-sys]]
+criteria = "safe-to-deploy"
+user-id = 6825 # Dan Gohman (sunfishcode)
+start = "2021-06-12"
+end = "2027-05-30"
+
+[[trusted.memchr]]
+criteria = "safe-to-deploy"
+user-id = 189 # Andrew Gallant (BurntSushi)
+start = "2019-07-07"
+end = "2027-05-30"
+
+[[trusted.rayon]]
+criteria = "safe-to-run"
+user-id = 539 # Josh Stone (cuviper)
+start = "2019-06-13"
+end = "2027-05-30"
+
+[[trusted.regex-syntax]]
+criteria = "safe-to-deploy"
+user-id = 189 # Andrew Gallant (BurntSushi)
+start = "2019-03-30"
+end = "2027-05-30"
+
+[[trusted.rustix]]
+criteria = "safe-to-deploy"
+user-id = 6825 # Dan Gohman (sunfishcode)
+start = "2021-10-29"
+end = "2027-05-30"
+
+[[trusted.walkdir]]
+criteria = "safe-to-deploy"
+user-id = 189 # Andrew Gallant (BurntSushi)
+start = "2019-06-09"
+end = "2027-05-30"
+
+[[trusted.web-sys]]
+criteria = "safe-to-deploy"
+user-id = 1 # Alex Crichton (alexcrichton)
+start = "2019-03-04"
+end = "2027-05-30"