diff --git a/src/chunking.ts b/src/chunking.ts index afed7ac..9d58b76 100644 --- a/src/chunking.ts +++ b/src/chunking.ts @@ -1,26 +1,54 @@ /** * Chunking for memories that don't fit in a single Algorand note. * - * Algorand caps tx notes at 1024 bytes. Subtract the JSON envelope - * (~150 bytes with book/page/total fields) and the encryption layer - * (envelope adds ~40 bytes, base64 inflates by 4/3) and you have - * ~600 bytes of plaintext per chunk that reliably fit. - * - * On save: the caller decides whether to chunk. `chunkValue` splits on - * a fixed byte boundary; chunks are reassembled in `joinChunks` by - * sorting on `page` ascending. - * - * On recall: callers collect all txs/ASAs for a given key, group by the - * save's `created` timestamp (one save = one book), require all pages - * to be present, and concatenate. + * Algorand caps tx notes at 1024 bytes. The on-chain envelope is: + * + * `{"type":"permanent-memory","key":"K","value":"","user":"<58>","created":"<24>","book":"K","page":N,"total":M}` + * + * JSON syntax + fixed field names take ~100 bytes. The Algorand + * address is 58 chars. The ISO-8601 `created` timestamp is 24 chars. + * The `book` field duplicates `key`, so its length is counted twice. + * `page` + `total` integer fields take up to ~12 chars. That leaves + * the encrypted base64 blob with: + * + * 1024 - 100 - 58 - 24 - (2 * keyLen) - 12 ≈ 830 - 2 * keyLen + * + * Base64 expands 4/3 and the crypto envelope adds 40 bytes (24-byte + * nonce + 16-byte MAC), so for a 30-char key: + * + * max base64 ≈ 830 - 60 = 770 chars + * max binary = 770 * 3/4 = 577 bytes + * max plaintext = 577 - 40 = 537 bytes + * + * `MAX_CLEARTEXT_PER_CHUNK = 480` is set conservatively below this + * threshold because (a) keys can run up to ~100 chars in practice + * (which matters since `book` doubles them), and (b) UTF-8 multi-byte + * codepoints inflate byte count over JS string length. + * + * **The prior value of 600 was too generous**: real-world keys around + * 30 chars produced envelopes that landed at ~1235 bytes — over the + * 1024 cap — and `permanentSave`'s post-chunking assertion fired on + * every multi-chunk write during a re-import of long memories from + * the corvid-agent migration. + * + * On save: the caller decides whether to chunk. `chunkValue` splits + * on a fixed byte boundary; chunks are reassembled in `joinChunks` + * by sorting on `page` ascending. + * + * On recall: callers collect all txs/ASAs for a given key, group by + * the save's `created` timestamp (one save = one book), require all + * pages to be present, and concatenate. */ /** - * Max plaintext bytes per chunk. Conservative — leaves headroom for - * UTF-8 multi-byte expansion, envelope JSON, and the encryption - * envelope overhead in `@corvidlabs/ts-algochat`. + * Max plaintext bytes per chunk. See module docstring for the + * derivation. Sized to keep the post-encryption envelope under 1024 + * bytes for keys up to ~120 chars (`book` field doubles the key). + * `validateKey` caps at 256, so very long keys may still trigger + * `permanentSave`'s 1024-byte assertion — but in practice we see + * keys under 60 chars across corvid-agent's 1,000+ memory keyspace. */ -export const MAX_CLEARTEXT_PER_CHUNK = 600; +export const MAX_CLEARTEXT_PER_CHUNK = 400; /** * Split `value` into N chunks of at most `MAX_CLEARTEXT_PER_CHUNK` diff --git a/test/chunking.test.ts b/test/chunking.test.ts index f4d735c..d518716 100644 --- a/test/chunking.test.ts +++ b/test/chunking.test.ts @@ -23,13 +23,19 @@ describe("chunkValue", () => { expect(chunks[1].length).toBe(1); }); - test("3000-byte input produces 5 chunks of 600 bytes", () => { + test("3000-byte input chunks all fit under the cap", () => { + // Don't hard-code the chunk count — it depends on MAX_CLEARTEXT_PER_CHUNK + // which moves over time as we tune envelope headroom. The invariant + // is that (a) every chunk fits, (b) the count is the ceiling of + // total / MAX, and (c) round-trip preserves content. const v = "a".repeat(3000); const chunks = chunkValue(v); - expect(chunks.length).toBe(5); + const expectedCount = Math.ceil(3000 / MAX_CLEARTEXT_PER_CHUNK); + expect(chunks.length).toBe(expectedCount); for (const c of chunks) { expect(Buffer.byteLength(c, "utf-8")).toBeLessThanOrEqual(MAX_CLEARTEXT_PER_CHUNK); } + expect(joinChunks(chunks)).toBe(v); }); test("round-trip: chunkValue → joinChunks preserves ASCII content", () => { @@ -81,3 +87,51 @@ describe("needsChunking", () => { expect("🌟".repeat(200).length).toBe(400); }); }); + +describe("envelope-fits invariant", () => { + /** + * Regression: the previous MAX_CLEARTEXT_PER_CHUNK = 600 produced + * envelopes that exceeded Algorand's 1024-byte note cap. This + * test simulates the actual envelope shape used in `permanentSave` + * with realistic key lengths and asserts each chunked envelope + * fits well under 1024. + * + * The simulated envelope uses an inflated base64 length that + * approximates `@corvidlabs/ts-algochat`'s encryption: each + * chunk's plaintext expands to roughly `ceil((plaintext + 40) * 4 / 3)` + * base64 chars. + */ + function simulateEnvelopeBytes(key: string, chunkPlaintextBytes: number): number { + const encryptedBinary = chunkPlaintextBytes + 40; // 24 nonce + 16 MAC + const base64Len = Math.ceil(encryptedBinary / 3) * 4; + const envelope = JSON.stringify({ + type: "permanent-memory", + key, + value: "X".repeat(base64Len), + user: "X".repeat(58), + created: "2026-05-18T23:55:34.123Z", + book: key, + page: 999, + total: 999, + }); + return Buffer.byteLength(envelope, "utf-8"); + } + + test("envelope fits 1024 bytes for typical 30-char key", () => { + const key = "x".repeat(30); + const envBytes = simulateEnvelopeBytes(key, MAX_CLEARTEXT_PER_CHUNK); + expect(envBytes).toBeLessThanOrEqual(1024); + }); + + test("envelope fits 1024 bytes for 60-char key", () => { + const key = "x".repeat(60); + const envBytes = simulateEnvelopeBytes(key, MAX_CLEARTEXT_PER_CHUNK); + expect(envBytes).toBeLessThanOrEqual(1024); + }); + + test("envelope fits 1024 bytes for 100-char key", () => { + const key = "x".repeat(100); + const envBytes = simulateEnvelopeBytes(key, MAX_CLEARTEXT_PER_CHUNK); + expect(envBytes).toBeLessThanOrEqual(1024); + }); +});