From b991c44bc22c3be4a0216d971d58cc9d3878c004 Mon Sep 17 00:00:00 2001 From: zhangchi47 Date: Mon, 22 Jun 2026 23:04:38 +0800 Subject: [PATCH] fix(claude-code): segment retains from PreCompact Claude Code transcripts are append-only across compaction. Add the PreCompact hook so full-session retain writes the pre-compact transcript before recording a boundary for the next compact segment. Remove shrink-based compaction detection from retention state. The next full-session retain now writes appended compact summary, retained tail, and new turns into a session_id-cN document after the checkpoint. Make run_retain return a success flag and only mark the PreCompact checkpoint after the retain API call succeeds. This avoids losing the pre-compact window when the daemon or retain request fails. --- .../claude-code/hooks/hooks.json | 12 ++ .../claude-code/scripts/lib/state.py | 61 +++++-- .../claude-code/scripts/pre_compact.py | 61 +++++++ .../claude-code/scripts/retain.py | 60 +++---- .../claude-code/scripts/setup_hooks.py | 12 ++ .../claude-code/tests/test_hooks.py | 157 +++++++++++++++++- .../claude-code/tests/test_state.py | 84 ++++------ 7 files changed, 341 insertions(+), 106 deletions(-) create mode 100644 hindsight-integrations/claude-code/scripts/pre_compact.py diff --git a/hindsight-integrations/claude-code/hooks/hooks.json b/hindsight-integrations/claude-code/hooks/hooks.json index ec8b68bad..e739671f1 100644 --- a/hindsight-integrations/claude-code/hooks/hooks.json +++ b/hindsight-integrations/claude-code/hooks/hooks.json @@ -34,6 +34,18 @@ ] } ], + "PreCompact": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": "python3 \"${CLAUDE_PLUGIN_ROOT}/scripts/pre_compact.py\" || python \"${CLAUDE_PLUGIN_ROOT}/scripts/pre_compact.py\"", + "timeout": 15 + } + ] + } + ], "SessionEnd": [ { "hooks": [ diff --git a/hindsight-integrations/claude-code/scripts/lib/state.py b/hindsight-integrations/claude-code/scripts/lib/state.py index 963353c23..463ae945b 100644 --- a/hindsight-integrations/claude-code/scripts/lib/state.py +++ b/hindsight-integrations/claude-code/scripts/lib/state.py @@ -157,32 +157,61 @@ def _locked_read_modify_write(state_name: str, lock_name: str, modify_fn): return result -def track_retention(session_id: str, message_count: int) -> tuple: - """Track retention state and detect compaction. +def mark_precompact(session_id: str, message_count: int) -> tuple: + """Record the transcript position before Claude Code compacts a session. - Compares the current message count against the last retained count for this - session. When the transcript shrinks (compaction), increments a chunk counter - so the caller can use a distinct document_id, preserving the pre-compaction - document. + Claude Code transcript files are append-only across compaction. PreCompact is + therefore the reliable signal for starting a new retained document segment: + everything appended after ``message_count`` becomes overlap/new context for + the next ``session_id-cN`` document. Returns: - (chunk_index, compacted) — chunk_index for building document_id, - compacted is True if compaction was detected this call. + (chunk_index, start_index) for the next compact segment. """ def _update(data): entry = data.get(session_id, {"message_count": 0, "chunk": 0}) - last_count = entry["message_count"] - chunk = entry["chunk"] - compacted = False + chunk = entry.get("chunk", 0) + 1 + + entry["message_count"] = max(message_count, entry.get("message_count", 0)) + entry["chunk"] = chunk + entry["compact_start"] = message_count + data[session_id] = entry + + # Cap tracked sessions + if len(data) > 10000: + sorted_keys = sorted(data.keys()) + for k in sorted_keys[: len(sorted_keys) // 2]: + del data[k] + + return data, (chunk, message_count) - if message_count < last_count: - # Transcript shrank — compaction happened - chunk += 1 - compacted = True + return _locked_read_modify_write("retention_tracking.json", "retention_tracking.lock", _update) + + +def track_retention(session_id: str, message_count: int) -> tuple: + """Track retention state and return the active document segment. + + Normal Claude Code transcripts only grow, including across compaction. Real + compaction segmentation is created by ``mark_precompact``; this function + does not infer compaction from transcript size changes. + + Returns: + (chunk_index, start_index) — use ``start_index`` to slice the current + transcript before retaining and ``chunk_index`` for document_id. + """ + + def _update(data): + entry = data.get(session_id, {"message_count": 0, "chunk": 0}) + chunk = entry.get("chunk", 0) + start_index = entry.get("compact_start", 0) entry["message_count"] = message_count entry["chunk"] = chunk + if start_index: + entry["compact_start"] = start_index + else: + entry.pop("compact_start", None) data[session_id] = entry # Cap tracked sessions @@ -191,6 +220,6 @@ def _update(data): for k in sorted_keys[: len(sorted_keys) // 2]: del data[k] - return data, (chunk, compacted) + return data, (chunk, start_index) return _locked_read_modify_write("retention_tracking.json", "retention_tracking.lock", _update) diff --git a/hindsight-integrations/claude-code/scripts/pre_compact.py b/hindsight-integrations/claude-code/scripts/pre_compact.py new file mode 100644 index 000000000..1de878c37 --- /dev/null +++ b/hindsight-integrations/claude-code/scripts/pre_compact.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +"""PreCompact hook: retain and mark the boundary before Claude Code compacts. + +Claude Code keeps transcript JSONL files append-only. Compaction appends a +boundary plus summary and preserved tail; it does not shrink the file. This hook +forces a retain of the current pre-compact transcript, then records the +pre-compact message count only if that retain succeeds so the next retain writes +only the appended summary/tail/new messages into a fresh ``session_id-cN`` +document. +""" + +import json +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from lib.config import debug_log, load_config +from lib.state import mark_precompact +from retain import read_transcript, run_retain + + +def main() -> None: + config = load_config() + + try: + hook_input = json.load(sys.stdin) + except (json.JSONDecodeError, EOFError): + hook_input = {} + + session_id = hook_input.get("session_id", "unknown") + transcript_path = hook_input.get("transcript_path", "") + trigger = hook_input.get("trigger", "") + + if not config.get("autoRetain"): + debug_log(config, f"Auto-retain disabled; skipping PreCompact retain for session {session_id}") + return + + message_count = len(read_transcript(transcript_path)) + retained = run_retain(hook_input, force=True) + if not retained: + debug_log(config, f"PreCompact retain did not complete for session {session_id}; checkpoint not marked") + return + + if config.get("retainMode", "full-session") == "chunked": + debug_log(config, f"PreCompact retained chunked session {session_id}; no full-session checkpoint needed") + return + + chunk, start_index = mark_precompact(session_id, message_count) + debug_log( + config, + f"PreCompact marked session {session_id}: chunk={chunk}, start_index={start_index}, trigger={trigger}", + ) + + +if __name__ == "__main__": + try: + main() + except Exception as e: + print(f"[Hindsight] PreCompact error: {e}", file=sys.stderr) + sys.exit(0) diff --git a/hindsight-integrations/claude-code/scripts/retain.py b/hindsight-integrations/claude-code/scripts/retain.py index 37900883a..fae0d315d 100755 --- a/hindsight-integrations/claude-code/scripts/retain.py +++ b/hindsight-integrations/claude-code/scripts/retain.py @@ -69,12 +69,12 @@ def read_transcript(transcript_path: str) -> list: return messages -def run_retain(hook_input: dict, force: bool = False) -> None: +def run_retain(hook_input: dict, force: bool = False) -> bool: config = load_config() if not config.get("autoRetain"): debug_log(config, "Auto-retain disabled, exiting") - return + return False debug_log(config, f"Retain hook_input keys: {list(hook_input.keys())} force={force}") @@ -85,7 +85,7 @@ def run_retain(hook_input: dict, force: bool = False) -> None: all_messages = read_transcript(transcript_path) if not all_messages: debug_log(config, "No messages in transcript, skipping retain") - return + return False debug_log(config, f"Read {len(all_messages)} messages from transcript") @@ -94,6 +94,7 @@ def run_retain(hook_input: dict, force: bool = False) -> None: retain_every_n = max(1, config.get("retainEveryNTurns", 1)) retain_full_window = False messages_to_retain = all_messages + document_id = session_id # Respect retainEveryNTurns in both modes, unless force=True (SessionEnd final retain) if retain_every_n > 1 and not force: @@ -101,7 +102,7 @@ def run_retain(hook_input: dict, force: bool = False) -> None: if turn_count % retain_every_n != 0: next_at = ((turn_count // retain_every_n) + 1) * retain_every_n debug_log(config, f"Turn {turn_count}/{retain_every_n}, skipping retain (next at turn {next_at})") - return + return False if retain_mode == "chunked" and retain_every_n > 1: # Sliding window: N turns + configured overlap @@ -109,14 +110,33 @@ def run_retain(hook_input: dict, force: bool = False) -> None: window_turns = retain_every_n + overlap_turns messages_to_retain = slice_last_turns_by_user_boundary(all_messages, window_turns) retain_full_window = True + document_id = f"{session_id}-{int(time.time() * 1000)}" debug_log( config, f"Chunked retain firing (window: {window_turns} turns, {len(messages_to_retain)} messages)", ) else: - # Full session mode: retain all messages, always as full window + # Full-session mode normally upserts the whole transcript into the + # session document. After PreCompact, Claude Code appends summary/tail + # records to the same JSONL file, so retain only the appended segment in + # a new cN document and keep the pre-compact document intact. + chunk_index, compact_start = track_retention(session_id, len(all_messages)) + if compact_start > 0: + messages_to_retain = all_messages[compact_start:] + document_id = f"{session_id}-c{chunk_index}" + debug_log( + config, + f"Compact segment retain: doc '{document_id}', messages {compact_start}:{len(all_messages)}", + ) + else: + document_id = session_id if chunk_index == 0 else f"{session_id}-c{chunk_index}" + + if not messages_to_retain: + debug_log(config, "No new messages after compact checkpoint, skipping retain") + return False + retain_full_window = True - debug_log(config, f"Full session retain: {len(all_messages)} messages") + debug_log(config, f"Full session retain: {len(messages_to_retain)} of {len(all_messages)} messages") # Format transcript retain_roles = config.get("retainRoles", ["user", "assistant"]) @@ -127,7 +147,7 @@ def run_retain(hook_input: dict, force: bool = False) -> None: if not transcript: debug_log(config, "Empty transcript after formatting, skipping retain") - return + return False # Resolve API URL def _dbg(*a): @@ -137,7 +157,7 @@ def _dbg(*a): api_url = get_api_url(config, debug_fn=_dbg, allow_daemon_start=True) except RuntimeError as e: print(f"[Hindsight] {e}", file=sys.stderr) - return + return False api_token = config.get("hindsightApiToken") try: @@ -148,32 +168,12 @@ def _dbg(*a): ) except ValueError as e: print(f"[Hindsight] Invalid API URL: {e}", file=sys.stderr) - return + return False # Derive bank ID and ensure mission bank_id = derive_bank_id(hook_input, config) ensure_bank_mission(client, bank_id, config, debug_fn=_dbg) - # Document ID strategy: - # - Chunked mode: each chunk gets a timestamped document_id. - # - Full-session mode: uses session_id as base, but tracks message count - # to detect compaction. When Claude Code compacts the conversation the - # transcript shrinks — if we kept the same document_id we'd overwrite the - # pre-compaction document with a shorter one, losing context. Instead we - # increment a chunk counter so the old document is preserved. - if retain_mode == "chunked" and retain_every_n > 1: - document_id = f"{session_id}-{int(time.time() * 1000)}" - else: - chunk_index, compacted = track_retention(session_id, len(all_messages)) - if compacted: - debug_log( - config, - f"Compaction detected for session {session_id}: transcript shrank, " - f"advancing to chunk {chunk_index} to preserve prior document", - ) - # chunk 0 → plain session_id (backwards compatible with existing docs) - document_id = session_id if chunk_index == 0 else f"{session_id}-c{chunk_index}" - # Resolve template variables in tags and metadata. # Supported variables: {session_id}, {bank_id}, {timestamp}, {user_id} template_vars = { @@ -232,8 +232,10 @@ def _resolve_template(value: str) -> str: timeout=15, ) debug_log(config, f"Retain response: {json.dumps(response)[:200]}") + return True except Exception as e: print(f"[Hindsight] Retain failed: {e}", file=sys.stderr) + return False def main(): diff --git a/hindsight-integrations/claude-code/scripts/setup_hooks.py b/hindsight-integrations/claude-code/scripts/setup_hooks.py index 114756240..967a9f1e4 100644 --- a/hindsight-integrations/claude-code/scripts/setup_hooks.py +++ b/hindsight-integrations/claude-code/scripts/setup_hooks.py @@ -66,6 +66,18 @@ def build_hooks(plugin_root: str) -> dict: ] } ], + "PreCompact": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": f'python3 "{plugin_root}/scripts/pre_compact.py"', + "timeout": 15, + } + ], + } + ], "SessionEnd": [ { "hooks": [ diff --git a/hindsight-integrations/claude-code/tests/test_hooks.py b/hindsight-integrations/claude-code/tests/test_hooks.py index 22cfc810f..7fc32cf82 100644 --- a/hindsight-integrations/claude-code/tests/test_hooks.py +++ b/hindsight-integrations/claude-code/tests/test_hooks.py @@ -424,9 +424,8 @@ def capture(req, timeout=None): assert "first question" in item["content"] assert "second question" in item["content"] - def test_full_session_new_document_after_compaction(self, monkeypatch, tmp_path): - """After compaction shrinks the transcript, retain should use a new document_id - to avoid overwriting the pre-compaction document.""" + def test_full_session_new_document_after_precompact_checkpoint(self, monkeypatch, tmp_path): + """PreCompact marks the append-only boundary for the next cN document.""" # First retain: 4 messages messages_full = [ {"role": "user", "content": "first question"}, @@ -449,8 +448,35 @@ def capture(req, timeout=None): assert captured_calls[0]["items"][0]["document_id"] == "sess-compact-test" assert "first question" in captured_calls[0]["items"][0]["content"] - # Second retain: compaction happened — transcript now has only 2 messages - messages_compacted = [ + # PreCompact fires before Claude Code appends compact_boundary and the + # compact summary to the same transcript file. + _run_hook( + "pre_compact", + { + "session_id": "sess-compact-test", + "transcript_path": transcript, + "trigger": "manual", + }, + monkeypatch, + tmp_path, + urlopen_side_effect=capture, + ) + + # Second retain: transcript is append-only. The c1 document should only + # include content appended after the PreCompact checkpoint. + messages_compacted = messages_full + [ + { + "type": "system", + "subtype": "compact_boundary", + "content": "Conversation compacted", + "compactMetadata": {"trigger": "manual"}, + }, + { + "type": "user", + "isCompactSummary": True, + "isVisibleInTranscriptOnly": True, + "message": {"role": "user", "content": "Summary: earlier work focused on first and second questions."}, + }, {"role": "user", "content": "third question"}, {"role": "assistant", "content": "third answer"}, ] @@ -459,10 +485,125 @@ def capture(req, timeout=None): _run_hook("retain", hook_input, monkeypatch, tmp_path, urlopen_side_effect=capture) - assert len(captured_calls) == 2 + assert len(captured_calls) == 3 + # PreCompact forces one final pre-compact retain before marking c1. + assert captured_calls[1]["items"][0]["document_id"] == "sess-compact-test" + assert "second question" in captured_calls[1]["items"][0]["content"] # Should use a new document_id with chunk suffix - assert captured_calls[1]["items"][0]["document_id"] == "sess-compact-test-c1" - assert "third question" in captured_calls[1]["items"][0]["content"] + assert captured_calls[2]["items"][0]["document_id"] == "sess-compact-test-c1" + assert "first question" not in captured_calls[2]["items"][0]["content"] + assert "Summary: earlier work" in captured_calls[2]["items"][0]["content"] + assert "third question" in captured_calls[2]["items"][0]["content"] + + def test_precompact_does_not_checkpoint_when_retain_fails(self, monkeypatch, tmp_path): + messages_full = [ + {"role": "user", "content": "first question"}, + {"role": "assistant", "content": "first answer"}, + {"role": "user", "content": "second question"}, + {"role": "assistant", "content": "second answer"}, + ] + transcript = make_transcript_file(tmp_path, messages_full) + + def fail_retain(req, timeout=None): + if "/memories" in req.full_url and "/recall" not in req.full_url: + raise OSError("connection refused") + return FakeHTTPResponse({}) + + _run_hook( + "pre_compact", + { + "session_id": "sess-precompact-fail", + "transcript_path": transcript, + "trigger": "auto", + }, + monkeypatch, + tmp_path, + urlopen_side_effect=fail_retain, + ) + + messages_after = messages_full + [ + { + "type": "user", + "isCompactSummary": True, + "message": {"role": "user", "content": "Summary: compact happened after failed retain."}, + }, + {"role": "user", "content": "third question"}, + {"role": "assistant", "content": "third answer"}, + ] + transcript = make_transcript_file(tmp_path, messages_after) + captured = {} + + def capture(req, timeout=None): + if "/memories" in req.full_url and "/recall" not in req.full_url: + captured["body"] = json.loads(req.data.decode()) + return FakeHTTPResponse({}) + + _run_hook( + "retain", + make_hook_input(transcript_path=transcript, session_id="sess-precompact-fail"), + monkeypatch, + tmp_path, + urlopen_side_effect=capture, + ) + + assert "body" in captured, "retain API was not called" + item = captured["body"]["items"][0] + assert item["document_id"] == "sess-precompact-fail" + assert "first question" in item["content"] + assert "third question" in item["content"] + + def test_precompact_chunked_mode_does_not_create_full_session_checkpoint(self, monkeypatch, tmp_path): + messages = [ + {"role": "user", "content": "first question"}, + {"role": "assistant", "content": "first answer"}, + ] + transcript = make_transcript_file(tmp_path, messages) + captured_calls = [] + + def capture(req, timeout=None): + if "/memories" in req.full_url and "/recall" not in req.full_url: + captured_calls.append(json.loads(req.data.decode())) + return FakeHTTPResponse({}) + + _run_hook( + "pre_compact", + { + "session_id": "sess-chunked-compact", + "transcript_path": transcript, + "trigger": "manual", + }, + monkeypatch, + tmp_path, + urlopen_side_effect=capture, + extra_settings={"retainMode": "chunked", "retainEveryNTurns": 2}, + ) + + assert len(captured_calls) == 1 + assert captured_calls[0]["items"][0]["document_id"].startswith("sess-chunked-compact-") + + messages_after = messages + [ + { + "type": "user", + "isCompactSummary": True, + "message": {"role": "user", "content": "Summary: prior chunked work."}, + }, + {"role": "user", "content": "after compact"}, + {"role": "assistant", "content": "after answer"}, + ] + transcript = make_transcript_file(tmp_path, messages_after) + + _run_hook( + "retain", + make_hook_input(transcript_path=transcript, session_id="sess-chunked-compact"), + monkeypatch, + tmp_path, + urlopen_side_effect=capture, + extra_settings={"retainMode": "full-session", "retainEveryNTurns": 1}, + ) + + assert len(captured_calls) == 2 + assert captured_calls[1]["items"][0]["document_id"] == "sess-chunked-compact" + assert "first question" in captured_calls[1]["items"][0]["content"] def test_full_session_same_document_when_growing(self, monkeypatch, tmp_path): """When transcript grows (no compaction), retain should keep the same document_id.""" diff --git a/hindsight-integrations/claude-code/tests/test_state.py b/hindsight-integrations/claude-code/tests/test_state.py index 48b3330f8..c8eb1791a 100644 --- a/hindsight-integrations/claude-code/tests/test_state.py +++ b/hindsight-integrations/claude-code/tests/test_state.py @@ -1,10 +1,10 @@ -"""Unit tests for lib/state.py — retention tracking and compaction detection.""" +"""Unit tests for lib/state.py — retention tracking and compact segmentation.""" import json import pytest -from lib.state import read_state, track_retention, write_state +from lib.state import mark_precompact, read_state, track_retention, write_state @pytest.fixture(autouse=True) @@ -14,69 +14,40 @@ def _isolated_state(monkeypatch, tmp_path): # --------------------------------------------------------------------------- -# track_retention — core compaction detection +# track_retention / mark_precompact — retention document segmentation # --------------------------------------------------------------------------- class TestTrackRetention: def test_first_call_returns_chunk_zero(self): - chunk, compacted = track_retention("sess-1", 10) + chunk, start_index = track_retention("sess-1", 10) assert chunk == 0 - assert compacted is False + assert start_index == 0 def test_growing_transcript_keeps_same_chunk(self): track_retention("sess-1", 4) - chunk, compacted = track_retention("sess-1", 8) + chunk, start_index = track_retention("sess-1", 8) assert chunk == 0 - assert compacted is False + assert start_index == 0 def test_equal_count_keeps_same_chunk(self): track_retention("sess-1", 5) - chunk, compacted = track_retention("sess-1", 5) + chunk, start_index = track_retention("sess-1", 5) assert chunk == 0 - assert compacted is False - - def test_shrinking_transcript_triggers_compaction(self): - track_retention("sess-1", 10) - chunk, compacted = track_retention("sess-1", 3) - assert chunk == 1 - assert compacted is True - - def test_multiple_compactions_increment_chunk(self): - track_retention("sess-1", 10) - - chunk, compacted = track_retention("sess-1", 3) - assert chunk == 1 - assert compacted is True - - # Grow again after compaction - track_retention("sess-1", 8) - - # Second compaction - chunk, compacted = track_retention("sess-1", 2) - assert chunk == 2 - assert compacted is True - - def test_growth_after_compaction_stays_on_same_chunk(self): - track_retention("sess-1", 10) - track_retention("sess-1", 3) # compaction → chunk 1 - - chunk, compacted = track_retention("sess-1", 6) - assert chunk == 1 - assert compacted is False + assert start_index == 0 def test_sessions_are_independent(self): track_retention("sess-a", 10) track_retention("sess-b", 20) - # Compaction on sess-a only - chunk_a, compacted_a = track_retention("sess-a", 3) - chunk_b, compacted_b = track_retention("sess-b", 25) + mark_precompact("sess-a", 10) + chunk_a, start_a = track_retention("sess-a", 12) + chunk_b, start_b = track_retention("sess-b", 25) assert chunk_a == 1 - assert compacted_a is True + assert start_a == 10 assert chunk_b == 0 - assert compacted_b is False + assert start_b == 0 def test_persists_across_calls(self, tmp_path): """State file is written to disk and survives between calls.""" @@ -90,19 +61,26 @@ def test_persists_across_calls(self, tmp_path): assert data["sess-1"]["message_count"] == 10 assert data["sess-1"]["chunk"] == 0 - def test_compaction_from_one_message(self): - """Edge case: transcript shrinks to a single message.""" - track_retention("sess-1", 50) - chunk, compacted = track_retention("sess-1", 1) + def test_precompact_marks_next_segment_start(self): + chunk, start_index = mark_precompact("sess-1", 10) assert chunk == 1 - assert compacted is True + assert start_index == 10 - def test_shrink_by_one_triggers_compaction(self): - """Even shrinking by a single message counts as compaction.""" - track_retention("sess-1", 10) - chunk, compacted = track_retention("sess-1", 9) + chunk, start_index = track_retention("sess-1", 14) assert chunk == 1 - assert compacted is True + assert start_index == 10 + + def test_multiple_precompacts_increment_chunks(self): + mark_precompact("sess-1", 10) + track_retention("sess-1", 14) + + chunk, start_index = mark_precompact("sess-1", 14) + assert chunk == 2 + assert start_index == 14 + + chunk, start_index = track_retention("sess-1", 17) + assert chunk == 2 + assert start_index == 14 # ---------------------------------------------------------------------------