Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions hindsight-integrations/claude-code/hooks/hooks.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,18 @@
]
}
],
"PreCompact": [
{
"matcher": "*",
"hooks": [
{
"type": "command",
"command": "python3 \"${CLAUDE_PLUGIN_ROOT}/scripts/pre_compact.py\" || python \"${CLAUDE_PLUGIN_ROOT}/scripts/pre_compact.py\"",
"timeout": 15
}
]
}
],
"SessionEnd": [
{
"hooks": [
Expand Down
61 changes: 45 additions & 16 deletions hindsight-integrations/claude-code/scripts/lib/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,32 +157,61 @@ def _locked_read_modify_write(state_name: str, lock_name: str, modify_fn):
return result


def track_retention(session_id: str, message_count: int) -> tuple:
"""Track retention state and detect compaction.
def mark_precompact(session_id: str, message_count: int) -> tuple:
"""Record the transcript position before Claude Code compacts a session.

Compares the current message count against the last retained count for this
session. When the transcript shrinks (compaction), increments a chunk counter
so the caller can use a distinct document_id, preserving the pre-compaction
document.
Claude Code transcript files are append-only across compaction. PreCompact is
therefore the reliable signal for starting a new retained document segment:
everything appended after ``message_count`` becomes overlap/new context for
the next ``session_id-cN`` document.

Returns:
(chunk_index, compacted) — chunk_index for building document_id,
compacted is True if compaction was detected this call.
(chunk_index, start_index) for the next compact segment.
"""

def _update(data):
entry = data.get(session_id, {"message_count": 0, "chunk": 0})
last_count = entry["message_count"]
chunk = entry["chunk"]
compacted = False
chunk = entry.get("chunk", 0) + 1

entry["message_count"] = max(message_count, entry.get("message_count", 0))
entry["chunk"] = chunk
entry["compact_start"] = message_count
data[session_id] = entry

# Cap tracked sessions
if len(data) > 10000:
sorted_keys = sorted(data.keys())
for k in sorted_keys[: len(sorted_keys) // 2]:
del data[k]

return data, (chunk, message_count)

if message_count < last_count:
# Transcript shrank — compaction happened
chunk += 1
compacted = True
return _locked_read_modify_write("retention_tracking.json", "retention_tracking.lock", _update)


def track_retention(session_id: str, message_count: int) -> tuple:
"""Track retention state and return the active document segment.

Normal Claude Code transcripts only grow, including across compaction. Real
compaction segmentation is created by ``mark_precompact``; this function
does not infer compaction from transcript size changes.

Returns:
(chunk_index, start_index) — use ``start_index`` to slice the current
transcript before retaining and ``chunk_index`` for document_id.
"""

def _update(data):
entry = data.get(session_id, {"message_count": 0, "chunk": 0})
chunk = entry.get("chunk", 0)
start_index = entry.get("compact_start", 0)

entry["message_count"] = message_count
entry["chunk"] = chunk
if start_index:
entry["compact_start"] = start_index
else:
entry.pop("compact_start", None)
data[session_id] = entry

# Cap tracked sessions
Expand All @@ -191,6 +220,6 @@ def _update(data):
for k in sorted_keys[: len(sorted_keys) // 2]:
del data[k]

return data, (chunk, compacted)
return data, (chunk, start_index)

return _locked_read_modify_write("retention_tracking.json", "retention_tracking.lock", _update)
61 changes: 61 additions & 0 deletions hindsight-integrations/claude-code/scripts/pre_compact.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env python3
"""PreCompact hook: retain and mark the boundary before Claude Code compacts.

Claude Code keeps transcript JSONL files append-only. Compaction appends a
boundary plus summary and preserved tail; it does not shrink the file. This hook
forces a retain of the current pre-compact transcript, then records the
pre-compact message count only if that retain succeeds so the next retain writes
only the appended summary/tail/new messages into a fresh ``session_id-cN``
document.
"""

import json
import os
import sys

sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from lib.config import debug_log, load_config
from lib.state import mark_precompact
from retain import read_transcript, run_retain


def main() -> None:
config = load_config()

try:
hook_input = json.load(sys.stdin)
except (json.JSONDecodeError, EOFError):
hook_input = {}

session_id = hook_input.get("session_id", "unknown")
transcript_path = hook_input.get("transcript_path", "")
trigger = hook_input.get("trigger", "")

if not config.get("autoRetain"):
debug_log(config, f"Auto-retain disabled; skipping PreCompact retain for session {session_id}")
return

message_count = len(read_transcript(transcript_path))
retained = run_retain(hook_input, force=True)
if not retained:
debug_log(config, f"PreCompact retain did not complete for session {session_id}; checkpoint not marked")
return

if config.get("retainMode", "full-session") == "chunked":
debug_log(config, f"PreCompact retained chunked session {session_id}; no full-session checkpoint needed")
return

chunk, start_index = mark_precompact(session_id, message_count)
debug_log(
config,
f"PreCompact marked session {session_id}: chunk={chunk}, start_index={start_index}, trigger={trigger}",
)


if __name__ == "__main__":
try:
main()
except Exception as e:
print(f"[Hindsight] PreCompact error: {e}", file=sys.stderr)
sys.exit(0)
60 changes: 31 additions & 29 deletions hindsight-integrations/claude-code/scripts/retain.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ def read_transcript(transcript_path: str) -> list:
return messages


def run_retain(hook_input: dict, force: bool = False) -> None:
def run_retain(hook_input: dict, force: bool = False) -> bool:
config = load_config()

if not config.get("autoRetain"):
debug_log(config, "Auto-retain disabled, exiting")
return
return False

debug_log(config, f"Retain hook_input keys: {list(hook_input.keys())} force={force}")

Expand All @@ -85,7 +85,7 @@ def run_retain(hook_input: dict, force: bool = False) -> None:
all_messages = read_transcript(transcript_path)
if not all_messages:
debug_log(config, "No messages in transcript, skipping retain")
return
return False

debug_log(config, f"Read {len(all_messages)} messages from transcript")

Expand All @@ -94,29 +94,49 @@ def run_retain(hook_input: dict, force: bool = False) -> None:
retain_every_n = max(1, config.get("retainEveryNTurns", 1))
retain_full_window = False
messages_to_retain = all_messages
document_id = session_id

# Respect retainEveryNTurns in both modes, unless force=True (SessionEnd final retain)
if retain_every_n > 1 and not force:
turn_count = increment_turn_count(session_id)
if turn_count % retain_every_n != 0:
next_at = ((turn_count // retain_every_n) + 1) * retain_every_n
debug_log(config, f"Turn {turn_count}/{retain_every_n}, skipping retain (next at turn {next_at})")
return
return False

if retain_mode == "chunked" and retain_every_n > 1:
# Sliding window: N turns + configured overlap
overlap_turns = config.get("retainOverlapTurns", 0)
window_turns = retain_every_n + overlap_turns
messages_to_retain = slice_last_turns_by_user_boundary(all_messages, window_turns)
retain_full_window = True
document_id = f"{session_id}-{int(time.time() * 1000)}"
debug_log(
config,
f"Chunked retain firing (window: {window_turns} turns, {len(messages_to_retain)} messages)",
)
else:
# Full session mode: retain all messages, always as full window
# Full-session mode normally upserts the whole transcript into the
# session document. After PreCompact, Claude Code appends summary/tail
# records to the same JSONL file, so retain only the appended segment in
# a new cN document and keep the pre-compact document intact.
chunk_index, compact_start = track_retention(session_id, len(all_messages))
if compact_start > 0:
messages_to_retain = all_messages[compact_start:]
document_id = f"{session_id}-c{chunk_index}"
debug_log(
config,
f"Compact segment retain: doc '{document_id}', messages {compact_start}:{len(all_messages)}",
)
else:
document_id = session_id if chunk_index == 0 else f"{session_id}-c{chunk_index}"

if not messages_to_retain:
debug_log(config, "No new messages after compact checkpoint, skipping retain")
return False

retain_full_window = True
debug_log(config, f"Full session retain: {len(all_messages)} messages")
debug_log(config, f"Full session retain: {len(messages_to_retain)} of {len(all_messages)} messages")

# Format transcript
retain_roles = config.get("retainRoles", ["user", "assistant"])
Expand All @@ -127,7 +147,7 @@ def run_retain(hook_input: dict, force: bool = False) -> None:

if not transcript:
debug_log(config, "Empty transcript after formatting, skipping retain")
return
return False

# Resolve API URL
def _dbg(*a):
Expand All @@ -137,7 +157,7 @@ def _dbg(*a):
api_url = get_api_url(config, debug_fn=_dbg, allow_daemon_start=True)
except RuntimeError as e:
print(f"[Hindsight] {e}", file=sys.stderr)
return
return False

api_token = config.get("hindsightApiToken")
try:
Expand All @@ -148,32 +168,12 @@ def _dbg(*a):
)
except ValueError as e:
print(f"[Hindsight] Invalid API URL: {e}", file=sys.stderr)
return
return False

# Derive bank ID and ensure mission
bank_id = derive_bank_id(hook_input, config)
ensure_bank_mission(client, bank_id, config, debug_fn=_dbg)

# Document ID strategy:
# - Chunked mode: each chunk gets a timestamped document_id.
# - Full-session mode: uses session_id as base, but tracks message count
# to detect compaction. When Claude Code compacts the conversation the
# transcript shrinks — if we kept the same document_id we'd overwrite the
# pre-compaction document with a shorter one, losing context. Instead we
# increment a chunk counter so the old document is preserved.
if retain_mode == "chunked" and retain_every_n > 1:
document_id = f"{session_id}-{int(time.time() * 1000)}"
else:
chunk_index, compacted = track_retention(session_id, len(all_messages))
if compacted:
debug_log(
config,
f"Compaction detected for session {session_id}: transcript shrank, "
f"advancing to chunk {chunk_index} to preserve prior document",
)
# chunk 0 → plain session_id (backwards compatible with existing docs)
document_id = session_id if chunk_index == 0 else f"{session_id}-c{chunk_index}"

# Resolve template variables in tags and metadata.
# Supported variables: {session_id}, {bank_id}, {timestamp}, {user_id}
template_vars = {
Expand Down Expand Up @@ -232,8 +232,10 @@ def _resolve_template(value: str) -> str:
timeout=15,
)
debug_log(config, f"Retain response: {json.dumps(response)[:200]}")
return True
except Exception as e:
print(f"[Hindsight] Retain failed: {e}", file=sys.stderr)
return False


def main():
Expand Down
12 changes: 12 additions & 0 deletions hindsight-integrations/claude-code/scripts/setup_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,18 @@ def build_hooks(plugin_root: str) -> dict:
]
}
],
"PreCompact": [
{
"matcher": "*",
"hooks": [
{
"type": "command",
"command": f'python3 "{plugin_root}/scripts/pre_compact.py"',
"timeout": 15,
}
],
}
],
"SessionEnd": [
{
"hooks": [
Expand Down
Loading