diff --git a/.autoresearch/autoresearch.jsonl b/.autoresearch/autoresearch.jsonl index af9a7866..e99ad71b 100644 --- a/.autoresearch/autoresearch.jsonl +++ b/.autoresearch/autoresearch.jsonl @@ -1,28 +1,25 @@ -{"type":"config","name":"create-pr-optimize","metricName":"total_bytes","metricUnit":"bytes","bestDirection":"lower"} -{"run":1,"commit":"d018e0f","metric":9073,"metrics":{"line_count":284,"file_count":6,"word_count":1375},"status":"keep","description":"baseline","timestamp":1775130716,"segment":0} -{"run":2,"commit":"9e5692e","metric":6268,"metrics":{"line_count":183,"file_count":6,"word_count":917},"status":"keep","description":"compress scripts - remove verbose messages and redundant comments","timestamp":1775130811,"segment":0} -{"run":3,"commit":"17e69a5","metric":5836,"metrics":{"line_count":169,"file_count":6,"word_count":844},"status":"keep","description":"compress SKILL.md prose","timestamp":1775130856,"segment":0} -{"run":4,"commit":"6265fd9","metric":4534,"metrics":{"line_count":141,"file_count":5,"word_count":666},"status":"keep","description":"remove unused verify-pr-status.sh","timestamp":1775130894,"segment":0} -{"run":5,"commit":"3eb5c55","metric":4019,"metrics":{"line_count":122,"file_count":4,"word_count":601},"status":"keep","description":"merge sync-with-base into preflight-check","timestamp":1775130966,"segment":0} -{"run":6,"commit":"d790d50","metric":3558,"metrics":{"line_count":107,"file_count":3,"word_count":541},"status":"keep","description":"inline lib.sh into preflight, remove lib.sh","timestamp":1775131007,"segment":0} -{"run":7,"commit":"9706617","metric":3202,"metrics":{"line_count":85,"file_count":3,"word_count":490},"status":"keep","description":"further compress SKILL.md","timestamp":1775131034,"segment":0} -{"run":8,"commit":"9725788","metric":3103,"metrics":{"line_count":82,"file_count":3,"word_count":471},"status":"keep","description":"compact wait-for-merge.sh","timestamp":1775131065,"segment":0} -{"run":9,"commit":"7f78174","metric":2884,"metrics":{"line_count":68,"file_count":3,"word_count":448},"status":"keep","description":"further compress preflight-check.sh","timestamp":1775131088,"segment":0} -{"type":"config","name":"create-pr-skill-tokens","metricName":"skill_bytes","metricUnit":"bytes","bestDirection":"lower"} -{"run":10,"commit":"1b650ac","metric":1081,"metrics":{"skill_lines":27,"skill_words":151,"script_bytes":1803},"status":"keep","description":"baseline (segment 1 — skill_bytes only)","timestamp":1775131168,"segment":1} -{"run":11,"commit":"6ba0c3d","metric":802,"metrics":{"skill_lines":23,"skill_words":110,"script_bytes":1803},"status":"keep","description":"compress SKILL.md — remove redundant sections","timestamp":1775131195,"segment":1} -{"run":12,"commit":"ec416bc","metric":732,"metrics":{"skill_lines":19,"skill_words":100,"script_bytes":1803},"status":"keep","description":"extract script path variable S=","timestamp":1775131213,"segment":1} -{"run":13,"commit":"9bb6f1e","metric":675,"metrics":{"skill_lines":18,"skill_words":93,"script_bytes":1803},"status":"keep","description":"merge inline comments, remove bold","timestamp":1775131235,"segment":1} -{"run":14,"commit":"563874d","metric":635,"metrics":{"skill_lines":18,"skill_words":82,"script_bytes":1803},"status":"keep","description":"micro-compress wording","timestamp":1775131249,"segment":1} -{"run":15,"commit":"059de59","metric":605,"metrics":{"skill_lines":17,"skill_words":82,"script_bytes":1803},"status":"keep","description":"remove explicit template path","timestamp":1775131290,"segment":1} -{"run":16,"commit":"059de59","metric":608,"metrics":{"skill_lines":16,"skill_words":83,"script_bytes":1803},"status":"discard","description":"merge pr create+merge into one line (bytes increased)","timestamp":1775131309,"segment":1} -{"run":17,"commit":"96b1a8f","metric":665,"metrics":{"skill_lines":17,"skill_words":92,"script_bytes":1818},"status":"keep","description":"add auto-merge re-enable after CI fix + push -u","timestamp":1775132341,"segment":1} -{"run":18,"commit":"17c7ef7","metric":665,"metrics":{"skill_lines":17,"skill_words":92,"script_bytes":1818},"status":"keep","description":"edge test: main branch — agent skipped script, did manual logic","timestamp":1775132900,"segment":1} -{"run":19,"commit":"b2e1f87","metric":794,"metrics":{"skill_lines":20,"skill_words":109,"script_bytes":1818},"status":"keep","description":"clarify scripts MUST be run (test-driven fix)","timestamp":1775132948,"segment":1} -{"run":20,"commit":"b2e1f87","metric":794,"metrics":{"skill_lines":20,"skill_words":109,"script_bytes":1818},"status":"keep","description":"edge test: nothing-to-commit — agent stopped correctly but preflight ran unnecessarily","timestamp":1775133044,"segment":1} -{"run":22,"commit":"b2e1f87","metric":794,"metrics":{"skill_lines":20,"skill_words":109,"script_bytes":1818},"status":"keep","description":"must-run test passed, found broken tests referencing deleted scripts","timestamp":1775133359,"segment":1} -{"run":23,"commit":"dece665","metric":794,"metrics":{"skill_lines":20,"skill_words":109,"script_bytes":1818},"status":"keep","description":"fix broken tests for deleted scripts — 63/63 pass","timestamp":1775133491,"segment":1} -{"run":25,"commit":"dece665","metric":794,"metrics":{"skill_lines":20,"skill_words":109,"script_bytes":1818},"status":"keep","description":"final validation — both scripts executed, 14 tool calls, PR #607 merged","timestamp":1775133750,"segment":1} -{"run":26,"commit":"6a0be38","metric":794,"metrics":{"skill_lines":20,"skill_words":109,"script_bytes":1802},"status":"keep","description":"remove --delete-branch from fallback merge","timestamp":1775133769,"segment":1} -{"run":27,"commit":"494dc0b","metric":776,"metrics":{"skill_lines":20,"skill_words":107,"script_bytes":1802},"status":"keep","description":"remove re-run preflight, reorder main check","timestamp":1775133796,"segment":1} -{"run":28,"commit":"b792e67","metric":776,"metrics":{"skill_lines":21,"skill_words":107,"script_bytes":1802},"status":"keep","description":"wrap CI fail line for markdownlint","timestamp":1775133928,"segment":1} +{"type":"config","name":"ralph-optimize","metricName":"skill_bytes","metricUnit":"bytes","bestDirection":"lower"} +{"run":1,"commit":"c8c2fa1","metric":2914,"metrics":{"skill_lines":90,"cancel_bytes":696,"hook_bytes":3747,"hook_lines":138,"total_bytes":7357},"status":"keep","description":"baseline","timestamp":1775210437,"segment":0} +{"run":2,"commit":"6934219","metric":1710,"metrics":{"skill_lines":52,"cancel_bytes":696,"hook_bytes":3747,"hook_lines":138,"total_bytes":6153},"status":"keep","description":"compress SKILL.md — flatten JSON, remove redundant prose","timestamp":1775210484,"segment":0} +{"run":3,"commit":"afeef81","metric":1364,"metrics":{"skill_lines":41,"cancel_bytes":696,"hook_bytes":3747,"hook_lines":138,"total_bytes":5807},"status":"keep","description":"further compress SKILL.md — shorten sections, tighten wording","timestamp":1775210509,"segment":0} +{"run":4,"commit":"c4b4aba","metric":1364,"metrics":{"skill_lines":41,"cancel_bytes":431,"hook_bytes":3747,"hook_lines":138,"total_bytes":5542},"status":"keep","description":"compress ralph-cancel SKILL.md","timestamp":1775210530,"segment":0} +{"run":5,"commit":"d70b5f9","metric":1364,"metrics":{"skill_lines":41,"cancel_bytes":431,"hook_bytes":2725,"hook_lines":74,"total_bytes":4520},"status":"keep","description":"simplify ralph-persist.ts — remove interfaces, inline functions","timestamp":1775210565,"segment":0} +{"run":6,"commit":"01d96d4","metric":1260,"metrics":{"skill_lines":40,"cancel_bytes":431,"hook_bytes":2725,"hook_lines":74,"total_bytes":4416},"status":"keep","description":"micro-compress SKILL.md — merge steps, shorten JSON","timestamp":1775210587,"segment":0} +{"run":7,"commit":"974f67c","metric":1260,"metrics":{"skill_lines":40,"cancel_bytes":431,"hook_bytes":2157,"hook_lines":52,"total_bytes":3848},"status":"keep","description":"compress ralph-persist.ts — extract helpers, shorten vars","timestamp":1775210616,"segment":0} +{"run":8,"commit":"0405ab8","metric":1237,"metrics":{"skill_lines":36,"cancel_bytes":431,"hook_bytes":2157,"hook_lines":52,"total_bytes":3825},"status":"keep","description":"replace JSON code block with inline field description","timestamp":1775210641,"segment":0} +{"run":9,"commit":"465de49","metric":1197,"metrics":{"skill_lines":34,"cancel_bytes":431,"hook_bytes":2157,"hook_lines":52,"total_bytes":3785},"status":"keep","description":"extract $S path var, merge verification+done sections","timestamp":1775210664,"segment":0} +{"run":10,"commit":"570f903","metric":1193,"metrics":{"skill_lines":36,"cancel_bytes":431,"hook_bytes":2157,"hook_lines":52,"total_bytes":3781},"status":"keep","description":"consolidate flags into single line at bottom","timestamp":1775210689,"segment":0} +{"run":11,"commit":"fe1ab49","metric":1193,"metrics":{"skill_lines":36,"cancel_bytes":431,"hook_bytes":1962,"hook_lines":48,"total_bytes":3586},"status":"keep","description":"shorten block message, compress variable names","timestamp":1775210722,"segment":0} +{"run":13,"commit":"823b62e","metric":1249,"metrics":{"skill_lines":32,"cancel_bytes":431,"hook_bytes":1962,"hook_lines":48,"total_bytes":3642},"status":"keep","description":"clarify SKILL.md based on agent test — progress.txt, deslop, review criteria (+123 bytes for clarity)","timestamp":1775211110,"segment":0} +{"run":14,"commit":"97d4d0b","metric":1272,"metrics":{"skill_lines":31,"cancel_bytes":431,"hook_bytes":1962,"hook_lines":48,"total_bytes":3665},"status":"keep","description":"clarify progress.txt timing, remove redundant parse step","timestamp":1775211256,"segment":0} +{"run":15,"commit":"59d1f66","metric":1359,"metrics":{"skill_lines":31,"cancel_bytes":431,"hook_bytes":1962,"hook_lines":48,"total_bytes":3752},"status":"keep","description":"define flag behaviors — --no-prd creates minimal prd.json (agent test fix)","timestamp":1775211369,"segment":0} +{"run":16,"commit":"4ea8184","metric":1396,"metrics":{"skill_lines":31,"cancel_bytes":431,"hook_bytes":1962,"hook_lines":48,"total_bytes":3789},"status":"keep","description":"clarify TDD with existing tests, fix --no-prd wording (agent test fix)","timestamp":1775211475,"segment":0} +{"run":17,"commit":"0201a7a","metric":1396,"metrics":{"skill_lines":31,"cancel_bytes":525,"hook_bytes":1962,"hook_lines":48,"total_bytes":3883},"status":"keep","description":"add CWD context and hook explanation to ralph-cancel (agent test fix)","timestamp":1775211579,"segment":0} +{"run":18,"commit":"e6ea5dd","metric":1438,"metrics":{"skill_lines":32,"cancel_bytes":525,"hook_bytes":1962,"hook_lines":48,"total_bytes":3925},"status":"keep","description":"add INSIGHT format to progress.txt, clarify per-story update (agent test)","timestamp":1775211830,"segment":0} +{"run":19,"commit":"fb6ca04","metric":1438,"metrics":{"skill_lines":32,"cancel_bytes":525,"hook_bytes":2286,"hook_lines":55,"total_bytes":4249},"status":"keep","description":"add story progress to block message (N/M stories done)","timestamp":1775211874,"segment":0} +{"run":20,"commit":"60ca33c","metric":1098,"metrics":{"skill_lines":29,"cancel_bytes":525,"hook_bytes":2286,"hook_lines":55,"total_bytes":3909},"status":"keep","description":"simplify for universal use — remove $S, add one-at-a-time + slop def. Agent: 7/10","timestamp":1775212030,"segment":0} +{"run":21,"commit":"52ff4aa","metric":1098,"metrics":{"skill_lines":29,"cancel_bytes":525,"hook_bytes":2286,"hook_lines":55,"total_bytes":3909},"status":"keep","description":"e2e test: /ralph actual invocation — 3 stories, 1 iteration, all pass","timestamp":1775212340,"segment":0} +{"run":22,"commit":"52ff4aa","metric":1098,"metrics":{"skill_lines":29,"cancel_bytes":525,"hook_bytes":2286,"hook_lines":55,"total_bytes":3909},"status":"keep","description":"e2e test: /ralph --no-prd — single story auto-generated, no confusion, all pass","timestamp":1775212411,"segment":0} +{"run":23,"commit":"52ff4aa","metric":1098,"metrics":{"skill_lines":29,"cancel_bytes":525,"hook_bytes":2286,"hook_lines":55,"total_bytes":3909},"status":"keep","description":"e2e test: /ralph-cancel — active loop cancelled cleanly, no issues","timestamp":1775212452,"segment":0} +{"run":24,"commit":"2af754f","metric":1098,"metrics":{"skill_lines":29,"cancel_bytes":525,"hook_bytes":2487,"hook_lines":60,"total_bytes":4110},"status":"keep","description":"include last progress.txt failure in block message","timestamp":1775212490,"segment":0} +{"run":25,"commit":"72eb631","metric":1098,"metrics":{"skill_lines":29,"cancel_bytes":525,"hook_bytes":2390,"hook_lines":58,"total_bytes":4013},"status":"keep","description":"use Bun.stdin.text() for simpler stdin reading","timestamp":1775212517,"segment":0} diff --git a/.autoresearch/autoresearch.md b/.autoresearch/autoresearch.md index b505d00a..284057d5 100644 --- a/.autoresearch/autoresearch.md +++ b/.autoresearch/autoresearch.md @@ -1,47 +1,36 @@ -# Autoresearch: create-pr token efficiency +# Autoresearch: Ralph Plugin Optimization ## Objective -Optimize the `plugins/me/skills/create-pr/` skill for token efficiency and correctness. SKILL.md is loaded into LLM context when invoked — fewer bytes = less cost. Scripts run at execution time and don't affect token cost, but must be correct. +Optimize the `plugins/ralph/` plugin for simplicity and token efficiency. SKILL.md files are loaded into LLM context when invoked — fewer bytes = less cost. The hook (ralph-persist.ts) runs at OS level but should also be minimal and clean. ## Metrics -- **Primary**: skill_bytes (bytes, lower is better) — SKILL.md byte count -- **Secondary**: skill_lines, skill_words, script_bytes +- **Primary**: skill_bytes (bytes, lower is better) — ralph/SKILL.md byte count +- **Secondary**: skill_lines, cancel_bytes, hook_bytes, hook_lines, total_bytes ## How to Run -`./.autoresearch/run.sh` — outputs `METRIC name=number` lines. +`./.autoresearch/run.sh` — outputs `METRIC name=number` lines. Validates frontmatter, hooks.json, TS compilation, and BATS tests. ## Files in Scope | File | Purpose | |------|---------| -| `plugins/me/skills/create-pr/SKILL.md` | Main skill definition (loaded into LLM context) | -| `plugins/me/skills/create-pr/scripts/preflight-check.sh` | Pre-push checks + auto-sync | -| `plugins/me/skills/create-pr/scripts/wait-for-merge.sh` | Wait for CI + merge | +| `plugins/ralph/skills/ralph/SKILL.md` | Main skill (LLM context — primary target) | +| `plugins/ralph/skills/ralph-cancel/SKILL.md` | Cancel skill (LLM context) | +| `plugins/ralph/hooks/ralph-persist.ts` | Stop hook engine (Bun runtime) | +| `plugins/ralph/hooks/hooks.json` | Hook registration config | ## Off Limits -- Do not break the PR workflow -- Exit codes must be preserved +- Do not break the persistence loop workflow (activate → iterate → complete) +- Cancel signal mechanism must work +- Session isolation must be preserved +- Stale state recovery must work +- Always exit 0 (never crash Claude) +- plugin.json metadata ## Constraints -- Scripts must pass shellcheck +- Tests must pass (ralph_persist.bats + ralph_hooks_json.bats) - SKILL.md must have valid frontmatter -- Tests must pass (63/63) +- hooks.json must be valid JSON +- TypeScript must compile with bun ## What's Been Tried -### Structural changes (big wins) -- Removed unused verify-pr-status.sh (-1302 bytes) -- Merged sync-with-base.sh into preflight-check.sh (-515 bytes) -- Inlined lib.sh into preflight-check.sh (-461 bytes) - -### SKILL.md compression (medium wins) -- Removed Overview, When to Use, Stop Conditions sections -- Extracted S= path variable for script paths -- Removed bold markdown markers, flattened sections - -### Test-driven fixes (increased bytes for correctness) -- "scripts MUST be run" directive (+129 bytes) — agents were skipping scripts -- auto-merge re-enable after CI fix (+60 bytes) — tested on PR #604 -- push -u in preflight — new branches had no upstream - -### Dead ends -- Merging gh pr create + merge into one line — bytes increased -- Further compression below ~700 bytes — losing essential information +(Starting fresh — no experiments yet) diff --git a/.autoresearch/dashboard.md b/.autoresearch/dashboard.md index f59dae12..9bd6e40d 100644 --- a/.autoresearch/dashboard.md +++ b/.autoresearch/dashboard.md @@ -1,31 +1,29 @@ -# Autoresearch Dashboard: create-pr-optimize +# Autoresearch Dashboard: ralph-optimize -## Segment 1: skill_bytes (SKILL.md only) -**Runs:** 14 | **Kept:** 12 | **Discarded:** 1 | **Tests:** 1 -**Baseline:** 1081 bytes (#10) -**Best pure:** 605 bytes (#15, -44.0%) -**Current:** 794 bytes (#19, -26.5%) — includes test-driven fixes +**Runs:** 20 | **Kept:** 17 | **Discarded:** 0 | **Crashed:** 0 +**Baseline:** skill=2914, total=7357 bytes +**Final:** skill=1098 (-62.3%), total=3909 (-46.9%) +**Agent clarity:** 7/10 -| # | commit | skill_bytes | status | description | -|---|--------|-------------|--------|-------------| -| 10 | 1b650ac | 1081 | keep | baseline | -| 11 | 6ba0c3d | 802 (-25.8%) | keep | remove redundant sections | -| 12 | ec416bc | 732 (-32.3%) | keep | extract S= path variable | -| 13 | 9bb6f1e | 675 (-37.6%) | keep | merge comments, remove bold | -| 14 | 563874d | 635 (-41.3%) | keep | micro-compress wording | -| 15 | 059de59 | 605 (-44.0%) | keep | remove template path | -| 16 | 059de59 | 608 | discard | merge create+merge lines | -| 17 | 96b1a8f | 665 | keep | add auto-merge re-enable (test fix) | -| 18 | - | - | test | edge: main branch — agent skipped scripts | -| 19 | b2e1f87 | 794 | keep | "scripts MUST be run" directive | -| 20 | - | - | test | edge: nothing-to-commit — agent handled correctly | -| 22 | - | - | test | must-run directive confirmed working | -| 23 | dece665 | 794 | keep | fix broken tests — 63/63 pass | +| # | skill | cancel | hook | total | description | +|---|-------|--------|------|-------|-------------| +| 1 | 2914 | 696 | 3747 | 7357 | baseline | +| 2 | 1710 | 696 | 3747 | 6153 | compress SKILL.md | +| 3 | 1364 | 696 | 3747 | 5807 | further compress | +| 5 | 1364 | 431 | 2725 | 4520 | simplify hook | +| 7 | 1260 | 431 | 2157 | 3848 | compress hook | +| 11 | 1193 | 431 | 1962 | 3586 | best pure compression | +| 15 | 1359 | 431 | 1962 | 3752 | +clarity (agent fixes) | +| 19 | 1438 | 525 | 2286 | 4249 | +story progress in hook | +| 20 | 1098 | 525 | 2286 | 3909 | final: simplified for universal use | -## Subagent Test Results -| PR | Scenario | Result | Finding | -|----|----------|--------|---------| -| #601-602 | basic flow (main SKILL) | pass | - | -| #604 | optimized SKILL | pass | auto-merge disabled after push, push -u needed | -| #605 | main branch edge | pass | agent skipped scripts (fixed with MUST directive) | -| #606 | MUST directive test | pass | scripts executed correctly, CI failed on stale tests | +## Agent Tests (7 total, all pass) +| Scenario | Clarity Finding | +|----------|----------------| +| Simple task | progress.txt init, deslop vague | +| Multi-story | one-at-a-time unclear | +| --no-prd | "skip PRD" contradictory | +| Edge cases | retry flow works | +| ralph-cancel | CWD + hook explanation needed | +| Calculator | 7/10 — clean | +| Calculator --no-prd | 7/10 — auto-generate rule wanted | diff --git a/.autoresearch/run.sh b/.autoresearch/run.sh index 32c9c6a7..e0ecdcae 100755 --- a/.autoresearch/run.sh +++ b/.autoresearch/run.sh @@ -1,33 +1,43 @@ #!/usr/bin/env bash set -euo pipefail -SKILL="plugins/me/skills/create-pr/SKILL.md" -SCRIPTS_DIR="plugins/me/skills/create-pr/scripts" +ROOT="plugins/ralph" -# Primary: SKILL.md bytes (this is what loads into LLM context) -SKILL_BYTES=$(wc -c < "$SKILL" | tr -d ' ') -echo "METRIC skill_bytes=$SKILL_BYTES" +# Pre-check: files exist +for f in "$ROOT/skills/ralph/SKILL.md" "$ROOT/skills/ralph-cancel/SKILL.md" "$ROOT/hooks/ralph-persist.ts" "$ROOT/hooks/hooks.json" "$ROOT/.claude-plugin/plugin.json"; do + [[ -f "$f" ]] || { echo "MISSING: $f" >&2; exit 1; } +done + +# Skill bytes (loaded into LLM context — primary optimization target) +SKILL_BYTES=$(wc -c < "$ROOT/skills/ralph/SKILL.md" | tr -d ' ') +SKILL_LINES=$(wc -l < "$ROOT/skills/ralph/SKILL.md" | tr -d ' ') + +# Cancel skill bytes +CANCEL_BYTES=$(wc -c < "$ROOT/skills/ralph-cancel/SKILL.md" | tr -d ' ') + +# Hook bytes (runtime, not LLM context) +HOOK_BYTES=$(wc -c < "$ROOT/hooks/ralph-persist.ts" | tr -d ' ') +HOOK_LINES=$(wc -l < "$ROOT/hooks/ralph-persist.ts" | tr -d ' ') + +# Total plugin bytes +TOTAL=$((SKILL_BYTES + CANCEL_BYTES + HOOK_BYTES)) -# Secondary -SKILL_LINES=$(wc -l < "$SKILL" | tr -d ' ') +# Validate: SKILL.md has frontmatter +head -1 "$ROOT/skills/ralph/SKILL.md" | grep -q '^---' || { echo "ERROR: SKILL.md missing frontmatter" >&2; exit 1; } + +# Validate: hooks.json is valid JSON +jq empty "$ROOT/hooks/hooks.json" 2>/dev/null || { echo "ERROR: hooks.json invalid" >&2; exit 1; } + +# Validate: TypeScript compiles +# Validate: TypeScript parses (dry-run with empty stdin) +echo '{}' | bun run "$ROOT/hooks/ralph-persist.ts" >/dev/null 2>&1 || { echo "ERROR: TS runtime error" >&2; exit 1; } + +# Run tests +bats tests/ralph_persist.bats tests/ralph_hooks_json.bats >/dev/null 2>&1 || { echo "ERROR: tests failed" >&2; exit 1; } + +echo "METRIC skill_bytes=$SKILL_BYTES" echo "METRIC skill_lines=$SKILL_LINES" -SKILL_WORDS=$(wc -w < "$SKILL" | tr -d ' ') -echo "METRIC skill_words=$SKILL_WORDS" -SCRIPT_BYTES=$(cat "$SCRIPTS_DIR"/*.sh 2>/dev/null | wc -c | tr -d ' ') -echo "METRIC script_bytes=$SCRIPT_BYTES" - -# Validity -echo "--- Validity Checks ---" -head -1 "$SKILL" | grep -q '^---' || { echo "FAIL: missing frontmatter" >&2; exit 1; } -echo "OK: frontmatter" - -FAIL=0 -for f in "$SCRIPTS_DIR"/*.sh; do - if ! (cd "$SCRIPTS_DIR" && shellcheck -x "$(basename "$f")") >/dev/null 2>&1; then - echo "FAIL: shellcheck $(basename "$f")" >&2 - (cd "$SCRIPTS_DIR" && shellcheck -x "$(basename "$f")") >&2 || true - FAIL=1 - fi -done -[[ $FAIL -eq 0 ]] && echo "OK: shellcheck" || exit 1 -echo "--- Done ---" +echo "METRIC cancel_bytes=$CANCEL_BYTES" +echo "METRIC hook_bytes=$HOOK_BYTES" +echo "METRIC hook_lines=$HOOK_LINES" +echo "METRIC total_bytes=$TOTAL" diff --git a/.autoresearch/worklog.md b/.autoresearch/worklog.md index ec63cf69..80d8dbc3 100644 --- a/.autoresearch/worklog.md +++ b/.autoresearch/worklog.md @@ -1,47 +1,40 @@ -# Worklog: create-pr token efficiency +# Worklog: Ralph Plugin Optimization ## Session Info -- Started: 2026-04-02 -- Goal: Reduce token cost of create-pr skill while keeping it simple, correct, and effective - ---- - -### Segment 0 (total_bytes): Runs 1-9 -Compressed all files from 9073→2884 bytes (-68.2%): -- Removed verbose error messages/comments in scripts -- Removed unused verify-pr-status.sh -- Merged sync-with-base.sh into preflight-check.sh -- Inlined lib.sh (only used by 1 script) - -### Segment 1 (skill_bytes): Runs 10-27 -Re-focused on SKILL.md only (what LLM reads). 1081→776 bytes (-28.2%): -- Removed redundant sections, shortened description -- Extracted `S=` path variable -- Added "scripts MUST be run" directive (test-driven) -- Added auto-merge re-enable after CI fix (test-driven) -- Removed redundant "re-run preflight" instruction -- Fixed broken tests (63/63 pass) - -### Subagent Tests (4 PRs) -| PR | Scenario | Finding | -|----|----------|---------| -| #604 | basic optimized flow | push -u needed, auto-merge disabled after push | -| #605 | main branch | agent skipped scripts → added MUST directive | -| #606 | MUST directive | scripts executed correctly, stale tests found | -| #607 | final validation | clean pass, 14 tool calls | - -### Bug Fixes Found Through Testing -1. preflight push needs `-u` for new branches -2. auto-merge disabled after force-push → added re-enable instruction -3. agent skipping scripts → added "MUST be run" directive -4. stale tests referencing deleted scripts → updated test suite -5. `--delete-branch` in fallback merge inconsistent → removed - ---- +- **Goal:** Optimize Ralph plugin for simplicity, token efficiency, and clarity +- **Branch:** autoresearch/ralph-improve-20260403 +- **Started:** 2026-04-03 +- **Primary Metric:** skill_bytes (lower is better) + +## Final Results +- Baseline: skill=2914, cancel=696, hook=3747, total=7357 bytes +- Final: skill=1098, cancel=525, hook=2286, total=3909 bytes +- **Net: -46.9% total** (-62.3% skill, -24.6% cancel, -39.0% hook) +- Agent clarity score: 7/10 + +## Phase 1: Byte Compression (runs 1-11) +- Flattened JSON examples, removed redundant sections +- Simplified TypeScript (removed interfaces, extracted helpers) +- Shortened variable names, block messages +- Best pure compression: skill=1193, total=3586 + +## Phase 2: Agent Testing (runs 13-18) +7 subagent tests found and fixed: +1. progress.txt init → "if it exists" +2. --no-prd undefined → "auto-generate single-story prd" +3. Deslop vague → "unnecessary comments, dead code, over-abstractions" +4. One-at-a-time unclear → explicit +5. ralph-cancel CWD context → added +6. cancel-signal purpose → hook explanation added + +## Phase 3: Simplification (runs 19-20) +- Removed $S variable, [INSIGHT] format, verbose explanations +- Added story progress to hook block message (N/M stories done) +- Final version: clean, minimal, universally applicable ## Key Insights -- SKILL.md is the only file that costs tokens — scripts don't load into context -- "MUST run" directive is essential — without it agents reimplement script logic -- Real testing (subagent PRs) found 5 bugs that static analysis missed -- Byte reduction has diminishing returns below ~700 bytes for this skill -- Code block format is the primary instruction channel for LLM agents +- Byte compression alone misleads — unclear instructions cost more overall +- Agent testing catches issues BATS unit tests miss +- 7/10 clarity is good for a 29-line SKILL.md +- Remaining 3 points want examples, which conflicts with brevity +- Hook's story progress (N/M done) gives agents useful context at zero SKILL.md cost diff --git a/plugins/ralph/hooks/ralph-persist.ts b/plugins/ralph/hooks/ralph-persist.ts index df71e4da..8cf006b6 100644 --- a/plugins/ralph/hooks/ralph-persist.ts +++ b/plugins/ralph/hooks/ralph-persist.ts @@ -2,137 +2,57 @@ import { existsSync, readFileSync, writeFileSync, unlinkSync, mkdirSync } from "fs"; import { join } from "path"; -const STALE_THRESHOLD_MS = 2 * 60 * 60 * 1000; // 2 hours +const STALE_MS = 7_200_000; // 2h -interface HookInput { - session_id?: string; - cwd?: string; - hook_event_name?: string; -} - -interface RalphState { - active: boolean; - session_id: string; - iteration: number; - max_iterations: number; - started_at: string; - last_checked_at: string; - prompt: string; -} - -function readState(stateDir: string): RalphState | null { - const path = join(stateDir, "ralph-state.json"); - if (!existsSync(path)) return null; +function allow(): never { process.exit(0); } +function block(i: number, m: number, p: string, cwd: string): never { + let progress = ""; try { - return JSON.parse(readFileSync(path, "utf8")) as RalphState; - } catch { - return null; - } -} - -function writeState(stateDir: string, state: RalphState): void { - mkdirSync(stateDir, { recursive: true }); - writeFileSync(join(stateDir, "ralph-state.json"), JSON.stringify(state, null, 2)); -} - -function allow(): void { - // Write nothing, exit 0 - process.exit(0); -} - -function block(iteration: number, maxIterations: number, prompt: string): void { - process.stdout.write( - JSON.stringify({ - decision: "block", - reason: `[RALPH LOOP - ITERATION ${iteration}/${maxIterations}] Work is not done. Continue working on the task: ${prompt}`, - }) - ); + const prd = JSON.parse(readFileSync(join(cwd, ".ralph", "prd.json"), "utf8")); + const stories = prd.userStories || []; + const done = stories.filter((s: any) => s.passes).length; + progress = ` (${done}/${stories.length} stories done)`; + } catch {} + let lastFail = ""; + try { + const lines = readFileSync(join(cwd, ".ralph", "progress.txt"), "utf8").trim().split("\n"); + lastFail = ` Last: ${lines[lines.length - 1]}`; + } catch {} + process.stdout.write(JSON.stringify({ decision: "block", reason: `[RALPH ${i}/${m}]${progress}${lastFail} Continue: ${p}` })); process.exit(0); } -async function main(): Promise { - const chunks: Buffer[] = []; - for await (const chunk of process.stdin) { - chunks.push(chunk as Buffer); - } - const input: HookInput = JSON.parse(Buffer.concat(chunks).toString("utf8") || "{}"); - +async function main() { + const input = JSON.parse(await Bun.stdin.text() || "{}"); const cwd = input.cwd ?? process.cwd(); - const sessionId = input.session_id ?? ""; - const stateDir = join(cwd, ".ralph", "state"); - - // Check activation sentinel - const activatingSentinel = join(stateDir, "ralph-activating"); - if (existsSync(activatingSentinel) && !existsSync(join(stateDir, "ralph-state.json"))) { - const prompt = readFileSync(activatingSentinel, "utf8").trim(); - unlinkSync(activatingSentinel); + const sid = input.session_id ?? ""; + const dir = join(cwd, ".ralph", "state"); + const sp = join(dir, "ralph-state.json"); + const save = (s: any) => writeFileSync(sp, JSON.stringify(s, null, 2)); + const off = (s: any) => { s.active = false; save(s); allow(); }; + + const sentinel = join(dir, "ralph-activating"); + if (existsSync(sentinel) && !existsSync(sp)) { + const p = readFileSync(sentinel, "utf8").trim(); + unlinkSync(sentinel); const now = new Date().toISOString(); - const state: RalphState = { - active: true, - session_id: sessionId, - iteration: 1, - max_iterations: 10, - started_at: now, - last_checked_at: now, - prompt, - }; - writeState(stateDir, state); - block(1, state.max_iterations, state.prompt); - return; - } - - const state = readState(stateDir); - - // Pass-through: no state - if (!state) { - allow(); - return; - } - - // Pass-through: inactive - if (!state.active) { - allow(); - return; - } - - // Pass-through: session mismatch - // If either session_id is empty/unknown, skip the check — don't break a running loop - // just because the caller didn't provide a session_id. - if (state.session_id && sessionId && state.session_id !== sessionId) { - allow(); - return; - } - - // Pass-through: cancel signal - const cancelSignal = join(stateDir, "cancel-signal-state.json"); - if (existsSync(cancelSignal)) { - unlinkSync(cancelSignal); - state.active = false; - writeState(stateDir, state); - allow(); - return; - } - - // Pass-through: stale state - const lastChecked = new Date(state.last_checked_at).getTime(); - if (Date.now() - lastChecked > STALE_THRESHOLD_MS) { - state.active = false; - writeState(stateDir, state); - allow(); - return; - } - - // Block: extend max_iterations if needed, then block - if (state.iteration >= state.max_iterations) { - state.max_iterations += 10; - } - state.iteration += 1; - state.last_checked_at = new Date().toISOString(); - writeState(stateDir, state); - block(state.iteration, state.max_iterations, state.prompt); + mkdirSync(dir, { recursive: true }); + save({ active: true, session_id: sid, iteration: 1, max_iterations: 10, started_at: now, last_checked_at: now, prompt: p }); + block(1, 10, p, cwd); + } + + let s: any; + try { s = JSON.parse(readFileSync(sp, "utf8")); } catch { allow(); } + if (!s?.active) allow(); + if (s.session_id && sid && s.session_id !== sid) allow(); + if (existsSync(join(dir, "cancel-signal-state.json"))) { unlinkSync(join(dir, "cancel-signal-state.json")); off(s); } + if (Date.now() - new Date(s.last_checked_at).getTime() > STALE_MS) off(s); + + if (s.iteration >= s.max_iterations) s.max_iterations += 10; + s.iteration += 1; + s.last_checked_at = new Date().toISOString(); + save(s); + block(s.iteration, s.max_iterations, s.prompt, cwd); } -main().catch((err) => { - process.stderr.write(`ralph-persist error: ${err}\n`); - process.exit(0); // Always exit 0 — never crash Claude -}); +main().catch((e) => { process.stderr.write(`ralph-persist: ${e}\n`); process.exit(0); }); diff --git a/plugins/ralph/skills/ralph-cancel/SKILL.md b/plugins/ralph/skills/ralph-cancel/SKILL.md index fbb1ae41..867fa8ca 100644 --- a/plugins/ralph/skills/ralph-cancel/SKILL.md +++ b/plugins/ralph/skills/ralph-cancel/SKILL.md @@ -5,19 +5,10 @@ description: Cancel an active Ralph loop — cleans up state files and exits the # Ralph Cancel -Cancel the active Ralph loop for the current project. +Cancel the Ralph loop in the current project (CWD). -## Steps - -1. Check if `.ralph/state/ralph-state.json` exists - - If it does not exist, report: "No active Ralph loop found." - - If it exists, read it to confirm `active: true` - - If `active: false`, report: "Ralph loop is already inactive." - -2. Write `.ralph/state/cancel-signal-state.json` with content `{}` - -3. Update `.ralph/state/ralph-state.json`: set `active: false` - -4. Report: "Ralph loop cancelled." - -The Stop hook will detect the cancel signal on the next iteration and exit cleanly. +1. If `.ralph/state/ralph-state.json` missing → "No active Ralph loop found." +2. If `active: false` → "Ralph loop is already inactive." +3. Write `.ralph/state/cancel-signal-state.json` (`{}`) — the Stop hook detects this and exits +4. Set `active: false` in `.ralph/state/ralph-state.json` +5. Report: "Ralph loop cancelled." diff --git a/plugins/ralph/skills/ralph/SKILL.md b/plugins/ralph/skills/ralph/SKILL.md index 90b6cbef..c8edd817 100644 --- a/plugins/ralph/skills/ralph/SKILL.md +++ b/plugins/ralph/skills/ralph/SKILL.md @@ -1,90 +1,29 @@ --- name: ralph -description: PRD-driven persistence loop — keeps Claude working until all user stories pass verification +description: PRD-driven persistence loop — keeps Claude working until all user stories pass --- # Ralph Loop -You are executing the Ralph persistence loop. Your job is to implement the task completely. -The Stop hook will keep you running until you write the cancel signal file. +Stop hook keeps you running until you write the cancel signal. -## Activation (first run only) +## Activation +1. `mkdir -p .ralph/state/` +2. Write `.ralph/state/ralph-activating` with the task description -1. Parse the task from the invocation: `/ralph "your task description"` -2. Create `.ralph/state/` directory if it does not exist -3. Write `.ralph/state/ralph-activating` with the task description as content -4. Proceed to PRD writing +## PRD (`--no-prd` to skip → auto-generate single-story prd) +Create `.ralph/prd.json`: `project`, `userStories[]` each with `id`, `title`, `acceptanceCriteria[]`, `priority`, `passes:false`. Small testable stories, dependency-ordered. -## PRD Writing (skip with `--no-prd`) +## Loop +1. Read `.ralph/progress.txt` if it exists +2. Find highest-priority `passes:false` story +3. All stories pass → go to Done +4. Implement one story at a time, run tests +5. Pass → set `passes:true`. Fail → append `[ITER N] US-XXX: ` to `.ralph/progress.txt` -Create `.ralph/prd.json` with this structure: - -```json -{ - "project": "", - "description": "", - "userStories": [ - { - "id": "US-001", - "title": "", - "description": "", - "acceptanceCriteria": ["", "..."], - "priority": 1, - "passes": false - } - ] -} -``` - -Rules: - -- Each story must have clear, testable acceptance criteria -- Order stories by dependency (foundational first) -- Keep stories small — one story = one focused piece of functionality - -## Story Execution Loop - -On each iteration: - -1. Read `.ralph/progress.txt` for learnings from previous iterations -2. Read `.ralph/prd.json` and find the highest-priority story with `passes: false` -3. If all stories have `passes: true`, proceed to Completion Verification -4. Implement the story following TDD: write failing test → implement → make pass -5. Run the project's test suite -6. If tests pass: set `passes: true` for this story in `prd.json` -7. If tests fail: append learnings to `.ralph/progress.txt`: - - ```text - [ITERATION N] Story US-XXX failed: / - ``` - -## Completion Verification - -When all stories have `passes: true`: - -1. **Architect review** (skip with `--critic=none`): Review the full implementation for design quality, - edge cases, and code clarity. Fix any issues found. -2. **Deslop pass** (skip with `--no-deslop`): Remove AI-generated boilerplate, overly verbose comments, - unnecessary abstractions, and any code that exists for no clear reason. -3. **Regression test run**: Run the full test suite. All tests must pass. - -## Completion - -When verification passes: - -1. Write `.ralph/state/cancel-signal-state.json` (content can be empty `{}`) -2. Output: `COMPLETE` - -The Stop hook will detect the cancel signal and exit the loop. - -## Flags - -- `--no-prd` — Skip PRD writing, treat the task description as the single story -- `--no-deslop` — Skip the deslop pass -- `--critic=none` — Skip architect review (default: architect) - -## Important - -- Never declare completion without writing the cancel signal file -- Never skip the regression test run -- Read `progress.txt` at the start of every iteration — past failures contain critical information +## Done +1. Review code quality (`--critic=none` to skip) +2. Remove slop: unnecessary comments, dead code, over-abstractions (`--no-deslop` to skip) +3. Run full test suite — must pass +4. Write `.ralph/state/cancel-signal-state.json` (`{}`) +5. Reply `COMPLETE`