diff --git a/src/designdoc/stages/s5_mermaid.py b/src/designdoc/stages/s5_mermaid.py index b84e94d..a141ee3 100644 --- a/src/designdoc/stages/s5_mermaid.py +++ b/src/designdoc/stages/s5_mermaid.py @@ -23,9 +23,11 @@ from designdoc.hil import inline_comment from designdoc.io_utils import atomic_write +from designdoc.loop import ArtifactResult from designdoc.mermaid.loop import generate_validated_mermaid, strip_fence from designdoc.mermaid.mmdc import preflight, validate from designdoc.state import PipelineState, StageStatus, state_lock +from designdoc.verdict import CheckerVerdict, MermaidIssue STAGE_NAME = "mermaid" @@ -90,14 +92,8 @@ async def run( ) mermaid_src = strip_fence(result.text) - section = f"\n\n## Diagram\n\n```mermaid\n{mermaid_src}\n```\n" - if result.status == "shipped_with_hil": - hil_id = state.hil_issues[-1]["id"] - section = ( - f"\n\n## Diagram\n\n" - f"{inline_comment(hil_id, 'mermaid diagram disputed')}\n\n" - f"```mermaid\n{mermaid_src}\n```\n" - ) + hil_id = state.hil_issues[-1]["id"] if result.status == "shipped_with_hil" else None + section = _build_diagram_section(result, mermaid_src, hil_id) # Write body (without any prior Diagram) plus the fresh section — # avoids stacking Diagram sections on re-run. @@ -236,6 +232,51 @@ def _merge_class_diagrams(blocks: list[str]) -> str: return "\n".join(lines) +def _is_mmdc_syntax_failure(verdict: CheckerVerdict) -> bool: + """True if the verdict's terminal failure was an mmdc syntax error. + + The composite mermaid checker (mermaid/loop.py) emits issues with + category='syntax' for mmdc parser failures and other categories + (hallucinated_node, missing_edge, etc.) for LLM-semantic failures. + A mixed verdict containing any syntax issue is still treated as + syntax-failing — the diagram won't render either way. + """ + return any( + isinstance(issue, MermaidIssue) and issue.category == "syntax" for issue in verdict.issues + ) + + +def _build_diagram_section(result: ArtifactResult, mermaid_src: str, hil_id: str | None) -> str: + """Build the '## Diagram' section for a class doc. + + Three paths: + + * ``pass``: fence around mermaid_src. The happy path. + * ``shipped_with_hil`` + mmdc syntax failure: drop the fence entirely. + A syntax-broken mermaid block renders as a blank/error in every viewer + (GitHub, IDE, mkdocs), so shipping one with a HIL marker is strictly + worse than no diagram at all. Replace with a 'diagram unavailable' + line pointing at hil-issues.yaml. See issue #61. + * ``shipped_with_hil`` + LLM-semantic failure: keep the fence. mmdc + said the mermaid parses; the dispute is about content quality + (hallucinated node, wrong direction, etc.). The diagram still renders. + """ + if result.status == "pass": + return f"\n\n## Diagram\n\n```mermaid\n{mermaid_src}\n```\n" + + assert hil_id is not None, "shipped_with_hil requires a hil_id" + marker = inline_comment(hil_id, "mermaid diagram disputed") + + if _is_mmdc_syntax_failure(result.verdict): + return ( + f"\n\n## Diagram\n\n" + f"{marker}\n\n" + f"> Diagram unavailable; see {hil_id} in hil-issues.yaml.\n" + ) + + return f"\n\n## Diagram\n\n{marker}\n\n```mermaid\n{mermaid_src}\n```\n" + + def _strip_arrow_labels(diagram_text: str) -> str: """Drop the `: label` suffix from every relationship-arrow line in a mermaid classDiagram. Used as a one-shot retry escape hatch when the diff --git a/tests/unit/test_stage5_hil_section.py b/tests/unit/test_stage5_hil_section.py new file mode 100644 index 0000000..e64da1d --- /dev/null +++ b/tests/unit/test_stage5_hil_section.py @@ -0,0 +1,176 @@ +"""Unit tests for Stage 5's '## Diagram' section builder under HIL fallback. + +Background: when mermaid generation exhausts MAX_ATTEMPTS, the artifact ships +with a `` marker. v1 kept the ```mermaid fence around +the (failing) source regardless of WHY it failed. That works for LLM-semantic +disputes (the diagram still parses; the dispute is about content quality), +but it's actively harmful for mmdc syntax failures: the fence renders as a +blank/error in every viewer, so the reader sees a code block that produces +nothing AND a TODO marker with no actionable content visible. + +Issue #61: when the terminal failure was an mmdc syntax error, drop the +fence entirely and ship only the HIL marker + a 'diagram unavailable' line. +LLM-semantic disputes (mmdc passed, checker rejected for content reasons) +still keep the fence. +""" + +from __future__ import annotations + +from designdoc.loop import ArtifactResult +from designdoc.stages.s5_mermaid import _build_diagram_section, _is_mmdc_syntax_failure +from designdoc.verdict import CheckerVerdict, MermaidIssue + + +def _pass_verdict() -> CheckerVerdict: + return CheckerVerdict( + status="pass", + attempt=1, + artifact_id="mermaid:Foo", + summary="ok", + ) + + +def _syntax_fail_verdict() -> CheckerVerdict: + return CheckerVerdict( + status="fail", + attempt=3, + artifact_id="mermaid:Foo", + summary="mmdc syntax check failed", + issues=[ + MermaidIssue( + severity="critical", + location="", + current_text="A --> B : calls foo(); raises Err", + suggested_fix="fix syntax error: Lexical error on line 4", + category="syntax", + ) + ], + ) + + +def _semantic_fail_verdict() -> CheckerVerdict: + return CheckerVerdict( + status="fail", + attempt=3, + artifact_id="mermaid:Foo", + summary="hallucinated node", + issues=[ + MermaidIssue( + severity="major", + location="class Bogus", + current_text="class Bogus", + suggested_fix="remove Bogus — not present in source", + category="hallucinated_node", + ) + ], + ) + + +# ----- _is_mmdc_syntax_failure ------------------------------------------- + + +def test_is_mmdc_syntax_failure_true_when_category_syntax(): + assert _is_mmdc_syntax_failure(_syntax_fail_verdict()) is True + + +def test_is_mmdc_syntax_failure_false_for_semantic_failure(): + """LLM-semantic categories (hallucinated_node, missing_edge, etc.) are + NOT syntax failures — mmdc said yes, the LLM checker had content concerns.""" + assert _is_mmdc_syntax_failure(_semantic_fail_verdict()) is False + + +def test_is_mmdc_syntax_failure_false_for_pass(): + """A passing verdict has no issues; nothing to classify as syntax.""" + assert _is_mmdc_syntax_failure(_pass_verdict()) is False + + +def test_is_mmdc_syntax_failure_true_when_any_issue_is_syntax(): + """Mixed-category verdict: if even one issue is a syntax issue, treat + the whole verdict as syntax-failing (the diagram won't render).""" + verdict = CheckerVerdict( + status="fail", + attempt=3, + artifact_id="mermaid:Foo", + summary="multiple problems", + issues=[ + MermaidIssue( + severity="major", + location="edge", + current_text="A --> B", + suggested_fix="rephrase", + category="hallucinated_node", + ), + MermaidIssue( + severity="critical", + location="", + current_text="A --> B : x; y", + suggested_fix="fix lexical error", + category="syntax", + ), + ], + ) + assert _is_mmdc_syntax_failure(verdict) is True + + +# ----- _build_diagram_section -------------------------------------------- + + +def _result(status: str, verdict: CheckerVerdict) -> ArtifactResult: + return ArtifactResult( + artifact_id="mermaid:Foo", + status=status, # type: ignore[arg-type] + text="classDiagram\n class Foo\n", + attempt=verdict.attempt, + verdict=verdict, + ) + + +def test_build_diagram_section_pass_emits_fence(): + """The happy path is unchanged: ## Diagram heading + fenced mermaid block.""" + src = "classDiagram\n class Foo\n class Bar\n" + section = _build_diagram_section(_result("pass", _pass_verdict()), src, hil_id=None) + + assert "## Diagram" in section + assert "```mermaid" in section + assert "class Foo" in section + assert "class Bar" in section + assert " B : foo(); bar\n" + section = _build_diagram_section( + _result("shipped_with_hil", _syntax_fail_verdict()), + src, + hil_id="HIL-003", + ) + + assert "## Diagram" in section + assert "