Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 49 additions & 8 deletions src/designdoc/stages/s5_mermaid.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@

from designdoc.hil import inline_comment
from designdoc.io_utils import atomic_write
from designdoc.loop import ArtifactResult
from designdoc.mermaid.loop import generate_validated_mermaid, strip_fence
from designdoc.mermaid.mmdc import preflight, validate
from designdoc.state import PipelineState, StageStatus, state_lock
from designdoc.verdict import CheckerVerdict, MermaidIssue

STAGE_NAME = "mermaid"

Expand Down Expand Up @@ -90,14 +92,8 @@ async def run(
)

mermaid_src = strip_fence(result.text)
section = f"\n\n## Diagram\n\n```mermaid\n{mermaid_src}\n```\n"
if result.status == "shipped_with_hil":
hil_id = state.hil_issues[-1]["id"]
section = (
f"\n\n## Diagram\n\n"
f"{inline_comment(hil_id, 'mermaid diagram disputed')}\n\n"
f"```mermaid\n{mermaid_src}\n```\n"
)
hil_id = state.hil_issues[-1]["id"] if result.status == "shipped_with_hil" else None
section = _build_diagram_section(result, mermaid_src, hil_id)

# Write body (without any prior Diagram) plus the fresh section —
# avoids stacking Diagram sections on re-run.
Expand Down Expand Up @@ -236,6 +232,51 @@ def _merge_class_diagrams(blocks: list[str]) -> str:
return "\n".join(lines)


def _is_mmdc_syntax_failure(verdict: CheckerVerdict) -> bool:
"""True if the verdict's terminal failure was an mmdc syntax error.

The composite mermaid checker (mermaid/loop.py) emits issues with
category='syntax' for mmdc parser failures and other categories
(hallucinated_node, missing_edge, etc.) for LLM-semantic failures.
A mixed verdict containing any syntax issue is still treated as
syntax-failing — the diagram won't render either way.
"""
return any(
isinstance(issue, MermaidIssue) and issue.category == "syntax" for issue in verdict.issues
)


def _build_diagram_section(result: ArtifactResult, mermaid_src: str, hil_id: str | None) -> str:
"""Build the '## Diagram' section for a class doc.

Three paths:

* ``pass``: fence around mermaid_src. The happy path.
* ``shipped_with_hil`` + mmdc syntax failure: drop the fence entirely.
A syntax-broken mermaid block renders as a blank/error in every viewer
(GitHub, IDE, mkdocs), so shipping one with a HIL marker is strictly
worse than no diagram at all. Replace with a 'diagram unavailable'
line pointing at hil-issues.yaml. See issue #61.
* ``shipped_with_hil`` + LLM-semantic failure: keep the fence. mmdc
said the mermaid parses; the dispute is about content quality
(hallucinated node, wrong direction, etc.). The diagram still renders.
"""
if result.status == "pass":
return f"\n\n## Diagram\n\n```mermaid\n{mermaid_src}\n```\n"

assert hil_id is not None, "shipped_with_hil requires a hil_id"
marker = inline_comment(hil_id, "mermaid diagram disputed")

if _is_mmdc_syntax_failure(result.verdict):
return (
f"\n\n## Diagram\n\n"
f"{marker}\n\n"
f"> Diagram unavailable; see {hil_id} in hil-issues.yaml.\n"
)

return f"\n\n## Diagram\n\n{marker}\n\n```mermaid\n{mermaid_src}\n```\n"


def _strip_arrow_labels(diagram_text: str) -> str:
"""Drop the `: label` suffix from every relationship-arrow line in a
mermaid classDiagram. Used as a one-shot retry escape hatch when the
Expand Down
176 changes: 176 additions & 0 deletions tests/unit/test_stage5_hil_section.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
"""Unit tests for Stage 5's '## Diagram' section builder under HIL fallback.

Background: when mermaid generation exhausts MAX_ATTEMPTS, the artifact ships
with a `<!-- HIL: HIL-NNN -->` marker. v1 kept the ```mermaid fence around
the (failing) source regardless of WHY it failed. That works for LLM-semantic
disputes (the diagram still parses; the dispute is about content quality),
but it's actively harmful for mmdc syntax failures: the fence renders as a
blank/error in every viewer, so the reader sees a code block that produces
nothing AND a TODO marker with no actionable content visible.

Issue #61: when the terminal failure was an mmdc syntax error, drop the
fence entirely and ship only the HIL marker + a 'diagram unavailable' line.
LLM-semantic disputes (mmdc passed, checker rejected for content reasons)
still keep the fence.
"""

from __future__ import annotations

from designdoc.loop import ArtifactResult
from designdoc.stages.s5_mermaid import _build_diagram_section, _is_mmdc_syntax_failure
from designdoc.verdict import CheckerVerdict, MermaidIssue


def _pass_verdict() -> CheckerVerdict:
return CheckerVerdict(
status="pass",
attempt=1,
artifact_id="mermaid:Foo",
summary="ok",
)


def _syntax_fail_verdict() -> CheckerVerdict:
return CheckerVerdict(
status="fail",
attempt=3,
artifact_id="mermaid:Foo",
summary="mmdc syntax check failed",
issues=[
MermaidIssue(
severity="critical",
location="<mermaid source>",
current_text="A --> B : calls foo(); raises Err",
suggested_fix="fix syntax error: Lexical error on line 4",
category="syntax",
)
],
)


def _semantic_fail_verdict() -> CheckerVerdict:
return CheckerVerdict(
status="fail",
attempt=3,
artifact_id="mermaid:Foo",
summary="hallucinated node",
issues=[
MermaidIssue(
severity="major",
location="class Bogus",
current_text="class Bogus",
suggested_fix="remove Bogus — not present in source",
category="hallucinated_node",
)
],
)


# ----- _is_mmdc_syntax_failure -------------------------------------------


def test_is_mmdc_syntax_failure_true_when_category_syntax():
assert _is_mmdc_syntax_failure(_syntax_fail_verdict()) is True


def test_is_mmdc_syntax_failure_false_for_semantic_failure():
"""LLM-semantic categories (hallucinated_node, missing_edge, etc.) are
NOT syntax failures — mmdc said yes, the LLM checker had content concerns."""
assert _is_mmdc_syntax_failure(_semantic_fail_verdict()) is False


def test_is_mmdc_syntax_failure_false_for_pass():
"""A passing verdict has no issues; nothing to classify as syntax."""
assert _is_mmdc_syntax_failure(_pass_verdict()) is False


def test_is_mmdc_syntax_failure_true_when_any_issue_is_syntax():
"""Mixed-category verdict: if even one issue is a syntax issue, treat
the whole verdict as syntax-failing (the diagram won't render)."""
verdict = CheckerVerdict(
status="fail",
attempt=3,
artifact_id="mermaid:Foo",
summary="multiple problems",
issues=[
MermaidIssue(
severity="major",
location="edge",
current_text="A --> B",
suggested_fix="rephrase",
category="hallucinated_node",
),
MermaidIssue(
severity="critical",
location="<mermaid source>",
current_text="A --> B : x; y",
suggested_fix="fix lexical error",
category="syntax",
),
],
)
assert _is_mmdc_syntax_failure(verdict) is True


# ----- _build_diagram_section --------------------------------------------


def _result(status: str, verdict: CheckerVerdict) -> ArtifactResult:
return ArtifactResult(
artifact_id="mermaid:Foo",
status=status, # type: ignore[arg-type]
text="classDiagram\n class Foo\n",
attempt=verdict.attempt,
verdict=verdict,
)


def test_build_diagram_section_pass_emits_fence():
"""The happy path is unchanged: ## Diagram heading + fenced mermaid block."""
src = "classDiagram\n class Foo\n class Bar\n"
section = _build_diagram_section(_result("pass", _pass_verdict()), src, hil_id=None)

assert "## Diagram" in section
assert "```mermaid" in section
assert "class Foo" in section
assert "class Bar" in section
assert "<!-- HIL:" not in section


def test_build_diagram_section_hil_syntax_drops_fence():
"""Terminal mmdc syntax failure → no ```mermaid fence (broken markdown
in every viewer); just the HIL marker + a 'diagram unavailable' line
pointing the reader at hil-issues.yaml."""
src = "classDiagram\n A --> B : foo(); bar\n"
section = _build_diagram_section(
_result("shipped_with_hil", _syntax_fail_verdict()),
src,
hil_id="HIL-003",
)

assert "## Diagram" in section
assert "<!-- HIL: HIL-003" in section
assert "```mermaid" not in section, (
"syntax-failed diagrams must not ship a broken fence — that renders "
"as a blank/error block in every viewer"
)
assert "HIL-003" in section
assert "hil-issues.yaml" in section.lower()


def test_build_diagram_section_hil_semantic_keeps_fence():
"""Terminal LLM-semantic failure (mmdc passed, content disputed) → keep
the fence. The diagram renders fine; the dispute is about quality, not
parseability."""
src = "classDiagram\n class Foo\n class Bogus\n"
section = _build_diagram_section(
_result("shipped_with_hil", _semantic_fail_verdict()),
src,
hil_id="HIL-004",
)

assert "## Diagram" in section
assert "<!-- HIL: HIL-004" in section
assert "```mermaid" in section
assert "class Foo" in section
assert "class Bogus" in section
Loading