Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 41 additions & 3 deletions src/designdoc/stages/s5_mermaid.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,12 @@ def _merge_class_diagrams(blocks: list[str]) -> str:
Returns an empty string if no class names were found.
"""
class_names: set[str] = set()
arrows: set[str] = set()
# Issue #62: dedupe arrows by (src, op, dst) tuple, not full line string,
# so labelled variants like `A --> B : foo` and `A --> B : bar` collapse
# to one canonical arrow. First label seen wins (deterministic given
# stable input order — blocks come from sorted class_docs).
arrow_keys: set[tuple[str, str, str]] = set()
arrow_lines: list[str] = []

for block in blocks:
# Identify classDiagram blocks by the directive at the head. We do
Expand All @@ -221,17 +226,50 @@ def _merge_class_diagrams(blocks: list[str]) -> str:
# does NOT start with `class ` (otherwise we'd grab class-block
# openers with `class Foo --> Bar` style inline declarations).
if any(op in stripped for op in _ARROW_OPS) and not stripped.startswith("class "):
arrows.add(stripped)
parsed = _parse_arrow(stripped)
if parsed is None:
continue
key = parsed[:3]
if key not in arrow_keys:
arrow_keys.add(key)
arrow_lines.append(stripped)

if not class_names:
return ""

lines = ["classDiagram"]
lines.extend(f" class {name}" for name in sorted(class_names))
lines.extend(f" {arrow}" for arrow in sorted(arrows))
lines.extend(f" {arrow}" for arrow in sorted(arrow_lines))
return "\n".join(lines)


def _parse_arrow(line: str) -> tuple[str, str, str, str] | None:
"""Parse a mermaid classDiagram relationship line into (src, op, dst, label).

Returns None if no arrow op is found or the surrounding parts are empty
(malformed line). label is "" when no `: label` is present — callers
can then dedupe-by-tuple uniformly without juggling None.

Sorted longest-first so multi-character ops like `<|--` don't get
mis-matched as `<--` substrings even though no op is currently a
substring of another (defensive against future _ARROW_OPS additions).
"""
stripped = line.strip()
if not stripped:
return None
for op in sorted(_ARROW_OPS, key=len, reverse=True):
idx = stripped.find(op)
if idx == -1:
continue
left = stripped[:idx].strip()
right = stripped[idx + len(op) :].strip()
if not left or not right:
return None
target, sep, label = right.partition(":")
return (left, op, target.strip(), label.strip() if sep else "")
return None


def _is_mmdc_syntax_failure(verdict: CheckerVerdict) -> bool:
"""True if the verdict's terminal failure was an mmdc syntax error.

Expand Down
99 changes: 98 additions & 1 deletion tests/unit/test_stage5_package_diagrams.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@
import pytest

from designdoc.stages import s5_mermaid
from designdoc.stages.s5_mermaid import _merge_class_diagrams, _strip_arrow_labels
from designdoc.stages.s5_mermaid import (
_merge_class_diagrams,
_parse_arrow,
_strip_arrow_labels,
)


def test_merge_extracts_class_names_from_inputs():
Expand Down Expand Up @@ -273,3 +277,96 @@ async def test_emit_package_diagrams_stays_failsoft_when_retry_also_fails(
assert (pkg_dir / "README.md").read_text() == pre_readme, (
"README must be untouched when both validate attempts fail"
)


# ----- arrow dedupe by (src, op, dst) tuple (issue #62) --------------------


def test_parse_arrow_extracts_src_op_dst_label():
"""The basic shape: indented `A --> B : label` → ('A', '-->', 'B', 'label')."""
assert _parse_arrow(" A --> B : uses") == ("A", "-->", "B", "uses")


def test_parse_arrow_no_label_means_empty_label():
"""An unlabelled arrow returns '' for the label — not None — so callers
can dedupe-by-tuple uniformly."""
assert _parse_arrow(" A --> B") == ("A", "-->", "B", "")


def test_parse_arrow_recognizes_all_relationship_ops():
"""Every classDiagram relationship op in _ARROW_OPS must parse cleanly.
Catches regressions if someone adds a new op to _ARROW_OPS without
updating the parser."""
cases = [
(" Animal <|-- Dog", "<|--"),
(" Dog --|> Animal", "--|>"),
(" Dog ..> Bone", "..>"),
(" Bone <.. Dog", "<.."),
(" Engine *-- Car", "*--"),
(" Car --* Engine", "--*"),
(" Listener o-- Source", "o--"),
(" Source --o Listener", "--o"),
(" A --> B", "-->"),
(" B <-- A", "<--"),
]
for line, expected_op in cases:
parsed = _parse_arrow(line)
assert parsed is not None, f"failed to parse {line!r}"
assert parsed[1] == expected_op, f"wrong op for {line!r}: got {parsed[1]}"


def test_parse_arrow_returns_none_for_non_arrow_line():
"""`class Foo` and similar non-arrow lines should return None so the
merger can skip them without misclassifying."""
assert _parse_arrow(" class Foo") is None
assert _parse_arrow("classDiagram") is None
assert _parse_arrow(" ") is None


def test_merge_dedupes_arrows_with_different_labels():
"""The agent-brain dogfood found `BaseModel <|-- GraphQueryContext : extends`
and `BaseModel <|-- GraphQueryContext : inherits` declared back-to-back
in models. Set-based dedupe keyed on full line string lets them both
survive. After this fix, dedupe-by-(src, op, dst) tuple collapses them
to one arrow (first label wins, deterministic given stable input order)."""
a = (
"classDiagram\n"
" class BaseModel\n"
" class GraphQueryContext\n"
" BaseModel <|-- GraphQueryContext : extends\n"
" BaseModel <|-- GraphQueryContext : inherits\n"
)

merged = _merge_class_diagrams([a])

arrow_lines = [
line for line in merged.splitlines() if "BaseModel <|-- GraphQueryContext" in line
]
assert len(arrow_lines) == 1, (
f"expected 1 arrow after tuple-dedupe, got {len(arrow_lines)}: {arrow_lines!r}"
)
# The first label seen wins — deterministic, given that block iteration
# order is stable (sorted class_docs in the caller).
assert "extends" in arrow_lines[0]


def test_merge_keeps_distinct_edges_with_different_ops():
"""Same (src, dst) but different op = genuinely distinct edge. Both must
survive — e.g. `A --> B` (dependency) and `A ..> B` (transient use)
convey different meaning and shouldn't be collapsed."""
a = "classDiagram\n class A\n class B\n A --> B\n A ..> B\n"

merged = _merge_class_diagrams([a])

assert "A --> B" in merged
assert "A ..> B" in merged


def test_merge_keeps_distinct_edges_with_different_targets():
"""Same (src, op) but different dst = different edge. Both survive."""
a = "classDiagram\n class A\n class B\n class C\n A --> B\n A --> C\n"

merged = _merge_class_diagrams([a])

assert "A --> B" in merged
assert "A --> C" in merged
Loading