diff --git a/src/designdoc/stages/s5_mermaid.py b/src/designdoc/stages/s5_mermaid.py index a141ee3..a75872a 100644 --- a/src/designdoc/stages/s5_mermaid.py +++ b/src/designdoc/stages/s5_mermaid.py @@ -198,7 +198,12 @@ def _merge_class_diagrams(blocks: list[str]) -> str: Returns an empty string if no class names were found. """ class_names: set[str] = set() - arrows: set[str] = set() + # Issue #62: dedupe arrows by (src, op, dst) tuple, not full line string, + # so labelled variants like `A --> B : foo` and `A --> B : bar` collapse + # to one canonical arrow. First label seen wins (deterministic given + # stable input order — blocks come from sorted class_docs). + arrow_keys: set[tuple[str, str, str]] = set() + arrow_lines: list[str] = [] for block in blocks: # Identify classDiagram blocks by the directive at the head. We do @@ -221,17 +226,50 @@ def _merge_class_diagrams(blocks: list[str]) -> str: # does NOT start with `class ` (otherwise we'd grab class-block # openers with `class Foo --> Bar` style inline declarations). if any(op in stripped for op in _ARROW_OPS) and not stripped.startswith("class "): - arrows.add(stripped) + parsed = _parse_arrow(stripped) + if parsed is None: + continue + key = parsed[:3] + if key not in arrow_keys: + arrow_keys.add(key) + arrow_lines.append(stripped) if not class_names: return "" lines = ["classDiagram"] lines.extend(f" class {name}" for name in sorted(class_names)) - lines.extend(f" {arrow}" for arrow in sorted(arrows)) + lines.extend(f" {arrow}" for arrow in sorted(arrow_lines)) return "\n".join(lines) +def _parse_arrow(line: str) -> tuple[str, str, str, str] | None: + """Parse a mermaid classDiagram relationship line into (src, op, dst, label). + + Returns None if no arrow op is found or the surrounding parts are empty + (malformed line). label is "" when no `: label` is present — callers + can then dedupe-by-tuple uniformly without juggling None. + + Sorted longest-first so multi-character ops like `<|--` don't get + mis-matched as `<--` substrings even though no op is currently a + substring of another (defensive against future _ARROW_OPS additions). + """ + stripped = line.strip() + if not stripped: + return None + for op in sorted(_ARROW_OPS, key=len, reverse=True): + idx = stripped.find(op) + if idx == -1: + continue + left = stripped[:idx].strip() + right = stripped[idx + len(op) :].strip() + if not left or not right: + return None + target, sep, label = right.partition(":") + return (left, op, target.strip(), label.strip() if sep else "") + return None + + def _is_mmdc_syntax_failure(verdict: CheckerVerdict) -> bool: """True if the verdict's terminal failure was an mmdc syntax error. diff --git a/tests/unit/test_stage5_package_diagrams.py b/tests/unit/test_stage5_package_diagrams.py index 3b3d8ae..0d28843 100644 --- a/tests/unit/test_stage5_package_diagrams.py +++ b/tests/unit/test_stage5_package_diagrams.py @@ -17,7 +17,11 @@ import pytest from designdoc.stages import s5_mermaid -from designdoc.stages.s5_mermaid import _merge_class_diagrams, _strip_arrow_labels +from designdoc.stages.s5_mermaid import ( + _merge_class_diagrams, + _parse_arrow, + _strip_arrow_labels, +) def test_merge_extracts_class_names_from_inputs(): @@ -273,3 +277,96 @@ async def test_emit_package_diagrams_stays_failsoft_when_retry_also_fails( assert (pkg_dir / "README.md").read_text() == pre_readme, ( "README must be untouched when both validate attempts fail" ) + + +# ----- arrow dedupe by (src, op, dst) tuple (issue #62) -------------------- + + +def test_parse_arrow_extracts_src_op_dst_label(): + """The basic shape: indented `A --> B : label` → ('A', '-->', 'B', 'label').""" + assert _parse_arrow(" A --> B : uses") == ("A", "-->", "B", "uses") + + +def test_parse_arrow_no_label_means_empty_label(): + """An unlabelled arrow returns '' for the label — not None — so callers + can dedupe-by-tuple uniformly.""" + assert _parse_arrow(" A --> B") == ("A", "-->", "B", "") + + +def test_parse_arrow_recognizes_all_relationship_ops(): + """Every classDiagram relationship op in _ARROW_OPS must parse cleanly. + Catches regressions if someone adds a new op to _ARROW_OPS without + updating the parser.""" + cases = [ + (" Animal <|-- Dog", "<|--"), + (" Dog --|> Animal", "--|>"), + (" Dog ..> Bone", "..>"), + (" Bone <.. Dog", "<.."), + (" Engine *-- Car", "*--"), + (" Car --* Engine", "--*"), + (" Listener o-- Source", "o--"), + (" Source --o Listener", "--o"), + (" A --> B", "-->"), + (" B <-- A", "<--"), + ] + for line, expected_op in cases: + parsed = _parse_arrow(line) + assert parsed is not None, f"failed to parse {line!r}" + assert parsed[1] == expected_op, f"wrong op for {line!r}: got {parsed[1]}" + + +def test_parse_arrow_returns_none_for_non_arrow_line(): + """`class Foo` and similar non-arrow lines should return None so the + merger can skip them without misclassifying.""" + assert _parse_arrow(" class Foo") is None + assert _parse_arrow("classDiagram") is None + assert _parse_arrow(" ") is None + + +def test_merge_dedupes_arrows_with_different_labels(): + """The agent-brain dogfood found `BaseModel <|-- GraphQueryContext : extends` + and `BaseModel <|-- GraphQueryContext : inherits` declared back-to-back + in models. Set-based dedupe keyed on full line string lets them both + survive. After this fix, dedupe-by-(src, op, dst) tuple collapses them + to one arrow (first label wins, deterministic given stable input order).""" + a = ( + "classDiagram\n" + " class BaseModel\n" + " class GraphQueryContext\n" + " BaseModel <|-- GraphQueryContext : extends\n" + " BaseModel <|-- GraphQueryContext : inherits\n" + ) + + merged = _merge_class_diagrams([a]) + + arrow_lines = [ + line for line in merged.splitlines() if "BaseModel <|-- GraphQueryContext" in line + ] + assert len(arrow_lines) == 1, ( + f"expected 1 arrow after tuple-dedupe, got {len(arrow_lines)}: {arrow_lines!r}" + ) + # The first label seen wins — deterministic, given that block iteration + # order is stable (sorted class_docs in the caller). + assert "extends" in arrow_lines[0] + + +def test_merge_keeps_distinct_edges_with_different_ops(): + """Same (src, dst) but different op = genuinely distinct edge. Both must + survive — e.g. `A --> B` (dependency) and `A ..> B` (transient use) + convey different meaning and shouldn't be collapsed.""" + a = "classDiagram\n class A\n class B\n A --> B\n A ..> B\n" + + merged = _merge_class_diagrams([a]) + + assert "A --> B" in merged + assert "A ..> B" in merged + + +def test_merge_keeps_distinct_edges_with_different_targets(): + """Same (src, op) but different dst = different edge. Both survive.""" + a = "classDiagram\n class A\n class B\n class C\n A --> B\n A --> C\n" + + merged = _merge_class_diagrams([a]) + + assert "A --> B" in merged + assert "A --> C" in merged