From 3198383f2e48fa3f11dd8480ac81b5aaffd17f8c Mon Sep 17 00:00:00 2001 From: Luigi Corsaro <5324491+drake69@users.noreply.github.com> Date: Tue, 23 Jun 2026 18:59:51 +0200 Subject: [PATCH 1/2] feat(ui+db): counterparts grid, CategoryCorrection table, cat_select widget MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - CategoryCorrection table: live implicit benchmark — logga ogni correzione utente con source, confidence e consistency_at_correction per calcolare accuracy per modello senza dataset etichettato separato (AI-164) - counterparts_page.py: griglia per-vendor con # tx, source_mode, variabilità categoria modale, flag human-checked; inline rule creation da riga (AI-164) - cat_select widget: selector categoria/sottocategoria riutilizzabile - i18n: chiavi counterparts aggiunte in IT/EN/FR/DE/ES --- app.py | 5 + core/_build_info.py | 3 + db/models.py | 55 +++++++ db/repository.py | 188 ++++++++++++++++++++++- services/transaction_service.py | 18 ++- ui/bulk_edit_page.py | 4 +- ui/counterparts_page.py | 254 ++++++++++++++++++++++++++++++++ ui/i18n/de.json | 50 ++++++- ui/i18n/en.json | 50 ++++++- ui/i18n/es.json | 50 ++++++- ui/i18n/fr.json | 50 ++++++- ui/i18n/it.json | 50 ++++++- ui/llm_models_page.py | 52 +++++++ ui/registry_page.py | 2 +- ui/review_page.py | 4 +- ui/rules_page.py | 4 +- ui/sidebar.py | 1 + ui/widgets/cat_select.py | 58 ++++++++ 18 files changed, 877 insertions(+), 21 deletions(-) create mode 100644 core/_build_info.py create mode 100644 ui/counterparts_page.py create mode 100644 ui/widgets/cat_select.py diff --git a/app.py b/app.py index 800f282..6672638 100644 --- a/app.py +++ b/app.py @@ -11,6 +11,7 @@ 📊 Budget vs Actual – compare actual spending vs budget targets 🔍 Review – manual review of low-confidence items 📏 Rules – manage category rules (edit / delete / create) + 🏪 Counterparts – per-vendor stats grid with inline rule creation 🗂️ Taxonomy – manage categories and subcategories ⚙️ Settings – locale, language, LLM backend preferences ✅ Checklist – monthly tx presence per account (pivot table) @@ -199,6 +200,10 @@ from ui.rules_page import render_rules_page render_rules_page(engine) +elif page == "counterparts": + from ui.counterparts_page import render_counterparts_page + render_counterparts_page(engine) + elif page == "taxonomy": from ui.taxonomy_page import render_taxonomy_page render_taxonomy_page(engine) diff --git a/core/_build_info.py b/core/_build_info.py new file mode 100644 index 0000000..2d66aa1 --- /dev/null +++ b/core/_build_info.py @@ -0,0 +1,3 @@ +# Generated at build time — do not edit manually. +BUILD_TIME = "2026-06-23 15:48" +BUILD_VERSION = "0.1.0" diff --git a/db/models.py b/db/models.py index fe86cdd..094ea54 100644 --- a/db/models.py +++ b/db/models.py @@ -226,6 +226,7 @@ def create_tables(engine=None): _migrate_add_nsi_tag_mapping(engine) _migrate_add_category_model(engine) _migrate_add_llm_usage_log(engine) + _migrate_add_category_correction(engine) _migrate_set_onboarding_done_for_existing_users(engine) # must run last _migrate_purge_orphan_schemas(engine) # cleanup: remove schemas without header_sha256 @@ -583,6 +584,34 @@ class NsiTagMapping(Base): updated_at = Column(DateTime, default=lambda: datetime.now(timezone.utc)) +class CategoryCorrection(Base): + """User correction log — one row every time the user changes a category. + + Captures the original LLM/rule assignment alongside two quality signals: + - original_confidence: the model's self-reported certainty on that tx + - consistency_at_correction: % of same-description txs that agreed on + the same category at the moment of correction (vendor-level coherence) + + Together these let us compute a live implicit benchmark: + accuracy ≈ 1 - (corrections / total_llm_categorizations) per model + and diagnose failure modes (high-confidence errors, inconsistent vendors). + """ + __tablename__ = "category_correction" + + id = Column(Integer, primary_key=True, autoincrement=True) + tx_id = Column(String(64), nullable=False, index=True) + original_category = Column(String(128)) + original_subcategory = Column(String(128)) + original_source = Column(String(10)) # llm | rule | history + original_model = Column(String(128)) # category_model at correction time + original_confidence = Column(String(10)) # high | medium | low + new_category = Column(String(128)) + new_subcategory = Column(String(128)) + consistency_at_correction = Column(Float, nullable=True) # % modal cat, same description + correction_origin = Column(String(20)) # ledger | counterparts | review | bulk_edit + corrected_at = Column(DateTime, default=lambda: datetime.now(timezone.utc)) + + def _migrate_add_import_job(engine) -> None: """Create import_job table if not present (idempotent) and add the AI-88 ms_* phase-timing columns when missing. @@ -1126,6 +1155,32 @@ def _migrate_add_category_model(engine) -> None: raise +def _migrate_add_category_correction(engine) -> None: + """Create category_correction table for live implicit benchmark (idempotent).""" + from sqlalchemy import text as _text + with engine.connect() as conn: + conn.execute(_text( + 'CREATE TABLE IF NOT EXISTS category_correction (' + 'id INTEGER PRIMARY KEY AUTOINCREMENT, ' + 'tx_id VARCHAR(64) NOT NULL, ' + 'original_category VARCHAR(128), ' + 'original_subcategory VARCHAR(128), ' + 'original_source VARCHAR(10), ' + 'original_model VARCHAR(128), ' + 'original_confidence VARCHAR(10), ' + 'new_category VARCHAR(128), ' + 'new_subcategory VARCHAR(128), ' + 'consistency_at_correction FLOAT, ' + 'correction_origin VARCHAR(20), ' + 'corrected_at DATETIME)' + )) + conn.execute(_text( + 'CREATE INDEX IF NOT EXISTS ix_category_correction_tx_id ' + 'ON category_correction (tx_id)' + )) + conn.commit() + + def _migrate_add_llm_usage_log(engine) -> None: """Create llm_usage_log table if not present (idempotent).""" from sqlalchemy import text as _text diff --git a/db/repository.py b/db/repository.py index e980d54..7b131a6 100644 --- a/db/repository.py +++ b/db/repository.py @@ -13,6 +13,7 @@ from core.categorizer import CategoryRule as CoreCategoryRule from core.schemas import DocumentSchema from db.models import ( + CategoryCorrection, CategoryRule, DEFAULT_USER_SETTINGS, DescriptionRule, @@ -288,7 +289,7 @@ def upsert_transaction(session: Session, tx: dict, batch_id: Optional[int] = Non date_accounting=tx.get("date_accounting").isoformat() if tx.get("date_accounting") and hasattr(tx["date_accounting"], "isoformat") else tx.get("date_accounting"), amount=amount_val, currency=tx.get("currency", "EUR"), - description=tx.get("description", ""), + description=(tx.get("description") or "").strip().upper() or None, source_file=tx.get("source_file", ""), doc_type=tx.get("doc_type", ""), account_label=tx.get("account_label", ""), @@ -318,16 +319,57 @@ def get_existing_tx_ids(session: Session, tx_ids: list[str]) -> set[str]: return {row.id for row in rows} +def _compute_consistency(session: Session, description: str) -> float | None: + """% of categorized transactions with same description that agree on modal category.""" + from collections import Counter + rows = ( + session.query(Transaction.category) + .filter( + Transaction.description == description, + Transaction.category.isnot(None), + ) + .all() + ) + if not rows: + return None + counts = Counter(r[0] for r in rows) + modal_count = counts.most_common(1)[0][1] + return round(modal_count / len(rows) * 100, 1) + + def update_transaction_category( session: Session, tx_id: str, category: str, subcategory: str, + origin: str = "unknown", ) -> bool: from datetime import datetime, timezone tx = session.get(Transaction, tx_id) if tx is None: return False + old_cat = tx.category + old_sub = tx.subcategory + old_src = tx.category_source + old_model = tx.category_model + old_conf = tx.category_confidence + category_changed = old_cat != category or old_sub != subcategory + if category_changed and old_src in ("llm", "rule", "history"): + consistency = _compute_consistency(session, tx.description or "") + correction = CategoryCorrection( + tx_id=tx_id, + original_category=old_cat, + original_subcategory=old_sub, + original_source=old_src, + original_model=old_model, + original_confidence=old_conf, + new_category=category, + new_subcategory=subcategory, + consistency_at_correction=consistency, + correction_origin=origin, + corrected_at=datetime.now(timezone.utc), + ) + session.add(correction) tx.category = category tx.subcategory = subcategory tx.category_confidence = "high" @@ -339,6 +381,66 @@ def update_transaction_category( return True +def get_correction_benchmark(session: Session) -> list[dict]: + """Live implicit benchmark: per-model corrections vs total LLM categorizations. + + Returns one dict per model with: + model, total_categorized, total_corrections, implicit_accuracy, + high_conf_errors, avg_consistency_at_error + """ + from collections import Counter, defaultdict + + # Still-LLM categorizations (not yet corrected by user) + still_llm_rows = ( + session.query(Transaction.category_model) + .filter( + Transaction.category_source == "llm", + Transaction.category_model.isnot(None), + ) + .all() + ) + total_by_model: Counter = Counter(r[0] for r in still_llm_rows) + + # Corrections where original source was llm + corr_rows = ( + session.query(CategoryCorrection) + .filter(CategoryCorrection.original_source == "llm") + .all() + ) + + corrections: dict[str, list] = defaultdict(list) + for c in corr_rows: + if c.original_model: + corrections[c.original_model].append(c) + + all_models = set(total_by_model.keys()) | set(corrections.keys()) + results = [] + for model in sorted(all_models): + corr_list = corrections.get(model, []) + n_corr = len(corr_list) + # total = still-LLM + already-corrected (corrected txs left the 'llm' source) + n_total = total_by_model.get(model, 0) + n_corr + high_conf = sum(1 for c in corr_list if c.original_confidence == "high") + consistency_vals = [ + c.consistency_at_correction + for c in corr_list + if c.consistency_at_correction is not None + ] + avg_cons = round(sum(consistency_vals) / len(consistency_vals), 1) if consistency_vals else None + implicit_acc = round((1 - n_corr / n_total) * 100, 1) if n_total > 0 else None + results.append( + { + "model": model, + "total_categorized": n_total, + "total_corrections": n_corr, + "implicit_accuracy": implicit_acc, + "high_conf_errors": high_conf, + "avg_consistency_at_error": avg_cons, + } + ) + return results + + def toggle_transaction_giroconto(session: Session, tx_id: str) -> tuple[bool, str]: """Toggle a transaction's tx_type between giroconto and expense/income. @@ -655,6 +757,10 @@ def create_category_rule( Returns (rule, created) where created=False means an existing rule was updated. """ + # Normalize pattern casing: contains/exact match against uppercase descriptions + if match_type in ("contains", "exact"): + pattern = pattern.upper() + existing = ( session.query(CategoryRule) .filter(CategoryRule.pattern == pattern, CategoryRule.match_type == match_type) @@ -696,7 +802,8 @@ def update_category_rule( if rule is None: return False if pattern is not None: - rule.pattern = pattern + _mt = match_type or rule.match_type + rule.pattern = pattern.upper() if _mt in ("contains", "exact") else pattern if match_type is not None: rule.match_type = match_type if category is not None: @@ -1908,3 +2015,80 @@ def get_adaptive_n_ctx_cap( # Round up to next 1024 multiple, enforce floor of 2048 cap = max(int(math.ceil(max_upper / 1024)) * 1024, 2048) return cap + + +def get_counterpart_stats( + session: Session, + tx_types: tuple[str, ...] = ("expense", "income"), +) -> list[dict]: + """Aggregate transactions by description to produce per-counterpart stats. + + Returns a list of dicts with keys: + description, tx_count, avg_amount, modal_category, modal_subcategory, + variability_pct, source_mode, human_checked + """ + from collections import Counter, defaultdict + + rows = ( + session.query( + Transaction.description, + Transaction.amount, + Transaction.category, + Transaction.subcategory, + Transaction.category_source, + Transaction.validated_at, + ) + .filter( + Transaction.description.isnot(None), + Transaction.description != "", + Transaction.tx_type.in_(tx_types), + ) + .all() + ) + + groups: dict[str, list] = defaultdict(list) + for row in rows: + groups[row.description].append(row) + + stats = [] + for desc, txs in groups.items(): + tx_count = len(txs) + avg_amount = sum(abs(float(t.amount or 0)) for t in txs) / tx_count + + cat_counts: Counter = Counter(t.category for t in txs if t.category) + if cat_counts: + modal_cat, modal_count = cat_counts.most_common(1)[0] + else: + modal_cat, modal_count = "", 0 + sub_counts: Counter = Counter( + t.subcategory for t in txs if t.category == modal_cat and t.subcategory + ) + modal_sub = sub_counts.most_common(1)[0][0] if sub_counts else "" + variability_pct = (modal_count / tx_count * 100) if tx_count else 0.0 + + sources = {t.category_source for t in txs if t.category_source} + if len(sources) == 1: + source_mode = next(iter(sources)) + elif sources: + source_mode = "mixed" + else: + source_mode = "unknown" + + human_checked = any( + t.validated_at is not None or t.category_source == "manual" for t in txs + ) + + stats.append( + { + "description": desc, + "tx_count": tx_count, + "avg_amount": avg_amount, + "modal_category": modal_cat, + "modal_subcategory": modal_sub, + "variability_pct": variability_pct, + "source_mode": source_mode, + "human_checked": human_checked, + } + ) + + return stats diff --git a/services/transaction_service.py b/services/transaction_service.py index 36545b8..f297717 100644 --- a/services/transaction_service.py +++ b/services/transaction_service.py @@ -56,12 +56,20 @@ def get_recent_for_home(self, since_iso: str) -> list[tuple]: .all() ) - def update_category(self, tx_id: str, category: str, subcategory: str) -> bool: + def update_category( + self, tx_id: str, category: str, subcategory: str, origin: str = "unknown" + ) -> bool: with self._session() as s: - result = repository.update_transaction_category(s, tx_id, category, subcategory) + result = repository.update_transaction_category( + s, tx_id, category, subcategory, origin=origin + ) s.commit() return result + def get_correction_benchmark(self) -> list[dict]: + with self._session() as s: + return repository.get_correction_benchmark(s) + def update_context(self, tx_id: str, context: str | None) -> bool: with self._session() as s: result = repository.update_transaction_context(s, tx_id, context) @@ -290,6 +298,12 @@ def update_context_bulk(self, ids: list[str], context: str | None) -> int: s.commit() return updated + def get_counterpart_stats( + self, tx_types: tuple[str, ...] = ("expense", "income") + ) -> list[dict]: + with self._session() as s: + return repository.get_counterpart_stats(s, tx_types=tx_types) + def delete_duplicate_groups(self, groups: list[list]) -> int: """Delete all but the first transaction in each duplicate group. diff --git a/ui/bulk_edit_page.py b/ui/bulk_edit_page.py index 8fc57d1..50f2892 100644 --- a/ui/bulk_edit_page.py +++ b/ui/bulk_edit_page.py @@ -327,7 +327,7 @@ def _cat_progress_cb(p: float): ) if st.button(t("bulk_edit.apply_category_btn"), type="primary", key="bulk_cat_save"): - ok = tx_svc.update_category(sel.id, new_cat, new_sub) + ok = tx_svc.update_category(sel.id, new_cat, new_sub, origin="bulk_edit") if ok: rule_msg = "" n_similar = 0 @@ -346,7 +346,7 @@ def _cat_progress_cb(p: float): similar = tx_svc.get_by_rule_pattern(sel.description, "contains") for stx in similar: if stx.id != sel.id: - tx_svc.update_category(stx.id, new_cat, new_sub) + tx_svc.update_category(stx.id, new_cat, new_sub, origin="bulk_edit") n_similar += 1 if n_similar: rule_msg += t("bulk_edit.similar_tx_updated", n=n_similar) diff --git a/ui/counterparts_page.py b/ui/counterparts_page.py new file mode 100644 index 0000000..dbb299e --- /dev/null +++ b/ui/counterparts_page.py @@ -0,0 +1,254 @@ +"""Counterparts page — per-vendor stats grid with inline rule creation.""" +from __future__ import annotations + +import pandas as pd +import streamlit as st + +from services.rule_service import RuleService +from services.settings_service import SettingsService +from services.transaction_service import TransactionService +from support.logging import setup_logging +from ui.i18n import t +from ui.widgets.cat_select import build_cat_options, join_cat_sub, split_cat_sub + +logger = setup_logging() + +_SOURCE_EMOJI = { + "llm": "🤖", + "rule": "📏", + "manual": "✋", + "mixed": "🔀", + "unknown": "❓", +} + +_VARIABILITY_WARN = 80.0 + + +def _build_df(stats: list[dict], cat_options: list[str]) -> pd.DataFrame: + rows = [] + for s in stats: + src = s["source_mode"] + src_label = f"{_SOURCE_EMOJI.get(src, '')} {src}" + var = s["variability_pct"] + combined = join_cat_sub(s["modal_category"], s["modal_subcategory"]) + rows.append( + { + t("counterparts.col_counterpart"): s["description"], + t("counterparts.col_tx_count"): s["tx_count"], + t("counterparts.col_avg_amount"): round(s["avg_amount"], 2), + t("counterparts.col_cat_sub"): combined, + t("counterparts.col_source"): src_label, + t("counterparts.col_variability"): f"{var:.0f}%", + t("counterparts.col_checked"): s["human_checked"], + "_description": s["description"], + "_orig_cat_sub": combined, + } + ) + return pd.DataFrame(rows) + + +_DROPDOWN_CSS = """ + +""" + + +def render_counterparts_page(engine) -> None: + st.markdown(_DROPDOWN_CSS, unsafe_allow_html=True) + st.header(t("counterparts.title")) + st.caption(t("counterparts.caption")) + + tx_svc = TransactionService(engine) + rule_svc = RuleService(engine) + cfg_svc = SettingsService(engine) + + taxonomy = cfg_svc.get_taxonomy() + cat_options = build_cat_options(taxonomy, include_empty=True) + + stats = tx_svc.get_counterpart_stats() + if not stats: + st.info(t("counterparts.empty")) + return + + # ── Filters ─────────────────────────────────────────────────────────────── + f1, f2, f3, f4, f5 = st.columns([2, 2, 2, 1, 1]) + with f1: + sort_opts = { + t("counterparts.sort_tx_count"): "tx_count", + t("counterparts.sort_avg_amount"): "avg_amount", + t("counterparts.sort_variability"): "variability_pct", + t("counterparts.sort_name"): "description", + } + sort_label = st.selectbox( + t("counterparts.sort_by"), list(sort_opts.keys()), key="cp_sort" + ) + sort_key = sort_opts[sort_label] + with f2: + sort_asc = st.toggle(t("counterparts.sort_asc"), value=False, key="cp_sort_asc") + with f3: + source_filter_opts = { + t("counterparts.filter_source_all"): None, + t("counterparts.filter_source_rule"): "rule", + t("counterparts.filter_source_llm"): "llm", + t("counterparts.filter_source_mixed"): "mixed", + t("counterparts.filter_source_manual"): "manual", + } + source_label = st.selectbox( + t("counterparts.filter_source"), list(source_filter_opts.keys()), key="cp_filter_src" + ) + filter_source = source_filter_opts[source_label] + with f4: + filter_low_var = st.toggle( + t("counterparts.filter_low_var"), value=False, key="cp_filter_var" + ) + with f5: + filter_unchecked = st.toggle( + t("counterparts.filter_unchecked"), value=False, key="cp_filter_unc" + ) + + # ── Apply filters & sort ────────────────────────────────────────────────── + filtered = stats + if filter_source is not None: + filtered = [s for s in filtered if s["source_mode"] == filter_source] + if filter_low_var: + filtered = [s for s in filtered if s["variability_pct"] < _VARIABILITY_WARN] + if filter_unchecked: + filtered = [s for s in filtered if not s["human_checked"]] + filtered.sort(key=lambda s: s[sort_key], reverse=not sort_asc) + + df = _build_df(filtered, cat_options) + + _col_counterpart = t("counterparts.col_counterpart") + _col_tx = t("counterparts.col_tx_count") + _col_avg = t("counterparts.col_avg_amount") + _col_cat_sub = t("counterparts.col_cat_sub") + _col_src = t("counterparts.col_source") + _col_var = t("counterparts.col_variability") + _col_chk = t("counterparts.col_checked") + + display_cols = [ + _col_counterpart, _col_tx, _col_avg, + _col_cat_sub, + _col_src, _col_var, _col_chk, + ] + + column_config = { + _col_counterpart: st.column_config.TextColumn( + _col_counterpart, disabled=True, width="large" + ), + _col_tx: st.column_config.NumberColumn( + _col_tx, disabled=True, width="small" + ), + _col_avg: st.column_config.NumberColumn( + _col_avg, disabled=True, format="€ %.2f", width="small" + ), + _col_cat_sub: st.column_config.SelectboxColumn( + _col_cat_sub, + options=cat_options, + required=False, + width="large", + ), + _col_src: st.column_config.TextColumn( + _col_src, disabled=True, width="small" + ), + _col_var: st.column_config.TextColumn( + _col_var, disabled=True, width="small" + ), + _col_chk: st.column_config.CheckboxColumn( + _col_chk, disabled=True, width="small" + ), + } + + st.caption(t("counterparts.grid_hint", n=len(filtered))) + + edited = st.data_editor( + df[display_cols], + column_config=column_config, + use_container_width=True, + hide_index=True, + key="cp_editor", + num_rows="fixed", + ) + + # ── Detect changes ──────────────────────────────────────────────────────── + changed_rows = [] + for idx in range(len(df)): + orig = df.at[idx, "_orig_cat_sub"] + new_val = edited.at[idx, _col_cat_sub] + if new_val and new_val != orig: + new_cat, new_sub = split_cat_sub(new_val) + changed_rows.append( + { + "description": df.at[idx, "_description"], + "new_cat": new_cat, + "new_sub": new_sub, + "orig": orig, + } + ) + + if changed_rows: + st.info(t("counterparts.changes_pending", n=len(changed_rows))) + + n_affected = sum( + len(tx_svc.get_by_rule_pattern(r["description"], "exact")) + for r in changed_rows + ) + retroapply = st.checkbox( + t("counterparts.retroapply", n=n_affected), + value=True, + key="cp_retroapply", + disabled=n_affected == 0, + ) + + if st.button(t("counterparts.save_btn"), type="primary", key="cp_save"): + saved = 0 + applied = 0 + for r in changed_rows: + _, created = rule_svc.create_rule( + pattern=r["description"], + match_type="exact", + category=r["new_cat"], + subcategory=r["new_sub"], + priority=10, + ) + saved += 1 + logger.info( + f"counterparts_page: {'created' if created else 'updated'} rule" + f" pattern={r['description']!r} → {r['new_cat']!r}/{r['new_sub']!r}" + ) + if retroapply: + txs = tx_svc.get_by_rule_pattern(r["description"], "exact") + for tx in txs: + tx_svc.update_category( + tx.id, r["new_cat"], r["new_sub"], origin="counterparts" + ) + applied += len(txs) + + msg = t("counterparts.saved_ok", n=saved) + if retroapply and applied: + msg += " " + t("counterparts.retroapplied", n=applied) + st.success(msg) + st.rerun() + else: + st.caption(t("counterparts.no_changes")) diff --git a/ui/i18n/de.json b/ui/i18n/de.json index f345540..934a1f8 100644 --- a/ui/i18n/de.json +++ b/ui/i18n/de.json @@ -984,5 +984,51 @@ "llm_models.stats.col.s_per_tx": "s/Tx", "llm_models.stats.col.s_per_tx_help": "Mittlere Zeit pro Einzeltransaktion = mittlere Aufrufdauer ÷ batch_size. Leer für Single-Shot-Phasen (Classifier, Footer). Nützlich zur Schätzung der Gesamt-Importzeit.", "llm_models.stats.col.mean_s_help": "Mittlere Latenz eines einzelnen LLM-Aufrufs (ein Aufruf kann N Transaktionen im Batch enthalten). Pro-Transaktion siehe Spalte s/Tx.", - "upload.error_backend_load": "❌ **{filename}** nicht importiert — LLM-Modell konnte nicht geladen werden.\n\n{error}\n\n👉 Öffnen Sie **🤖 LLM-Modelle** und prüfen Sie die Modelldatei (Test 🧪). Bei Beschädigung erneut herunterladen." -} + "upload.error_backend_load": "❌ **{filename}** nicht importiert — LLM-Modell konnte nicht geladen werden.\n\n{error}\n\n👉 Öffnen Sie **🤖 LLM-Modelle** und prüfen Sie die Modelldatei (Test 🧪). Bei Beschädigung erneut herunterladen.", + "nav.counterparts": "🏪 Gegenparteien", + "nav.counterparts.desc": "Anbieter-Statistiken mit schneller Regelerfassung", + "counterparts.title": "🏪 Gegenparteien", + "counterparts.caption": "Händler/Gegenpartei-Liste mit Kategorisierungsstatistiken. Kategorie direkt im Raster bearbeiten und speichern, um eine automatische Regel zu erstellen.", + "counterparts.empty": "Keine kategorisierten Transaktionen gefunden.", + "counterparts.col_counterpart": "Gegenpartei", + "counterparts.col_tx_count": "# Tx", + "counterparts.col_avg_amount": "Ø Betrag", + "counterparts.col_category": "Kategorie", + "counterparts.col_subcategory": "Unterkategorie", + "counterparts.col_source": "Quelle", + "counterparts.col_variability": "Konsistenz", + "counterparts.col_checked": "✓", + "counterparts.sort_by": "Sortieren nach", + "counterparts.sort_asc": "Aufsteigend", + "counterparts.sort_tx_count": "# Transaktionen", + "counterparts.sort_avg_amount": "Ø Betrag", + "counterparts.sort_variability": "Konsistenz", + "counterparts.sort_name": "Name", + "counterparts.filter_low_var": "Nur niedrige Konsistenz", + "counterparts.filter_unchecked": "Nur nicht geprüft", + "counterparts.grid_hint": "{n} Gegenparteien — Kategorie oder Unterkategorie bearbeiten, dann speichern", + "counterparts.changes_pending": "{n} Zeile(n) geändert — bereit als Regel(n)", + "counterparts.retroapply": "Auch auf {n} bestehende Transaktionen anwenden", + "counterparts.save_btn": "💾 Als Regeln speichern", + "counterparts.saved_ok": "✅ {n} Regel(n) erstellt oder aktualisiert.", + "counterparts.retroapplied": "{n} Transaktionen aktualisiert.", + "counterparts.no_changes": "Keine ausstehenden Änderungen.", + "counterparts.filter_source": "Quelle", + "counterparts.filter_source_all": "Alle", + "counterparts.filter_source_rule": "📏 Regel", + "counterparts.filter_source_llm": "🤖 LLM", + "counterparts.filter_source_mixed": "🔀 Gemischt", + "counterparts.filter_source_manual": "✋ Manuell", + "llm_models.benchmark.title": "📊 Implizites Benutzer-Benchmark", + "llm_models.benchmark.caption": "Geschätzte Genauigkeit pro Modell basierend auf Benutzerkorrekturen.", + "llm_models.benchmark.unavailable": "Benchmark-Tabelle noch nicht verfügbar.", + "llm_models.benchmark.empty": "Noch keine Korrekturen aufgezeichnet.", + "llm_models.benchmark.col_model": "Modell", + "llm_models.benchmark.col_total": "LLM-Kategorisierungen", + "llm_models.benchmark.col_corrections": "Benutzerkorrekturen", + "llm_models.benchmark.col_accuracy": "Implizite Genauigkeit", + "llm_models.benchmark.col_hce": "Fehler hoher Konfidenz", + "llm_models.benchmark.col_consistency": "Anbieter-Konsistenz (Ø)", + "llm_models.benchmark.note": "⚠️ Precision-when-reviewed: nicht berührte Transaktionen werden nicht gezählt.", + "counterparts.col_cat_sub": "Kategorie / Unterkategorie" +} \ No newline at end of file diff --git a/ui/i18n/en.json b/ui/i18n/en.json index b6699c6..066bffb 100644 --- a/ui/i18n/en.json +++ b/ui/i18n/en.json @@ -984,5 +984,51 @@ "llm_models.stats.col.s_per_tx": "s/tx", "llm_models.stats.col.s_per_tx_help": "Mean time per single transaction = mean call duration ÷ batch_size. Blank for single-shot phases (classifier, footer) where 1 call ≠ N transactions. Useful for estimating total import time.", "llm_models.stats.col.mean_s_help": "Mean latency of a single LLM call (one call may contain N transactions in a batch). For per-transaction time see the s/tx column.", - "upload.error_backend_load": "❌ **{filename}** not imported — failed to load the LLM model.\n\n{error}\n\n👉 Open **🤖 LLM Models** and verify the model file is valid (Test 🧪). If the file is corrupted, re-download it from the Download section." -} + "upload.error_backend_load": "❌ **{filename}** not imported — failed to load the LLM model.\n\n{error}\n\n👉 Open **🤖 LLM Models** and verify the model file is valid (Test 🧪). If the file is corrupted, re-download it from the Download section.", + "nav.counterparts": "🏪 Counterparts", + "nav.counterparts.desc": "Per-vendor statistics with quick rule creation", + "counterparts.title": "🏪 Counterparts", + "counterparts.caption": "Vendor/counterpart list with categorization statistics. Edit category directly in the grid and save to create or update an automatic rule.", + "counterparts.empty": "No categorized transactions found. Import and categorize transactions before using this page.", + "counterparts.col_counterpart": "Counterpart", + "counterparts.col_tx_count": "# Tx", + "counterparts.col_avg_amount": "Avg amount", + "counterparts.col_category": "Category", + "counterparts.col_subcategory": "Subcategory", + "counterparts.col_source": "Source", + "counterparts.col_variability": "Consistency", + "counterparts.col_checked": "✓", + "counterparts.sort_by": "Sort by", + "counterparts.sort_asc": "Ascending", + "counterparts.sort_tx_count": "# Transactions", + "counterparts.sort_avg_amount": "Avg amount", + "counterparts.sort_variability": "Consistency", + "counterparts.sort_name": "Name", + "counterparts.filter_low_var": "Low consistency only", + "counterparts.filter_unchecked": "Unchecked only", + "counterparts.grid_hint": "{n} counterparts — edit Category or Subcategory in the highlighted cells, then save", + "counterparts.changes_pending": "{n} row(s) modified — ready to become rule(s)", + "counterparts.retroapply": "Also apply to {n} existing transactions", + "counterparts.save_btn": "💾 Save as rules", + "counterparts.saved_ok": "✅ {n} rule(s) created or updated.", + "counterparts.retroapplied": "{n} transactions updated.", + "counterparts.no_changes": "No pending changes. Edit Category or Subcategory in a row to create a rule.", + "counterparts.filter_source": "Source", + "counterparts.filter_source_all": "All", + "counterparts.filter_source_rule": "📏 Rule", + "counterparts.filter_source_llm": "🤖 LLM", + "counterparts.filter_source_mixed": "🔀 Mixed", + "counterparts.filter_source_manual": "✋ Manual", + "llm_models.benchmark.title": "📊 Implicit user benchmark", + "llm_models.benchmark.caption": "Estimated per-model accuracy based on user category corrections. One correction = model was wrong. Only LLM-tracked categorizations included.", + "llm_models.benchmark.unavailable": "Benchmark table not yet available.", + "llm_models.benchmark.empty": "No corrections recorded yet. The benchmark fills in as the user corrects categories in Ledger, Review or Counterparts.", + "llm_models.benchmark.col_model": "Model", + "llm_models.benchmark.col_total": "LLM categorizations", + "llm_models.benchmark.col_corrections": "User corrections", + "llm_models.benchmark.col_accuracy": "Implicit accuracy", + "llm_models.benchmark.col_hce": "High-confidence errors", + "llm_models.benchmark.col_consistency": "Vendor consistency (avg)", + "llm_models.benchmark.note": "⚠️ Precision-when-reviewed benchmark: untouched transactions are not counted. More corrections → more reliable estimate.", + "counterparts.col_cat_sub": "Category / Subcategory" +} \ No newline at end of file diff --git a/ui/i18n/es.json b/ui/i18n/es.json index a0b2f3f..b30b996 100644 --- a/ui/i18n/es.json +++ b/ui/i18n/es.json @@ -984,5 +984,51 @@ "llm_models.stats.col.s_per_tx": "s/tx", "llm_models.stats.col.s_per_tx_help": "Tiempo medio por transacción = duración media de la llamada ÷ batch_size. Vacío para fases single-shot (classifier, footer). Útil para estimar el tiempo total de un import.", "llm_models.stats.col.mean_s_help": "Latencia media de una sola llamada LLM (puede contener N transacciones en el lote). Para el tiempo por transacción ver la columna s/tx.", - "upload.error_backend_load": "❌ **{filename}** no importado — error al cargar el modelo LLM.\n\n{error}\n\n👉 Abre **🤖 Modelos LLM** y verifica el archivo del modelo (Test 🧪). Si está dañado, descárgalo de nuevo." -} + "upload.error_backend_load": "❌ **{filename}** no importado — error al cargar el modelo LLM.\n\n{error}\n\n👉 Abre **🤖 Modelos LLM** y verifica el archivo del modelo (Test 🧪). Si está dañado, descárgalo de nuevo.", + "nav.counterparts": "🏪 Contrapartes", + "nav.counterparts.desc": "Estadísticas por proveedor con creación rápida de reglas", + "counterparts.title": "🏪 Contrapartes", + "counterparts.caption": "Lista de proveedores/contrapartes con estadísticas de categorización.", + "counterparts.empty": "No se encontraron transacciones categorizadas.", + "counterparts.col_counterpart": "Contraparte", + "counterparts.col_tx_count": "# Tx", + "counterparts.col_avg_amount": "Importe medio", + "counterparts.col_category": "Categoría", + "counterparts.col_subcategory": "Subcategoría", + "counterparts.col_source": "Fuente", + "counterparts.col_variability": "Consistencia", + "counterparts.col_checked": "✓", + "counterparts.sort_by": "Ordenar por", + "counterparts.sort_asc": "Ascendente", + "counterparts.sort_tx_count": "# Transacciones", + "counterparts.sort_avg_amount": "Importe medio", + "counterparts.sort_variability": "Consistencia", + "counterparts.sort_name": "Nombre", + "counterparts.filter_low_var": "Solo baja consistencia", + "counterparts.filter_unchecked": "Solo no validadas", + "counterparts.grid_hint": "{n} contrapartes — edita Categoría o Subcategoría, luego guarda", + "counterparts.changes_pending": "{n} fila(s) modificada(s) — lista(s) para convertirse en regla(s)", + "counterparts.retroapply": "Aplicar también a {n} transacciones existentes", + "counterparts.save_btn": "💾 Guardar como reglas", + "counterparts.saved_ok": "✅ {n} regla(s) creada(s) o actualizada(s).", + "counterparts.retroapplied": "{n} transacciones actualizadas.", + "counterparts.no_changes": "Sin cambios pendientes.", + "counterparts.filter_source": "Fuente", + "counterparts.filter_source_all": "Todas", + "counterparts.filter_source_rule": "📏 Regla", + "counterparts.filter_source_llm": "🤖 LLM", + "counterparts.filter_source_mixed": "🔀 Mixto", + "counterparts.filter_source_manual": "✋ Manual", + "llm_models.benchmark.title": "📊 Benchmark implícito de usuario", + "llm_models.benchmark.caption": "Precisión estimada por modelo basada en correcciones del usuario.", + "llm_models.benchmark.unavailable": "Tabla de benchmark no disponible aún.", + "llm_models.benchmark.empty": "Aún no hay correcciones registradas.", + "llm_models.benchmark.col_model": "Modelo", + "llm_models.benchmark.col_total": "Categorizaciones LLM", + "llm_models.benchmark.col_corrections": "Correcciones usuario", + "llm_models.benchmark.col_accuracy": "Precisión implícita", + "llm_models.benchmark.col_hce": "Errores alta confianza", + "llm_models.benchmark.col_consistency": "Consistencia vendor (media)", + "llm_models.benchmark.note": "⚠️ Benchmark precision-when-reviewed: las transacciones no tocadas no se cuentan.", + "counterparts.col_cat_sub": "Categoría / Subcategoría" +} \ No newline at end of file diff --git a/ui/i18n/fr.json b/ui/i18n/fr.json index e13ef94..8254e45 100644 --- a/ui/i18n/fr.json +++ b/ui/i18n/fr.json @@ -984,5 +984,51 @@ "llm_models.stats.col.s_per_tx": "s/tx", "llm_models.stats.col.s_per_tx_help": "Temps moyen par transaction = durée moyenne de l'appel ÷ batch_size. Vide pour les phases single-shot (classifier, footer). Utile pour estimer le temps total d'un import.", "llm_models.stats.col.mean_s_help": "Latence moyenne d'un seul appel LLM (un appel peut contenir N transactions dans un lot). Pour le temps par transaction voir la colonne s/tx.", - "upload.error_backend_load": "❌ **{filename}** non importé — échec du chargement du modèle LLM.\n\n{error}\n\n👉 Ouvrez **🤖 Modèles LLM** et vérifiez la validité du fichier (Test 🧪). Si corrompu, retéléchargez-le." -} + "upload.error_backend_load": "❌ **{filename}** non importé — échec du chargement du modèle LLM.\n\n{error}\n\n👉 Ouvrez **🤖 Modèles LLM** et vérifiez la validité du fichier (Test 🧪). Si corrompu, retéléchargez-le.", + "nav.counterparts": "🏪 Contreparties", + "nav.counterparts.desc": "Statistiques par fournisseur avec création rapide de règles", + "counterparts.title": "🏪 Contreparties", + "counterparts.caption": "Liste des fournisseurs/contreparties avec statistiques de catégorisation.", + "counterparts.empty": "Aucune transaction catégorisée trouvée.", + "counterparts.col_counterpart": "Contrepartie", + "counterparts.col_tx_count": "# Tx", + "counterparts.col_avg_amount": "Montant moyen", + "counterparts.col_category": "Catégorie", + "counterparts.col_subcategory": "Sous-catégorie", + "counterparts.col_source": "Source", + "counterparts.col_variability": "Cohérence", + "counterparts.col_checked": "✓", + "counterparts.sort_by": "Trier par", + "counterparts.sort_asc": "Croissant", + "counterparts.sort_tx_count": "# Transactions", + "counterparts.sort_avg_amount": "Montant moyen", + "counterparts.sort_variability": "Cohérence", + "counterparts.sort_name": "Nom", + "counterparts.filter_low_var": "Faible cohérence uniquement", + "counterparts.filter_unchecked": "Non validées uniquement", + "counterparts.grid_hint": "{n} contreparties — modifiez Catégorie ou Sous-catégorie, puis sauvegardez", + "counterparts.changes_pending": "{n} ligne(s) modifiée(s) — prête(s) à devenir règle(s)", + "counterparts.retroapply": "Appliquer aussi à {n} transactions existantes", + "counterparts.save_btn": "💾 Enregistrer comme règles", + "counterparts.saved_ok": "✅ {n} règle(s) créée(s) ou mise(s) à jour.", + "counterparts.retroapplied": "{n} transactions mises à jour.", + "counterparts.no_changes": "Aucune modification en attente.", + "counterparts.filter_source": "Source", + "counterparts.filter_source_all": "Toutes", + "counterparts.filter_source_rule": "📏 Règle", + "counterparts.filter_source_llm": "🤖 LLM", + "counterparts.filter_source_mixed": "🔀 Mixte", + "counterparts.filter_source_manual": "✋ Manuel", + "llm_models.benchmark.title": "📊 Benchmark utilisateur implicite", + "llm_models.benchmark.caption": "Précision estimée par modèle basée sur les corrections de l'utilisateur.", + "llm_models.benchmark.unavailable": "Table de benchmark non encore disponible.", + "llm_models.benchmark.empty": "Aucune correction enregistrée pour l'instant.", + "llm_models.benchmark.col_model": "Modèle", + "llm_models.benchmark.col_total": "Catégorisations LLM", + "llm_models.benchmark.col_corrections": "Corrections utilisateur", + "llm_models.benchmark.col_accuracy": "Précision implicite", + "llm_models.benchmark.col_hce": "Erreurs haute confiance", + "llm_models.benchmark.col_consistency": "Cohérence fournisseur (moy.)", + "llm_models.benchmark.note": "⚠️ Benchmark precision-when-reviewed: les transactions non touchées ne sont pas comptées.", + "counterparts.col_cat_sub": "Catégorie / Sous-catégorie" +} \ No newline at end of file diff --git a/ui/i18n/it.json b/ui/i18n/it.json index f840e28..d146d22 100644 --- a/ui/i18n/it.json +++ b/ui/i18n/it.json @@ -984,5 +984,51 @@ "llm_models.stats.col.s_per_tx": "s/tx", "llm_models.stats.col.s_per_tx_help": "Tempo medio per singola transazione = durata media della call ÷ batch_size. Vuoto per le fasi single-shot (classifier, footer) dove 1 call ≠ N transazioni. Utile per stimare il tempo totale di un import.", "llm_models.stats.col.mean_s_help": "Latenza media per una singola chiamata LLM (può contenere N transazioni nel batch). Per il tempo per tx vedi la colonna s/tx.", - "upload.error_backend_load": "❌ **{filename}** non importato — errore nel caricamento del modello LLM.\n\n{error}\n\n👉 Apri **🤖 Modelli LLM** e verifica che il file del modello sia valido (Test 🧪). Se il file è danneggiato, ri-scaricalo dalla sezione Download." -} + "upload.error_backend_load": "❌ **{filename}** non importato — errore nel caricamento del modello LLM.\n\n{error}\n\n👉 Apri **🤖 Modelli LLM** e verifica che il file del modello sia valido (Test 🧪). Se il file è danneggiato, ri-scaricalo dalla sezione Download.", + "nav.counterparts": "🏪 Controparti", + "nav.counterparts.desc": "Statistiche per controparte/vendor con creazione rapida di regole", + "counterparts.title": "🏪 Controparti", + "counterparts.caption": "Elenco vendor/controparti con statistiche di categorizzazione. Modifica la categoria direttamente nella griglia e salva per creare o aggiornare una regola automatica.", + "counterparts.empty": "Nessuna transazione categorizzata trovata. Importa e categorizza le transazioni prima di usare questa pagina.", + "counterparts.col_counterpart": "Controparte", + "counterparts.col_tx_count": "# Tx", + "counterparts.col_avg_amount": "Valore medio", + "counterparts.col_category": "Categoria", + "counterparts.col_subcategory": "Sottocategoria", + "counterparts.col_source": "Modalità", + "counterparts.col_variability": "Variabilità", + "counterparts.col_checked": "✓", + "counterparts.sort_by": "Ordina per", + "counterparts.sort_asc": "Crescente", + "counterparts.sort_tx_count": "# Transazioni", + "counterparts.sort_avg_amount": "Valore medio", + "counterparts.sort_variability": "Variabilità", + "counterparts.sort_name": "Nome", + "counterparts.filter_low_var": "Solo bassa variabilità", + "counterparts.filter_unchecked": "Solo non validate", + "counterparts.grid_hint": "{n} controparti — modifica Categoria o Sottocategoria nelle celle evidenziate, poi salva", + "counterparts.changes_pending": "{n} riga/righe modificata/e — pronta/e per diventare regola", + "counterparts.retroapply": "Applica anche alle {n} transazioni esistenti", + "counterparts.save_btn": "💾 Salva come regole", + "counterparts.saved_ok": "✅ {n} regola/e creata/e o aggiornata/e.", + "counterparts.retroapplied": "{n} transazioni aggiornate.", + "counterparts.no_changes": "Nessuna modifica in sospeso. Cambia Categoria o Sottocategoria in una riga per creare una regola.", + "counterparts.filter_source": "Modalità", + "counterparts.filter_source_all": "Tutte", + "counterparts.filter_source_rule": "📏 Regola", + "counterparts.filter_source_llm": "🤖 LLM", + "counterparts.filter_source_mixed": "🔀 Misto", + "counterparts.filter_source_manual": "✋ Manuale", + "llm_models.benchmark.title": "📊 Benchmark implicito utente", + "llm_models.benchmark.caption": "Accuratezza stimata per modello basata sulle correzioni di categoria effettuate dall'utente. Una correzione = il modello aveva sbagliato. Solo categorizzazioni LLM tracciate.", + "llm_models.benchmark.unavailable": "Tabella benchmark non ancora disponibile.", + "llm_models.benchmark.empty": "Nessuna correzione registrata. Il benchmark si popola man mano che l'utente corregge le categorie nel Ledger, Review o Controparti.", + "llm_models.benchmark.col_model": "Modello", + "llm_models.benchmark.col_total": "Categorizzazioni LLM", + "llm_models.benchmark.col_corrections": "Correzioni utente", + "llm_models.benchmark.col_accuracy": "Accuratezza implicita", + "llm_models.benchmark.col_hce": "Errori alta confidence", + "llm_models.benchmark.col_consistency": "Coerenza vendor (media)", + "llm_models.benchmark.note": "⚠️ Benchmark su \"precision-when-reviewed\": le transazioni mai toccate non entrano nel calcolo. Più correzioni → stima più affidabile.", + "counterparts.col_cat_sub": "Categoria / Sottocategoria" +} \ No newline at end of file diff --git a/ui/llm_models_page.py b/ui/llm_models_page.py index 7f92d8b..6d34eb5 100644 --- a/ui/llm_models_page.py +++ b/ui/llm_models_page.py @@ -619,11 +619,63 @@ def render_llm_models_page(engine) -> None: st.subheader(t("llm_models.operations.title")) _render_stats_7d(engine) st.divider() + _render_correction_benchmark(engine) + st.divider() _render_calibrate_stub() st.divider() _render_download() +def _render_correction_benchmark(engine) -> None: + """Live implicit benchmark derived from user category corrections.""" + import pandas as pd + from db import repository + from sqlalchemy.orm import sessionmaker + + st.markdown(f"**{t('llm_models.benchmark.title')}**") + st.caption(t("llm_models.benchmark.caption")) + + try: + _Session = sessionmaker(bind=engine, expire_on_commit=False) + s = _Session() + try: + rows = repository.get_correction_benchmark(s) + finally: + s.close() + except Exception: + st.caption(t("llm_models.benchmark.unavailable")) + return + + if not rows: + st.caption(t("llm_models.benchmark.empty")) + return + + df = pd.DataFrame(rows) + df.rename(columns={ + "model": t("llm_models.benchmark.col_model"), + "total_categorized": t("llm_models.benchmark.col_total"), + "total_corrections": t("llm_models.benchmark.col_corrections"), + "implicit_accuracy": t("llm_models.benchmark.col_accuracy"), + "high_conf_errors": t("llm_models.benchmark.col_hce"), + "avg_consistency_at_error": t("llm_models.benchmark.col_consistency"), + }, inplace=True) + + col_acc = t("llm_models.benchmark.col_accuracy") + col_cons = t("llm_models.benchmark.col_consistency") + + st.dataframe( + df.style.format( + { + col_acc: lambda v: f"{v:.1f}%" if v is not None else "—", + col_cons: lambda v: f"{v:.1f}%" if v is not None else "—", + } + ).background_gradient(subset=[col_acc], cmap="RdYlGn", vmin=0, vmax=100), + use_container_width=True, + hide_index=True, + ) + st.caption(t("llm_models.benchmark.note")) + + def _render_stats_7d(engine) -> None: """Aggregate llm_usage_log over the last 7 days, group by caller × model.""" from sqlalchemy import text as _sql diff --git a/ui/registry_page.py b/ui/registry_page.py index 444c43a..75992ff 100644 --- a/ui/registry_page.py +++ b/ui/registry_page.py @@ -373,7 +373,7 @@ def render_registry_page(engine): f"a «{_new_cat}». Sottocategorie valide: {', '.join(_valid_subs)}" ) continue - tx_svc.update_category(tx_id, _new_cat, _new_sub) + tx_svc.update_category(tx_id, _new_cat, _new_sub, origin="ledger") n_cat += 1 _desc = str(orig["Descrizione"]).strip() if _desc: diff --git a/ui/review_page.py b/ui/review_page.py index 68719e5..b26f009 100644 --- a/ui/review_page.py +++ b/ui/review_page.py @@ -300,7 +300,7 @@ def render_review_page(engine): review_retroactive = False if st.button(t("review.apply_btn"), type="primary"): - ok = tx_svc.update_category(selected_tx.id, new_cat, new_sub) + ok = tx_svc.update_category(selected_tx.id, new_cat, new_sub, origin="review") if ok: rule_msg = "" if save_rule and selected_tx.description: @@ -321,7 +321,7 @@ def render_review_page(engine): n_similar = 0 for stx in similar: if stx.id != selected_tx.id: - tx_svc.update_category(stx.id, new_cat, new_sub) + tx_svc.update_category(stx.id, new_cat, new_sub, origin="review") n_similar += 1 if n_similar: rule_msg += f" · {n_similar} transazioni simili aggiornate." diff --git a/ui/rules_page.py b/ui/rules_page.py index 050404a..fd07804 100644 --- a/ui/rules_page.py +++ b/ui/rules_page.py @@ -200,7 +200,7 @@ def render_rules_page(engine): ) if ok and also_fix_txs and n_affected > 0: for tx in affected: - tx_svc.update_category(tx.id, new_cat, new_sub) + tx_svc.update_category(tx.id, new_cat, new_sub, origin="rule_apply") if new_ctx: tx_svc.update_context(tx.id, new_ctx) if ok: @@ -301,7 +301,7 @@ def render_rules_page(engine): logger.info(f"rules_page: updated existing rule pattern={nr_pattern!r} cat={nr_cat!r} ctx={nr_ctx!r}") if nr_also_apply and _nr_preview_txs: for _tx in _nr_preview_txs: - tx_svc.update_category(_tx.id, nr_cat, nr_sub) + tx_svc.update_category(_tx.id, nr_cat, nr_sub, origin="rule_apply") if nr_ctx: tx_svc.update_context(_tx.id, nr_ctx) logger.info( diff --git a/ui/sidebar.py b/ui/sidebar.py index 3c2014e..baf9def 100644 --- a/ui/sidebar.py +++ b/ui/sidebar.py @@ -16,6 +16,7 @@ ("budget_vs_actual","budget_vs_actual"), ("review", "review"), ("rules", "rules"), + ("counterparts", "counterparts"), ("taxonomy", "taxonomy"), ("llm_models", "llm_models"), ("settings", "settings"), diff --git a/ui/widgets/cat_select.py b/ui/widgets/cat_select.py new file mode 100644 index 0000000..7770f70 --- /dev/null +++ b/ui/widgets/cat_select.py @@ -0,0 +1,58 @@ +"""Category + subcategory as a single combined string. + +Pattern: "Categoria / Sottocategoria" (separator " / "). +When the category has no subcategories the string is just "Categoria". + +Public API: + build_cat_options(taxonomy) -> list[str] + Full flat list of valid combined strings, suitable for SelectboxColumn. + + join_cat_sub(category, subcategory) -> str + Build the combined string from separate fields. + + split_cat_sub(value) -> (category, subcategory) + Parse back to separate fields. Returns ("", "") for empty/None. +""" +from __future__ import annotations + +from core.categorizer import TaxonomyConfig + +SEP = " / " + + +def build_cat_options(taxonomy: TaxonomyConfig, *, include_empty: bool = False) -> list[str]: + """Return all valid category+subcategory combinations as combined strings. + + For categories with subcategories: one entry per subcategory ("Cat / Sub"). + For categories without subcategories: one entry for the category alone ("Cat"). + """ + options: list[str] = [] + if include_empty: + options.append("") + for cat in taxonomy.all_expense_categories + taxonomy.all_income_categories: + subs = taxonomy.valid_subcategories(cat) + if subs: + for sub in subs: + options.append(f"{cat}{SEP}{sub}") + else: + options.append(cat) + return options + + +def join_cat_sub(category: str | None, subcategory: str | None) -> str: + """Combine category and subcategory into one display string.""" + cat = (category or "").strip() + sub = (subcategory or "").strip() + if cat and sub: + return f"{cat}{SEP}{sub}" + return cat + + +def split_cat_sub(value: str | None) -> tuple[str, str]: + """Parse a combined string back to (category, subcategory).""" + if not value: + return "", "" + if SEP in value: + cat, sub = value.split(SEP, 1) + return cat.strip(), sub.strip() + return value.strip(), "" From f54ac8819b84dd224efe5561b1c52c85a38dc00c Mon Sep 17 00:00:00 2001 From: Luigi Corsaro <5324491+drake69@users.noreply.github.com> Date: Thu, 25 Jun 2026 21:37:23 +0200 Subject: [PATCH 2/2] fix(rules+counterparts): case-insensitive rule upsert and counterpart grouping - store rule pattern verbatim (matching already case-insensitive at compare time) - case-insensitive upsert dedup for contains/exact rules - group counterparts case-insensitively, keep first-seen casing for display - add regression tests Fixes the 4 failing rule tests on #140. --- db/repository.py | 42 ++++++++++----- tests/test_repository_counterparts.py | 78 +++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 12 deletions(-) create mode 100644 tests/test_repository_counterparts.py diff --git a/db/repository.py b/db/repository.py index 7b131a6..177872a 100644 --- a/db/repository.py +++ b/db/repository.py @@ -757,15 +757,26 @@ def create_category_rule( Returns (rule, created) where created=False means an existing rule was updated. """ - # Normalize pattern casing: contains/exact match against uppercase descriptions - if match_type in ("contains", "exact"): - pattern = pattern.upper() + # Pattern is stored verbatim (matching is case-insensitive at compare time: + # see categorizer.matches / get_transactions_by_rule_pattern). The upsert + # lookup is case-insensitive for contains/exact so "coop"/"COOP" dedup to one. + from sqlalchemy import func - existing = ( - session.query(CategoryRule) - .filter(CategoryRule.pattern == pattern, CategoryRule.match_type == match_type) - .first() - ) + if match_type in ("contains", "exact"): + existing = ( + session.query(CategoryRule) + .filter( + func.upper(CategoryRule.pattern) == pattern.upper(), + CategoryRule.match_type == match_type, + ) + .first() + ) + else: + existing = ( + session.query(CategoryRule) + .filter(CategoryRule.pattern == pattern, CategoryRule.match_type == match_type) + .first() + ) if existing is not None: existing.category = category existing.subcategory = subcategory @@ -802,8 +813,8 @@ def update_category_rule( if rule is None: return False if pattern is not None: - _mt = match_type or rule.match_type - rule.pattern = pattern.upper() if _mt in ("contains", "exact") else pattern + # Stored verbatim; rule matching is case-insensitive at compare time. + rule.pattern = pattern if match_type is not None: rule.match_type = match_type if category is not None: @@ -2046,12 +2057,19 @@ def get_counterpart_stats( .all() ) + # Group case-insensitively so "Coop"/"COOP" collapse into one counterpart, + # regardless of how the description casing was stored. The first-seen + # original casing is kept for display. groups: dict[str, list] = defaultdict(list) + display: dict[str, str] = {} for row in rows: - groups[row.description].append(row) + key = (row.description or "").upper() + groups[key].append(row) + display.setdefault(key, row.description) stats = [] - for desc, txs in groups.items(): + for key, txs in groups.items(): + desc = display[key] tx_count = len(txs) avg_amount = sum(abs(float(t.amount or 0)) for t in txs) / tx_count diff --git a/tests/test_repository_counterparts.py b/tests/test_repository_counterparts.py new file mode 100644 index 0000000..b617a7e --- /dev/null +++ b/tests/test_repository_counterparts.py @@ -0,0 +1,78 @@ +"""Case-insensitive behaviour of counterpart grouping and rule upsert. + +These lock the contract that storage casing no longer matters: descriptions +that differ only by case collapse into a single counterpart, and rule patterns +are stored verbatim while the upsert dedup is case-insensitive. +""" +from __future__ import annotations + +import pytest +from sqlalchemy import create_engine + +from db.models import Base, Transaction, get_session +from db.repository import create_category_rule, get_counterpart_stats + + +@pytest.fixture +def engine(): + eng = create_engine("sqlite:///:memory:", connect_args={"check_same_thread": False}) + Base.metadata.create_all(eng) + return eng + + +@pytest.fixture +def session(engine): + with get_session(engine) as s: + yield s + + +def _tx(session, *, tx_id: str, description: str, amount: float = -10.0) -> None: + session.add( + Transaction( + id=tx_id, + date="2025-01-01", + description=description, + amount=amount, + currency="EUR", + tx_type="expense", + category="Spesa", + subcategory="Supermercato", + category_source="llm", + category_confidence="medium", + account_label="test", + ) + ) + session.flush() + + +def test_counterpart_grouping_is_case_insensitive(session): + _tx(session, tx_id="t1", description="Coop Roma") + _tx(session, tx_id="t2", description="COOP ROMA") + _tx(session, tx_id="t3", description="coop roma") + + stats = get_counterpart_stats(session) + + assert len(stats) == 1 + group = stats[0] + assert group["tx_count"] == 3 + # First-seen original casing is preserved for display. + assert group["description"] == "Coop Roma" + + +def test_rule_upsert_is_case_insensitive_and_keeps_verbatim_pattern(session): + rule, created = create_category_rule( + session, pattern="coop", match_type="contains", + category="Spesa", subcategory="Supermercato", + ) + assert created is True + assert rule.pattern == "coop" + + # Same pattern, different casing → updates the existing rule, no duplicate. + rule2, created2 = create_category_rule( + session, pattern="COOP", match_type="contains", + category="Spesa", subcategory="Altro", + ) + assert created2 is False + assert rule2.id == rule.id + # Stored pattern stays as originally entered. + assert rule2.pattern == "coop"