Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
📊 Budget vs Actual – compare actual spending vs budget targets
🔍 Review – manual review of low-confidence items
📏 Rules – manage category rules (edit / delete / create)
🏪 Counterparts – per-vendor stats grid with inline rule creation
🗂️ Taxonomy – manage categories and subcategories
⚙️ Settings – locale, language, LLM backend preferences
✅ Checklist – monthly tx presence per account (pivot table)
Expand Down Expand Up @@ -199,6 +200,10 @@
from ui.rules_page import render_rules_page
render_rules_page(engine)

elif page == "counterparts":
from ui.counterparts_page import render_counterparts_page
render_counterparts_page(engine)

elif page == "taxonomy":
from ui.taxonomy_page import render_taxonomy_page
render_taxonomy_page(engine)
Expand Down
2 changes: 1 addition & 1 deletion core/_build_info.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# Generated at build time — do not edit manually.
BUILD_TIME = "2026-06-22 19:52"
BUILD_TIME = "2026-06-23 15:48"
BUILD_VERSION = "0.1.0"
55 changes: 55 additions & 0 deletions db/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ def create_tables(engine=None):
_migrate_add_nsi_tag_mapping(engine)
_migrate_add_category_model(engine)
_migrate_add_llm_usage_log(engine)
_migrate_add_category_correction(engine)
_migrate_set_onboarding_done_for_existing_users(engine) # must run last
_migrate_purge_orphan_schemas(engine) # cleanup: remove schemas without header_sha256

Expand Down Expand Up @@ -583,6 +584,34 @@ class NsiTagMapping(Base):
updated_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))


class CategoryCorrection(Base):
"""User correction log — one row every time the user changes a category.

Captures the original LLM/rule assignment alongside two quality signals:
- original_confidence: the model's self-reported certainty on that tx
- consistency_at_correction: % of same-description txs that agreed on
the same category at the moment of correction (vendor-level coherence)

Together these let us compute a live implicit benchmark:
accuracy ≈ 1 - (corrections / total_llm_categorizations) per model
and diagnose failure modes (high-confidence errors, inconsistent vendors).
"""
__tablename__ = "category_correction"

id = Column(Integer, primary_key=True, autoincrement=True)
tx_id = Column(String(64), nullable=False, index=True)
original_category = Column(String(128))
original_subcategory = Column(String(128))
original_source = Column(String(10)) # llm | rule | history
original_model = Column(String(128)) # category_model at correction time
original_confidence = Column(String(10)) # high | medium | low
new_category = Column(String(128))
new_subcategory = Column(String(128))
consistency_at_correction = Column(Float, nullable=True) # % modal cat, same description
correction_origin = Column(String(20)) # ledger | counterparts | review | bulk_edit
corrected_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))


def _migrate_add_import_job(engine) -> None:
"""Create import_job table if not present (idempotent) and add the
AI-88 ms_* phase-timing columns when missing.
Expand Down Expand Up @@ -1126,6 +1155,32 @@ def _migrate_add_category_model(engine) -> None:
raise


def _migrate_add_category_correction(engine) -> None:
"""Create category_correction table for live implicit benchmark (idempotent)."""
from sqlalchemy import text as _text
with engine.connect() as conn:
conn.execute(_text(
'CREATE TABLE IF NOT EXISTS category_correction ('
'id INTEGER PRIMARY KEY AUTOINCREMENT, '
'tx_id VARCHAR(64) NOT NULL, '
'original_category VARCHAR(128), '
'original_subcategory VARCHAR(128), '
'original_source VARCHAR(10), '
'original_model VARCHAR(128), '
'original_confidence VARCHAR(10), '
'new_category VARCHAR(128), '
'new_subcategory VARCHAR(128), '
'consistency_at_correction FLOAT, '
'correction_origin VARCHAR(20), '
'corrected_at DATETIME)'
))
conn.execute(_text(
'CREATE INDEX IF NOT EXISTS ix_category_correction_tx_id '
'ON category_correction (tx_id)'
))
conn.commit()


def _migrate_add_llm_usage_log(engine) -> None:
"""Create llm_usage_log table if not present (idempotent)."""
from sqlalchemy import text as _text
Expand Down
214 changes: 208 additions & 6 deletions db/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from core.categorizer import CategoryRule as CoreCategoryRule
from core.schemas import DocumentSchema
from db.models import (
CategoryCorrection,
CategoryRule,
DEFAULT_USER_SETTINGS,
DescriptionRule,
Expand Down Expand Up @@ -288,7 +289,7 @@ def upsert_transaction(session: Session, tx: dict, batch_id: Optional[int] = Non
date_accounting=tx.get("date_accounting").isoformat() if tx.get("date_accounting") and hasattr(tx["date_accounting"], "isoformat") else tx.get("date_accounting"),
amount=amount_val,
currency=tx.get("currency", "EUR"),
description=tx.get("description", ""),
description=(tx.get("description") or "").strip().upper() or None,
source_file=tx.get("source_file", ""),
doc_type=tx.get("doc_type", ""),
account_label=tx.get("account_label", ""),
Expand Down Expand Up @@ -318,16 +319,57 @@ def get_existing_tx_ids(session: Session, tx_ids: list[str]) -> set[str]:
return {row.id for row in rows}


def _compute_consistency(session: Session, description: str) -> float | None:
"""% of categorized transactions with same description that agree on modal category."""
from collections import Counter
rows = (
session.query(Transaction.category)
.filter(
Transaction.description == description,
Transaction.category.isnot(None),
)
.all()
)
if not rows:
return None
counts = Counter(r[0] for r in rows)
modal_count = counts.most_common(1)[0][1]
return round(modal_count / len(rows) * 100, 1)


def update_transaction_category(
session: Session,
tx_id: str,
category: str,
subcategory: str,
origin: str = "unknown",
) -> bool:
from datetime import datetime, timezone
tx = session.get(Transaction, tx_id)
if tx is None:
return False
old_cat = tx.category
old_sub = tx.subcategory
old_src = tx.category_source
old_model = tx.category_model
old_conf = tx.category_confidence
category_changed = old_cat != category or old_sub != subcategory
if category_changed and old_src in ("llm", "rule", "history"):
consistency = _compute_consistency(session, tx.description or "")
correction = CategoryCorrection(
tx_id=tx_id,
original_category=old_cat,
original_subcategory=old_sub,
original_source=old_src,
original_model=old_model,
original_confidence=old_conf,
new_category=category,
new_subcategory=subcategory,
consistency_at_correction=consistency,
correction_origin=origin,
corrected_at=datetime.now(timezone.utc),
)
session.add(correction)
tx.category = category
tx.subcategory = subcategory
tx.category_confidence = "high"
Expand All @@ -339,6 +381,66 @@ def update_transaction_category(
return True


def get_correction_benchmark(session: Session) -> list[dict]:
"""Live implicit benchmark: per-model corrections vs total LLM categorizations.

Returns one dict per model with:
model, total_categorized, total_corrections, implicit_accuracy,
high_conf_errors, avg_consistency_at_error
"""
from collections import Counter, defaultdict

# Still-LLM categorizations (not yet corrected by user)
still_llm_rows = (
session.query(Transaction.category_model)
.filter(
Transaction.category_source == "llm",
Transaction.category_model.isnot(None),
)
.all()
)
total_by_model: Counter = Counter(r[0] for r in still_llm_rows)

# Corrections where original source was llm
corr_rows = (
session.query(CategoryCorrection)
.filter(CategoryCorrection.original_source == "llm")
.all()
)

corrections: dict[str, list] = defaultdict(list)
for c in corr_rows:
if c.original_model:
corrections[c.original_model].append(c)

all_models = set(total_by_model.keys()) | set(corrections.keys())
results = []
for model in sorted(all_models):
corr_list = corrections.get(model, [])
n_corr = len(corr_list)
# total = still-LLM + already-corrected (corrected txs left the 'llm' source)
n_total = total_by_model.get(model, 0) + n_corr
high_conf = sum(1 for c in corr_list if c.original_confidence == "high")
consistency_vals = [
c.consistency_at_correction
for c in corr_list
if c.consistency_at_correction is not None
]
avg_cons = round(sum(consistency_vals) / len(consistency_vals), 1) if consistency_vals else None
implicit_acc = round((1 - n_corr / n_total) * 100, 1) if n_total > 0 else None
results.append(
{
"model": model,
"total_categorized": n_total,
"total_corrections": n_corr,
"implicit_accuracy": implicit_acc,
"high_conf_errors": high_conf,
"avg_consistency_at_error": avg_cons,
}
)
return results


def toggle_transaction_giroconto(session: Session, tx_id: str) -> tuple[bool, str]:
"""Toggle a transaction's tx_type between giroconto and expense/income.

Expand Down Expand Up @@ -655,11 +757,26 @@ def create_category_rule(

Returns (rule, created) where created=False means an existing rule was updated.
"""
existing = (
session.query(CategoryRule)
.filter(CategoryRule.pattern == pattern, CategoryRule.match_type == match_type)
.first()
)
# Pattern is stored verbatim (matching is case-insensitive at compare time:
# see categorizer.matches / get_transactions_by_rule_pattern). The upsert
# lookup is case-insensitive for contains/exact so "coop"/"COOP" dedup to one.
from sqlalchemy import func

if match_type in ("contains", "exact"):
existing = (
session.query(CategoryRule)
.filter(
func.upper(CategoryRule.pattern) == pattern.upper(),
CategoryRule.match_type == match_type,
)
.first()
)
else:
existing = (
session.query(CategoryRule)
.filter(CategoryRule.pattern == pattern, CategoryRule.match_type == match_type)
.first()
)
if existing is not None:
existing.category = category
existing.subcategory = subcategory
Expand Down Expand Up @@ -696,6 +813,7 @@ def update_category_rule(
if rule is None:
return False
if pattern is not None:
# Stored verbatim; rule matching is case-insensitive at compare time.
rule.pattern = pattern
if match_type is not None:
rule.match_type = match_type
Expand Down Expand Up @@ -1908,3 +2026,87 @@ def get_adaptive_n_ctx_cap(
# Round up to next 1024 multiple, enforce floor of 2048
cap = max(int(math.ceil(max_upper / 1024)) * 1024, 2048)
return cap


def get_counterpart_stats(
session: Session,
tx_types: tuple[str, ...] = ("expense", "income"),
) -> list[dict]:
"""Aggregate transactions by description to produce per-counterpart stats.

Returns a list of dicts with keys:
description, tx_count, avg_amount, modal_category, modal_subcategory,
variability_pct, source_mode, human_checked
"""
from collections import Counter, defaultdict

rows = (
session.query(
Transaction.description,
Transaction.amount,
Transaction.category,
Transaction.subcategory,
Transaction.category_source,
Transaction.validated_at,
)
.filter(
Transaction.description.isnot(None),
Transaction.description != "",
Transaction.tx_type.in_(tx_types),
)
.all()
)

# Group case-insensitively so "Coop"/"COOP" collapse into one counterpart,
# regardless of how the description casing was stored. The first-seen
# original casing is kept for display.
groups: dict[str, list] = defaultdict(list)
display: dict[str, str] = {}
for row in rows:
key = (row.description or "").upper()
groups[key].append(row)
display.setdefault(key, row.description)

stats = []
for key, txs in groups.items():
desc = display[key]
tx_count = len(txs)
avg_amount = sum(abs(float(t.amount or 0)) for t in txs) / tx_count

cat_counts: Counter = Counter(t.category for t in txs if t.category)
if cat_counts:
modal_cat, modal_count = cat_counts.most_common(1)[0]
else:
modal_cat, modal_count = "", 0
sub_counts: Counter = Counter(
t.subcategory for t in txs if t.category == modal_cat and t.subcategory
)
modal_sub = sub_counts.most_common(1)[0][0] if sub_counts else ""
variability_pct = (modal_count / tx_count * 100) if tx_count else 0.0

sources = {t.category_source for t in txs if t.category_source}
if len(sources) == 1:
source_mode = next(iter(sources))
elif sources:
source_mode = "mixed"
else:
source_mode = "unknown"

human_checked = any(
t.validated_at is not None or t.category_source == "manual" for t in txs
)

stats.append(
{
"description": desc,
"tx_count": tx_count,
"avg_amount": avg_amount,
"modal_category": modal_cat,
"modal_subcategory": modal_sub,
"variability_pct": variability_pct,
"source_mode": source_mode,
"human_checked": human_checked,
}
)

return stats
Loading
Loading