ylab-hi · riasc · May 25, 2026 · May 25, 2026 · May 25, 2026 · May 25, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Changed
+
+- **Improve prioritization log readability per-vartype**: `BindingAffinities.start` / `collect_binding_affinities` in `workflow/scripts/prioritization/prediction.py` now print a banner header (`=== somatic.snvs ===`) at the start of each vartype, throttle the per-unit progress line to ~10 messages per vartype (`step = max(1, total // 10)`, always emits the final), and replace the bare `Done` with a closing summary line `[<vartype>] done in X.X min`. Reduces a ~2000-line prioritization log to ~30 useful lines plus 3 clean section boundaries. Also removes two leftover debug `print()` calls in `reference.py:Counts.__init__` that were dumping the count-table header columns and group slice on every run. Closes #121. ([#121](https://github.com/ylab-hi/ScanNeo2/issues/121), [#140](https://github.com/ylab-hi/ScanNeo2/pull/140))
+
 ### Fixed
 
 - **Make multi-line `params: extra=...` blocks snakefmt-cross-version compatible**: the Snakemake Workflow Catalog pins `snakefmt 0.11.5` while our CI pins `2.0.0`, and the two versions indent backslash-continued multi-line strings inside `params:` differently — so a tree clean under one version fails the other's `--check`. Three rules (`star_align_fastq`, `star_align_bamfile`, `filter_short_indels_m2`) rewritten as adjacent string literals inside `(...)`, eliminating the multi-line continuation entirely; both snakefmt versions now agree the file is clean. The argument string passed to each wrapper is identical (modulo single-spacing between args). ([#139](https://github.com/ylab-hi/ScanNeo2/pull/139))

diff --git a/workflow/scripts/prioritization/prediction.py b/workflow/scripts/prioritization/prediction.py
@@ -6,6 +6,7 @@
 """
 
 import tempfile
+import time
 import os
 import contextlib
 import concurrent.futures
@@ -23,6 +24,7 @@ def __init__(self, threads):
 
     def start(self, allele_file, epitope_lengths, output_dir, mhc_class, vartype):
         # create temorary_directory
+        t_start = time.time()
         with tempfile.TemporaryDirectory() as tmp_seqs:
             self.get_alleles(allele_file)
 
@@ -112,7 +114,11 @@ def start(self, allele_file, epitope_lengths, output_dir, mhc_class, vartype):
 
             total_seqs = max((wt_cnt.get(epilens[0], 1),
                               mt_cnt.get(epilens[0], 1))) - 1
-            print(f"calculate binding affinities for {total_seqs} sequences "
+            bar = "=" * 70
+            print(bar, flush=True)
+            print(f"  {vartype}", flush=True)
+            print(bar, flush=True)
+            print(f"  calculate binding affinities for {total_seqs} sequences "
                   f"({len(self.alleles)} alleles, epitope lengths: "
                   f"{','.join(map(str, epilens))})...", flush=True)
 
@@ -122,8 +128,10 @@ def start(self, allele_file, epitope_lengths, output_dir, mhc_class, vartype):
                 epilens,
                 mhc_class,
                 self.threads)
-            print("Done", flush=True)
-
+            elapsed = (time.time() - t_start) / 60
+            print(f"  [{vartype}] done in {elapsed:.1f} min", flush=True)
+            print("", flush=True)
+
             with open(os.path.join(output_dir,
                                    f"{vartype}_{mhc_class}_neoepitopes.txt"), "w") as outfile:
                 BindingAffinities.write_header(outfile)
@@ -283,6 +291,9 @@ def collect_binding_affinities(alleles, fnames, epilens, mhc_class, threads):
             # one pool over all units -- the prediction tool numbers each batch
             # file from 1, so offset translates that to a global seqnum
             completed = 0
+            total = len(units)
+            # emit ~10 progress lines per vartype regardless of unit count
+            step = max(1, total // 10)
             with concurrent.futures.ThreadPoolExecutor(
                     max_workers=int(threads)) as executor:
                 futures = {}
@@ -296,7 +307,8 @@ def collect_binding_affinities(alleles, fnames, epilens, mhc_class, threads):
                 for future in concurrent.futures.as_completed(futures):
                     group, epilen, offset = futures[future]
                     completed += 1
-                    print(f"  [{completed}/{len(units)}] completed", flush=True)
+                    if completed % step == 0 or completed == total:
+                        print(f"  [{completed}/{total}] completed", flush=True)
                     dest = affinities[group][epilen]
                     for seqnum, epitopes in future.result().items():
                         global_seqnum = offset + seqnum

diff --git a/workflow/scripts/prioritization/reference.py b/workflow/scripts/prioritization/reference.py
@@ -70,9 +70,7 @@ def __init__(self, countFile):
         if countFile is not None and countFile != "":
             with open(countFile, 'r') as count_fh:
                 lines = count_fh.readlines()
-            print(lines[0].rstrip().split('\t'))
             groups = lines[0].rstrip().split('\t')[3:]
-            print(groups)
             for line in lines[1:]:
                 cols = line.rstrip().split('\t')