diff --git a/CHANGELOG.md b/CHANGELOG.md index 707da11..50fd3fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed + +- **Improve prioritization log readability per-vartype**: `BindingAffinities.start` / `collect_binding_affinities` in `workflow/scripts/prioritization/prediction.py` now print a banner header (`=== somatic.snvs ===`) at the start of each vartype, throttle the per-unit progress line to ~10 messages per vartype (`step = max(1, total // 10)`, always emits the final), and replace the bare `Done` with a closing summary line `[] done in X.X min`. Reduces a ~2000-line prioritization log to ~30 useful lines plus 3 clean section boundaries. Also removes two leftover debug `print()` calls in `reference.py:Counts.__init__` that were dumping the count-table header columns and group slice on every run. Closes #121. ([#121](https://github.com/ylab-hi/ScanNeo2/issues/121), [#140](https://github.com/ylab-hi/ScanNeo2/pull/140)) + ### Fixed - **Make multi-line `params: extra=...` blocks snakefmt-cross-version compatible**: the Snakemake Workflow Catalog pins `snakefmt 0.11.5` while our CI pins `2.0.0`, and the two versions indent backslash-continued multi-line strings inside `params:` differently — so a tree clean under one version fails the other's `--check`. Three rules (`star_align_fastq`, `star_align_bamfile`, `filter_short_indels_m2`) rewritten as adjacent string literals inside `(...)`, eliminating the multi-line continuation entirely; both snakefmt versions now agree the file is clean. The argument string passed to each wrapper is identical (modulo single-spacing between args). ([#139](https://github.com/ylab-hi/ScanNeo2/pull/139)) diff --git a/workflow/scripts/prioritization/prediction.py b/workflow/scripts/prioritization/prediction.py index 62b636f..e8a7cf9 100644 --- a/workflow/scripts/prioritization/prediction.py +++ b/workflow/scripts/prioritization/prediction.py @@ -6,6 +6,7 @@ """ import tempfile +import time import os import contextlib import concurrent.futures @@ -23,6 +24,7 @@ def __init__(self, threads): def start(self, allele_file, epitope_lengths, output_dir, mhc_class, vartype): # create temorary_directory + t_start = time.time() with tempfile.TemporaryDirectory() as tmp_seqs: self.get_alleles(allele_file) @@ -112,7 +114,11 @@ def start(self, allele_file, epitope_lengths, output_dir, mhc_class, vartype): total_seqs = max((wt_cnt.get(epilens[0], 1), mt_cnt.get(epilens[0], 1))) - 1 - print(f"calculate binding affinities for {total_seqs} sequences " + bar = "=" * 70 + print(bar, flush=True) + print(f" {vartype}", flush=True) + print(bar, flush=True) + print(f" calculate binding affinities for {total_seqs} sequences " f"({len(self.alleles)} alleles, epitope lengths: " f"{','.join(map(str, epilens))})...", flush=True) @@ -122,8 +128,10 @@ def start(self, allele_file, epitope_lengths, output_dir, mhc_class, vartype): epilens, mhc_class, self.threads) - print("Done", flush=True) - + elapsed = (time.time() - t_start) / 60 + print(f" [{vartype}] done in {elapsed:.1f} min", flush=True) + print("", flush=True) + with open(os.path.join(output_dir, f"{vartype}_{mhc_class}_neoepitopes.txt"), "w") as outfile: BindingAffinities.write_header(outfile) @@ -283,6 +291,9 @@ def collect_binding_affinities(alleles, fnames, epilens, mhc_class, threads): # one pool over all units -- the prediction tool numbers each batch # file from 1, so offset translates that to a global seqnum completed = 0 + total = len(units) + # emit ~10 progress lines per vartype regardless of unit count + step = max(1, total // 10) with concurrent.futures.ThreadPoolExecutor( max_workers=int(threads)) as executor: futures = {} @@ -296,7 +307,8 @@ def collect_binding_affinities(alleles, fnames, epilens, mhc_class, threads): for future in concurrent.futures.as_completed(futures): group, epilen, offset = futures[future] completed += 1 - print(f" [{completed}/{len(units)}] completed", flush=True) + if completed % step == 0 or completed == total: + print(f" [{completed}/{total}] completed", flush=True) dest = affinities[group][epilen] for seqnum, epitopes in future.result().items(): global_seqnum = offset + seqnum diff --git a/workflow/scripts/prioritization/reference.py b/workflow/scripts/prioritization/reference.py index 9ad3257..65f5515 100644 --- a/workflow/scripts/prioritization/reference.py +++ b/workflow/scripts/prioritization/reference.py @@ -70,9 +70,7 @@ def __init__(self, countFile): if countFile is not None and countFile != "": with open(countFile, 'r') as count_fh: lines = count_fh.readlines() - print(lines[0].rstrip().split('\t')) groups = lines[0].rstrip().split('\t')[3:] - print(groups) for line in lines[1:]: cols = line.rstrip().split('\t')