Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Changed

- **Improve prioritization log readability per-vartype**: `BindingAffinities.start` / `collect_binding_affinities` in `workflow/scripts/prioritization/prediction.py` now print a banner header (`=== somatic.snvs ===`) at the start of each vartype, throttle the per-unit progress line to ~10 messages per vartype (`step = max(1, total // 10)`, always emits the final), and replace the bare `Done` with a closing summary line `[<vartype>] done in X.X min`. Reduces a ~2000-line prioritization log to ~30 useful lines plus 3 clean section boundaries. Also removes two leftover debug `print()` calls in `reference.py:Counts.__init__` that were dumping the count-table header columns and group slice on every run. Closes #121. ([#121](https://github.com/ylab-hi/ScanNeo2/issues/121), [#140](https://github.com/ylab-hi/ScanNeo2/pull/140))

### Fixed

- **Make multi-line `params: extra=...` blocks snakefmt-cross-version compatible**: the Snakemake Workflow Catalog pins `snakefmt 0.11.5` while our CI pins `2.0.0`, and the two versions indent backslash-continued multi-line strings inside `params:` differently — so a tree clean under one version fails the other's `--check`. Three rules (`star_align_fastq`, `star_align_bamfile`, `filter_short_indels_m2`) rewritten as adjacent string literals inside `(...)`, eliminating the multi-line continuation entirely; both snakefmt versions now agree the file is clean. The argument string passed to each wrapper is identical (modulo single-spacing between args). ([#139](https://github.com/ylab-hi/ScanNeo2/pull/139))
Expand Down
20 changes: 16 additions & 4 deletions workflow/scripts/prioritization/prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""

import tempfile
import time
import os
import contextlib
import concurrent.futures
Expand All @@ -23,6 +24,7 @@ def __init__(self, threads):

def start(self, allele_file, epitope_lengths, output_dir, mhc_class, vartype):
# create temorary_directory
t_start = time.time()
with tempfile.TemporaryDirectory() as tmp_seqs:
self.get_alleles(allele_file)

Expand Down Expand Up @@ -112,7 +114,11 @@ def start(self, allele_file, epitope_lengths, output_dir, mhc_class, vartype):

total_seqs = max((wt_cnt.get(epilens[0], 1),
mt_cnt.get(epilens[0], 1))) - 1
print(f"calculate binding affinities for {total_seqs} sequences "
bar = "=" * 70
print(bar, flush=True)
print(f" {vartype}", flush=True)
print(bar, flush=True)
print(f" calculate binding affinities for {total_seqs} sequences "
f"({len(self.alleles)} alleles, epitope lengths: "
f"{','.join(map(str, epilens))})...", flush=True)

Expand All @@ -122,8 +128,10 @@ def start(self, allele_file, epitope_lengths, output_dir, mhc_class, vartype):
epilens,
mhc_class,
self.threads)
print("Done", flush=True)

elapsed = (time.time() - t_start) / 60
print(f" [{vartype}] done in {elapsed:.1f} min", flush=True)
print("", flush=True)

with open(os.path.join(output_dir,
f"{vartype}_{mhc_class}_neoepitopes.txt"), "w") as outfile:
BindingAffinities.write_header(outfile)
Expand Down Expand Up @@ -283,6 +291,9 @@ def collect_binding_affinities(alleles, fnames, epilens, mhc_class, threads):
# one pool over all units -- the prediction tool numbers each batch
# file from 1, so offset translates that to a global seqnum
completed = 0
total = len(units)
# emit ~10 progress lines per vartype regardless of unit count
step = max(1, total // 10)
with concurrent.futures.ThreadPoolExecutor(
max_workers=int(threads)) as executor:
futures = {}
Expand All @@ -296,7 +307,8 @@ def collect_binding_affinities(alleles, fnames, epilens, mhc_class, threads):
for future in concurrent.futures.as_completed(futures):
group, epilen, offset = futures[future]
completed += 1
print(f" [{completed}/{len(units)}] completed", flush=True)
if completed % step == 0 or completed == total:
print(f" [{completed}/{total}] completed", flush=True)
dest = affinities[group][epilen]
for seqnum, epitopes in future.result().items():
global_seqnum = offset + seqnum
Expand Down
2 changes: 0 additions & 2 deletions workflow/scripts/prioritization/reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,7 @@ def __init__(self, countFile):
if countFile is not None and countFile != "":
with open(countFile, 'r') as count_fh:
lines = count_fh.readlines()
print(lines[0].rstrip().split('\t'))
groups = lines[0].rstrip().split('\t')[3:]
print(groups)
for line in lines[1:]:
cols = line.rstrip().split('\t')

Expand Down
Loading