diff --git a/lib/BlockedFlags.groovy b/lib/BlockedFlags.groovy new file mode 100644 index 0000000..c11d297 --- /dev/null +++ b/lib/BlockedFlags.groovy @@ -0,0 +1,139 @@ +/** + * Centralised registry of DIA-NN flags managed by the pipeline. + * + * WHY THIS EXISTS: + * The pipeline controls certain DIA-NN flags directly (e.g. --threads, --out, --qvalue) + * based on pipeline parameters, SDRF metadata, or step-specific logic. If a user also + * passes these via --extra_args, the flag would appear twice in the DIA-NN command — + * causing silent conflicts or undefined behaviour. This registry strips managed flags + * from extra_args with a warning, so the pipeline's values always take precedence. + * + * WHY A GROOVY CLASS AND NOT CONFIG FILES: + * Blocked flags are a safety mechanism. Defining them in Nextflow config (e.g. via + * ext.blocked_flags) would allow users to accidentally override or disable them in + * custom configs. A compiled Groovy class in lib/ cannot be overridden by user configs, + * ensuring the safety net is always active. It also provides a single file to audit + * and edit when adding new managed flags. + * + * HOW TO ADD A NEW BLOCKED FLAG: + * - If the flag applies to ALL DIA-NN steps, add it to the COMMON list below. + * - If it applies to specific steps only, add it to the relevant MODULE_FLAGS entry. + * - No changes needed in the module .nf files — they call BlockedFlags.strip() which + * reads from this registry automatically. + * + * Nextflow auto-loads all classes in lib/, so this is available in all modules. + */ +class BlockedFlags { + + // Flags common to ALL DIA-NN steps. + // These are always set by the pipeline from config/SDRF, so user overrides would conflict. + // --temp/--threads/--verbose: controlled by Nextflow task resources and debug_level param + // --lib/--f/--fasta: set from workflow inputs (speclib, ms_files, database) + // --var-mod/--fixed-mod/--monitor-mod/--channels/--lib-fixed-mod/--original-mods: injected from diann_config.cfg + // --dda: auto-detected from SDRF or set via --dda param + // --proteoforms/--peptidoforms/--no-peptidoforms: controlled by scoring_mode param + private static final List COMMON = [ + '--temp', '--threads', '--verbose', '--lib', '--f', '--fasta', + '--monitor-mod', '--var-mod', '--fixed-mod', '--dda', + '--channels', '--lib-fixed-mod', '--original-mods', + '--proteoforms', '--peptidoforms', '--no-peptidoforms', + ] + + // Per-module additional blocked flags (on top of COMMON). + // + // Flags are blocked for two reasons: + // 1. "Pipeline-managed": the pipeline sets them explicitly from params/SDRF/metadata. + // Allowing user overrides would create duplicate/conflicting flags. + // 2. "No-effect guard": the flag has no effect in this step (e.g., protein inference + // during library assembly). Blocking prevents users from passing them and wrongly + // believing they had an effect. + // + // When reason is non-obvious, a comment explains why the flag is blocked. + private static final Map> MODULE_FLAGS = [ + INSILICO_LIBRARY_GENERATION: [ + // Pipeline-managed: set from params (library generation settings) + '--use-quant', '--no-main-report', '--matrices', '--out', + '--fasta-search', '--predictor', '--gen-spec-lib', + '--missed-cleavages', '--min-pep-len', '--max-pep-len', + '--min-pr-charge', '--max-pr-charge', '--var-mods', + '--min-pr-mz', '--max-pr-mz', '--min-fr-mz', '--max-fr-mz', + '--met-excision', '--light-models', + '--infin-dia', '--pre-select', + ], + PRELIMINARY_ANALYSIS: [ + // Pipeline-managed: set from params and SDRF calibration metadata + '--use-quant', '--gen-spec-lib', '--out-lib', '--matrices', '--out', + '--mass-acc', '--mass-acc-ms1', '--window', + '--quick-mass-acc', '--min-corr', '--corr-diff', '--time-corr-only', + '--min-pr-mz', '--max-pr-mz', '--min-fr-mz', '--max-fr-mz', + // Pipeline-managed: preliminary step disables protein inference (--no-prot-inf) + '--no-prot-inf', + ], + ASSEMBLE_EMPIRICAL_LIBRARY: [ + // Pipeline-managed: set from params and calibration results + '--no-main-report', '--no-ifs-removal', '--matrices', '--out', + '--mass-acc', '--mass-acc-ms1', '--window', + '--individual-mass-acc', '--individual-windows', + '--out-lib', '--use-quant', '--gen-spec-lib', '--rt-profiling', + // No-effect guard: protein inference flags have no effect during library assembly + // (--gen-spec-lib produces a spectral library, not a quantified report). + // Blocked to prevent users from thinking they affect this step. + '--no-prot-inf', '--relaxed-prot-inf', '--pg-level', + ], + INDIVIDUAL_ANALYSIS: [ + // Pipeline-managed: set from params and calibrated values from assembly step + '--use-quant', '--gen-spec-lib', '--out-lib', '--matrices', '--out', '--rt-profiling', + '--mass-acc', '--mass-acc-ms1', '--window', + '--no-ifs-removal', '--no-main-report', + // Pipeline-managed: protein inference set by pipeline (--relaxed-prot-inf --pg-level) + '--relaxed-prot-inf', '--pg-level', + '--min-pr-mz', '--max-pr-mz', '--min-fr-mz', '--max-fr-mz', + '--no-prot-inf', + ], + FINAL_QUANTIFICATION: [ + // Pipeline-managed: set from params for final report generation + '--no-main-report', '--gen-spec-lib', '--out-lib', '--no-ifs-removal', + '--use-quant', '--matrices', '--out', + // Pipeline-managed: protein inference (--relaxed-prot-inf --pg-level) + '--relaxed-prot-inf', '--pg-level', + // Pipeline-managed: FDR controls (precursor_qvalue, matrix_qvalue, matrix_spec_q) + '--qvalue', '--matrix-qvalue', '--matrix-spec-q', + '--window', '--individual-windows', + '--species-genes', '--report-decoys', '--xic', '--no-norm', + '--export-quant', '--site-ms1-quant', + '--channel-run-norm', '--channel-spec-norm', + '--no-prot-inf', + ], + ] + + /** + * Get the full blocked flags list for a module (COMMON + module-specific). + */ + static List forModule(String moduleName) { + def moduleFlags = MODULE_FLAGS[moduleName] ?: [] + return (COMMON + moduleFlags).unique() + } + + /** + * Strip blocked flags from an args string, logging a warning for each. + * Returns the cleaned args string. + * + * @param moduleName e.g. 'FINAL_QUANTIFICATION' + * @param args the raw args string from task.ext.args + * @param log the Nextflow log object + * @return cleaned args string + */ + static String strip(String moduleName, String args, def log) { + if (!args) return '' + def blocked = forModule(moduleName) + // Sort by length descending so longer flags (e.g. --mass-acc-ms1) match before shorter prefixes (--mass-acc) + blocked.sort { a -> -a.length() }.each { flag -> + def flagPattern = '(?<=^|\\s)' + java.util.regex.Pattern.quote(flag) + '(?=\\s|\$)(\\s+(?!-{1,2}[a-zA-Z])\\S+)*' + if (args =~ flagPattern) { + log.warn "DIA-NN: '${flag}' is managed by the pipeline for ${moduleName} and will be stripped." + args = args.replaceAll(flagPattern, '').trim() + } + } + return args + } +} diff --git a/modules/local/diann/assemble_empirical_library/main.nf b/modules/local/diann/assemble_empirical_library/main.nf index 2323cb2..70ef8aa 100644 --- a/modules/local/diann/assemble_empirical_library/main.nf +++ b/modules/local/diann/assemble_empirical_library/main.nf @@ -26,23 +26,9 @@ process ASSEMBLE_EMPIRICAL_LIBRARY { script: def args = task.ext.args ?: '' - // Strip flags that are managed by the pipeline to prevent silent conflicts - def blocked = ['--no-main-report', '--no-ifs-removal', '--matrices', '--out', - '--temp', '--threads', '--verbose', '--lib', '--f', '--fasta', - '--mass-acc', '--mass-acc-ms1', '--window', - '--individual-mass-acc', '--individual-windows', - '--out-lib', '--use-quant', '--gen-spec-lib', '--rt-profiling', - '--monitor-mod', '--var-mod', '--fixed-mod', '--dda', - '--channels', '--lib-fixed-mod', '--original-mods', - '--proteoforms', '--peptidoforms', '--no-peptidoforms'] - // Sort by length descending so longer flags (e.g. --mass-acc-ms1) are matched before shorter prefixes (--mass-acc) - blocked.sort { a -> -a.length() }.each { flag -> - def flagPattern = '(?<=^|\\s)' + java.util.regex.Pattern.quote(flag) + '(?=\\s|\$)(\\s+(?!-{1,2}[a-zA-Z])\\S+)*' - if (args =~ flagPattern) { - log.warn "DIA-NN: '${flag}' is managed by the pipeline for ASSEMBLE_EMPIRICAL_LIBRARY and will be stripped." - args = args.replaceAll(flagPattern, '').trim() - } - } + // Strip flags managed by the pipeline from extra_args to prevent silent conflicts. + // Blocked flags are defined centrally in lib/BlockedFlags.groovy — edit there, not here. + args = BlockedFlags.strip('ASSEMBLE_EMPIRICAL_LIBRARY', args, log) if (params.mass_acc_automatic) { mass_acc = '--individual-mass-acc' diff --git a/modules/local/diann/final_quantification/main.nf b/modules/local/diann/final_quantification/main.nf index 8f517c1..48404b2 100644 --- a/modules/local/diann/final_quantification/main.nf +++ b/modules/local/diann/final_quantification/main.nf @@ -45,23 +45,9 @@ process FINAL_QUANTIFICATION { script: def args = task.ext.args ?: '' - // Strip flags that are managed by the pipeline to prevent silent conflicts - def blocked = ['--no-main-report', '--gen-spec-lib', '--out-lib', '--no-ifs-removal', - '--temp', '--threads', '--verbose', '--lib', '--f', '--fasta', - '--use-quant', '--matrices', '--out', '--relaxed-prot-inf', '--pg-level', - '--qvalue', '--matrix-qvalue', '--matrix-spec-q', '--window', '--individual-windows', - '--species-genes', '--report-decoys', '--xic', '--no-norm', - '--monitor-mod', '--var-mod', '--fixed-mod', '--dda', '--export-quant', '--site-ms1-quant', - '--channels', '--lib-fixed-mod', '--original-mods', - '--proteoforms', '--peptidoforms', '--no-peptidoforms'] - // Sort by length descending so longer flags (e.g. --individual-windows) are matched before shorter prefixes (--window) - blocked.sort { a -> -a.length() }.each { flag -> - def flagPattern = '(?<=^|\\s)' + java.util.regex.Pattern.quote(flag) + '(?=\\s|\$)(\\s+(?!-{1,2}[a-zA-Z])\\S+)*' - if (args =~ flagPattern) { - log.warn "DIA-NN: '${flag}' is managed by the pipeline for FINAL_QUANTIFICATION and will be stripped." - args = args.replaceAll(flagPattern, '').trim() - } - } + // Strip flags managed by the pipeline from extra_args to prevent silent conflicts. + // Blocked flags are defined centrally in lib/BlockedFlags.groovy — edit there, not here. + args = BlockedFlags.strip('FINAL_QUANTIFICATION', args, log) scan_window = params.scan_window_automatic ? "--individual-windows" : "--window $params.scan_window" species_genes = params.species_genes ? "--species-genes": "" diff --git a/modules/local/diann/individual_analysis/main.nf b/modules/local/diann/individual_analysis/main.nf index e53676c..6f2027d 100644 --- a/modules/local/diann/individual_analysis/main.nf +++ b/modules/local/diann/individual_analysis/main.nf @@ -22,23 +22,9 @@ process INDIVIDUAL_ANALYSIS { script: def args = task.ext.args ?: '' - // Strip flags that are managed by the pipeline to prevent silent conflicts - def blocked = ['--use-quant', '--gen-spec-lib', '--out-lib', '--matrices', '--out', '--rt-profiling', - '--temp', '--threads', '--verbose', '--lib', '--f', '--fasta', - '--mass-acc', '--mass-acc-ms1', '--window', - '--no-ifs-removal', '--no-main-report', '--relaxed-prot-inf', '--pg-level', - '--min-pr-mz', '--max-pr-mz', '--min-fr-mz', '--max-fr-mz', - '--monitor-mod', '--var-mod', '--fixed-mod', '--dda', - '--channels', '--lib-fixed-mod', '--original-mods', - '--proteoforms', '--peptidoforms', '--no-peptidoforms'] - // Sort by length descending so longer flags (e.g. --mass-acc-ms1) are matched before shorter prefixes (--mass-acc) - blocked.sort { a -> -a.length() }.each { flag -> - def flagPattern = '(?<=^|\\s)' + java.util.regex.Pattern.quote(flag) + '(?=\\s|\$)(\\s+(?!-{1,2}[a-zA-Z])\\S+)*' - if (args =~ flagPattern) { - log.warn "DIA-NN: '${flag}' is managed by the pipeline for INDIVIDUAL_ANALYSIS and will be stripped." - args = args.replaceAll(flagPattern, '').trim() - } - } + // Strip flags managed by the pipeline from extra_args to prevent silent conflicts. + // Blocked flags are defined centrally in lib/BlockedFlags.groovy — edit there, not here. + args = BlockedFlags.strip('INDIVIDUAL_ANALYSIS', args, log) // Warn about flags that override pipeline-computed calibration values (not blocked, but may change behaviour) ['--individual-windows', '--individual-mass-acc'].each { flag -> diff --git a/modules/local/diann/insilico_library_generation/main.nf b/modules/local/diann/insilico_library_generation/main.nf index 7ca1723..d7e695e 100644 --- a/modules/local/diann/insilico_library_generation/main.nf +++ b/modules/local/diann/insilico_library_generation/main.nf @@ -24,23 +24,9 @@ process INSILICO_LIBRARY_GENERATION { script: def args = task.ext.args ?: '' - // Strip flags that are managed by the pipeline to prevent silent conflicts - def blocked = ['--use-quant', '--no-main-report', '--matrices', '--out', - '--temp', '--threads', '--verbose', '--lib', '--f', '--fasta', - '--fasta-search', '--predictor', '--gen-spec-lib', - '--missed-cleavages', '--min-pep-len', '--max-pep-len', - '--min-pr-charge', '--max-pr-charge', '--var-mods', - '--min-pr-mz', '--max-pr-mz', '--min-fr-mz', '--max-fr-mz', - '--met-excision', '--monitor-mod', '--dda', '--light-models', - '--infin-dia', '--pre-select', '--proteoforms', '--peptidoforms', '--no-peptidoforms'] - // Sort by length descending so longer flags (e.g. --fasta-search) are matched before shorter prefixes (--fasta, --f) - blocked.sort { a -> -a.length() }.each { flag -> - def flagPattern = '(?<=^|\\s)' + java.util.regex.Pattern.quote(flag) + '(?=\\s|\$)(\\s+(?!-{1,2}[a-zA-Z])\\S+)*' - if (args =~ flagPattern) { - log.warn "DIA-NN: '${flag}' is managed by the pipeline for INSILICO_LIBRARY_GENERATION and will be stripped." - args = args.replaceAll(flagPattern, '').trim() - } - } + // Strip flags managed by the pipeline from extra_args to prevent silent conflicts. + // Blocked flags are defined centrally in lib/BlockedFlags.groovy — edit there, not here. + args = BlockedFlags.strip('INSILICO_LIBRARY_GENERATION', args, log) min_pr_mz = params.min_pr_mz ? "--min-pr-mz $params.min_pr_mz":"" max_pr_mz = params.max_pr_mz ? "--max-pr-mz $params.max_pr_mz":"" diff --git a/modules/local/diann/preliminary_analysis/main.nf b/modules/local/diann/preliminary_analysis/main.nf index ad44527..a4616ea 100644 --- a/modules/local/diann/preliminary_analysis/main.nf +++ b/modules/local/diann/preliminary_analysis/main.nf @@ -22,23 +22,9 @@ process PRELIMINARY_ANALYSIS { script: def args = task.ext.args ?: '' - // Strip flags that are managed by the pipeline to prevent silent conflicts - def blocked = ['--use-quant', '--gen-spec-lib', '--out-lib', '--matrices', '--out', - '--temp', '--threads', '--verbose', '--lib', '--f', '--fasta', - '--mass-acc', '--mass-acc-ms1', '--window', - '--quick-mass-acc', '--min-corr', '--corr-diff', '--time-corr-only', - '--min-pr-mz', '--max-pr-mz', '--min-fr-mz', '--max-fr-mz', - '--monitor-mod', '--var-mod', '--fixed-mod', '--no-prot-inf', '--dda', - '--channels', '--lib-fixed-mod', '--original-mods', - '--proteoforms', '--peptidoforms', '--no-peptidoforms'] - // Sort by length descending so longer flags (e.g. --mass-acc-ms1) are matched before shorter prefixes (--mass-acc) - blocked.sort { a -> -a.length() }.each { flag -> - def flagPattern = '(?<=^|\\s)' + java.util.regex.Pattern.quote(flag) + '(?=\\s|\$)(\\s+(?!-{1,2}[a-zA-Z])\\S+)*' - if (args =~ flagPattern) { - log.warn "DIA-NN: '${flag}' is managed by the pipeline for PRELIMINARY_ANALYSIS and will be stripped." - args = args.replaceAll(flagPattern, '').trim() - } - } + // Strip flags managed by the pipeline from extra_args to prevent silent conflicts. + // Blocked flags are defined centrally in lib/BlockedFlags.groovy — edit there, not here. + args = BlockedFlags.strip('PRELIMINARY_ANALYSIS', args, log) // Performance flags for preliminary analysis calibration step quick_mass_acc = params.quick_mass_acc ? "--quick-mass-acc" : "" diff --git a/workflows/dia.nf b/workflows/dia.nf index 028906e..6c53625 100644 --- a/workflows/dia.nf +++ b/workflows/dia.nf @@ -51,14 +51,22 @@ workflow DIA { } // Version guard for DIA-NN 2.0+ features - if ((params.light_models || params.export_quant || params.site_ms1_quant) && VersionUtils.versionLessThan(params.diann_version, '2.0')) { + if ((params.light_models || params.export_quant || params.site_ms1_quant || params.channel_run_norm || params.channel_spec_norm) && VersionUtils.versionLessThan(params.diann_version, '2.0')) { def enabled = [] if (params.light_models) enabled << '--light-models' if (params.export_quant) enabled << '--export-quant' if (params.site_ms1_quant) enabled << '--site-ms1-quant' + if (params.channel_run_norm) enabled << '--channel-run-norm' + if (params.channel_spec_norm) enabled << '--channel-spec-norm' error("${enabled.join(', ')} require DIA-NN >= 2.0. Current version: ${params.diann_version}. Use -profile diann_v2_1_0 or later") } + // Warn about contradictory normalization flags + if (!params.normalize && (params.channel_run_norm || params.channel_spec_norm)) { + log.warn "Both --normalize false (adds --no-norm) and channel normalization flags are set. " + + "These may conflict — --no-norm disables cross-run normalization while channel normalization requires it." + } + ch_searchdb = channel.fromPath(params.database, checkIfExists: true) .ifEmpty { error("No protein database found at '${params.database}'. Provide --database ") } .first()