Skip to content

Commit de817e1

Browse files
authored
fix(global-standards): Enable multiple precursors for the same reference peptide (#180)
1 parent c7d33c0 commit de817e1

2 files changed

Lines changed: 135 additions & 25 deletions

File tree

R/utils_normalize.R

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -192,32 +192,31 @@ MSstatsNormalize = function(input, normalization_method, peptides_dict = NULL, s
192192
.normalizeGlobalStandards = function(input, peptides_dict, standards) {
193193
PeptideSequence = PEPTIDE = PROTEIN = median_by_fraction = NULL
194194
Standard = FRACTION = LABEL = ABUNDANCE = RUN = GROUP = NULL
195-
196-
proteins = as.character(unique(input$PROTEIN))
197-
means_by_standard = unique(input[, list(RUN)])
198-
for (standard_id in seq_along(standards)) {
199-
peptide_name = unlist(peptides_dict[PeptideSequence == standards[standard_id],
200-
as.character(PEPTIDE)], FALSE, FALSE)
201-
if (length(peptide_name) > 0) {
202-
standard = input[PEPTIDE == peptide_name, ]
203-
} else {
204-
if (standards[standard_id] %in% proteins) {
205-
standard = input[PROTEIN == standards[standard_id], ]
206-
} else {
207-
msg = paste("global standard peptides or proteins, ",
208-
standards[standard_id],", is not in dataset.",
209-
"Please check whether 'nameStandards' input is correct or not.")
210-
getOption("MSstatsLog")("ERROR", msg)
211-
stop(msg)
212-
}
213-
}
214-
mean_by_run = standard[GROUP != "0" & !is.na(ABUNDANCE),
215-
list(mean_abundance = mean(ABUNDANCE, na.rm = TRUE)),
216-
by = "RUN"]
217-
colnames(mean_by_run)[2] = paste0("meanStandard", standard_id)
218-
means_by_standard = merge(means_by_standard, mean_by_run,
219-
by = "RUN", all.x = TRUE)
195+
196+
input_with_peptides <- merge(input, peptides_dict, by = "PEPTIDE", all.x = TRUE)
197+
standards_data <- input_with_peptides[
198+
(PeptideSequence %in% standards | PROTEIN %in% standards) &
199+
GROUP != "0" &
200+
!is.na(ABUNDANCE)
201+
]
202+
missing_standards <- standards[!standards %in% c(standards_data$PeptideSequence, standards_data$PROTEIN)]
203+
if (length(missing_standards) > 0) {
204+
msg <- paste("Global standard peptides or proteins,",
205+
paste(missing_standards, collapse = ", "),
206+
"are not in dataset. Please check whether 'nameStandards' input is correct.")
207+
getOption("MSstatsLog")("ERROR", msg)
208+
stop(msg)
220209
}
210+
standards_data[, standard := ifelse(!is.na(PeptideSequence) & PeptideSequence %in% standards,
211+
PeptideSequence,
212+
PROTEIN)]
213+
means_by_standard <- standards_data[,
214+
list(mean_abundance = mean(ABUNDANCE, na.rm = TRUE)),
215+
by = .(RUN, standard)
216+
]
217+
means_by_standard <- dcast(means_by_standard,
218+
RUN ~ standard,
219+
value.var = "mean_abundance")
221220
means_by_standard = data.table::melt(means_by_standard, id.vars = "RUN",
222221
variable.name = "Standard", value.name = "ABUNDANCE")
223222
means_by_standard[, mean_by_run := mean(ABUNDANCE, na.rm = TRUE), by = "RUN"]
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
# Test for .normalizeGlobalStandards function ----------------------------------
2+
3+
# Setup test data ---------------------------------------------------------------
4+
5+
create_peptide_dictionary <- function() {
6+
data.table::data.table(
7+
PeptideSequence = c("AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSRD"),
8+
PrecursorCharge = c(3, 2, 3),
9+
PEPTIDE = c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAAAGAGAGAK_2", "AAAAAAAAAAAAVSRD_3")
10+
)
11+
}
12+
13+
create_test_input <- function(standard_intensities, peptide2_intensities, peptide3_intensities) {
14+
# Constants
15+
n_proteins <- 3
16+
n_runs <- 48
17+
n_subjects <- 4
18+
n_fractions <- 6
19+
20+
# Create base structure
21+
input <- data.table::data.table(
22+
PROTEIN = rep(c("P1", "P1", "P3"), each = n_runs),
23+
PEPTIDE = rep(c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAAAGAGAGAK_2", "AAAAAAAAAAAAVSRD_3"),
24+
each = n_runs),
25+
TRANSITION = rep("NA_NA", n_proteins * n_runs),
26+
FEATURE = rep(c("AAAAAAAAAAAAAAGAGAGAK_3_NA_NA",
27+
"AAAAAAAAAAAAAAGAGAGAK_2_NA_NA",
28+
"AAAAAAAAAAAAVSRD_3_NA_NA"),
29+
each = n_runs),
30+
LABEL = rep("L", n_proteins * n_runs),
31+
GROUP_ORIGINAL = rep(rep(c("Control", "Treatment"), each = n_runs/2), n_proteins),
32+
SUBJECT_ORIGINAL = rep(paste0("Subject", rep(1:n_subjects, each = n_fractions)),
33+
n_proteins * 2),
34+
RUN = rep(1:n_runs, n_proteins),
35+
GROUP = rep(rep(c("Control", "Treatment"), each = n_runs/2), n_proteins),
36+
SUBJECT = rep(paste0("Subject", rep(1:n_subjects, each = n_fractions)),
37+
n_proteins * 2),
38+
FRACTION = rep(rep(1:n_fractions, n_subjects), n_proteins * 2),
39+
INTENSITY = c(standard_intensities, peptide2_intensities, peptide3_intensities),
40+
ANOMALYSCORES = rep(NA, n_proteins * n_runs),
41+
originalRUN = rep(paste0("Run", 1:n_runs), n_proteins)
42+
)
43+
44+
input[, ABUNDANCE := log2(INTENSITY)]
45+
return(input)
46+
}
47+
48+
# Test 1: Standards with different intensities between groups -------------------
49+
test_different_group_intensities <- function() {
50+
peptide_dict <- create_peptide_dictionary()
51+
standards <- c("AAAAAAAAAAAAAAGAGAGAK")
52+
53+
# Control group: 262144, Treatment group: 524288
54+
standard_intensities <- c(
55+
rep(262144, 24), # Control (runs 1-24)
56+
rep(524288, 24) # Treatment (runs 25-48)
57+
)
58+
59+
# Non-standard peptides: all 262144
60+
peptide3_intensities <- rep(262144, 48)
61+
62+
input <- create_test_input(standard_intensities, standard_intensities, peptide3_intensities)
63+
output <- MSstats:::.normalizeGlobalStandards(input, peptide_dict, standards)
64+
65+
# Verify normalization: Control runs should be shifted up, Treatment runs shifted down
66+
control_runs <- 1:24
67+
treatment_runs <- 25:48
68+
69+
# Check Control group (shifted up to match treatment standard)
70+
control_abundance <- output[RUN %in% control_runs &
71+
!is.na(ABUNDANCE) &
72+
!grepl(standards, PEPTIDE)]$ABUNDANCE
73+
expect_true(all(abs(control_abundance - 18.5) < 1e-10),
74+
info = "Control group non-standard peptides should be normalized to 18.5")
75+
76+
# Check Treatment group (shifted down to match control standard)
77+
treatment_abundance <- output[RUN %in% treatment_runs &
78+
!is.na(ABUNDANCE) &
79+
!grepl(standards, PEPTIDE)]$ABUNDANCE
80+
expect_true(all(abs(treatment_abundance - 17.5) < 1e-10),
81+
info = "Treatment group non-standard peptides should be normalized to 17.5")
82+
}
83+
84+
# Test 2: Standards with alternating intensities within fractions ---------------
85+
test_alternating_intensities_within_fractions <- function() {
86+
peptide_dict <- create_peptide_dictionary()
87+
standards <- c("AAAAAAAAAAAAAAGAGAGAK")
88+
89+
# Standard alternates between 262144 and 524288 within each fraction
90+
standard_intensities <- rep(c(262144, 524288), 24)
91+
92+
# Non-standard peptides: all 262144
93+
peptide3_intensities <- rep(262144, 48)
94+
95+
input <- create_test_input(standard_intensities, standard_intensities, peptide3_intensities)
96+
output <- MSstats:::.normalizeGlobalStandards(input, peptide_dict, standards)
97+
98+
# When standards vary within fractions but average to same level,
99+
# no net normalization should occur
100+
all_runs <- 1:48
101+
normalized_abundance <- output[RUN %in% all_runs &
102+
!is.na(ABUNDANCE) &
103+
!grepl(standards, PEPTIDE)]$ABUNDANCE
104+
105+
expect_true(all(abs(normalized_abundance - 18) < 1e-10),
106+
info = "No normalization should occur when standard averages are equal across fractions")
107+
}
108+
109+
# Run tests ---------------------------------------------------------------------
110+
test_different_group_intensities()
111+
test_alternating_intensities_within_fractions()

0 commit comments

Comments
 (0)