Skip to content

Commit 58084b9

Browse files
authored
Fix single feature bug in linear summarization
1 parent 2448662 commit 58084b9

1 file changed

Lines changed: 78 additions & 37 deletions

File tree

R/dataProcess.R

Lines changed: 78 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -364,75 +364,116 @@ MSstatsSummarizeSingleLinear = function(single_protein,
364364
equal_variances = TRUE) {
365365
ABUNDANCE = RUN = FEATURE = PROTEIN = LogIntensities = NULL
366366

367-
cols = intersect(colnames(single_protein), c("newABUNDANCE", "cen", "RUN",
368-
"FEATURE", "ref"))
369-
single_protein = single_protein[(n_obs > 1 & !is.na(n_obs)) &
370-
(n_obs_run > 0 & !is.na(n_obs_run))]
367+
cols = intersect(
368+
colnames(single_protein),
369+
c("newABUNDANCE", "cen", "RUN", "FEATURE", "ref")
370+
)
371+
372+
single_protein = single_protein[
373+
(n_obs > 1 & !is.na(n_obs)) &
374+
(n_obs_run > 0 & !is.na(n_obs_run))
375+
]
376+
371377
if (nrow(single_protein) == 0) {
372378
return(list(NULL, NULL))
373379
}
380+
374381
single_protein[, RUN := factor(RUN)]
375382
single_protein[, FEATURE := factor(FEATURE)]
383+
376384
if (impute & any(single_protein[["censored"]])) {
377-
survival_fit = .fitSurvival(single_protein[LABEL == "L", cols,
378-
with = FALSE],
379-
aft_iterations)
385+
survival_fit = .fitSurvival(
386+
single_protein[LABEL == "L", cols, with = FALSE],
387+
aft_iterations
388+
)
380389
sigma2 = survival_fit$scale^2
390+
381391
single_protein[, c("predicted", "imputation_var") := {
382392
pred = predict(survival_fit, newdata = .SD, se.fit = TRUE)
383393
list(pred$fit, pred$se.fit^2 + sigma2)
384394
}]
385-
single_protein[, predicted := ifelse(censored & (LABEL == "L"),
386-
predicted, NA)]
387-
single_protein[, newABUNDANCE := ifelse(censored & LABEL == "L",
388-
predicted, newABUNDANCE)]
395+
396+
single_protein[, predicted := ifelse(
397+
censored & (LABEL == "L"),
398+
predicted,
399+
NA
400+
)]
401+
single_protein[, newABUNDANCE := ifelse(
402+
censored & LABEL == "L",
403+
predicted,
404+
newABUNDANCE
405+
)]
406+
389407
survival = single_protein[, c(cols, "predicted"), with = FALSE]
390408
} else {
391409
survival = single_protein[, cols, with = FALSE]
392410
survival[, predicted := NA]
393411
}
394412

395-
if (all(!is.na(single_protein$ANOMALYSCORES))){
396-
# single_protein$weights = 1 / single_protein$ANOMALYSCORES
397-
# s: anomaly scores for a given precursor across runs
413+
if (all(!is.na(single_protein$ANOMALYSCORES))) {
398414
single_protein[, weights :=
399-
anomaly_weights_z_vec(ANOMALYSCORES),
400-
by = .(PROTEIN, PEPTIDE)]
415+
anomaly_weights_z_vec(ANOMALYSCORES),
416+
by = .(PROTEIN, PEPTIDE)]
401417
} else {
402-
single_protein$weights = NA
418+
single_protein[, weights := NA_real_]
403419
}
404420

405421
label = data.table::uniqueN(single_protein$LABEL) > 1
422+
406423
single_protein = single_protein[!is.na(newABUNDANCE)]
424+
425+
if (nrow(single_protein) == 0) {
426+
return(list(NULL, survival))
427+
}
428+
407429
single_protein[, RUN := factor(RUN)]
408430
single_protein[, FEATURE := factor(FEATURE)]
409431

410-
counts = xtabs(~ RUN + FEATURE,
411-
data = unique(single_protein[, list(FEATURE, RUN)]))
412-
counts = as.matrix(counts)
413432
is_single_feature = .checkSingleFeature(single_protein)
414433

415-
fit = try(.fitLinearModel(single_protein, is_single_feature,
416-
is_labeled = label, equal_variances), silent = TRUE)
417-
418-
if (inherits(fit, "try-error")) {
419-
msg = paste("*** error : can't fit the model for ",
420-
unique(single_protein$PROTEIN))
421-
getOption("MSstatsLog")("WARN", msg)
422-
getOption("MSstatsMsg")("WARN", msg)
423-
result = NULL
434+
if (is_single_feature) {
435+
result = single_protein[LABEL == "L",
436+
.(LogIntensities = mean(newABUNDANCE)),
437+
by = RUN]
438+
result[, Protein := unique(single_protein$PROTEIN)]
439+
result[, Variance := NA_real_]
440+
setcolorder(result, c("Protein", "RUN", "LogIntensities",
441+
"Variance"))
442+
443+
return(list(result, survival))
424444
} else {
425-
cf = summary(fit)$coefficients[, 1]
426-
cov_mat = vcov(fit)
445+
counts = xtabs(
446+
~ RUN + FEATURE,
447+
data = unique(single_protein[, .(FEATURE, RUN)])
448+
)
449+
counts = as.matrix(counts)
450+
451+
fit = try(
452+
.fitLinearModel(single_protein, is_single_feature,
453+
is_labeled = label,
454+
equal_variances),
455+
silent = TRUE
456+
)
427457

428-
result = unique(single_protein[, list(Protein = PROTEIN, RUN = RUN)])
429-
extracted_values = get_linear_summary(single_protein, cf,
430-
counts, label, cov_mat)
431-
# extracted_values = get_run_estimates_simple(fit, single_protein, counts)
458+
if (inherits(fit, "try-error")) {
459+
msg = paste("*** error : can't fit the model for",
460+
unique(single_protein$PROTEIN))
461+
getOption("MSstatsLog")("WARN", msg)
462+
getOption("MSstatsMsg")("WARN", msg)
463+
result = NULL
464+
} else {
465+
cf = summary(fit)$coefficients[, 1]
466+
cov_mat = vcov(fit)
467+
468+
result = unique(single_protein[, .(Protein = PROTEIN,
469+
RUN = RUN)])
470+
extracted_values = get_linear_summary(single_protein, cf,
471+
counts, label, cov_mat)
472+
result = cbind(result, extracted_values)
473+
}
432474

433-
result = cbind(result, extracted_values)
475+
return(list(result, survival))
434476
}
435-
list(result, survival)
436477
}
437478

438479

0 commit comments

Comments
 (0)