Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 29 additions & 8 deletions R/pairwise-comparisons.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,17 @@
#' given, then a scaled relative skill with respect to the baseline will be
#' returned. By default (`NULL`), relative skill will not be scaled with
#' respect to a baseline model.
#' @param ... Additional arguments for the comparison between two models. See
#' [compare_forecasts()] for more information.
#' @param test_type Character, either "non_parametric" (the default),
#' "permutation", or NULL. Determines which test is used to compute
#' p-values. "non_parametric" uses a paired Wilcoxon signed-rank test,
#' "permutation" uses a permutation test. If NULL, no test is conducted
#' and p-values will be `NA`. See [compare_forecasts()] for details.
#' @param one_sided Boolean, default is `FALSE`. Whether to conduct a
#' one-sided instead of a two-sided test to determine significance in a
#' pairwise comparison.
#' @param n_permutations Numeric, the number of permutations for a
#' permutation test. Default is 999. Only used if
#' `test_type = "permutation"`.
#' @inheritParams summarise_scores
#' @returns A data.table with the results of pairwise comparisons
#' containing the mean score ratios (`mean_scores_ratio`),
Expand Down Expand Up @@ -118,7 +127,9 @@ get_pairwise_comparisons <- function(
by = NULL,
metric = intersect(c("wis", "crps", "brier_score"), names(scores)),
baseline = NULL,
...
test_type = c("non_parametric", "permutation"),
one_sided = FALSE,
n_permutations = 999
) {

# input checks ---------------------------------------------------------------
Expand Down Expand Up @@ -244,7 +255,9 @@ get_pairwise_comparisons <- function(
baseline = baseline,
compare = compare,
by = by,
...
test_type = test_type,
one_sided = one_sided,
n_permutations = n_permutations
)
}
)
Expand Down Expand Up @@ -275,7 +288,9 @@ pairwise_comparison_one_group <- function(scores,
baseline,
compare = "model",
by,
...) {
test_type = c("non_parametric", "permutation"),
one_sided = FALSE,
n_permutations = 999) {
if (!(compare %in% names(scores))) {
cli_abort(
"pairwise comparisons require a column as given by `compare`"
Expand Down Expand Up @@ -307,7 +322,9 @@ pairwise_comparison_one_group <- function(scores,
name_comparator1 = ..compare,
name_comparator2 = compare_against,
metric = metric,
...
test_type = test_type,
one_sided = one_sided,
n_permutations = n_permutations
),
by = seq_len(NROW(combinations))
]
Expand Down Expand Up @@ -588,7 +605,9 @@ add_relative_skill <- function(
by = NULL,
metric = intersect(c("wis", "crps", "brier_score"), names(scores)),
baseline = NULL,
...
test_type = c("non_parametric", "permutation"),
one_sided = FALSE,
n_permutations = 999
) {

# input checks are done in `get_pairwise_comparisons()`
Expand All @@ -599,7 +618,9 @@ add_relative_skill <- function(
baseline = baseline,
compare = compare,
by = by,
...
test_type = test_type,
one_sided = one_sided,
n_permutations = n_permutations
)

# store original metrics
Expand Down
19 changes: 16 additions & 3 deletions man/add_relative_skill.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 16 additions & 3 deletions man/get_pairwise_comparisons.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 16 additions & 3 deletions man/pairwise_comparison_one_group.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

106 changes: 106 additions & 0 deletions tests/testthat/test-pairwise_comparison.R
Original file line number Diff line number Diff line change
Expand Up @@ -595,3 +595,109 @@
expect_type(scores_w_rel_skill$ae_median_relative_skill, "double")
expect_false(anyNA(scores_w_rel_skill$ae_median_relative_skill))
})


# ==============================================================================
# Tests for explicit argument promotion (issue #769)
# ==============================================================================

test_that("get_pairwise_comparisons() accepts test_type as explicit argument", {
result_np <- get_pairwise_comparisons(scores_quantile, test_type = "non_parametric")
expect_s3_class(result_np, "data.table")
expect_true(all(is.finite(result_np[model != compare_against]$pval)))

result_perm <- get_pairwise_comparisons(
scores_quantile, test_type = "permutation", n_permutations = 50
)
expect_s3_class(result_perm, "data.table")
expect_true(all(is.finite(result_perm[model != compare_against]$pval)))

result_null <- get_pairwise_comparisons(scores_quantile, test_type = NULL)
expect_s3_class(result_null, "data.table")
expect_true(all(is.na(result_null[model != compare_against]$pval)))

expect_true("test_type" %in% names(formals(get_pairwise_comparisons)))
})

test_that("get_pairwise_comparisons() accepts one_sided as explicit argument", {
result_one <- get_pairwise_comparisons(
scores_quantile, one_sided = TRUE, test_type = "permutation", n_permutations = 50
)
result_two <- get_pairwise_comparisons(
scores_quantile, one_sided = FALSE, test_type = "permutation", n_permutations = 50
)
expect_s3_class(result_one, "data.table")
expect_s3_class(result_two, "data.table")
expect_true(all(is.finite(result_one[model != compare_against]$pval)))
expect_true(all(is.finite(result_two[model != compare_against]$pval)))
expect_true("one_sided" %in% names(formals(get_pairwise_comparisons)))
})

test_that("get_pairwise_comparisons() accepts n_permutations as explicit argument", {
result <- get_pairwise_comparisons(
scores_quantile, test_type = "permutation", n_permutations = 10
)
expect_s3_class(result, "data.table")
expect_true(all(is.finite(result[model != compare_against]$pval)))
expect_true("n_permutations" %in% names(formals(get_pairwise_comparisons)))
})

test_that("add_relative_skill() accepts test_type, one_sided, n_permutations as explicit arguments", {
result_null <- expect_no_warning(
add_relative_skill(scores_quantile, test_type = NULL)
)
expect_s3_class(result_null, "data.table")

result_perm <- add_relative_skill(
scores_quantile, test_type = "permutation", one_sided = TRUE, n_permutations = 50
)
expect_s3_class(result_perm, "data.table")

expect_true("test_type" %in% names(formals(add_relative_skill)))
expect_true("one_sided" %in% names(formals(add_relative_skill)))
expect_true("n_permutations" %in% names(formals(add_relative_skill)))
})

test_that("get_pairwise_comparisons() has correct default values for promoted arguments", {
fmls <- formals(get_pairwise_comparisons)
expect_identical(eval(fmls$one_sided), FALSE)

Check warning on line 663 in tests/testthat/test-pairwise_comparison.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=tests/testthat/test-pairwise_comparison.R,line=663,col=3,[expect_true_false_linter] expect_false(x) is better than expect_identical(x, FALSE)
expect_identical(eval(fmls$n_permutations), 999)
# test_type default should have "non_parametric" as first element
test_type_default <- eval(fmls$test_type)
expect_true("non_parametric" %in% test_type_default)
expect_identical(test_type_default[[1]], "non_parametric")
})

test_that("get_pairwise_comparisons() results are unchanged after argument promotion", {
result_default <- get_pairwise_comparisons(scores_quantile)
expect_true("mean_scores_ratio" %in% names(result_default))
expect_true("pval" %in% names(result_default))
expect_true("adj_pval" %in% names(result_default))
expect_true("wis_relative_skill" %in% names(result_default))

result_null <- get_pairwise_comparisons(scores_quantile, test_type = NULL)
expect_true(all(is.na(result_null[model != compare_against]$pval)))
expect_true(all(is.na(result_null[model != compare_against]$adj_pval)))

set.seed(42)
result_perm <- get_pairwise_comparisons(
scores_quantile, test_type = "permutation", n_permutations = 50
)
expect_true(all(is.finite(result_perm[model != compare_against]$pval)))
})

test_that("pairwise_comparison_one_group() passes test args through to compare_forecasts()", {
result_null <- pairwise_comparison_one_group(
data.table::copy(scores_quantile), metric = "wis", baseline = NULL,
compare = "model", by = character(0), test_type = NULL
)
expect_true(all(is.na(result_null[model != compare_against]$pval)))

result_np <- pairwise_comparison_one_group(
data.table::copy(scores_quantile), metric = "wis", baseline = NULL,
compare = "model", by = character(0), test_type = "non_parametric"
)
expect_true(all(is.finite(result_np[model != compare_against]$pval)))

expect_true("test_type" %in% names(formals(pairwise_comparison_one_group)))
})
Loading