diff --git a/R/pairwise-comparisons.R b/R/pairwise-comparisons.R index 2e8b00b10..d9c898349 100644 --- a/R/pairwise-comparisons.R +++ b/R/pairwise-comparisons.R @@ -75,8 +75,17 @@ #' given, then a scaled relative skill with respect to the baseline will be #' returned. By default (`NULL`), relative skill will not be scaled with #' respect to a baseline model. -#' @param ... Additional arguments for the comparison between two models. See -#' [compare_forecasts()] for more information. +#' @param test_type Character, either "non_parametric" (the default), +#' "permutation", or NULL. Determines which test is used to compute +#' p-values. "non_parametric" uses a paired Wilcoxon signed-rank test, +#' "permutation" uses a permutation test. If NULL, no test is conducted +#' and p-values will be `NA`. See [compare_forecasts()] for details. +#' @param one_sided Boolean, default is `FALSE`. Whether to conduct a +#' one-sided instead of a two-sided test to determine significance in a +#' pairwise comparison. +#' @param n_permutations Numeric, the number of permutations for a +#' permutation test. Default is 999. Only used if +#' `test_type = "permutation"`. #' @inheritParams summarise_scores #' @returns A data.table with the results of pairwise comparisons #' containing the mean score ratios (`mean_scores_ratio`), @@ -118,7 +127,9 @@ get_pairwise_comparisons <- function( by = NULL, metric = intersect(c("wis", "crps", "brier_score"), names(scores)), baseline = NULL, - ... + test_type = c("non_parametric", "permutation"), + one_sided = FALSE, + n_permutations = 999 ) { # input checks --------------------------------------------------------------- @@ -244,7 +255,9 @@ get_pairwise_comparisons <- function( baseline = baseline, compare = compare, by = by, - ... + test_type = test_type, + one_sided = one_sided, + n_permutations = n_permutations ) } ) @@ -275,7 +288,9 @@ pairwise_comparison_one_group <- function(scores, baseline, compare = "model", by, - ...) { + test_type = c("non_parametric", "permutation"), + one_sided = FALSE, + n_permutations = 999) { if (!(compare %in% names(scores))) { cli_abort( "pairwise comparisons require a column as given by `compare`" @@ -307,7 +322,9 @@ pairwise_comparison_one_group <- function(scores, name_comparator1 = ..compare, name_comparator2 = compare_against, metric = metric, - ... + test_type = test_type, + one_sided = one_sided, + n_permutations = n_permutations ), by = seq_len(NROW(combinations)) ] @@ -588,7 +605,9 @@ add_relative_skill <- function( by = NULL, metric = intersect(c("wis", "crps", "brier_score"), names(scores)), baseline = NULL, - ... + test_type = c("non_parametric", "permutation"), + one_sided = FALSE, + n_permutations = 999 ) { # input checks are done in `get_pairwise_comparisons()` @@ -599,7 +618,9 @@ add_relative_skill <- function( baseline = baseline, compare = compare, by = by, - ... + test_type = test_type, + one_sided = one_sided, + n_permutations = n_permutations ) # store original metrics diff --git a/man/add_relative_skill.Rd b/man/add_relative_skill.Rd index c57ae7852..5628a60da 100644 --- a/man/add_relative_skill.Rd +++ b/man/add_relative_skill.Rd @@ -10,7 +10,9 @@ add_relative_skill( by = NULL, metric = intersect(c("wis", "crps", "brier_score"), names(scores)), baseline = NULL, - ... + test_type = c("non_parametric", "permutation"), + one_sided = FALSE, + n_permutations = 999 ) } \arguments{ @@ -37,8 +39,19 @@ given, then a scaled relative skill with respect to the baseline will be returned. By default (\code{NULL}), relative skill will not be scaled with respect to a baseline model.} -\item{...}{Additional arguments for the comparison between two models. See -\code{\link[=compare_forecasts]{compare_forecasts()}} for more information.} +\item{test_type}{Character, either "non_parametric" (the default), +"permutation", or NULL. Determines which test is used to compute +p-values. "non_parametric" uses a paired Wilcoxon signed-rank test, +"permutation" uses a permutation test. If NULL, no test is conducted +and p-values will be \code{NA}. See \code{\link[=compare_forecasts]{compare_forecasts()}} for details.} + +\item{one_sided}{Boolean, default is \code{FALSE}. Whether to conduct a +one-sided instead of a two-sided test to determine significance in a +pairwise comparison.} + +\item{n_permutations}{Numeric, the number of permutations for a +permutation test. Default is 999. Only used if +\code{test_type = "permutation"}.} } \description{ Adds a columns with relative skills computed by running diff --git a/man/get_pairwise_comparisons.Rd b/man/get_pairwise_comparisons.Rd index f842f1f47..1ed3bdd2d 100644 --- a/man/get_pairwise_comparisons.Rd +++ b/man/get_pairwise_comparisons.Rd @@ -10,7 +10,9 @@ get_pairwise_comparisons( by = NULL, metric = intersect(c("wis", "crps", "brier_score"), names(scores)), baseline = NULL, - ... + test_type = c("non_parametric", "permutation"), + one_sided = FALSE, + n_permutations = 999 ) } \arguments{ @@ -37,8 +39,19 @@ given, then a scaled relative skill with respect to the baseline will be returned. By default (\code{NULL}), relative skill will not be scaled with respect to a baseline model.} -\item{...}{Additional arguments for the comparison between two models. See -\code{\link[=compare_forecasts]{compare_forecasts()}} for more information.} +\item{test_type}{Character, either "non_parametric" (the default), +"permutation", or NULL. Determines which test is used to compute +p-values. "non_parametric" uses a paired Wilcoxon signed-rank test, +"permutation" uses a permutation test. If NULL, no test is conducted +and p-values will be \code{NA}. See \code{\link[=compare_forecasts]{compare_forecasts()}} for details.} + +\item{one_sided}{Boolean, default is \code{FALSE}. Whether to conduct a +one-sided instead of a two-sided test to determine significance in a +pairwise comparison.} + +\item{n_permutations}{Numeric, the number of permutations for a +permutation test. Default is 999. Only used if +\code{test_type = "permutation"}.} } \value{ A data.table with the results of pairwise comparisons diff --git a/man/pairwise_comparison_one_group.Rd b/man/pairwise_comparison_one_group.Rd index 2982a753f..3d62de3c0 100644 --- a/man/pairwise_comparison_one_group.Rd +++ b/man/pairwise_comparison_one_group.Rd @@ -10,7 +10,9 @@ pairwise_comparison_one_group( baseline, compare = "model", by, - ... + test_type = c("non_parametric", "permutation"), + one_sided = FALSE, + n_permutations = 999 ) } \arguments{ @@ -37,8 +39,19 @@ will be one relative skill score per distinct entry of the column selected in \code{compare}. If further columns are given here, for example, \code{by = "location"} with \code{compare = "model"}, then one separate relative skill score is calculated for every model in every location.} -\item{...}{Additional arguments for the comparison between two models. See -\code{\link[=compare_forecasts]{compare_forecasts()}} for more information.} +\item{test_type}{Character, either "non_parametric" (the default), +"permutation", or NULL. Determines which test is used to compute +p-values. "non_parametric" uses a paired Wilcoxon signed-rank test, +"permutation" uses a permutation test. If NULL, no test is conducted +and p-values will be \code{NA}. See \code{\link[=compare_forecasts]{compare_forecasts()}} for details.} + +\item{one_sided}{Boolean, default is \code{FALSE}. Whether to conduct a +one-sided instead of a two-sided test to determine significance in a +pairwise comparison.} + +\item{n_permutations}{Numeric, the number of permutations for a +permutation test. Default is 999. Only used if +\code{test_type = "permutation"}.} } \value{ A data.table with the results of pairwise comparisons diff --git a/tests/testthat/test-pairwise_comparison.R b/tests/testthat/test-pairwise_comparison.R index 6838f345e..f1f433036 100644 --- a/tests/testthat/test-pairwise_comparison.R +++ b/tests/testthat/test-pairwise_comparison.R @@ -595,3 +595,109 @@ test_that("add_relative_skill() works without warnings when not computing p-valu expect_type(scores_w_rel_skill$ae_median_relative_skill, "double") expect_false(anyNA(scores_w_rel_skill$ae_median_relative_skill)) }) + + +# ============================================================================== +# Tests for explicit argument promotion (issue #769) +# ============================================================================== + +test_that("get_pairwise_comparisons() accepts test_type as explicit argument", { + result_np <- get_pairwise_comparisons(scores_quantile, test_type = "non_parametric") + expect_s3_class(result_np, "data.table") + expect_true(all(is.finite(result_np[model != compare_against]$pval))) + + result_perm <- get_pairwise_comparisons( + scores_quantile, test_type = "permutation", n_permutations = 50 + ) + expect_s3_class(result_perm, "data.table") + expect_true(all(is.finite(result_perm[model != compare_against]$pval))) + + result_null <- get_pairwise_comparisons(scores_quantile, test_type = NULL) + expect_s3_class(result_null, "data.table") + expect_true(all(is.na(result_null[model != compare_against]$pval))) + + expect_true("test_type" %in% names(formals(get_pairwise_comparisons))) +}) + +test_that("get_pairwise_comparisons() accepts one_sided as explicit argument", { + result_one <- get_pairwise_comparisons( + scores_quantile, one_sided = TRUE, test_type = "permutation", n_permutations = 50 + ) + result_two <- get_pairwise_comparisons( + scores_quantile, one_sided = FALSE, test_type = "permutation", n_permutations = 50 + ) + expect_s3_class(result_one, "data.table") + expect_s3_class(result_two, "data.table") + expect_true(all(is.finite(result_one[model != compare_against]$pval))) + expect_true(all(is.finite(result_two[model != compare_against]$pval))) + expect_true("one_sided" %in% names(formals(get_pairwise_comparisons))) +}) + +test_that("get_pairwise_comparisons() accepts n_permutations as explicit argument", { + result <- get_pairwise_comparisons( + scores_quantile, test_type = "permutation", n_permutations = 10 + ) + expect_s3_class(result, "data.table") + expect_true(all(is.finite(result[model != compare_against]$pval))) + expect_true("n_permutations" %in% names(formals(get_pairwise_comparisons))) +}) + +test_that("add_relative_skill() accepts test_type, one_sided, n_permutations as explicit arguments", { + result_null <- expect_no_warning( + add_relative_skill(scores_quantile, test_type = NULL) + ) + expect_s3_class(result_null, "data.table") + + result_perm <- add_relative_skill( + scores_quantile, test_type = "permutation", one_sided = TRUE, n_permutations = 50 + ) + expect_s3_class(result_perm, "data.table") + + expect_true("test_type" %in% names(formals(add_relative_skill))) + expect_true("one_sided" %in% names(formals(add_relative_skill))) + expect_true("n_permutations" %in% names(formals(add_relative_skill))) +}) + +test_that("get_pairwise_comparisons() has correct default values for promoted arguments", { + fmls <- formals(get_pairwise_comparisons) + expect_identical(eval(fmls$one_sided), FALSE) + expect_identical(eval(fmls$n_permutations), 999) + # test_type default should have "non_parametric" as first element + test_type_default <- eval(fmls$test_type) + expect_true("non_parametric" %in% test_type_default) + expect_identical(test_type_default[[1]], "non_parametric") +}) + +test_that("get_pairwise_comparisons() results are unchanged after argument promotion", { + result_default <- get_pairwise_comparisons(scores_quantile) + expect_true("mean_scores_ratio" %in% names(result_default)) + expect_true("pval" %in% names(result_default)) + expect_true("adj_pval" %in% names(result_default)) + expect_true("wis_relative_skill" %in% names(result_default)) + + result_null <- get_pairwise_comparisons(scores_quantile, test_type = NULL) + expect_true(all(is.na(result_null[model != compare_against]$pval))) + expect_true(all(is.na(result_null[model != compare_against]$adj_pval))) + + set.seed(42) + result_perm <- get_pairwise_comparisons( + scores_quantile, test_type = "permutation", n_permutations = 50 + ) + expect_true(all(is.finite(result_perm[model != compare_against]$pval))) +}) + +test_that("pairwise_comparison_one_group() passes test args through to compare_forecasts()", { + result_null <- pairwise_comparison_one_group( + data.table::copy(scores_quantile), metric = "wis", baseline = NULL, + compare = "model", by = character(0), test_type = NULL + ) + expect_true(all(is.na(result_null[model != compare_against]$pval))) + + result_np <- pairwise_comparison_one_group( + data.table::copy(scores_quantile), metric = "wis", baseline = NULL, + compare = "model", by = character(0), test_type = "non_parametric" + ) + expect_true(all(is.finite(result_np[model != compare_against]$pval))) + + expect_true("test_type" %in% names(formals(pairwise_comparison_one_group))) +})