diff --git a/NAMESPACE b/NAMESPACE index 151aa6149..e2efa2a0d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -86,6 +86,8 @@ export(logs_binary) export(logs_categorical) export(logs_sample) export(mad_sample) +export(metric_adapt_binary_numeric) +export(metric_adapt_swap_args) export(new_forecast) export(overprediction_quantile) export(overprediction_sample) diff --git a/R/metric-adapters.R b/R/metric-adapters.R new file mode 100644 index 000000000..04b1c712f --- /dev/null +++ b/R/metric-adapters.R @@ -0,0 +1,72 @@ +#' @title Adapt external metrics for use with scoringutils +#' +#' @description +#' These adapter functions wrap external scoring functions so they can be +#' used with [score()]. They handle common incompatibilities between +#' scoringutils' calling conventions and those of external packages. +#' +#' - `metric_adapt_binary_numeric()` wraps a metric that expects numeric +#' 0/1 `observed` values, converting the factor input that scoringutils +#' uses for binary forecasts. +#' +#' - `metric_adapt_swap_args()` wraps a metric that takes +#' `function(predicted, observed)` instead of the +#' `function(observed, predicted)` convention used by scoringutils. +#' +#' The returned functions can be passed directly to [score()] via the +#' `metrics` argument, and are compatible with [purrr::partial()] for +#' further customization. +#' +#' @param fun A function to wrap. For `metric_adapt_binary_numeric()`, +#' `fun` should expect numeric `observed` (0/1) as its first argument. +#' For `metric_adapt_swap_args()`, `fun` should expect `predicted` as its +#' first argument and `observed` as its second. +#' +#' @returns A function with formals `(observed, predicted, ...)` that can be +#' used as a metric in [score()]. +#' +#' @keywords handle-metrics +#' @importFrom checkmate assert_function +#' @examples +#' # Wrap a metric that expects numeric 0/1 observed values +#' numeric_brier <- function(observed, predicted) { +#' (observed - predicted)^2 +#' } +#' adapted_brier <- metric_adapt_binary_numeric(numeric_brier) +#' +#' # Use inside score() +#' score(example_binary, +#' metrics = list(custom_brier = adapted_brier)) +#' +#' # Wrap a metric that takes (predicted, observed) order +#' swapped_ae <- function(predicted, observed) { +#' abs(predicted - observed) +#' } +#' adapted_ae <- metric_adapt_swap_args(swapped_ae) +#' score(example_point, +#' metrics = list(custom_ae = adapted_ae)) +#' @name metric-adapters +NULL + + +#' @rdname metric-adapters +#' @export +metric_adapt_binary_numeric <- function(fun) { + assert_function(fun) + adapted <- function(observed, predicted, ...) { + observed <- as.numeric(observed) - 1 + fun(observed, predicted, ...) + } + return(adapted) +} + + +#' @rdname metric-adapters +#' @export +metric_adapt_swap_args <- function(fun) { + assert_function(fun) + adapted <- function(observed, predicted, ...) { + fun(predicted, observed, ...) + } + return(adapted) +} diff --git a/man/metric-adapters.Rd b/man/metric-adapters.Rd new file mode 100644 index 000000000..513836344 --- /dev/null +++ b/man/metric-adapters.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/metric-adapters.R +\name{metric-adapters} +\alias{metric-adapters} +\alias{metric_adapt_binary_numeric} +\alias{metric_adapt_swap_args} +\title{Adapt external metrics for use with scoringutils} +\usage{ +metric_adapt_binary_numeric(fun) + +metric_adapt_swap_args(fun) +} +\arguments{ +\item{fun}{A function to wrap. For \code{metric_adapt_binary_numeric()}, +\code{fun} should expect numeric \code{observed} (0/1) as its first argument. +For \code{metric_adapt_swap_args()}, \code{fun} should expect \code{predicted} as its +first argument and \code{observed} as its second.} +} +\value{ +A function with formals \verb{(observed, predicted, ...)} that can be +used as a metric in \code{\link[=score]{score()}}. +} +\description{ +These adapter functions wrap external scoring functions so they can be +used with \code{\link[=score]{score()}}. They handle common incompatibilities between +scoringutils' calling conventions and those of external packages. +\itemize{ +\item \code{metric_adapt_binary_numeric()} wraps a metric that expects numeric +0/1 \code{observed} values, converting the factor input that scoringutils +uses for binary forecasts. +\item \code{metric_adapt_swap_args()} wraps a metric that takes +\verb{function(predicted, observed)} instead of the +\verb{function(observed, predicted)} convention used by scoringutils. +} + +The returned functions can be passed directly to \code{\link[=score]{score()}} via the +\code{metrics} argument, and are compatible with \code{\link[purrr:partial]{purrr::partial()}} for +further customization. +} +\examples{ +# Wrap a metric that expects numeric 0/1 observed values +numeric_brier <- function(observed, predicted) { + (observed - predicted)^2 +} +adapted_brier <- metric_adapt_binary_numeric(numeric_brier) + +# Use inside score() +score(example_binary, + metrics = list(custom_brier = adapted_brier)) + +# Wrap a metric that takes (predicted, observed) order +swapped_ae <- function(predicted, observed) { + abs(predicted - observed) +} +adapted_ae <- metric_adapt_swap_args(swapped_ae) +score(example_point, + metrics = list(custom_ae = adapted_ae)) +} +\keyword{handle-metrics} diff --git a/tests/testthat/test-metric-adapters.R b/tests/testthat/test-metric-adapters.R new file mode 100644 index 000000000..8588dc662 --- /dev/null +++ b/tests/testthat/test-metric-adapters.R @@ -0,0 +1,174 @@ +# ============================================================================== +# metric_adapt_binary_numeric() +# ============================================================================== +test_that("metric_adapt_binary_numeric() converts factor observed to numeric 0/1", { + observed <- factor(c(0, 1, 1, 0, 1)) + predicted <- c(0.2, 0.8, 0.6, 0.3, 0.9) + + # Define a simple external metric that requires numeric observed + external_brier <- function(observed, predicted) { + (observed - predicted)^2 + } + + adapted <- metric_adapt_binary_numeric(external_brier) + result <- adapted(observed, predicted) + + # Factor is converted to 0/1 numeric and the external function computes correctly + expect_equal(result, (c(0, 1, 1, 0, 1) - predicted)^2) + + # Without the adapter, factor arithmetic produces a warning and NAs + expect_warning( + raw_result <- external_brier(observed, predicted) + ) +}) + +test_that("metric_adapt_binary_numeric() works inside score() with binary forecasts", { + # Define an external metric expecting numeric 0/1 + numeric_brier <- function(observed, predicted) { + (observed - predicted)^2 + } + adapted_brier <- metric_adapt_binary_numeric(numeric_brier) + metrics <- list(custom_brier = adapted_brier) + + result <- score(example_binary, metrics = metrics) + + expect_s3_class(result, "scores") + expect_true("custom_brier" %in% names(result)) + + # Compare against the built-in brier_score + reference <- score(example_binary, metrics = list(brier_score = brier_score)) + expect_equal(result$custom_brier, reference$brier_score) +}) + +# ============================================================================== +# metric_adapt_swap_args() +# ============================================================================== +test_that("metric_adapt_swap_args() reverses observed/predicted argument order", { + observed <- c(1, 2, 3, 4, 5) + predicted <- c(1.1, 2.2, 2.8, 4.1, 5.3) + + # External metric with swapped arg order + swapped_ae <- function(predicted, observed) { + abs(predicted - observed) + } + normal_ae <- function(observed, predicted) { + abs(observed - predicted) + } + + adapted <- metric_adapt_swap_args(swapped_ae) + result <- adapted(observed, predicted) + + expect_equal(result, normal_ae(observed, predicted)) + + # Verify the swap actually happened + first_arg <- function(a, b) { a } + expect_equal(metric_adapt_swap_args(first_arg)(10, 20), 20) +}) + +test_that("metric_adapt_swap_args() works inside score() with point forecasts", { + # External absolute error function with swapped order + swapped_ae <- function(predicted, observed) { + abs(predicted - observed) + } + adapted_ae <- metric_adapt_swap_args(swapped_ae) + metrics <- list(ae_adapted = adapted_ae) + + result <- score(example_point, metrics = metrics) + + expect_s3_class(result, "scores") + expect_true("ae_adapted" %in% names(result)) + + reference <- score(example_point, metrics = list(ae_point = Metrics::ae)) + expect_equal(result$ae_adapted, reference$ae_point) +}) + +test_that("metric_adapt_swap_args() works inside score() with sample forecasts", { + # Define a function with swapped order that computes MAD + swapped_mad <- function(predicted, observed) { + apply(predicted, 1, mad) + } + adapted_mad <- metric_adapt_swap_args(swapped_mad) + metrics <- list(mad_adapted = adapted_mad) + + result <- score(example_sample_continuous, metrics = metrics) + + expect_s3_class(result, "scores") + expect_true("mad_adapted" %in% names(result)) + + reference <- score( + example_sample_continuous, metrics = list(mad = mad_sample) + ) + expect_equal(result$mad_adapted, reference$mad) +}) + +# ============================================================================== +# Adapters return proper function objects +# ============================================================================== +test_that("adapted metrics return a function object", { + dummy <- function(observed, predicted) { 0 } + + adapted_numeric <- metric_adapt_binary_numeric(dummy) + adapted_swap <- metric_adapt_swap_args(dummy) + + expect_true(is.function(adapted_numeric)) + expect_true(is.function(adapted_swap)) + + # Must have at least observed and predicted formals + expect_true(length(formals(adapted_numeric)) >= 2) + expect_true(length(formals(adapted_swap)) >= 2) + + # Must accept ... so run_safely() passes through all args + expect_true("..." %in% names(formals(adapted_numeric))) + expect_true("..." %in% names(formals(adapted_swap))) +}) + +# ============================================================================== +# Composability with purrr::partial() +# ============================================================================== +test_that("adapted metrics compose with purrr::partial()", { + # Metric with an extra parameter expecting numeric observed + weighted_brier <- function(observed, predicted, weight = 1) { + weight * (observed - predicted)^2 + } + adapted <- metric_adapt_binary_numeric(weighted_brier) + custom <- purrr::partial(adapted, weight = 2) + + observed <- factor(c(0, 1, 1, 0)) + predicted <- c(0.3, 0.7, 0.8, 0.2) + + result <- custom(observed, predicted) + expect_equal(result, 2 * (c(0, 1, 1, 0) - predicted)^2) +}) + +# ============================================================================== +# Input validation +# ============================================================================== +test_that("metric_adapt_binary_numeric() errors on non-function input", { + expect_error(metric_adapt_binary_numeric("not_a_function")) + expect_error(metric_adapt_binary_numeric(42)) + expect_error(metric_adapt_binary_numeric(NULL)) +}) + +test_that("metric_adapt_swap_args() errors on non-function input", { + expect_error(metric_adapt_swap_args("not_a_function")) + expect_error(metric_adapt_swap_args(42)) +}) + +# ============================================================================== +# Extra arguments pass through via ... +# ============================================================================== +test_that("metric_adapt_binary_numeric() preserves extra arguments via ...", { + # Metric with an extra param + my_metric <- function(observed, predicted, na.rm = FALSE) { + if (na.rm) observed[is.na(observed)] <- 0 + sum(observed != predicted) + } + adapted <- metric_adapt_binary_numeric(my_metric) + + observed <- factor(c(0, 1, NA, 0, 1)) + predicted <- c(0.3, 0.7, 0.5, 0.2, 0.8) + + # na.rm = TRUE should be forwarded to the inner function + result <- adapted(observed, predicted, na.rm = TRUE) + expect_true(is.numeric(result)) +})