Skip to content

Commit 860f92b

Browse files
committed
[R] Implement support for keepNA = FALSE in base::nchar()
1 parent 744f0ec commit 860f92b

File tree

2 files changed

+20
-4
lines changed

2 files changed

+20
-4
lines changed

r/R/dplyr-funcs-string.R

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -484,15 +484,21 @@ register_bindings_string_other <- function() {
484484
if (is.na(keepNA)) {
485485
keepNA <- !identical(type, "width")
486486
}
487-
if (!keepNA) {
488-
# TODO: I think there is a fill_null kernel we could use, set null to 2
487+
if (keepNA) {
489488
arrow_not_supported("keepNA = TRUE")
490489
}
491490
if (identical(type, "bytes")) {
492-
Expression$create("binary_length", x)
491+
result <- Expression$create("binary_length", x)
493492
} else {
494-
Expression$create("utf8_length", x)
493+
result <- Expression$create("utf8_length", x)
495494
}
495+
496+
if (!keepNA) {
497+
# When keepNA = FALSE, NA values should return 2 (length of "NA" as string)
498+
result <- Expression$create("coalesce", result, Expression$scalar(2L))
499+
}
500+
501+
result
496502
},
497503
notes = "`allowNA = TRUE` and `keepNA = TRUE` not supported"
498504
)

r/tests/testthat/test-dplyr-funcs-string.R

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1442,6 +1442,16 @@ test_that("nchar with namespacing", {
14421442
)
14431443
})
14441444

1445+
test_that("nchar with keepNA = FALSE", {
1446+
df <- tibble(x = c("foo", NA_character_, "bar"))
1447+
compare_dplyr_binding(
1448+
.input |>
1449+
mutate(n = nchar(x, keepNA = FALSE)) |>
1450+
collect(),
1451+
df
1452+
)
1453+
})
1454+
14451455
test_that("str_trim()", {
14461456
compare_dplyr_binding(
14471457
.input |>

0 commit comments

Comments
 (0)