From 5c15ada80a9b7115152ed0c6d8ed7c93bd5c133f Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 28 Dec 2025 01:21:21 -0800 Subject: [PATCH 1/3] Open up factor support --- NEWS.md | 2 +- inst/tests/nafill.Rraw | 17 +++++++++++++++++ man/nafill.Rd | 5 +++++ src/nafill.c | 4 ++-- 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index 1a6ca751d..20049e321 100644 --- a/NEWS.md +++ b/NEWS.md @@ -14,7 +14,7 @@ ### NEW FEATURES -1. `nafill()`, `setnafill()` extended to work on logical vectors (part of [#3992](https://github.com/Rdatatable/data.table/issues/3992)). Thanks @jangorecki for the request and @MichaelChirico for the PR. +1. `nafill()`, `setnafill()` extended to work on logical and factor vectors (part of [#3992](https://github.com/Rdatatable/data.table/issues/3992)). Thanks @jangorecki for the request and @MichaelChirico for the PR. ### Notes diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index 16f84fa16..390ad9977 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -333,6 +333,23 @@ test(12.08, nafill(x, fill=NA_integer_), x) test(12.09, nafill(x, fill=NA_real_), x) test(12.10, nafill(x, fill=NaN), x) +## factor input +x = rep(NA_character_, 10L) +x[c(3:4, 7:8)] = c("a", "b", "a", "c") +x = as.factor(x) +test(13.01, nafill(x, "locf"), replace(replace(x, 5:6, "b"), 9:10, "c")) +test(13.02, nafill(x, "nocb"), replace(x, c(1:2, 5:6), "a")) +test(13.03, nafill(x, fill="a"), x_fill_a <- replace(x, c(1:2, 5:6, 9:10), "a")) +test(13.04, nafill(x, fill=1L), x_fill_a) +test(13.05, nafill(x, fill=1.0), x_fill_a) +test(13.06, nafill(x, fill=factor("a")), x_fill_a) +test(13.07, nafill(x, fill=factor("a", levels=levels(x))), x_fill_a) +test(13.08, nafill(x, fill=factor("a", levels=c("a", "b"))), x_fill_a) +test(13.09, nafill(x, fill=factor("a", levels=c("a", "d"))), x_fill_a) +test(13.10, nafill(x, fill=NA), x) +test(13.11, nafill(x, fill=NA_integer_), x) +test(13.12, nafill(x, fill=NA_character_), x) + ## logical ## character ## factor diff --git a/man/nafill.Rd b/man/nafill.Rd index 1304c8b67..e0d339702 100644 --- a/man/nafill.Rd +++ b/man/nafill.Rd @@ -37,6 +37,11 @@ x = c(1, NA, NaN, 3, NaN, NA, 4) nafill(x, "locf") nafill(x, "locf", nan=NaN) +# works for factors +x = gl(3, 2, 10) +is.na(x) = 1:2 +nafill(x, "nocb") + # fill= applies to any leftover NA nafill(c(NA, x), "locf") nafill(c(NA, x), "locf", fill=0) diff --git a/src/nafill.c b/src/nafill.c index 4187523c5..4b1e107e8 100644 --- a/src/nafill.c +++ b/src/nafill.c @@ -113,7 +113,7 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S if (obj_scalar) { if (binplace) error(_("'x' argument is atomic vector, in-place update is supported only for list/data.table")); - else if (!isReal(obj) && !isInteger(obj) && !isLogical(obj)) + else if (!isReal(obj) && TYPEOF(obj) != INTSXP && !isLogical(obj)) error(_("'x' argument must be logical/numeric type, or list/data.table of logical/numeric types")); SEXP obj1 = obj; obj = PROTECT(allocVector(VECSXP, 1)); protecti++; // wrap into list @@ -124,7 +124,7 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S int *icols = INTEGER(ricols); for (int i=0; i Date: Sun, 28 Dec 2025 02:04:23 -0800 Subject: [PATCH 2/3] a test of possibly valid factor filling --- inst/tests/nafill.Rraw | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index 390ad9977..d9a4341c0 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -346,6 +346,7 @@ test(13.06, nafill(x, fill=factor("a")), x_fill_a) test(13.07, nafill(x, fill=factor("a", levels=levels(x))), x_fill_a) test(13.08, nafill(x, fill=factor("a", levels=c("a", "b"))), x_fill_a) test(13.09, nafill(x, fill=factor("a", levels=c("a", "d"))), x_fill_a) +test(13.09, nafill(x, fill=factor("d", levels=c("a", "b", "c", "d"))), x_fill_a) test(13.10, nafill(x, fill=NA), x) test(13.11, nafill(x, fill=NA_integer_), x) test(13.12, nafill(x, fill=NA_character_), x) From bc877755aae363062dbc0b4fae452ab77b40d3c8 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 28 Dec 2025 02:52:11 -0800 Subject: [PATCH 3/3] Reflect coerceAs update of levels in output --- inst/tests/nafill.Rraw | 18 ++++++++++++------ src/nafill.c | 13 +++++++++++-- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index d9a4341c0..0903d8896 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -339,17 +339,23 @@ x[c(3:4, 7:8)] = c("a", "b", "a", "c") x = as.factor(x) test(13.01, nafill(x, "locf"), replace(replace(x, 5:6, "b"), 9:10, "c")) test(13.02, nafill(x, "nocb"), replace(x, c(1:2, 5:6), "a")) -test(13.03, nafill(x, fill="a"), x_fill_a <- replace(x, c(1:2, 5:6, 9:10), "a")) +x_fill_a = replace(x, c(1:2, 5:6, 9:10), "a") +test(13.03, nafill(x, fill="a"), x_fill_a) test(13.04, nafill(x, fill=1L), x_fill_a) test(13.05, nafill(x, fill=1.0), x_fill_a) test(13.06, nafill(x, fill=factor("a")), x_fill_a) test(13.07, nafill(x, fill=factor("a", levels=levels(x))), x_fill_a) test(13.08, nafill(x, fill=factor("a", levels=c("a", "b"))), x_fill_a) -test(13.09, nafill(x, fill=factor("a", levels=c("a", "d"))), x_fill_a) -test(13.09, nafill(x, fill=factor("d", levels=c("a", "b", "c", "d"))), x_fill_a) -test(13.10, nafill(x, fill=NA), x) -test(13.11, nafill(x, fill=NA_integer_), x) -test(13.12, nafill(x, fill=NA_character_), x) +test(13.09, nafill(x, fill=factor("a", levels=c("a", "d"))), factor(x_fill_a, levels=c("a", "b", "c", "d"))) +x_fill_d = replace(factor(x, levels = c(levels(x), "d")), c(1:2, 5:6, 9:10), "d") +test(13.10, nafill(x, fill="d"), x_fill_d) +test(13.11, nafill(x, fill=factor("d", levels=c("a", "b", "c", "d"))), x_fill_d) +test(13.12, nafill(x, fill=factor("d", levels=c("d", "a", "b", "c"))), x_fill_d) +test(13.13, nafill(x, fill=factor("d", levels=c("d", "c", "b", "a"))), x_fill_d) +test(13.14, nafill(x, fill=factor("d", levels=c("b", "c", "d"))), x_fill_d) +test(13.15, nafill(x, fill=NA), x) +test(13.16, nafill(x, fill=NA_integer_), x) +test(13.17, nafill(x, fill=NA_character_), x) ## logical ## character diff --git a/src/nafill.c b/src/nafill.c index 4b1e107e8..0b663c0c7 100644 --- a/src/nafill.c +++ b/src/nafill.c @@ -218,8 +218,17 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S if (!binplace) { for (R_len_t i=0; i