diff --git a/NAMESPACE b/NAMESPACE index d712b318a..67993db4c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,7 +11,7 @@ export(setindex, setindexv, indices) export(as.data.table,is.data.table,test.data.table) export(last,first,like,"%like%","%ilike%","%flike%","%plike%",between,"%between%",inrange,"%inrange%", "%notin%") export(timetaken) -export(truelength, setalloccol, alloc.col, ":=", let) +export(truelength, setalloccol, setallocrow, alloc.col, ":=", let) export(setattr, setnames, setcolorder, set, setDT, setDF) export(setorder, setorderv) export(setNumericRounding, getNumericRounding) @@ -28,7 +28,7 @@ export(tstrsplit) export(frank) export(frankv) export(address) -export(.SD,.N,.I,.GRP,.NGRP,.BY,.EACHI, measure, measurev, patterns) +export(.SD,.N,.I,.GRP,.NGRP,.BY,.EACHI,.ROW, measure, measurev, patterns) # TODO(#6197): Export these. # export(., J) export(rleid) diff --git a/R/data.table.R b/R/data.table.R index f05220a62..42aeeeeeb 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -11,7 +11,7 @@ methods::setPackageName("data.table",.global) # (1) add to man/special-symbols.Rd # (2) export() in NAMESPACE # (3) add to vignettes/datatable-importing.Rmd#globals section -.SD = .N = .I = .GRP = .NGRP = .BY = .EACHI = NULL +.SD = .N = .I = .GRP = .NGRP = .BY = .EACHI = .ROW = NULL # These are exported to prevent NOTEs from R CMD check, and checkUsage via compiler. # But also exporting them makes it clear (to users and other packages) that data.table uses these as symbols. # And NULL makes it clear (to the R's mask check on loading) that they're variables not functions. @@ -1182,6 +1182,21 @@ replace_dot_alias = function(e) { names(jsub)="" jsub[[1L]]=as.name("list") } + + # Check for .ROW := NULL pattern (delete rows by reference) + if ((is.character(lhs) && length(lhs)==1L && lhs==".ROW") || + (is.name(lhs) && identical(lhs, quote(.ROW)))) { + if (is.null(jsub) || identical(jsub, quote(NULL))) { + if (is.null(irows)) + stopf(".ROW := NULL requires i= condition to specify rows to delete") + if (!missingby) + stopf(".ROW := NULL with 'by' or 'keyby' is not supported yet") + .Call(CdeleteRows, x, irows) + return(suppPrint(x)) + } else { + stopf(".ROW can only be used with := NULL to delete rows") + } + } av = all.vars(jsub,TRUE) if (!is.atomic(lhs)) stopf("LHS of := must be a symbol, or an atomic vector (column names or positions).") if (is.character(lhs)) { @@ -2719,6 +2734,10 @@ selfrefok = function(DT,verbose=getOption("datatable.verbose")) { .Call(Cselfrefokwrapper,DT,verbose) } +setallocrow = function(DT) { + .Call(Callocrow, DT) +} + truelength = function(x) .Call(Ctruelength,x) # deliberately no "truelength<-" method. setalloccol is the mechanism for that. # settruelength() no longer need (and so removed) now that data.table depends on R 2.14.0 diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c7205e52a..8c2de805f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21959,3 +21959,96 @@ test(2355.1, fread(txt, skip=0), data.table(V1 = c("b1", "c1"), a1 test(2355.2, fread(txt, skip=0, header=TRUE), data.table(V1 = c("b1", "c1"), a1 = c("b2", "c2"), a2 = c("b3", "c3")), warning="Added an extra default column name") test(2355.3, fread(txt, skip=0, header=FALSE), data.table(V1=character(), V2=character(), V3=character()), warning="Consider fill=TRUE") test(2355.4, fread(txt, skip=0, fill=TRUE), data.table(V1 = c("a1", "b1", "c1"), V2 = c("a2", "b2", "c2"), V3 = c("", "b3", "c3"))) + +# delete rows by reference #635 +# atomic types and list columns +dt = data.table( + int = 1:5, + real = c(1.1, 2.2, 3.3, 4.4, 5.5), + char = letters[1:5], + lgl = c(TRUE, FALSE, TRUE, FALSE, TRUE), + cplx = as.complex(1:5), + raw_col = as.raw(1:5), + list_col = list(1L, 1:2, 1:3, 1:4, 1:5) +) +test(2356.01, copy(dt)[1L, .ROW := NULL], dt[-1]) +test(2356.02, copy(dt)[1, .ROW := NULL], dt[-1]) +test(2356.03, copy(dt)[c(TRUE, FALSE, FALSE, TRUE, FALSE), .ROW := NULL], dt[-c(1,4)]) +test(2356.04, copy(dt)[int==1L, .ROW := NULL], dt[-1]) +test(2356.05, copy(dt)[int<2L, .ROW := NULL], dt[-1]) +test(2356.06, copy(dt)[-1, .ROW := NULL], dt[1]) +# zero row or empty data.tables +dt = data.table() +test(2356.07, dt[logical(0), .ROW := NULL], dt) +dt = data.table(a=integer(0), b=character(0)) +test(2356.08, dt[logical(0), .ROW := NULL], dt) +# multirow +dt = data.table(a=1:5, b=letters[1:5]) +test(2356.09, copy(dt)[c(1L, 3L), .ROW := NULL], dt[c(2,4,5)]) +test(2356.10, copy(dt)[c(TRUE, FALSE, TRUE, FALSE, TRUE), .ROW := NULL], dt[c(2,4)]) +test(2356.11, copy(dt)[1:2, .ROW := NULL], dt[3:5]) +test(2356.12, copy(dt)[1:5, .ROW := NULL], dt[0]) +# NA handling and edges case +dt = data.table(a=1:5, b=letters[1:5]) +test(2356.13, copy(dt)[c(1L, NA_integer_, 3L), .ROW := NULL], dt[c(2,4,5)]) +test(2356.14, copy(dt)[c(NA_integer_, NA_integer_), .ROW := NULL], dt) +test(2356.15, copy(dt)[c(TRUE, NA, FALSE, NA, TRUE), .ROW := NULL], dt[c(2,3,4)]) +test(2356.16, copy(dt)[integer(0), .ROW := NULL], dt) +test(2356.17, copy(dt)[logical(0), .ROW := NULL], dt) +test(2356.18, copy(dt)[c(FALSE, FALSE, FALSE, FALSE, FALSE), .ROW := NULL], dt) +test(2356.19, copy(dt)[a > 100, .ROW := NULL], dt) # no matches +# Duplicate indices +dt = data.table(a=1:5, b=letters[1:5]) +test(2356.20, copy(dt)[c(1L, 1L), .ROW := NULL], dt[-1]) +test(2356.21, copy(dt)[c(1L, 1L, 2L, 2L), .ROW := NULL], dt[3:5]) +test(2356.22, copy(dt)[c(3L, 1L, 3L, 1L), .ROW := NULL], dt[c(2,4,5)]) +# integer64 +if (test_bit64) { + dt = data.table(a=1:5, b=as.integer64(11:15)) + test(2356.23, copy(dt)[c(1L, 3L), .ROW := NULL], dt[-c(1L,3L)]) + test(2356.24, copy(dt)[1:5, .ROW := NULL], data.table(a=integer(0), b=integer64(0))) +} +# Date/IDate/ITime columns +dt = data.table(a=1:5, d=as.Date("2024-01-01") + 0:4, t=as.ITime(paste0(10:14, ":00:00")), dt=as.POSIXct("2024-01-01 12:00:00") + 3600*0:4) +test(2356.25, copy(dt)[c(1L, 3L), .ROW := NULL], dt[c(2,4,5)]) +test(2356.26, copy(dt)[c(2L, 4L), .ROW := NULL]$d, as.Date("2024-01-01") + c(0,2,4)) +# Factor columns +dt = data.table(a=1:5, f=factor(letters[1:5], levels=letters[1:10])) +test(2356.27, copy(dt)[c(1L, 3L), .ROW := NULL], dt[-c(1L,3L)]) +test(2356.28, levels(copy(dt)[c(1L, 3L), .ROW := NULL]$f), letters[1:10]) +dt = data.table(a=1:5, of=ordered(letters[1:5], levels=letters[5:1])) +test(2356.29, copy(dt)[c(2L, 4L), .ROW := NULL], dt[-c(2L,4L)]) +test(2356.30, is.ordered(copy(dt)[c(2, 4L), .ROW := NULL]$of)) +# Keys - should be cleared after deletion +dt = data.table(a=5:1, b=letters[1:5], key="a") +test(2356.31, key(copy(dt)[1L, .ROW := NULL]), NULL) +test(2356.32, haskey(copy(dt)[1L, .ROW := NULL]), FALSE) +# Indices - should be cleared after deletion +dt = data.table(a=1:5, b=letters[1:5], c=5:1) +setindex(dt, b) +test(2356.33, indices(copy(dt)[1L, .ROW := NULL]), NULL) +# row names +dt = data.table(a=1:5, b=letters[1:5]) +test(2356.34, attr(copy(dt)[c(1L, 3L), .ROW := NULL], "row.names"), 1:3) +# selfref check +test(2356.35, selfrefok(copy(dt)[1L, .ROW := NULL]), 1L) +# errors +dt = data.table(a=1:4, g=1:2) +test(2356.36, dt[1L, .ROW := 1L], error=".ROW can only be used with := NULL") +test(2356.37, dt[1L, .ROW := "delete"], error=".ROW can only be used with := NULL") +test(2356.38, dt[1L, .ROW := FALSE], error=".ROW can only be used with := NULL") +test(2356.39, dt[, .ROW := NULL], error=".ROW := NULL requires i= condition") +test(2356.40, dt[1L, .ROW := NULL, by=g], error=".ROW := NULL with 'by' or 'keyby' is not supported") +# large table +dt = data.table(a=1:20000, b=rep(letters, length.out=20000)) +idx = seq(1L, 20000L, by=2L) +test(2356.41, copy(dt)[idx, .ROW := NULL], dt[-idx]) +# Chaining and complexer i expressions +dt = data.table(a=1:10, b=letters[1:10]) +test(2356.42, copy(dt)[a>2, .ROW := NULL][b=="a"], data.table(a=1L, b="a")) +test(2356.43, copy(dt)[a %% 2 == 0, .ROW := NULL], dt[a %% 2 != 0]) +test(2356.44, copy(dt)[!(a < 5 & b != "d"), .ROW := NULL], dt[1:3]) +# make columns resizable +dt = data.table(a=1:3) +test(2356.91, truelength(dt$a), 0L) +test(2356.92, {setallocrow(dt); truelength(dt$a)}, 3L) diff --git a/man/truelength.Rd b/man/truelength.Rd index a85f78b1b..4b7901354 100644 --- a/man/truelength.Rd +++ b/man/truelength.Rd @@ -2,6 +2,7 @@ \alias{truelength} \alias{setalloccol} \alias{alloc.col} +\alias{setallocrow} \title{ Over-allocation access } \description{ These functions are experimental and somewhat advanced. By \emph{experimental} we mean their names might change and perhaps the syntax, argument names and types. So if you write a lot of code using them, you have been warned! They should work and be stable, though, so please report problems with them. \code{alloc.col} is just an alias to \code{setalloccol}. We recommend to use \code{setalloccol} (though \code{alloc.col} will continue to be supported) because the \code{set*} prefix in \code{setalloccol} makes it clear that its input argument is modified in-place. @@ -14,6 +15,7 @@ setalloccol(DT, alloc.col(DT, n = getOption("datatable.alloccol"), # default: 1024L verbose = getOption("datatable.verbose")) # default: FALSE +setallocrow(DT) } \arguments{ \item{x}{ Any type of vector, including \code{data.table} which is a \code{list} vector of column pointers. } @@ -34,6 +36,11 @@ alloc.col(DT, (perhaps in your .Rprofile); e.g., \code{options(datatable.alloccol=10000L)}. Please note: over-allocation of the column pointer vector is not for efficiency \emph{per se}; it is so that \code{:=} can add columns by reference without a shallow copy. + + \code{setallocrow} is a utility function that prepares columns for fast row operations (delete or insert (not implemented yet)) by reference. + Before deleting or inserting rows by reference, columns must be resizable. + \code{setallocrow} ensures all columns are in a resizable state by converting ALTREP columns to materialized form and duplicating + non-resizable columns as resizable vectors. This operation modifies \code{DT} by reference. } \value{ \code{truelength(x)} returns the length of the vector allocated in memory. \code{length(x)} of those items are in use. Currently, it is just the list vector of column @@ -43,6 +50,8 @@ alloc.col(DT, \code{setalloccol} \emph{reallocates} \code{DT} by reference. This may be useful for efficiency if you know you are about to going to add a lot of columns in a loop. It also returns the new \code{DT}, for convenience in compound queries. + + \code{setallocrow} modifies \code{DT} by reference to ensure all columns are resizable. } \seealso{ \code{\link{copy}} } \examples{ diff --git a/src/assign.c b/src/assign.c index 849cb08f2..f2fbcf9dd 100644 --- a/src/assign.c +++ b/src/assign.c @@ -496,7 +496,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) Rprintf(_("RHS for item %d has been duplicated because MAYBE_REFERENCED==%d MAYBE_SHARED==%d ALTREP==%d, but then is being plonked. length(values)==%d; length(cols)==%d\n"), i+1, MAYBE_REFERENCED(thisvalue), MAYBE_SHARED(thisvalue), ALTREP(thisvalue), length(values), length(cols)); } - thisvalue = copyAsPlain(thisvalue); // PROTECT not needed as assigned as element to protected list below. + thisvalue = copyAsPlain(thisvalue, false); // PROTECT not needed as assigned as element to protected list below. } else { if (verbose) Rprintf(_("Direct plonk of unnamed RHS, no copy. MAYBE_REFERENCED==%d, MAYBE_SHARED==%d\n"), MAYBE_REFERENCED(thisvalue), MAYBE_SHARED(thisvalue)); // e.g. DT[,a:=as.character(a)] as tested by 754.5 } diff --git a/src/coalesce.c b/src/coalesce.c index 10b7b7757..c34c1ff22 100644 --- a/src/coalesce.c +++ b/src/coalesce.c @@ -52,7 +52,7 @@ SEXP coalesce(SEXP x, SEXP inplaceArg, SEXP nan_is_na_arg) { error(_("Item %d is length %d but the first item is length %d. Only singletons are recycled."), i+2, length(item), nrow); } if (!inplace) { - first = PROTECT(copyAsPlain(first)); nprotect++; + first = PROTECT(copyAsPlain(first, false)); nprotect++; if (verbose) Rprintf(_("coalesce copied first item (inplace=FALSE)\n")); } const void **valP = (const void **)R_alloc(nval, sizeof(*valP)); diff --git a/src/data.table.h b/src/data.table.h index 434d0a340..b6138543b 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -183,6 +183,10 @@ void subsetVectorRaw(SEXP ans, SEXP source, SEXP idx, const bool anyNA); SEXP subsetVector(SEXP x, SEXP idx); const char *check_idx(SEXP idx, int max, bool *anyNA_out, bool *orderedSubset_out); +// deleterows.c +SEXP deleteRows(SEXP dt, SEXP rows_to_delete); +SEXP allocrow(SEXP dt); + // fcast.c SEXP int_vec_init(R_len_t n, int val); @@ -321,7 +325,7 @@ SEXP fitsInInt64R(SEXP x); bool allNA(SEXP x, bool errorForBadType); SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups, SEXP skip_absent); bool INHERITS(SEXP x, SEXP char_); -SEXP copyAsPlain(SEXP x); +SEXP copyAsPlain(SEXP x, bool resizable); void copySharedColumns(SEXP x); SEXP lock(SEXP x); SEXP unlock(SEXP x); diff --git a/src/deleterows.c b/src/deleterows.c new file mode 100644 index 000000000..30f6f465c --- /dev/null +++ b/src/deleterows.c @@ -0,0 +1,207 @@ +#include "data.table.h" + +static void computePrefixSum(const int *keep, int *dest, R_xlen_t n, int nthreads); +static void compactVectorRaw(SEXP col, const int *dest, const int *keep, R_xlen_t new_nrow, R_xlen_t old_nrow); + +// Helper function to make columns resizable for delete by reference +SEXP allocrow(SEXP dt) { + if (!INHERITS(dt, char_datatable)) + error(_("input to allocrow is not a data.table")); + + if (!xlength(dt)) return dt; // zero-column data.table + + for (R_xlen_t i = 0; i < length(dt); i++) { + SEXP col = VECTOR_ELT(dt, i); + if (!isVector(col)) + error("Cannot make non-vector column %lld resizable", (long long)(i + 1)); + + if (ALTREP(col)) SET_VECTOR_ELT(dt, i, copyAsPlain(col, true)); + + if (!R_isResizable(col)) { + SEXP newcol = R_duplicateAsResizable(col); + SET_VECTOR_ELT(dt, i, newcol); + } + } + + return dt; +} + +SEXP deleteRows(SEXP dt, SEXP rows_to_delete) { + if (!isNewList(dt)) + error("Internal error: deleteRows received non-list dt"); // #nocov + if (!xlength(dt)) return dt; // zero-column data.tabl + for (R_xlen_t i = 0; i < length(dt); i++) { + SEXP col = VECTOR_ELT(dt, i); + if (!isVector(col)) error("Cannot delete rows by reference from non-vector column %lld", (long long)(i + 1)); + if (ALTREP(col)) SET_VECTOR_ELT(dt, i, copyAsPlain(col, true)); + } + + const R_xlen_t ncol = length(dt); + const R_xlen_t old_nrow = length(VECTOR_ELT(dt, 0)); + int nprotect = 0; + + if (old_nrow == 0) return dt; + + if (!isInteger(rows_to_delete) && !isLogical(rows_to_delete)) + internal_error(__func__, "rows_to_delete must be logical, integer, or numeric"); // #nocov + + int *keep = (int *)R_alloc(old_nrow, sizeof(int)); + const R_xlen_t n = length(rows_to_delete); + for (R_xlen_t i = 0; i < old_nrow; i++) keep[i] = 1; + int *idx = INTEGER(rows_to_delete); + for (R_xlen_t j = 0; j < n; j++) { + if (idx[j] == NA_INTEGER) continue; + // should be checked from irows in [ + if (idx[j] < 1 || idx[j] > old_nrow) internal_error(__func__, "Row index %d out of range [1, %lld]", idx[j], (long long)old_nrow); //# nocov + keep[idx[j] - 1] = 0; + } + + R_xlen_t new_nrow = 0; + for (R_xlen_t i = 0; i < old_nrow; i++) new_nrow += keep[i]; + if (new_nrow == old_nrow) return dt; + + int *dest = (int *)R_alloc(old_nrow, sizeof(int)); + const int nthreads = getDTthreads(old_nrow, true); + computePrefixSum(keep, dest, old_nrow, nthreads); + + // Compact each column + for (R_xlen_t j = 0; j < ncol; j++) { + SEXP col = VECTOR_ELT(dt, j); + if (!R_isResizable(col)) { + // catered for ALTREP above + SEXP newcol = R_duplicateAsResizable(col); + SET_VECTOR_ELT(dt, j, newcol); + col = newcol; + } + compactVectorRaw(col, dest, keep, new_nrow, old_nrow); + R_resizeVector(col, new_nrow); + SET_VECTOR_ELT(dt, j, col); + } + + SEXP rownames = PROTECT(getAttrib(dt, R_RowNamesSymbol)); nprotect++; + if (!isNull(rownames)) { + // create them from scratch like in dogroups or subset to avoid R internal issues + SEXP rn = PROTECT(allocVector(INTSXP, 2)); nprotect++; + INTEGER(rn)[0] = NA_INTEGER; + INTEGER(rn)[1] = -(int)new_nrow; + setAttrib(dt, R_RowNamesSymbol, rn); + } + + // Clear key and indices + setAttrib(dt, install("sorted"), R_NilValue); + setAttrib(dt, install("index"), R_NilValue); + + UNPROTECT(nprotect); + return dt; +} + +// Parallel prefix sum (exclusive scan) +// Two-pass algorithm: first count per thread, then scan, then local prefix sum +static void computePrefixSum(const int *keep, int *dest, R_xlen_t n, int nthreads) { + if (nthreads == 1) { + // Sequential version + int sum = 0; + for (R_xlen_t i = 0; i < n; i++) { + dest[i] = sum; + sum += keep[i]; + } + return; + } + + // Parallel version with two passes + int *thread_counts = (int *)R_alloc(nthreads, sizeof(int)); + + // Pass 1: Count keeps per thread + #pragma omp parallel num_threads(nthreads) + { + const int tid = omp_get_thread_num(); + const R_xlen_t chunk_size = (n + nthreads - 1) / nthreads; + const R_xlen_t start = tid * chunk_size; + const R_xlen_t end = (start + chunk_size > n) ? n : start + chunk_size; + + int local_count = 0; + for (R_xlen_t i = start; i < end; i++) { + local_count += keep[i]; + } + thread_counts[tid] = local_count; + } + + // Sequential scan of thread counts to get offsets + int *thread_offsets = (int *)R_alloc(nthreads, sizeof(int)); + thread_offsets[0] = 0; + for (int t = 1; t < nthreads; t++) { + thread_offsets[t] = thread_offsets[t-1] + thread_counts[t-1]; + } + + // Pass 2: Compute local prefix sum with offset + #pragma omp parallel num_threads(nthreads) + { + const int tid = omp_get_thread_num(); + const R_xlen_t chunk_size = (n + nthreads - 1) / nthreads; + const R_xlen_t start = tid * chunk_size; + const R_xlen_t end = (start + chunk_size > n) ? n : start + chunk_size; + + int local_sum = thread_offsets[tid]; + for (R_xlen_t i = start; i < end; i++) { + dest[i] = local_sum; + local_sum += keep[i]; + } + } +} + +#define COMPACT(CTYPE, ACCESSOR) { \ + CTYPE *p = ACCESSOR(col); \ + R_xlen_t i = 0; \ + while (i < old_nrow) { \ + if (!keep[i]) { \ + i++; \ + continue; \ + } \ + R_xlen_t run_start = i; \ + int target_idx = dest[i]; \ + while (i < old_nrow && keep[i]) i++; \ + size_t run_len = i - run_start; \ + if (target_idx != run_start) { \ + memmove(p + target_idx, p + run_start, run_len * sizeof(CTYPE)); \ + } \ + } \ +} + + +// Type-specific stream compaction +static void compactVectorRaw(SEXP col, const int *dest, const int *keep, + R_xlen_t new_nrow, R_xlen_t old_nrow) { + switch(TYPEOF(col)) { + case INTSXP: + case LGLSXP: { + COMPACT(int, INTEGER); + break; + } + case REALSXP: { + COMPACT(double, REAL); + break; + } + case CPLXSXP: { + COMPACT(Rcomplex, COMPLEX); + break; + } + case RAWSXP: { + COMPACT(Rbyte, RAW); + break; + } + case STRSXP: { + for (R_xlen_t i = 0; i < old_nrow; i++) { + if (keep[i]) SET_STRING_ELT(col, dest[i], STRING_ELT(col, i)); + } + break; + } + case VECSXP: { + for (R_xlen_t i = 0; i < old_nrow; i++) { + if (keep[i]) SET_VECTOR_ELT(col, dest[i], VECTOR_ELT(col, i)); + } + break; + } + default: + error("Unsupported column type %s", type2char(TYPEOF(col))); + } +} diff --git a/src/dogroups.c b/src/dogroups.c index 5200d33f3..ce473a050 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -334,7 +334,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX target = VECTOR_ELT(dt, colj); bool copied = false; if (isNewList(target) && anySpecialStatic(RHS, specials)) { // see comments in anySpecialStatic() - RHS = PROTECT(copyAsPlain(RHS)); + RHS = PROTECT(copyAsPlain(RHS, false)); copied = true; } const char *warn = memrecycle(target, order, INTEGER(starts)[i]-1, grpn, RHS, 0, -1, 0, ""); @@ -440,7 +440,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX } bool copied = false; if (isNewList(target) && anySpecialStatic(source, specials)) { // see comments in anySpecialStatic() - source = PROTECT(copyAsPlain(source)); + source = PROTECT(copyAsPlain(source, false)); copied = true; } memrecycle(target, R_NilValue, thisansloc, maxn, source, 0, -1, 0, ""); diff --git a/src/fmelt.c b/src/fmelt.c index d6843c3ac..87e555911 100644 --- a/src/fmelt.c +++ b/src/fmelt.c @@ -811,7 +811,7 @@ SEXP fmelt(SEXP DT, SEXP id, SEXP measure, SEXP varfactor, SEXP valfactor, SEXP // edge case no measure.vars if (!data.lmax) { SEXP tt = PROTECT(shallowwrapper(DT, data.idcols)); protecti++; - ans = PROTECT(copyAsPlain(tt)); protecti++; + ans = PROTECT(copyAsPlain(tt, false)); protecti++; } else { ansvals = PROTECT(getvaluecols(DT, dtnames, LOGICAL(valfactor)[0], verbose, &data)); protecti++; ansvars = PROTECT(getvarcols(DT, dtnames, LOGICAL(varfactor)[0], verbose, &data)); protecti++; diff --git a/src/init.c b/src/init.c index 13421998b..f63f786c0 100644 --- a/src/init.c +++ b/src/init.c @@ -95,6 +95,8 @@ static const R_CallMethodDef callMethods[] = { {"CconvertNegAndZeroIdx", (DL_FUNC)&convertNegAndZeroIdx, -1}, {"Cfrank", (DL_FUNC)&frank, -1}, {"Cdt_na", (DL_FUNC)&dt_na, -1}, + {"CdeleteRows", (DL_FUNC)&deleteRows, 2}, + {"Callocrow", (DL_FUNC)&allocrow, 1}, {"Clookup", (DL_FUNC)&lookup, -1}, {"Coverlaps", (DL_FUNC)&overlaps, -1}, {"Cwhichwrapper", (DL_FUNC)&whichwrapper, -1}, diff --git a/src/reorder.c b/src/reorder.c index 8fe682e86..93a2a0a18 100644 --- a/src/reorder.c +++ b/src/reorder.c @@ -24,7 +24,7 @@ SEXP reorder(SEXP x, SEXP order) error(_("Column %d is length %d which differs from length of column 1 (%d). Invalid data.table."), i+1, length(v), nrow); if (RTYPE_SIZEOF(v) > maxSize) maxSize=RTYPE_SIZEOF(v); - if (ALTREP(v)) SET_VECTOR_ELT(x, i, copyAsPlain(v)); + if (ALTREP(v)) SET_VECTOR_ELT(x, i, copyAsPlain(v, false)); } copySharedColumns(x); // otherwise two columns which point to the same vector would be reordered and then re-reordered, issues linked in PR#3768 } else { @@ -40,7 +40,7 @@ SEXP reorder(SEXP x, SEXP order) if (length(order) != nrow) error("nrow(x)[%d]!=length(order)[%d]", nrow, length(order)); // # notranslate int nprotect = 0; - if (ALTREP(order)) { order=PROTECT(copyAsPlain(order)); nprotect++; } // TODO: if it's an ALTREP sequence some optimizations are possible rather than expand + if (ALTREP(order)) { order=PROTECT(copyAsPlain(order, false)); nprotect++; } // TODO: if it's an ALTREP sequence some optimizations are possible rather than expand const int *restrict idx = INTEGER_RO(order); int i=0; diff --git a/src/subset.c b/src/subset.c index b3372f057..4f1473c9d 100644 --- a/src/subset.c +++ b/src/subset.c @@ -313,7 +313,7 @@ SEXP subsetDT(SEXP x, SEXP rows, SEXP cols) { // API change needs update NEWS.md for (int i=0; i