Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@

1. `fread()` with `skip=0` and `(header=TRUE|FALSE)` no longer skips the first row when it has fewer fields than subsequent rows, [#7463](https://github.com/Rdatatable/data.table/issues/7463). Thanks @emayerhofer for the report and @ben-schwen for the fix.

2. `set()` now automatically pre-allocates new column slots if needed, similar to what `:=` already does, [#1831](https://github.com/Rdatatable/data.table/issues/1831) [#4100](https://github.com/Rdatatable/data.table/issues/4100). Thanks to @zachokeeffe and @tyner for the report and @ben-schwen for the fix.

## data.table [v1.18.0](https://github.com/Rdatatable/data.table/milestone/37?closed=1) 23 December 2025

### BREAKING CHANGE
Expand Down
3 changes: 2 additions & 1 deletion R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -2857,7 +2857,8 @@ setcolorder = function(x, neworder=key(x), before=NULL, after=NULL, skip_absent=
set = function(x,i=NULL,j,value) # low overhead, loopable
{
# If removing columns from a table that's not selfrefok, need to call setalloccol first, #7488
if ((is.null(value) || (is.list(value) && any(vapply_1b(value, is.null)))) && selfrefok(x, verbose=FALSE) < 1L) {
if (((is.null(value) || (is.list(value) && any(vapply_1b(value, is.null)))) && selfrefok(x, verbose=FALSE) < 1L) ||
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

too many parentheses :)

I propose this helper:

.set_needs_alloccol = function(x, value) {
  if (truelength(x) <= length(x)) return(TRUE)
  if (selfrefok(x, verbose=FALSE) >= 1L) return(FALSE)
  if (is.null(value)) return(TRUE)
  if (!is.list(value)) return(FALSE)
  any(vapply_1b(value, is.null))
}

truelength(x) <= length(x)) { # automatically allocate more space when tl <= ncol (either full or loaded from disk)
name = substitute(x)
setalloccol(x, verbose=FALSE)
if (is.name(name)) {
Expand Down
3 changes: 2 additions & 1 deletion inst/tests/froll.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -2084,7 +2084,8 @@ if (use.fork) {
test(6010.772, .selfref.ok(ans[[2L]]))
ans = frollapply(1:2, 2, function(x) list(data.table(x)), fill=list(data.table(NA)), simplify=FALSE)
test(6010.773, !.selfref.ok(ans[[2L]][[1L]]))
test(6010.7731, set(ans[[2L]][[1L]],, "newcol", 1L), error="data.table has either been loaded from disk")
# deactivated by #5443
# test(6010.7731, set(ans[[2L]][[1L]],, "newcol", 1L), error="data.table has either been loaded from disk")
ans = lapply(ans, lapply, setDT)
test(6010.774, .selfref.ok(ans[[2L]][[1L]])) ## fix after
ans = frollapply(1:2, 2, function(x) list(data.table(x)), fill=list(data.table(NA)), simplify=function(x) lapply(x, lapply, setDT))
Expand Down
13 changes: 11 additions & 2 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -14797,7 +14797,7 @@ test(2016.1, name, "DT")
test(2016.2, DT, data.table(a=1:3))
test(2016.3, DT[2,a:=4L], data.table(a=INT(1,4,3))) # no error for := when existing column
test(2016.4, set(DT,3L,1L,5L), data.table(a=INT(1,4,5))) # no error for set() when existing column
test(2016.5, set(DT,2L,"newCol",5L), error="either been loaded from disk.*or constructed manually.*Please run setDT.*setalloccol.*on it first") # just set()
test(2016.5, set(DT,2L,"newCol",5L), data.table(a=INT(1,4,5), newCol=INT(NA,5L,NA))) # works since set overallocates #4100
test(2016.6, DT[2,newCol:=6L], data.table(a=INT(1,4,5), newCol=INT(NA,6L,NA))) # := ok (it changes DT in caller)
unlink(tt)

Expand Down Expand Up @@ -19478,7 +19478,7 @@ test(2290.4, DT[, `:=`(a = 2, c := 3)], error="It looks like you re-used `:=` in
df = data.frame(a=1:3)
setDT(df)
attr(df, "att") = 1
test(2291.1, set(df, NULL, "new", "new"), error="either been loaded from disk.*or constructed manually.*Please run setDT.*setalloccol.*on it first")
test(2291.1, set(df, NULL, "new", "new"), setattr(data.table(a=1:3, new="new"), "att", 1)) # fixed when calling setalloccol before set #4100

# ns-qualified bysub error, #6493
DT = data.table(a = 1)
Expand Down Expand Up @@ -21959,3 +21959,12 @@ test(2355.1, fread(txt, skip=0), data.table(V1 = c("b1", "c1"), a1
test(2355.2, fread(txt, skip=0, header=TRUE), data.table(V1 = c("b1", "c1"), a1 = c("b2", "c2"), a2 = c("b3", "c3")), warning="Added an extra default column name")
test(2355.3, fread(txt, skip=0, header=FALSE), data.table(V1=character(), V2=character(), V3=character()), warning="Consider fill=TRUE")
test(2355.4, fread(txt, skip=0, fill=TRUE), data.table(V1 = c("a1", "b1", "c1"), V2 = c("a2", "b2", "c2"), V3 = c("", "b3", "c3")))

# re-overallocate in set if quota is reached #496 #1831 #4100
DT = data.table()
test(2356.1, options=c(datatable.alloccol=1L), {for (i in seq(10L)) set(DT, j = paste0("V",i), value = i); ncol(DT)}, 10L)
DT = structure(list(a = 1, b = 2), class = c("data.table", "data.frame"))
test(2356.2, options=c(datatable.alloccol=1L), set(DT, j="c", value=3), data.table(a=1, b=2, c=3))
# ensure := and set are consistent if they need to overallocate
DT = data.table(); DT2 = data.table()
test(2356.3, options=c(datatable.alloccol=1L), {for (i in seq(10L)) set(DT, j = paste0("V",i), value = i)}, {for (i in seq(10)) DT2[, sprintf("V%d",i) := i]})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need ; DT and ; DT2 here, a for loop returns NULL so now the test just checks the code runs:

x = for (i in 1:10) i
dput(x)
# NULL

Loading