Skip to content
Closed

Dev #29

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,22 @@ rticle
.Rhistory
src/ZSTD/LICENSCE.txt
aclocal.m4
^config\.log$
^config\.status$
^configure~$
Makefile
^.vscode
README.html
^.github
^qs2\.Rcheck$
^revdep$
^inst/analysis
^inst/standalone
^context
^inst/include/qdata-cpp/(\.git|\.github|benchmarks|docs|examples|tests)(/|$)
^inst/include/qdata-cpp/build(/|$)
^inst/include/qdata-cpp/(\.gitignore|CMakeLists\.txt|Makefile|README\.md|environment\.yml)$
^src/ZSTD/update\.sh$
^src/ZSTD/patches(/|$)
^inst/include/qdata-cpp/include/xxhash/update\.sh$
^inst/include/qdata-cpp/include/xxhash/patches(/|$)
44 changes: 29 additions & 15 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,32 +6,37 @@
# usethis::use_github_action("check-standard") will install it.
on:
push:
branches: [main, master]
branches: [main, master, dev]
pull_request:
branches: [main, master]
branches: [main, master, dev]

name: R-CMD-check

permissions:
contents: read

jobs:
R-CMD-check:
runs-on: ${{ matrix.config.os }}

name: ${{ matrix.config.os }} (${{ matrix.config.r }})
name: ${{ matrix.config.name }}

strategy:
fail-fast: false
matrix:
config:
- {os: macOS-latest, r: 'devel', http-user-agent: 'release'}
- {os: macOS-latest, r: 'release'}
- {os: windows-latest, r: 'devel', http-user-agent: 'release'}
- {os: windows-latest, r: 'release'}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- {os: ubuntu-latest, r: 'release'}
- {os: ubuntu-latest, r: 'oldrel-1'}
- {os: ubuntu-latest, r: 'oldrel-2'}
- {os: ubuntu-latest, r: 'oldrel-3'}
- {os: ubuntu-latest, r: 'oldrel-4'}
- {name: 'macOS-latest (devel, system zstd)', os: macOS-latest, r: 'devel', http-user-agent: 'release', install-args: ''}
- {name: 'macOS-latest (release, system zstd)', os: macOS-latest, r: 'release', install-args: ''}
- {name: 'macOS-latest (release, bundled zstd)', os: macOS-latest, r: 'release', install-args: '--configure-args=--with-zstd-force-compile'}
- {name: 'windows-latest (devel)', os: windows-latest, r: 'devel', http-user-agent: 'release', install-args: ''}
- {name: 'windows-latest (release)', os: windows-latest, r: 'release', install-args: ''}
- {name: 'ubuntu-latest (devel, system zstd)', os: ubuntu-latest, r: 'devel', http-user-agent: 'release', install-args: ''}
- {name: 'ubuntu-latest (release, system zstd)', os: ubuntu-latest, r: 'release', install-args: ''}
- {name: 'ubuntu-latest (release, bundled zstd)', os: ubuntu-latest, r: 'release', install-args: '--configure-args=--with-zstd-force-compile'}
- {name: 'ubuntu-latest (oldrel-1)', os: ubuntu-latest, r: 'oldrel-1', install-args: ''}
- {name: 'ubuntu-latest (oldrel-2)', os: ubuntu-latest, r: 'oldrel-2', install-args: ''}
- {name: 'ubuntu-latest (oldrel-3)', os: ubuntu-latest, r: 'oldrel-3', install-args: ''}
- {name: 'ubuntu-latest (oldrel-4)', os: ubuntu-latest, r: 'oldrel-4', install-args: ''}

env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
Expand All @@ -42,7 +47,9 @@ jobs:
- name: Windows CRLF fix
run: git config --global core.autocrlf false

- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
submodules: recursive

- uses: r-lib/actions/setup-pandoc@v2

Expand All @@ -61,7 +68,14 @@ jobs:
extra-packages: any::rcmdcheck
needs: check

- uses: r-lib/actions/check-r-package@v2
- if: matrix.config.install-args == ''
uses: r-lib/actions/check-r-package@v2
with:
args: 'c("--no-manual", "--as-cran")'
upload-snapshots: true

- if: matrix.config.install-args != ''
uses: r-lib/actions/check-r-package@v2
with:
args: 'c("--no-manual", "--as-cran", "--install-args=${{ matrix.config.install-args }}")'
upload-snapshots: true
7 changes: 7 additions & 0 deletions .github/workflows/rhub.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ on:
env:
QS_EXTENDED_TESTS: true

permissions:
contents: read

jobs:

setup:
Expand Down Expand Up @@ -54,6 +57,8 @@ jobs:

steps:
- uses: r-hub/actions/checkout@v1
with:
submodules: recursive
- uses: r-hub/actions/platform-info@v1
with:
token: ${{ secrets.RHUB_TOKEN }}
Expand All @@ -79,6 +84,8 @@ jobs:

steps:
- uses: r-hub/actions/checkout@v1
with:
submodules: recursive
- uses: r-hub/actions/setup-r@v1
with:
job-config: ${{ matrix.config.job-config }}
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@ configure~
*.gz
*.json
context
context/*
4 changes: 4 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[submodule "inst/include/qdata-cpp"]
path = inst/include/qdata-cpp
url = https://github.com/qsbase/qdata-cpp.git
branch=main
18 changes: 16 additions & 2 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
Version 0.1.8 (2026-03-03)
* Add `xxhash` checks by and throw warning on mismatch by default
Version 0.1.8 (2026-04-08)
* `use_alt_rep` parameter temporarily disabled with warning for this version
* Add stored `xxhash` checks and warn on mismatch by default
* Replace the old custom unwind-jump handling with `Rcpp::unwindProtect()` and extend the same cleanup model to qdata read/write paths
* Add `R_CheckStack()` to qdata recursive read/write paths so deep nesting fails cleanly instead of crashing
* Harden malformed-input handling in `qs2`, `qdata`, `qx_dump()`, and `qs_to_rds()`: reject oversized compressed-block headers, oversized qdata lengths, short malformed blocks, and attribute counts, malformed attribute names and semantically invalid R attributes
* Add max output size parameter in `zstd_decompress_file()` and `zstd_in()`, and delete partial output on failure
* Make `qs_save()` and `qd_save()` fail cleanly if file writes start successfully but later fail (e.g. due to full disk)
* Require actual raw vectors in `zstd_(de)compress_raw()`, `blosc_(un)shuffle_raw()`, and `xxhash_raw()` instead of allowing implicit coercion
* Harden `base85` / `base91` utilities and expand utility regression coverage for malformed and empty inputs
* Fix qdata delayed attribute replay ordering for malformed or canonicalized attribute subtrees
* Remove the old dynamic-blocksize plumbing and keep the fixed block-size path only
* Update bundled zstd from `1.5.6` to `1.5.7` and vendored qdata-cpp xxHash from `0.8.2` to `0.8.3`
* Add reproducible local vendoring helpers for bundled zstd and vendored xxHash
* Add deterministic malformed-input regressions and expand fuzz coverage in local hardening scripts
* Refactor qdata internals into a vendored standalone `qdata-cpp` package
* Fix usage of `Rf_error` error handling (https://github.com/qsbase/qs2/issues/26)
* Fix performance bug, use correct shuffle flag at high compress levels

Expand Down
10 changes: 5 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: qs2
Type: Package
Title: Efficient Serialization of R Objects
Version: 0.1.8
Date: 2026-03-03
Date: 2026-04-08
Authors@R: c(
person("Travers", "Ching", email = "traversc@gmail.com", role = c("aut", "cre", "cph")),
person("Yann", "Collet", role = c("ctb", "cph"), comment = "Yann Collet is the author of the bundled zstd"),
Expand All @@ -18,10 +18,10 @@ LazyData: true
Biarch: true
Depends: R (>= 3.5.0)
Imports:
Rcpp, stringfish (>= 0.18.0)
LinkingTo: Rcpp, stringfish, RcppParallel
Suggests: knitr, rmarkdown, dplyr, data.table, stringi
SystemRequirements: GNU make
Rcpp, RcppParallel
LinkingTo: Rcpp, RcppParallel
Suggests: knitr, rmarkdown, dplyr, data.table, stringi, stringfish (>= 0.18.0)
SystemRequirements: GNU make, C++17
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.3
Expand Down
30 changes: 12 additions & 18 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ PACKAGE := $(shell perl -aF: -ne 'print, exit if s/^Package:\s+//' DESCRIPTION)
VERSION := $(shell perl -aF: -ne 'print, exit if s/^Version:\s+//' DESCRIPTION)
BUILD := $(PACKAGE)_$(VERSION).tar.gz

.PHONY: doc build install test vignette $(BUILD)
.PHONY: doc build install test test-short vignette submodules $(BUILD)

submodules:
git submodule sync --recursive
git submodule update --remote --init --recursive --force

check: $(BUILD)
R CMD check --as-cran $<
Expand Down Expand Up @@ -88,30 +92,20 @@ install-compile-zstd:
R CMD build . --no-build-vignettes
R CMD INSTALL $(BUILD) --configure-args="--with-zstd-force-compile"

install-dynamic-blocksize:
find . -type f -exec chmod 644 {} \;
find . -type d -exec chmod 755 {} \;
chmod 755 cleanup
chmod 755 configure
# find src/ -type f -exec chmod 644 {} \;
# chmod 644 ChangeLog DESCRIPTION Makefile NAMESPACE README.md
./configure
./cleanup
Rscript -e "library(Rcpp); compileAttributes('.');"
Rscript -e "devtools::load_all(); roxygen2::roxygenise('.');"
find . -iname "*.a" -exec rm {} \;
find . -iname "*.o" -exec rm {} \;
find . -iname "*.so" -exec rm {} \;
R CMD build . --no-build-vignettes
R CMD INSTALL $(BUILD) --configure-args="--with-dynamic-blocksize"

vignette:
Rscript -e "rmarkdown::render(input='vignettes/vignette.rmd', output_format='html_vignette')"
IS_GITHUB=Yes Rscript -e "rmarkdown::render(input='vignettes/vignette.rmd', output_file='../README.md', output_format=rmarkdown::github_document(html_preview=FALSE))"; unset IS_GITHUB
# mv vignettes/vignette.md README.md
# sed -r -i 's/\((.+)\.png/\(vignettes\/\1\.png/' README.md

test-short:
Rscript tests/qs_savem_testing.R
Rscript tests/correctness_testing.R
Rscript tests/utility_testing.R
Rscript tests/qdata_cpp_external_testing.R

test:
Rscript tests/qs_savem_testing.R
QS_EXTENDED_TESTS=1 Rscript tests/correctness_testing.R; unset QS_EXTENDED_TESTS
Rscript tests/utility_testing.R
Rscript tests/qdata_cpp_external_testing.R
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,6 @@ export(encode_source)

export(qs_cache)

import(stringfish)
importFrom(Rcpp,evalCpp)
importFrom(RcppParallel, RcppParallelLibs)
useDynLib(qs2, .registration = TRUE)
12 changes: 2 additions & 10 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,6 @@ check_TBB <- function() {
.Call(`_qs2_check_TBB`)
}

check_internal_blocksize <- function() {
.Call(`_qs2_check_internal_blocksize`)
}

internal_set_blocksize <- function(size) {
.Call(`_qs2_internal_set_blocksize`, size)
}

internal_is_utf8_locale <- function(size) {
.Call(`_qs2_internal_is_utf8_locale`, size)
}
Expand Down Expand Up @@ -157,7 +149,7 @@ zstd_compress_file <- function(input_file, output_file, compress_level = qopt("c
invisible(.Call(`_qs2_zstd_compress_file`, input_file, output_file, compress_level))
}

zstd_decompress_file <- function(input_file, output_file) {
invisible(.Call(`_qs2_zstd_decompress_file`, input_file, output_file))
zstd_decompress_file <- function(input_file, output_file, max_output_bytes = NULL) {
invisible(.Call(`_qs2_zstd_decompress_file`, input_file, output_file, max_output_bytes))
}

9 changes: 6 additions & 3 deletions R/qopt.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#'
#' This function provides an interface to retrieve or update internal qs2 options
#' such as compression level, shuffle flag, number of threads, checksum validation,
#' warning for unsupported types, and ALTREP usage. It directly calls the underlying
#' warning for unsupported types, and requested ALTREP usage. It directly calls the underlying
#' C-level functions.
#'
#' @details The default settings are:
Expand All @@ -14,9 +14,12 @@
#' \item \code{nthreads}: 1L
#' \item \code{validate_checksum}: FALSE
#' \item \code{warn_unsupported_types}: TRUE (used only in \code{qd_save})
#' \item \code{use_alt_rep}: FALSE (used only in \code{qd_read})
#' \item \code{use_alt_rep}: FALSE (accepted by \code{qd_read} and \code{qd_deserialize}, but temporarily disabled)
#' }
#'
#' When \code{parameter = "use_alt_rep"} is set to \code{TRUE}, qdata reads currently
#' warn and fall back to ordinary character vectors.
#'
#' When \code{value} is \code{NULL}, the current value of the specified option is returned.
#' Otherwise, the option is set to \code{value} and the new value is returned invisibly.
#'
Expand All @@ -42,7 +45,7 @@
#' # Get the current setting for warn_unsupported_types (used in qd_save):
#' qopt("warn_unsupported_types")
#'
#' # Get the current setting for use_alt_rep (used in qd_read):
#' # Get the current setting for use_alt_rep:
#' qopt("use_alt_rep")
#'
#' @export
Expand Down
18 changes: 16 additions & 2 deletions R/qs_to_rds.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,25 @@ qs_to_rds <- function(input_file, output_file, compress_level = 6) {
if(dump$format != "qs2") {
stop("qs2 format not detected")
}
if(identical(dump$stored_hash, "0")) {
stop("qs2 file does not contain a stored hash")
}
if(!identical(dump$stored_hash, dump$computed_hash)) {
stop("qs2 file hash mismatch")
}
con <- gzfile(output_file, "wb", compression = compress_level)
for(i in 1:length(dump$blocks)) {
ok <- FALSE
on.exit({
try(close(con), silent = TRUE)
if(!ok && file.exists(output_file)) {
unlink(output_file)
}
}, add = TRUE)
for(i in seq_along(dump$blocks)) {
writeBin(dump$blocks[[i]], con)
}
close(con)
ok <- TRUE
}

#' RDS to qs2 format
Expand All @@ -56,7 +70,7 @@ qs_to_rds <- function(input_file, output_file, compress_level = 6) {
#' stopifnot(identical(x, x2))
#' @export
rds_to_qs <- function(input_file, output_file, compress_level = 3) {
MAX_BLOCKSIZE <- check_internal_blocksize() # defined in io/io_common.h
MAX_BLOCKSIZE <- 1048576L # defined in io/io_common.h
HEADER_SIZE <- 24 # defined in qx_file_headers.h
tmp_output <- tempfile()
qs_save(NULL, tmp_output, compress_level = compress_level, shuffle = FALSE)
Expand Down
4 changes: 2 additions & 2 deletions R/qx_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ shared_params_save <- function(file_output=TRUE, warn_unsupported_types=FALSE) {
'@param compress_level The compression level used (the initial value is 3L).',
'',
'The maximum and minimum possible values depend on the version of the ZSTD library used.',
'As of ZSTD 1.5.6 the maximum compression level is 22, and the minimum is -131072.',
'As of ZSTD 1.5.7 the maximum compression level is 22, and the minimum is -131072.',
'Usually, values in the low positive range offer very good performance in terms',
'of speed and compression.',
'@param shuffle Whether to allow byte shuffling when compressing data (the initial value is TRUE).',
Expand All @@ -16,7 +16,7 @@ shared_params_save <- function(file_output=TRUE, warn_unsupported_types=FALSE) {
shared_params_read <- function(file_input=TRUE, use_alt_rep=FALSE) {
c('@param file The file name/path.'[file_input],
'@param input The raw vector to deserialize.'[!file_input],
'@param use_alt_rep Use ALTREP when reading in string data (the initial value is FALSE).'[use_alt_rep],
'@param use_alt_rep Request ALTREP when reading qdata string data. This option is temporarily disabled; if TRUE, qs2 warns and falls back to ordinary character vectors (the initial value is FALSE).'[use_alt_rep],
'@param validate_checksum If TRUE, validate checksum before deserialization and error on mismatch (or missing checksum). If FALSE, checksum is computed during read and mismatches (or missing checksum) produce a warning after reading (the initial value is FALSE).',
'@param nthreads The number of threads to use when reading data (the initial value is 1L). When TBB is not available, values greater than 1 emit a warning and fall back to 1.'
)
Expand Down
11 changes: 8 additions & 3 deletions R/zstd_file_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,14 @@ NULL
#'
#' A utility function to decompresses a zstd file to disk.
#'
#' @usage zstd_decompress_file(input_file, output_file)
#' @usage zstd_decompress_file(input_file, output_file, max_output_bytes = NULL)
#'
#' @name zstd_decompress_file
#' @param input_file Path to the input file.
#' @param output_file Path to the output file.
#' @param max_output_bytes Optional maximum number of decompressed output bytes.
#' When supplied, decompression stops with an error before writing a chunk that
#' would exceed this limit.
#'
#' @return No value is returned. The file is written to disk.
#' @export
Expand Down Expand Up @@ -68,6 +71,8 @@ NULL
#' @param envir Environment for `FUN` evaluation.
#' @param tmpfile Temporary file path. If not supplied, a temp file is created
#' and removed on exit.
#' @param max_output_bytes Optional maximum number of decompressed output bytes
#' passed through to [zstd_decompress_file()].
#'
#' @return The value returned by `FUN`.
#' @export
Expand All @@ -80,7 +85,7 @@ NULL
#' dt <- zstd_in(data.table::fread, file = zfile)
#' print(nrow(dt))
#' }
zstd_in <- function(FUN, ..., envir = parent.frame(), tmpfile = tempfile()) {
zstd_in <- function(FUN, ..., envir = parent.frame(), tmpfile = tempfile(), max_output_bytes = NULL) {
params <- list(...)
w <- which(names(params) != "")
if (length(w) == 0) stop("expecting at least one named parameter for file path")
Expand All @@ -93,7 +98,7 @@ zstd_in <- function(FUN, ..., envir = parent.frame(), tmpfile = tempfile()) {
stop("file path does not exist: ", input_path)
}
on.exit(unlink(tmpfile), add = TRUE)
zstd_decompress_file(input_path, tmpfile)
zstd_decompress_file(input_path, tmpfile, max_output_bytes = max_output_bytes)
params[[w]] <- tmpfile
do.call(FUN, params, envir = envir)
}
Expand Down
Loading
Loading