From 2af550d04e5dda39e1dea85a55b8bb4dd3e0f076 Mon Sep 17 00:00:00 2001 From: Andy Weiss Date: Tue, 10 Feb 2026 10:45:22 -0500 Subject: [PATCH 1/3] zlib: add support for brotli compression dictionary This change adds JS API support for custom compression dictionaries with Brotli in the zlib library. The underlying Brotli dependency already supports this and zstd exposes something similar. This follows the zstd approach for using a custom dictionary but for Brotli. Fixes: https://github.com/nodejs/node/issues/52250 --- lib/zlib.js | 7 +- src/node_zlib.cc | 100 +++++++++++++++++-- test/parallel/test-zlib-brotli-dictionary.js | 72 +++++++++++++ 3 files changed, 168 insertions(+), 11 deletions(-) create mode 100644 test/parallel/test-zlib-brotli-dictionary.js diff --git a/lib/zlib.js b/lib/zlib.js index 73eb8ff2120490..4aea60a4b6c425 100644 --- a/lib/zlib.js +++ b/lib/zlib.js @@ -834,7 +834,12 @@ function Brotli(opts, mode) { new binding.BrotliDecoder(mode) : new binding.BrotliEncoder(mode); this._writeState = new Uint32Array(2); - handle.init(brotliInitParamsArray, this._writeState, processCallback); + handle.init( + brotliInitParamsArray, + this._writeState, + processCallback, + opts?.dictionary && isArrayBufferView(opts.dictionary) ? opts.dictionary : undefined, + ); ZlibBase.call(this, opts, mode, handle, brotliDefaultOpts); } diff --git a/src/node_zlib.cc b/src/node_zlib.cc index d3bd0f6f6540b4..003618b1438a87 100644 --- a/src/node_zlib.cc +++ b/src/node_zlib.cc @@ -40,6 +40,7 @@ #include "brotli/decode.h" #include "brotli/encode.h" +#include "brotli/shared_dictionary.h" #include "zlib.h" #include "zstd.h" #include "zstd_errors.h" @@ -256,7 +257,7 @@ class BrotliEncoderContext final : public BrotliContext { public: void Close(); void DoThreadPoolWork(); - CompressionError Init(); + CompressionError Init(std::string_view dictionary = {}); CompressionError ResetStream(); CompressionError SetParams(int key, uint32_t value); CompressionError GetErrorInfo() const; @@ -268,13 +269,17 @@ class BrotliEncoderContext final : public BrotliContext { private: bool last_result_ = false; DeleteFnPtr state_; + DeleteFnPtr prepared_dictionary_; + // Dictionary data must remain valid while the prepared dictionary is alive. + std::vector dictionary_; }; class BrotliDecoderContext final : public BrotliContext { public: void Close(); void DoThreadPoolWork(); - CompressionError Init(); + CompressionError Init(std::string_view dictionary = {}); CompressionError ResetStream(); CompressionError SetParams(int key, uint32_t value); CompressionError GetErrorInfo() const; @@ -288,6 +293,8 @@ class BrotliDecoderContext final : public BrotliContext { BrotliDecoderErrorCode error_ = BROTLI_DECODER_NO_ERROR; std::string error_string_; DeleteFnPtr state_; + // Dictionary data must remain valid for the lifetime of the decoder. + std::vector dictionary_; }; class ZstdContext : public MemoryRetainer { @@ -830,7 +837,8 @@ class BrotliCompressionStream final : static void Init(const FunctionCallbackInfo& args) { BrotliCompressionStream* wrap; ASSIGN_OR_RETURN_UNWRAP(&wrap, args.This()); - CHECK(args.Length() == 3 && "init(params, writeResult, writeCallback)"); + CHECK((args.Length() == 3 || args.Length() == 4) && + "init(params, writeResult, writeCallback[, dictionary])"); CHECK(args[1]->IsUint32Array()); CHECK_GE(args[1].As()->Length(), 2); @@ -841,7 +849,19 @@ class BrotliCompressionStream final : wrap->InitStream(write_result, write_js_callback); AllocScope alloc_scope(wrap); - CompressionError err = wrap->context()->Init(); + std::string_view dictionary; + ArrayBufferViewContents contents; + if (args.Length() == 4 && !args[3]->IsUndefined()) { + if (!args[3]->IsArrayBufferView()) { + THROW_ERR_INVALID_ARG_TYPE( + wrap->env(), "dictionary must be an ArrayBufferView if provided"); + return; + } + contents.ReadValue(args[3]); + dictionary = std::string_view(contents.data(), contents.length()); + } + + CompressionError err = wrap->context()->Init(dictionary); if (err.IsError()) { wrap->EmitError(err); // TODO(addaleax): Sometimes we generate better error codes in C++ land, @@ -1387,23 +1407,60 @@ void BrotliEncoderContext::DoThreadPoolWork() { void BrotliEncoderContext::Close() { state_.reset(); + prepared_dictionary_.reset(); + dictionary_.clear(); mode_ = NONE; } -CompressionError BrotliEncoderContext::Init() { +CompressionError BrotliEncoderContext::Init(std::string_view dictionary) { brotli_alloc_func alloc = CompressionStreamMemoryOwner::AllocForBrotli; brotli_free_func free = CompressionStreamMemoryOwner::FreeForZlib; void* opaque = CompressionStream::AllocatorOpaquePointerForContext( this); + + // Clean up any previous dictionary state before re-initializing. + prepared_dictionary_.reset(); + dictionary_.clear(); + state_.reset(BrotliEncoderCreateInstance(alloc, free, opaque)); if (!state_) { return CompressionError("Could not initialize Brotli instance", "ERR_ZLIB_INITIALIZATION_FAILED", -1); - } else { - return CompressionError {}; } + + if (!dictionary.empty()) { + // The dictionary data must remain valid for the lifetime of the prepared + // dictionary, so copy it into a member vector. + dictionary_.assign( + reinterpret_cast(dictionary.data()), + reinterpret_cast(dictionary.data()) + + dictionary.size()); + + prepared_dictionary_.reset(BrotliEncoderPrepareDictionary( + BROTLI_SHARED_DICTIONARY_RAW, + dictionary_.size(), + dictionary_.data(), + BROTLI_MAX_QUALITY, + alloc, + free, + opaque)); + if (!prepared_dictionary_) { + return CompressionError("Failed to prepare brotli dictionary", + "ERR_ZLIB_DICTIONARY_LOAD_FAILED", + -1); + } + + if (!BrotliEncoderAttachPreparedDictionary(state_.get(), + prepared_dictionary_.get())) { + return CompressionError("Failed to attach brotli dictionary", + "ERR_ZLIB_DICTIONARY_LOAD_FAILED", + -1); + } + } + + return CompressionError {}; } CompressionError BrotliEncoderContext::ResetStream() { @@ -1435,6 +1492,7 @@ CompressionError BrotliEncoderContext::GetErrorInfo() const { void BrotliDecoderContext::Close() { state_.reset(); + dictionary_.clear(); mode_ = NONE; } @@ -1455,20 +1513,42 @@ void BrotliDecoderContext::DoThreadPoolWork() { } } -CompressionError BrotliDecoderContext::Init() { +CompressionError BrotliDecoderContext::Init(std::string_view dictionary) { brotli_alloc_func alloc = CompressionStreamMemoryOwner::AllocForBrotli; brotli_free_func free = CompressionStreamMemoryOwner::FreeForZlib; void* opaque = CompressionStream::AllocatorOpaquePointerForContext( this); + + // Clean up any previous dictionary state before re-initializing. + dictionary_.clear(); + state_.reset(BrotliDecoderCreateInstance(alloc, free, opaque)); if (!state_) { return CompressionError("Could not initialize Brotli instance", "ERR_ZLIB_INITIALIZATION_FAILED", -1); - } else { - return CompressionError {}; } + + if (!dictionary.empty()) { + // The dictionary data must remain valid for the lifetime of the decoder, + // so copy it into a member vector. + dictionary_.assign( + reinterpret_cast(dictionary.data()), + reinterpret_cast(dictionary.data()) + + dictionary.size()); + + if (!BrotliDecoderAttachDictionary(state_.get(), + BROTLI_SHARED_DICTIONARY_RAW, + dictionary_.size(), + dictionary_.data())) { + return CompressionError("Failed to attach brotli dictionary", + "ERR_ZLIB_DICTIONARY_LOAD_FAILED", + -1); + } + } + + return CompressionError {}; } CompressionError BrotliDecoderContext::ResetStream() { diff --git a/test/parallel/test-zlib-brotli-dictionary.js b/test/parallel/test-zlib-brotli-dictionary.js new file mode 100644 index 00000000000000..ecd7d70dd75909 --- /dev/null +++ b/test/parallel/test-zlib-brotli-dictionary.js @@ -0,0 +1,72 @@ +'use strict'; + +const common = require('../common'); +const assert = require('assert'); +const zlib = require('zlib'); + +const dictionary = Buffer.from( + `Lorem ipsum dolor sit amet, consectetur adipiscing elit. + Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. + Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.` +); + +const input = Buffer.from( + `Lorem ipsum dolor sit amet, consectetur adipiscing elit. + Lorem ipsum dolor sit amet, consectetur adipiscing elit. + Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. + Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. + Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.` +); + +// Test with convenience methods (async). +zlib.brotliCompress(input, { dictionary }, common.mustSucceed((compressed) => { + assert(compressed.length < input.length, + 'compressed data should be smaller with dictionary'); + zlib.brotliDecompress(compressed, { dictionary }, common.mustSucceed((decompressed) => { + assert.strictEqual(decompressed.toString(), input.toString()); + })); +})); + +// Test with streaming API. +{ + const encoder = zlib.createBrotliCompress({ dictionary }); + const decoder = zlib.createBrotliDecompress({ dictionary }); + + const chunks = []; + decoder.on('data', (chunk) => chunks.push(chunk)); + decoder.on('end', common.mustCall(() => { + const result = Buffer.concat(chunks); + assert.strictEqual(result.toString(), input.toString()); + })); + + encoder.pipe(decoder); + encoder.end(input); +} + +// Test that dictionary improves compression ratio. +{ + const withDict = zlib.brotliCompressSync(input, { dictionary }); + const withoutDict = zlib.brotliCompressSync(input); + + // Dictionary-based compression should be at least as good as without. + assert(withDict.length <= withoutDict.length, + `Dictionary compression (${withDict.length}) should not be ` + + `larger than non-dictionary compression (${withoutDict.length})`); + + // Verify decompression with dictionary works. + const decompressed = zlib.brotliDecompressSync(withDict, { dictionary }); + assert.strictEqual(decompressed.toString(), input.toString()); +} + +// Test that decompression without matching dictionary fails. +{ + const compressed = zlib.brotliCompressSync(input, { dictionary }); + assert.throws(() => { + zlib.brotliDecompressSync(compressed); + }, (err) => { + // The exact error may vary, but decoding should fail without the + // matching dictionary. + return err.code === 'ERR_BROTLI_COMPRESSION_FAILED' || + err instanceof Error; + }); +} From ea4034245ec324b6100ec0135173a94b98d88a80 Mon Sep 17 00:00:00 2001 From: Andy Weiss Date: Wed, 11 Feb 2026 11:46:32 -0500 Subject: [PATCH 2/3] Fix lint, convert to r-value ref instead of string_view, add tests --- lib/zlib.js | 15 ++++- src/node_zlib.cc | 31 ++++------ test/parallel/test-zlib-brotli-dictionary.js | 62 ++++++++++++++++++-- 3 files changed, 84 insertions(+), 24 deletions(-) diff --git a/lib/zlib.js b/lib/zlib.js index 4aea60a4b6c425..8809d7edbbdaa2 100644 --- a/lib/zlib.js +++ b/lib/zlib.js @@ -830,6 +830,19 @@ function Brotli(opts, mode) { }); } + let dictionary = opts?.dictionary; + if (dictionary !== undefined && !isArrayBufferView(dictionary)) { + if (isAnyArrayBuffer(dictionary)) { + dictionary = Buffer.from(dictionary); + } else { + throw new ERR_INVALID_ARG_TYPE( + 'options.dictionary', + ['Buffer', 'TypedArray', 'DataView', 'ArrayBuffer'], + dictionary, + ); + } + } + const handle = mode === BROTLI_DECODE ? new binding.BrotliDecoder(mode) : new binding.BrotliEncoder(mode); @@ -838,7 +851,7 @@ function Brotli(opts, mode) { brotliInitParamsArray, this._writeState, processCallback, - opts?.dictionary && isArrayBufferView(opts.dictionary) ? opts.dictionary : undefined, + dictionary, ); ZlibBase.call(this, opts, mode, handle, brotliDefaultOpts); diff --git a/src/node_zlib.cc b/src/node_zlib.cc index 003618b1438a87..df6363844f4980 100644 --- a/src/node_zlib.cc +++ b/src/node_zlib.cc @@ -257,7 +257,7 @@ class BrotliEncoderContext final : public BrotliContext { public: void Close(); void DoThreadPoolWork(); - CompressionError Init(std::string_view dictionary = {}); + CompressionError Init(std::vector&& dictionary = {}); CompressionError ResetStream(); CompressionError SetParams(int key, uint32_t value); CompressionError GetErrorInfo() const; @@ -279,7 +279,7 @@ class BrotliDecoderContext final : public BrotliContext { public: void Close(); void DoThreadPoolWork(); - CompressionError Init(std::string_view dictionary = {}); + CompressionError Init(std::vector&& dictionary = {}); CompressionError ResetStream(); CompressionError SetParams(int key, uint32_t value); CompressionError GetErrorInfo() const; @@ -849,19 +849,18 @@ class BrotliCompressionStream final : wrap->InitStream(write_result, write_js_callback); AllocScope alloc_scope(wrap); - std::string_view dictionary; - ArrayBufferViewContents contents; + std::vector dictionary; if (args.Length() == 4 && !args[3]->IsUndefined()) { if (!args[3]->IsArrayBufferView()) { THROW_ERR_INVALID_ARG_TYPE( wrap->env(), "dictionary must be an ArrayBufferView if provided"); return; } - contents.ReadValue(args[3]); - dictionary = std::string_view(contents.data(), contents.length()); + ArrayBufferViewContents contents(args[3]); + dictionary.assign(contents.data(), contents.data() + contents.length()); } - CompressionError err = wrap->context()->Init(dictionary); + CompressionError err = wrap->context()->Init(std::move(dictionary)); if (err.IsError()) { wrap->EmitError(err); // TODO(addaleax): Sometimes we generate better error codes in C++ land, @@ -1412,7 +1411,7 @@ void BrotliEncoderContext::Close() { mode_ = NONE; } -CompressionError BrotliEncoderContext::Init(std::string_view dictionary) { +CompressionError BrotliEncoderContext::Init(std::vector&& dictionary) { brotli_alloc_func alloc = CompressionStreamMemoryOwner::AllocForBrotli; brotli_free_func free = CompressionStreamMemoryOwner::FreeForZlib; void* opaque = @@ -1432,11 +1431,8 @@ CompressionError BrotliEncoderContext::Init(std::string_view dictionary) { if (!dictionary.empty()) { // The dictionary data must remain valid for the lifetime of the prepared - // dictionary, so copy it into a member vector. - dictionary_.assign( - reinterpret_cast(dictionary.data()), - reinterpret_cast(dictionary.data()) + - dictionary.size()); + // dictionary, so take ownership via move. + dictionary_ = std::move(dictionary); prepared_dictionary_.reset(BrotliEncoderPrepareDictionary( BROTLI_SHARED_DICTIONARY_RAW, @@ -1513,7 +1509,7 @@ void BrotliDecoderContext::DoThreadPoolWork() { } } -CompressionError BrotliDecoderContext::Init(std::string_view dictionary) { +CompressionError BrotliDecoderContext::Init(std::vector&& dictionary) { brotli_alloc_func alloc = CompressionStreamMemoryOwner::AllocForBrotli; brotli_free_func free = CompressionStreamMemoryOwner::FreeForZlib; void* opaque = @@ -1532,11 +1528,8 @@ CompressionError BrotliDecoderContext::Init(std::string_view dictionary) { if (!dictionary.empty()) { // The dictionary data must remain valid for the lifetime of the decoder, - // so copy it into a member vector. - dictionary_.assign( - reinterpret_cast(dictionary.data()), - reinterpret_cast(dictionary.data()) + - dictionary.size()); + // so take ownership via move. + dictionary_ = std::move(dictionary); if (!BrotliDecoderAttachDictionary(state_.get(), BROTLI_SHARED_DICTIONARY_RAW, diff --git a/test/parallel/test-zlib-brotli-dictionary.js b/test/parallel/test-zlib-brotli-dictionary.js index ecd7d70dd75909..1703a5e047041a 100644 --- a/test/parallel/test-zlib-brotli-dictionary.js +++ b/test/parallel/test-zlib-brotli-dictionary.js @@ -64,9 +64,63 @@ zlib.brotliCompress(input, { dictionary }, common.mustSucceed((compressed) => { assert.throws(() => { zlib.brotliDecompressSync(compressed); }, (err) => { - // The exact error may vary, but decoding should fail without the - // matching dictionary. - return err.code === 'ERR_BROTLI_COMPRESSION_FAILED' || - err instanceof Error; + assert.match(err.code, /ERR_/); + return true; }); } + +// Test that decompression with wrong dictionary fails. +{ + const compressed = zlib.brotliCompressSync(input, { dictionary }); + const wrongDictionary = Buffer.from('this is the wrong dictionary'); + assert.throws(() => { + zlib.brotliDecompressSync(compressed, { dictionary: wrongDictionary }); + }, (err) => { + assert.match(err.code, /ERR_/); + return true; + }); +} + +// Test that dictionary works with ArrayBuffer (converted to Buffer). +{ + const arrayBufferDict = dictionary.buffer.slice( + dictionary.byteOffset, + dictionary.byteOffset + dictionary.byteLength, + ); + const compressed = zlib.brotliCompressSync(input, { dictionary: arrayBufferDict }); + const decompressed = zlib.brotliDecompressSync(compressed, { dictionary: arrayBufferDict }); + assert.strictEqual(decompressed.toString(), input.toString()); +} + +// Test that dictionary works with TypedArray (Uint8Array). +{ + const uint8Dict = new Uint8Array(dictionary); + const compressed = zlib.brotliCompressSync(input, { dictionary: uint8Dict }); + const decompressed = zlib.brotliDecompressSync(compressed, { dictionary: uint8Dict }); + assert.strictEqual(decompressed.toString(), input.toString()); +} + +// Test that invalid dictionary type throws ERR_INVALID_ARG_TYPE. +for (const invalidDict of ['string', 123, true, { object: true }, [1, 2, 3]]) { + assert.throws(() => { + zlib.createBrotliCompress({ dictionary: invalidDict }); + }, { code: 'ERR_INVALID_ARG_TYPE' }); + + assert.throws(() => { + zlib.createBrotliDecompress({ dictionary: invalidDict }); + }, { code: 'ERR_INVALID_ARG_TYPE' }); +} + +// Test with streaming API and wrong dictionary emits error event. +{ + const compressed = zlib.brotliCompressSync(input, { dictionary }); + const wrongDict = Buffer.from('wrong dictionary data'); + const decoder = zlib.createBrotliDecompress({ dictionary: wrongDict }); + + decoder.on('error', common.mustCall((err) => { + assert.match(err.code, /ERR_/); + })); + + decoder.write(compressed); + decoder.end(); +} From f733e95c098a1cf396ad47dfd3950681bd5afb96 Mon Sep 17 00:00:00 2001 From: Andy Weiss Date: Thu, 12 Feb 2026 08:59:07 -0500 Subject: [PATCH 3/3] Actually fix the lint --- src/node_zlib.cc | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/node_zlib.cc b/src/node_zlib.cc index df6363844f4980..9d49f13d07c125 100644 --- a/src/node_zlib.cc +++ b/src/node_zlib.cc @@ -270,7 +270,8 @@ class BrotliEncoderContext final : public BrotliContext { bool last_result_ = false; DeleteFnPtr state_; DeleteFnPtr prepared_dictionary_; + BrotliEncoderDestroyPreparedDictionary> + prepared_dictionary_; // Dictionary data must remain valid while the prepared dictionary is alive. std::vector dictionary_; }; @@ -1434,14 +1435,14 @@ CompressionError BrotliEncoderContext::Init(std::vector&& dictionary) { // dictionary, so take ownership via move. dictionary_ = std::move(dictionary); - prepared_dictionary_.reset(BrotliEncoderPrepareDictionary( - BROTLI_SHARED_DICTIONARY_RAW, - dictionary_.size(), - dictionary_.data(), - BROTLI_MAX_QUALITY, - alloc, - free, - opaque)); + prepared_dictionary_.reset( + BrotliEncoderPrepareDictionary(BROTLI_SHARED_DICTIONARY_RAW, + dictionary_.size(), + dictionary_.data(), + BROTLI_MAX_QUALITY, + alloc, + free, + opaque)); if (!prepared_dictionary_) { return CompressionError("Failed to prepare brotli dictionary", "ERR_ZLIB_DICTIONARY_LOAD_FAILED", @@ -1456,7 +1457,7 @@ CompressionError BrotliEncoderContext::Init(std::vector&& dictionary) { } } - return CompressionError {}; + return CompressionError{}; } CompressionError BrotliEncoderContext::ResetStream() { @@ -1541,7 +1542,7 @@ CompressionError BrotliDecoderContext::Init(std::vector&& dictionary) { } } - return CompressionError {}; + return CompressionError{}; } CompressionError BrotliDecoderContext::ResetStream() {