diff --git a/lib/zlib.js b/lib/zlib.js index 73eb8ff2120490..8809d7edbbdaa2 100644 --- a/lib/zlib.js +++ b/lib/zlib.js @@ -830,11 +830,29 @@ function Brotli(opts, mode) { }); } + let dictionary = opts?.dictionary; + if (dictionary !== undefined && !isArrayBufferView(dictionary)) { + if (isAnyArrayBuffer(dictionary)) { + dictionary = Buffer.from(dictionary); + } else { + throw new ERR_INVALID_ARG_TYPE( + 'options.dictionary', + ['Buffer', 'TypedArray', 'DataView', 'ArrayBuffer'], + dictionary, + ); + } + } + const handle = mode === BROTLI_DECODE ? new binding.BrotliDecoder(mode) : new binding.BrotliEncoder(mode); this._writeState = new Uint32Array(2); - handle.init(brotliInitParamsArray, this._writeState, processCallback); + handle.init( + brotliInitParamsArray, + this._writeState, + processCallback, + dictionary, + ); ZlibBase.call(this, opts, mode, handle, brotliDefaultOpts); } diff --git a/src/node_zlib.cc b/src/node_zlib.cc index d3bd0f6f6540b4..9d49f13d07c125 100644 --- a/src/node_zlib.cc +++ b/src/node_zlib.cc @@ -40,6 +40,7 @@ #include "brotli/decode.h" #include "brotli/encode.h" +#include "brotli/shared_dictionary.h" #include "zlib.h" #include "zstd.h" #include "zstd_errors.h" @@ -256,7 +257,7 @@ class BrotliEncoderContext final : public BrotliContext { public: void Close(); void DoThreadPoolWork(); - CompressionError Init(); + CompressionError Init(std::vector&& dictionary = {}); CompressionError ResetStream(); CompressionError SetParams(int key, uint32_t value); CompressionError GetErrorInfo() const; @@ -268,13 +269,18 @@ class BrotliEncoderContext final : public BrotliContext { private: bool last_result_ = false; DeleteFnPtr state_; + DeleteFnPtr + prepared_dictionary_; + // Dictionary data must remain valid while the prepared dictionary is alive. + std::vector dictionary_; }; class BrotliDecoderContext final : public BrotliContext { public: void Close(); void DoThreadPoolWork(); - CompressionError Init(); + CompressionError Init(std::vector&& dictionary = {}); CompressionError ResetStream(); CompressionError SetParams(int key, uint32_t value); CompressionError GetErrorInfo() const; @@ -288,6 +294,8 @@ class BrotliDecoderContext final : public BrotliContext { BrotliDecoderErrorCode error_ = BROTLI_DECODER_NO_ERROR; std::string error_string_; DeleteFnPtr state_; + // Dictionary data must remain valid for the lifetime of the decoder. + std::vector dictionary_; }; class ZstdContext : public MemoryRetainer { @@ -830,7 +838,8 @@ class BrotliCompressionStream final : static void Init(const FunctionCallbackInfo& args) { BrotliCompressionStream* wrap; ASSIGN_OR_RETURN_UNWRAP(&wrap, args.This()); - CHECK(args.Length() == 3 && "init(params, writeResult, writeCallback)"); + CHECK((args.Length() == 3 || args.Length() == 4) && + "init(params, writeResult, writeCallback[, dictionary])"); CHECK(args[1]->IsUint32Array()); CHECK_GE(args[1].As()->Length(), 2); @@ -841,7 +850,18 @@ class BrotliCompressionStream final : wrap->InitStream(write_result, write_js_callback); AllocScope alloc_scope(wrap); - CompressionError err = wrap->context()->Init(); + std::vector dictionary; + if (args.Length() == 4 && !args[3]->IsUndefined()) { + if (!args[3]->IsArrayBufferView()) { + THROW_ERR_INVALID_ARG_TYPE( + wrap->env(), "dictionary must be an ArrayBufferView if provided"); + return; + } + ArrayBufferViewContents contents(args[3]); + dictionary.assign(contents.data(), contents.data() + contents.length()); + } + + CompressionError err = wrap->context()->Init(std::move(dictionary)); if (err.IsError()) { wrap->EmitError(err); // TODO(addaleax): Sometimes we generate better error codes in C++ land, @@ -1387,23 +1407,57 @@ void BrotliEncoderContext::DoThreadPoolWork() { void BrotliEncoderContext::Close() { state_.reset(); + prepared_dictionary_.reset(); + dictionary_.clear(); mode_ = NONE; } -CompressionError BrotliEncoderContext::Init() { +CompressionError BrotliEncoderContext::Init(std::vector&& dictionary) { brotli_alloc_func alloc = CompressionStreamMemoryOwner::AllocForBrotli; brotli_free_func free = CompressionStreamMemoryOwner::FreeForZlib; void* opaque = CompressionStream::AllocatorOpaquePointerForContext( this); + + // Clean up any previous dictionary state before re-initializing. + prepared_dictionary_.reset(); + dictionary_.clear(); + state_.reset(BrotliEncoderCreateInstance(alloc, free, opaque)); if (!state_) { return CompressionError("Could not initialize Brotli instance", "ERR_ZLIB_INITIALIZATION_FAILED", -1); - } else { - return CompressionError {}; } + + if (!dictionary.empty()) { + // The dictionary data must remain valid for the lifetime of the prepared + // dictionary, so take ownership via move. + dictionary_ = std::move(dictionary); + + prepared_dictionary_.reset( + BrotliEncoderPrepareDictionary(BROTLI_SHARED_DICTIONARY_RAW, + dictionary_.size(), + dictionary_.data(), + BROTLI_MAX_QUALITY, + alloc, + free, + opaque)); + if (!prepared_dictionary_) { + return CompressionError("Failed to prepare brotli dictionary", + "ERR_ZLIB_DICTIONARY_LOAD_FAILED", + -1); + } + + if (!BrotliEncoderAttachPreparedDictionary(state_.get(), + prepared_dictionary_.get())) { + return CompressionError("Failed to attach brotli dictionary", + "ERR_ZLIB_DICTIONARY_LOAD_FAILED", + -1); + } + } + + return CompressionError{}; } CompressionError BrotliEncoderContext::ResetStream() { @@ -1435,6 +1489,7 @@ CompressionError BrotliEncoderContext::GetErrorInfo() const { void BrotliDecoderContext::Close() { state_.reset(); + dictionary_.clear(); mode_ = NONE; } @@ -1455,20 +1510,39 @@ void BrotliDecoderContext::DoThreadPoolWork() { } } -CompressionError BrotliDecoderContext::Init() { +CompressionError BrotliDecoderContext::Init(std::vector&& dictionary) { brotli_alloc_func alloc = CompressionStreamMemoryOwner::AllocForBrotli; brotli_free_func free = CompressionStreamMemoryOwner::FreeForZlib; void* opaque = CompressionStream::AllocatorOpaquePointerForContext( this); + + // Clean up any previous dictionary state before re-initializing. + dictionary_.clear(); + state_.reset(BrotliDecoderCreateInstance(alloc, free, opaque)); if (!state_) { return CompressionError("Could not initialize Brotli instance", "ERR_ZLIB_INITIALIZATION_FAILED", -1); - } else { - return CompressionError {}; } + + if (!dictionary.empty()) { + // The dictionary data must remain valid for the lifetime of the decoder, + // so take ownership via move. + dictionary_ = std::move(dictionary); + + if (!BrotliDecoderAttachDictionary(state_.get(), + BROTLI_SHARED_DICTIONARY_RAW, + dictionary_.size(), + dictionary_.data())) { + return CompressionError("Failed to attach brotli dictionary", + "ERR_ZLIB_DICTIONARY_LOAD_FAILED", + -1); + } + } + + return CompressionError{}; } CompressionError BrotliDecoderContext::ResetStream() { diff --git a/test/parallel/test-zlib-brotli-dictionary.js b/test/parallel/test-zlib-brotli-dictionary.js new file mode 100644 index 00000000000000..1703a5e047041a --- /dev/null +++ b/test/parallel/test-zlib-brotli-dictionary.js @@ -0,0 +1,126 @@ +'use strict'; + +const common = require('../common'); +const assert = require('assert'); +const zlib = require('zlib'); + +const dictionary = Buffer.from( + `Lorem ipsum dolor sit amet, consectetur adipiscing elit. + Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. + Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.` +); + +const input = Buffer.from( + `Lorem ipsum dolor sit amet, consectetur adipiscing elit. + Lorem ipsum dolor sit amet, consectetur adipiscing elit. + Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. + Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. + Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.` +); + +// Test with convenience methods (async). +zlib.brotliCompress(input, { dictionary }, common.mustSucceed((compressed) => { + assert(compressed.length < input.length, + 'compressed data should be smaller with dictionary'); + zlib.brotliDecompress(compressed, { dictionary }, common.mustSucceed((decompressed) => { + assert.strictEqual(decompressed.toString(), input.toString()); + })); +})); + +// Test with streaming API. +{ + const encoder = zlib.createBrotliCompress({ dictionary }); + const decoder = zlib.createBrotliDecompress({ dictionary }); + + const chunks = []; + decoder.on('data', (chunk) => chunks.push(chunk)); + decoder.on('end', common.mustCall(() => { + const result = Buffer.concat(chunks); + assert.strictEqual(result.toString(), input.toString()); + })); + + encoder.pipe(decoder); + encoder.end(input); +} + +// Test that dictionary improves compression ratio. +{ + const withDict = zlib.brotliCompressSync(input, { dictionary }); + const withoutDict = zlib.brotliCompressSync(input); + + // Dictionary-based compression should be at least as good as without. + assert(withDict.length <= withoutDict.length, + `Dictionary compression (${withDict.length}) should not be ` + + `larger than non-dictionary compression (${withoutDict.length})`); + + // Verify decompression with dictionary works. + const decompressed = zlib.brotliDecompressSync(withDict, { dictionary }); + assert.strictEqual(decompressed.toString(), input.toString()); +} + +// Test that decompression without matching dictionary fails. +{ + const compressed = zlib.brotliCompressSync(input, { dictionary }); + assert.throws(() => { + zlib.brotliDecompressSync(compressed); + }, (err) => { + assert.match(err.code, /ERR_/); + return true; + }); +} + +// Test that decompression with wrong dictionary fails. +{ + const compressed = zlib.brotliCompressSync(input, { dictionary }); + const wrongDictionary = Buffer.from('this is the wrong dictionary'); + assert.throws(() => { + zlib.brotliDecompressSync(compressed, { dictionary: wrongDictionary }); + }, (err) => { + assert.match(err.code, /ERR_/); + return true; + }); +} + +// Test that dictionary works with ArrayBuffer (converted to Buffer). +{ + const arrayBufferDict = dictionary.buffer.slice( + dictionary.byteOffset, + dictionary.byteOffset + dictionary.byteLength, + ); + const compressed = zlib.brotliCompressSync(input, { dictionary: arrayBufferDict }); + const decompressed = zlib.brotliDecompressSync(compressed, { dictionary: arrayBufferDict }); + assert.strictEqual(decompressed.toString(), input.toString()); +} + +// Test that dictionary works with TypedArray (Uint8Array). +{ + const uint8Dict = new Uint8Array(dictionary); + const compressed = zlib.brotliCompressSync(input, { dictionary: uint8Dict }); + const decompressed = zlib.brotliDecompressSync(compressed, { dictionary: uint8Dict }); + assert.strictEqual(decompressed.toString(), input.toString()); +} + +// Test that invalid dictionary type throws ERR_INVALID_ARG_TYPE. +for (const invalidDict of ['string', 123, true, { object: true }, [1, 2, 3]]) { + assert.throws(() => { + zlib.createBrotliCompress({ dictionary: invalidDict }); + }, { code: 'ERR_INVALID_ARG_TYPE' }); + + assert.throws(() => { + zlib.createBrotliDecompress({ dictionary: invalidDict }); + }, { code: 'ERR_INVALID_ARG_TYPE' }); +} + +// Test with streaming API and wrong dictionary emits error event. +{ + const compressed = zlib.brotliCompressSync(input, { dictionary }); + const wrongDict = Buffer.from('wrong dictionary data'); + const decoder = zlib.createBrotliDecompress({ dictionary: wrongDict }); + + decoder.on('error', common.mustCall((err) => { + assert.match(err.code, /ERR_/); + })); + + decoder.write(compressed); + decoder.end(); +}