From 348a7fe5cfaff99dfd8b89b43b0f3fc891d2104c Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Tue, 23 Jun 2026 13:48:27 +0000 Subject: [PATCH 1/2] perf(upgrade): apply delta patch chains in memory with cached base reads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multi-patch delta upgrades previously wrote each intermediate binary to disk, copied it to a temp file, and recomputed its SHA-256 — N-1 redundant disk round-trips and hash passes per chain. Patch application now keeps intermediates in memory and only persists and hashes the final binary. The base ("old") binary is read on demand via positional reads (pread) through an OldReader abstraction instead of being loaded whole into the JS heap, dropping the single-patch peak heap from ~100 MB to ~KB. A bounded 1 MiB read-ahead cache coalesces bsdiff's many small windowed reads into a handful of block reads, avoiding a per-window syscall storm. A redundant Uint8Array copy of the base is also dropped. - bspatch.ts: extract transformPatch core; add OldReader (fd-backed pread with block cache + in-memory) and applyPatchChainInMemory; applyPatch is now a thin wrapper - delta-upgrade.ts: applyPatchChain delegates to applyPatchChainInMemory; remove disk-intermediate alternation and scratch-file cleanup - tests: multi-hop chain, memory/pread equivalence, block-cache boundary coverage, no-intermediate-files assertions, e2e two-hop chain --- src/lib/bspatch.ts | 515 ++++++++++++++++++++++++++------- src/lib/delta-upgrade.ts | 81 +----- test/e2e/delta-upgrade.test.ts | 35 ++- test/lib/bspatch.test.ts | 313 +++++++++++++++++++- test/lib/delta-upgrade.test.ts | 29 +- 5 files changed, 775 insertions(+), 198 deletions(-) diff --git a/src/lib/bspatch.ts b/src/lib/bspatch.ts index 8a8a69882..e0decb724 100644 --- a/src/lib/bspatch.ts +++ b/src/lib/bspatch.ts @@ -5,11 +5,22 @@ * TRDIFF10 format (produced by zig-bsdiff with `--use-zstd`). Designed for * minimal memory usage during CLI self-upgrades: * - * - Old binary: copy to temp file, then read via `readFile()` (~100 MB heap) + * - Old binary: copy to temp file, then read on demand via positional `read()` + * (`pread`), so the base never sits fully in the JS heap — only the windows + * actually referenced are pulled in, served from the OS page cache * - Diff/extra blocks: streamed via zstd `Transform` from `node:zlib` * - Output: written incrementally to disk via `createWriteStream()` * - Integrity: SHA-256 computed inline via `node:crypto` * + * Multi-patch chains keep every intermediate result in memory and only persist + * (and hash) the final binary, avoiding the redundant disk write, temp-copy, and + * SHA-256 pass that a file-by-file chain would incur per hop. The running binary + * is the only input copied to a temp file. See {@link applyPatchChainInMemory}. + * + * The base ("old") bytes are accessed through an {@link OldReader} so the same + * transform serves both an fd-backed on-disk binary (first hop / single patch) + * and an in-memory intermediate buffer (subsequent hops). + * * TRDIFF10 format (from zig-bsdiff): * ``` * [0..8] magic: "TRDIFF10" @@ -22,7 +33,7 @@ import { createHash } from "node:crypto"; import { constants, copyFileSync, createWriteStream } from "node:fs"; -import { readFile, unlink } from "node:fs/promises"; +import { type FileHandle, open, readFile, unlink } from "node:fs/promises"; import { tmpdir } from "node:os"; import { join } from "node:path"; import { Readable } from "node:stream"; @@ -239,25 +250,199 @@ function createZstdStreamReader(compressed: Uint8Array): BufferedStreamReader { ); } -/** Result of loading the old binary for patching */ -type OldFileHandle = { - /** In-memory view of the old binary */ - data: Uint8Array; - /** Cleanup function to call after patching (removes temp copy, if any) */ - cleanup: () => void | Promise; +/** + * Random-access view of the base ("old") binary during patching. + * + * Abstracts over an fd-backed on-disk file (read on demand via `pread`) and an + * in-memory buffer (a multi-patch intermediate), so {@link transformPatch} can + * source old bytes the same way regardless of where they live. + */ +type OldReader = { + /** + * Read exactly `len` bytes starting at `pos`. + * + * Positions outside `[0, size)` are zero-filled, matching the original + * `oldFile[oldpos + i] ?? 0` semantics (bsdiff seeks can reference offsets + * past the end, and a negative/oversized seek must read as zeros rather than + * fail). The returned buffer is always exactly `len` bytes. + */ + read: (pos: number, len: number) => Promise; + /** Release any held resources (fd, temp copy). Safe to call more than once. */ + close: () => Promise; }; /** - * Load the old binary for read access during patching. + * {@link OldReader} backed by an in-memory buffer. + * + * Used for multi-patch intermediates, whose bytes already live in the JS heap + * and must stay there for the next hop's random access. + */ +class MemoryOldReader implements OldReader { + private readonly data: Uint8Array; + + constructor(data: Uint8Array) { + this.data = data; + } + + read(pos: number, len: number): Promise { + const out = new Uint8Array(len); // zero-filled + const start = Math.max(pos, 0); + const end = Math.min(pos + len, this.data.length); + if (end > start) { + out.set(this.data.subarray(start, end), start - pos); + } + return Promise.resolve(out); + } + + close(): Promise { + // Bytes live in the JS heap — nothing to release. + return Promise.resolve(); + } +} + +/** + * Size of {@link FileOldReader}'s read-ahead cache block (1 MiB). + * + * bsdiff references the base mostly forward in many small windows; caching a + * sliding block of this size collapses thousands of per-window positional reads + * into roughly `fileSize / BLOCK_SIZE` reads, at a bounded ~1 MiB memory cost. + */ +const OLD_READER_BLOCK_SIZE = 1024 * 1024; + +/** + * {@link OldReader} backed by an open file descriptor, read on demand via + * positional reads (`pread`) through a single-block read-ahead cache. * - * Strategy: copy to temp file, then read into memory. The copy avoids - * ETXTBSY (Linux) / AMFI SIGKILL (macOS) issues with reading the running - * binary directly. On CoW filesystems (btrfs, xfs, APFS) the copy is a - * metadata-only reflink (near-instant). + * Keeps the base binary out of the JS heap — only the referenced windows are + * pulled in, served from the OS page cache populated by the reflink copy. The + * cache coalesces the many small windowed reads bsdiff performs (mostly forward + * with occasional jumps) into a handful of block reads, avoiding a per-window + * syscall storm while staying bounded at {@link OLD_READER_BLOCK_SIZE}. + */ +class FileOldReader implements OldReader { + private closed = false; + private readonly handle: FileHandle; + private readonly size: number; + private readonly tempPath: string; + + /** Read-ahead block buffer (allocated once, reused across refills). */ + private readonly block: Buffer = Buffer.alloc(OLD_READER_BLOCK_SIZE); + /** File offset the cached block starts at, or -1 when the cache is empty. */ + private blockStart = -1; + /** Number of valid bytes currently held in the block. */ + private blockLen = 0; + + constructor(handle: FileHandle, size: number, tempPath: string) { + this.handle = handle; + this.size = size; + this.tempPath = tempPath; + } + + async read(pos: number, len: number): Promise { + const out = Buffer.alloc(len); // zero-filled; out-of-range stays zero + const start = Math.max(pos, 0); + const end = Math.min(pos + len, this.size); + if (end <= start) { + return out; // window is entirely out of range — all zeros + } + + const need = end - start; + const outOffset = start - pos; + + if (need > OLD_READER_BLOCK_SIZE) { + // Window larger than a cache block — read straight into the output and + // leave the cache untouched (caching it would blow the memory bound). + await this.readExact(out, outOffset, need, start); + return out; + } + + if (!this.blockCovers(start, end)) { + await this.fillBlock(start); + } + out.set( + this.block.subarray(start - this.blockStart, end - this.blockStart), + outOffset + ); + return out; + } + + /** True when the cached block fully covers `[start, end)`. */ + private blockCovers(start: number, end: number): boolean { + return ( + this.blockStart >= 0 && + start >= this.blockStart && + end <= this.blockStart + this.blockLen + ); + } + + /** + * Refill the cache block starting at `start`. The length is clamped to the + * file size; callers only reach here when `[start, end)` fits in one block, + * and `start + len <= size`, so the read never crosses EOF. + */ + private async fillBlock(start: number): Promise { + const len = Math.min(OLD_READER_BLOCK_SIZE, this.size - start); + await this.readExact(this.block, 0, len, start); + this.blockStart = start; + this.blockLen = len; + } + + /** + * Read exactly `length` bytes at file offset `filePos` into `buf` at `offset`, + * looping over short positional reads (possible across some filesystems). + */ + private async readExact( + buf: Buffer, + offset: number, + length: number, + filePos: number + ): Promise { + let read = 0; + while (read < length) { + const { bytesRead } = await this.handle.read( + buf, + offset + read, + length - read, + filePos + read + ); + if (bytesRead === 0) { + break; // Unexpected EOF within bounds — leave remainder as-is + } + read += bytesRead; + } + } + + async close(): Promise { + if (this.closed) { + return; + } + this.closed = true; + try { + await this.handle.close(); + } catch { + // fd may already be closed — safe to ignore + } + await unlink(this.tempPath).catch(() => { + /* Best-effort cleanup — OS will reclaim on reboot */ + }); + } +} + +/** + * Open the old binary for on-demand read access during patching. + * + * Strategy: copy to a temp file, then read windows on demand via `pread`. The + * copy avoids ETXTBSY (Linux) / AMFI SIGKILL (macOS) issues with reading the + * running binary directly; on CoW filesystems (btrfs, xfs, APFS) it is a + * metadata-only reflink (near-instant). Reading on demand keeps the ~100 MB + * base out of the JS heap. + * + * Falls back to a full in-memory read of the original file if the copy or open + * fails (rare) — correctness over the memory optimization. */ let loadCounter = 0; -async function loadOldBinary(oldPath: string): Promise { +async function loadOldBinary(oldPath: string): Promise { loadCounter += 1; const tempCopy = join( tmpdir(), @@ -267,78 +452,43 @@ async function loadOldBinary(oldPath: string): Promise { // COPYFILE_FICLONE: attempt CoW reflink first (near-instant on btrfs/xfs/APFS), // silently falls back to regular copy on filesystems that don't support it. copyFileSync(oldPath, tempCopy, constants.COPYFILE_FICLONE); - - return { - data: new Uint8Array(await readFile(tempCopy)), - cleanup: () => - unlink(tempCopy).catch(() => { - /* Best-effort cleanup — OS will reclaim on reboot */ - }), - }; + const handle = await open(tempCopy, "r"); + const { size } = await handle.stat(); + return new FileOldReader(handle, size, tempCopy); } catch { - // Copy failed — read directly into JS heap + // Copy/open failed — fall back to a direct in-memory read of the original. await unlink(tempCopy).catch(() => { /* May not exist if copyFileSync failed */ }); - return { - data: new Uint8Array(await readFile(oldPath)), - cleanup: () => { - // Data is in JS heap — no temp file to clean up - }, - }; + return new MemoryOldReader(await readFile(oldPath)); } } -/** Resources to clean up after patch application. */ -type PatchResources = { - writer: import("node:fs").WriteStream; - /** Returns the latest write error (live reference, not a snapshot). */ - getWriteError: () => Error | undefined; - diffReader: BufferedStreamReader; - extraReader: BufferedStreamReader; - cleanupOldFile: () => void | Promise; -}; - /** - * Clean up all patch resources: flush the output stream, cancel decompression - * readers, and remove temp files. Each step runs regardless of prior failures. - */ -async function cleanupPatchResources(r: PatchResources): Promise { - try { - await new Promise((resolve, reject) => { - r.writer.end((err?: Error | null) => { - const finalErr = err ?? r.getWriteError(); - if (finalErr) { - reject(finalErr); - } else { - resolve(); - } - }); - }); - } finally { - await Promise.all([r.diffReader.cancel(), r.extraReader.cancel()]); - await r.cleanupOldFile(); - } -} - -/** - * Apply a TRDIFF10 binary patch with streaming I/O for minimal memory usage. + * Core TRDIFF10 transform. * - * Copies the old file to a temp path and reads it into memory. Streams - * diff/extra blocks via `node:zlib` zstd decompressor, writes output via - * `createWriteStream()`, and computes SHA-256 inline. + * Applies a patch to in-memory old bytes, emitting output chunks in order via + * `onChunk`. Handles header parsing, streaming zstd decompression of the diff + * and extra blocks, the wrapping-add reconstruction, and output-size validation. * - * @param oldPath - Path to the existing (old) binary file + * The base bytes are pulled from `oldReader` one diff-window at a time (the + * algorithm only references the old binary during the diff step), so the caller + * decides whether they come from disk or memory. Output routing is likewise the + * caller's choice: `onChunk` writes to disk, hashes, and/or collects into a + * buffer. The diff/extra decompression readers are always cancelled before + * returning, even on error. `onChunk` may throw to abort the transform early + * (used to surface a streaming write failure). + * + * @param oldReader - Random-access view of the base ("old") binary * @param patchData - Complete TRDIFF10 patch file contents - * @param destPath - Path to write the patched (new) binary - * @returns SHA-256 hex digest of the written output - * @throws {Error} On corrupt patch, I/O failure, or size mismatch + * @param onChunk - Receives each output chunk in order; may throw to abort + * @throws {Error} On corrupt patch, or when output size disagrees with the header */ -export async function applyPatch( - oldPath: string, +async function transformPatch( + oldReader: OldReader, patchData: Uint8Array, - destPath: string -): Promise { + onChunk: (chunk: Uint8Array) => void +): Promise { const { controlLen, diffLen, newSize } = parsePatchHeader(patchData); // Slice compressed blocks from the patch buffer @@ -357,21 +507,6 @@ export async function applyPatch( ); const extraReader = createZstdStreamReader(patchData.subarray(extraStart)); - // Load old binary via copy-then-read (or direct read as fallback). - const { data: oldFile, cleanup: cleanupOldFile } = - await loadOldBinary(oldPath); - - // Streaming output: write directly to disk, compute SHA-256 inline - const writer = createWriteStream(destPath); - const hasher = createHash("sha256"); - - // Capture write errors early — without a listener, Node crashes with - // ERR_UNHANDLED_ERROR if a write fails (ENOSPC, EIO, etc.) during the loop. - let writeError: Error | undefined; - writer.on("error", (err) => { - writeError ??= err; - }); - let oldpos = 0; let newpos = 0; @@ -382,9 +517,6 @@ export async function applyPatch( controlPos < controlBlock.byteLength; controlPos += 24 ) { - if (writeError) { - break; - } const readDiffBy = offtin(controlBlock, controlPos); const readExtraBy = offtin(controlBlock, controlPos + 8); const seekBy = offtin(controlBlock, controlPos + 16); @@ -392,16 +524,17 @@ export async function applyPatch( // Step 1: Read diff bytes and add to old file bytes (wrapping u8 add) if (readDiffBy > 0) { const diffChunk = await diffReader.read(readDiffBy); + // Pull exactly the old-file window this step references (zero-filled + // beyond the file's bounds — see OldReader.read). + const oldChunk = await oldReader.read(oldpos, readDiffBy); const outputChunk = new Uint8Array(readDiffBy); for (let i = 0; i < readDiffBy; i++) { // Wrapping unsigned byte addition, matching zig-bsdiff's @addWithOverflow - outputChunk[i] = - ((oldFile[oldpos + i] ?? 0) + (diffChunk[i] ?? 0)) % 256; + outputChunk[i] = ((oldChunk[i] ?? 0) + (diffChunk[i] ?? 0)) % 256; } - writer.write(outputChunk); - hasher.update(outputChunk); + onChunk(outputChunk); oldpos += readDiffBy; newpos += readDiffBy; } @@ -409,8 +542,7 @@ export async function applyPatch( // Step 2: Copy extra bytes directly to output (new data) if (readExtraBy > 0) { const extraChunk = await extraReader.read(readExtraBy); - writer.write(extraChunk); - hasher.update(extraChunk); + onChunk(extraChunk); newpos += readExtraBy; } @@ -418,13 +550,7 @@ export async function applyPatch( oldpos += seekBy; } } finally { - await cleanupPatchResources({ - writer, - getWriteError: () => writeError, - diffReader, - extraReader, - cleanupOldFile, - }); + await Promise.all([diffReader.cancel(), extraReader.cancel()]); } // Validate output size matches header @@ -433,6 +559,187 @@ export async function applyPatch( `Output size mismatch: wrote ${newpos} bytes, expected ${newSize}` ); } +} + +/** + * Apply a patch to the base bytes from `oldReader`, streaming the result to + * `destPath` while computing its SHA-256. + * + * Used for the final hop of a chain (and single-patch upgrades), where the + * output must be persisted and verified. + * + * @param oldReader - Random-access view of the base ("old") binary + * @param patchData - Complete TRDIFF10 patch file contents + * @param destPath - Path to write the patched output + * @returns SHA-256 hex digest of the written output + * @throws {Error} On corrupt patch, I/O failure, or size mismatch + */ +async function applyReaderToFile( + oldReader: OldReader, + patchData: Uint8Array, + destPath: string +): Promise { + const writer = createWriteStream(destPath); + const hasher = createHash("sha256"); + + // Capture write errors early — without a listener, Node crashes with + // ERR_UNHANDLED_ERROR if a write fails (ENOSPC, EIO, etc.) during the loop. + let writeError: Error | undefined; + writer.on("error", (err) => { + writeError ??= err; + }); + + try { + await transformPatch(oldReader, patchData, (chunk) => { + // Abort the transform on the first I/O failure. Throwing here unwinds + // through transformPatch's reader cleanup; the writer is then flushed + // and the error re-surfaced in the finally below. + if (writeError) { + throw writeError; + } + writer.write(chunk); + hasher.update(chunk); + }); + } finally { + await new Promise((resolve, reject) => { + writer.end((err?: Error | null) => { + const finalErr = err ?? writeError; + if (finalErr) { + reject(finalErr); + } else { + resolve(); + } + }); + }); + } return hasher.digest("hex"); } + +/** + * Apply a patch to the base bytes from `oldReader`, returning the result as a + * new in-memory buffer. + * + * Used for the intermediate hops of a multi-patch chain: the output becomes the + * base for the next patch without ever touching disk, and no SHA-256 is computed + * (only the final binary is hashed and verified). + * + * @param oldReader - Random-access view of the base ("old") binary + * @param patchData - Complete TRDIFF10 patch file contents + * @returns The patched output bytes + * @throws {Error} On corrupt patch or size mismatch + */ +async function applyReaderToMemory( + oldReader: OldReader, + patchData: Uint8Array +): Promise { + // Preallocate the exact output size from the header so chunks can be copied + // in place — avoids a final concat pass over ~100 MB of output. + const { newSize } = parsePatchHeader(patchData); + const output = new Uint8Array(newSize); + let offset = 0; + + await transformPatch(oldReader, patchData, (chunk) => { + output.set(chunk, offset); + offset += chunk.byteLength; + }); + + return output; +} + +/** + * Apply a patch to in-memory old bytes, returning the result as a new buffer. + * + * Convenience wrapper over the internal reader-based path for callers that + * already hold the base bytes in memory (e.g. tests). Production chains use + * {@link applyPatchChainInMemory}, which reads the on-disk base on demand. + * + * @param oldFile - Full contents of the base ("old") binary + * @param patchData - Complete TRDIFF10 patch file contents + * @returns The patched output bytes + * @throws {Error} On corrupt patch or size mismatch + */ +export function applyPatchToMemory( + oldFile: Uint8Array, + patchData: Uint8Array +): Promise { + return applyReaderToMemory(new MemoryOldReader(oldFile), patchData); +} + +/** + * Apply a sequence of TRDIFF10 patches, oldest first, writing the final binary + * to `destPath` and returning its SHA-256. + * + * The base binary at `oldPath` is loaded once (copied to a temp file to avoid + * reading the running executable in place — see {@link loadOldBinary}). Every + * intermediate result is kept in memory and fed straight into the next patch, + * so intermediates never hit disk and only the final binary is hashed. This + * eliminates the N−1 redundant disk writes, temp-copies, and SHA-256 passes a + * file-by-file chain would incur — and because reads and writes never target + * the same path, there is no risk of truncating a file that is being read. + * + * For a single-patch chain this is equivalent to applying that patch straight + * to `destPath`. + * + * @param oldPath - Path to the base ("old") binary + * @param patches - Patches to apply in order (oldest first); must be non-empty + * @param destPath - Path to write the final patched binary + * @returns SHA-256 hex digest of the final output + * @throws {Error} When `patches` is empty, or on corrupt patch / I/O / size mismatch + */ +export async function applyPatchChainInMemory( + oldPath: string, + patches: Uint8Array[], + destPath: string +): Promise { + if (patches.length === 0) { + throw new Error("Cannot apply an empty patch chain"); + } + + // First hop reads the on-disk base on demand (fd-backed). Subsequent hops + // read the previous in-memory output. Each reader is closed before the next + // replaces it; the active one is closed in the finally. + let reader = await loadOldBinary(oldPath); + + try { + // Intermediate hops stay entirely in memory — no disk I/O, no hashing. + for (let i = 0; i < patches.length - 1; i++) { + const patch = patches[i]; + if (!patch) { + throw new Error(`Missing patch at index ${i}`); + } + const next = await applyReaderToMemory(reader, patch); + await reader.close(); + reader = new MemoryOldReader(next); + } + + // Final hop streams to disk and computes the verification hash. + const finalPatch = patches.at(-1); + if (!finalPatch) { + throw new Error("Missing final patch"); + } + return await applyReaderToFile(reader, finalPatch, destPath); + } finally { + await reader.close(); + } +} + +/** + * Apply a single TRDIFF10 binary patch and write the result to `destPath`. + * + * Thin wrapper over {@link applyPatchChainInMemory} for the common single-patch + * case; preserved as the documented entry point for one-shot patch application. + * + * @param oldPath - Path to the existing (old) binary file + * @param patchData - Complete TRDIFF10 patch file contents + * @param destPath - Path to write the patched (new) binary + * @returns SHA-256 hex digest of the written output + * @throws {Error} On corrupt patch, I/O failure, or size mismatch + */ +export function applyPatch( + oldPath: string, + patchData: Uint8Array, + destPath: string +): Promise { + return applyPatchChainInMemory(oldPath, [patchData], destPath); +} diff --git a/src/lib/delta-upgrade.ts b/src/lib/delta-upgrade.ts index 991210914..b6e0a2fee 100644 --- a/src/lib/delta-upgrade.ts +++ b/src/lib/delta-upgrade.ts @@ -17,8 +17,6 @@ * - Any error occurs during patch download or application */ -import { unlinkSync } from "node:fs"; - // biome-ignore lint/performance/noNamespaceImport: Sentry SDK recommends namespace import import * as Sentry from "@sentry/node-core/light"; import { compare as semverCompare, valid as semverValid } from "semver"; @@ -29,7 +27,7 @@ import { isDowngrade, isNightlyVersion, } from "./binary.js"; -import { applyPatch } from "./bspatch.js"; +import { applyPatchChainInMemory } from "./bspatch.js"; import { CLI_VERSION } from "./constants.js"; import { customFetch } from "./custom-ca.js"; import { formatBytes } from "./formatters/numbers.js"; @@ -1158,75 +1156,14 @@ async function resolveNightlyChainWithContext( ); } -/** Remove intermediate patching files, ignoring errors. */ -function cleanupIntermediates(destPath: string): void { - for (const suffix of [".patching.a", ".patching.b"]) { - try { - unlinkSync(`${destPath}${suffix}`); - } catch { - // Ignore cleanup errors - } - } -} - -/** - * Apply patches sequentially, alternating between two intermediate files. - * - * Extracted to keep cognitive complexity manageable when the caller wraps - * this in a tracing span. - * - * @returns SHA-256 hex of the final output - */ -async function applyPatchesSequentially( - chain: PatchChain, - oldBinaryPath: string, - destPath: string -): Promise { - let currentOldPath = oldBinaryPath; - let sha256 = ""; - - // Alternate between two intermediate paths to avoid reading and writing - // the same file (mmap'd read + writer truncation = corruption). - const intermediateA = `${destPath}.patching.a`; - const intermediateB = `${destPath}.patching.b`; - - try { - for (let i = 0; i < chain.patches.length; i++) { - const patch = chain.patches[i]; - if (!patch) { - throw new Error(`Missing patch at index ${i}`); - } - const isLast = i === chain.patches.length - 1; - const intermediate = i % 2 === 0 ? intermediateA : intermediateB; - const outputPath = isLast ? destPath : intermediate; - - sha256 = await withTracing( - `apply-patch-${i}`, - "upgrade.delta.apply", - () => applyPatch(currentOldPath, patch.data, outputPath) - ); - - if (!isLast) { - currentOldPath = outputPath; - } - } - } finally { - // Always clean up intermediate files, even on failure - if (chain.patches.length > 1) { - cleanupIntermediates(destPath); - } - } - - return sha256; -} - /** - * Apply a resolved patch chain sequentially and verify the result. + * Apply a resolved patch chain and verify the result. * - * For single-patch chains, applies directly from old binary to dest. - * For multi-patch chains, alternates between two intermediate files - * so that read and write never target the same path — writing to the - * source would truncate it and corrupt the output. + * Delegates to {@link applyPatchChainInMemory}, which loads the base binary + * once, keeps every intermediate hop in memory (no per-hop disk writes, + * temp-copies, or SHA-256 passes), and streams only the final binary to + * `destPath`. Because reads and writes never target the same path, there is no + * read/write truncation hazard. * * Does **not** set executable permissions — the caller * (`downloadBinaryToTemp`) handles that uniformly for both delta @@ -1257,9 +1194,9 @@ export function applyPatchChain( `Applying ${chain.patches.length} patch(es), expected SHA-256: ${chain.expectedSha256.slice(0, 12)}...` ); - const sha256 = await applyPatchesSequentially( - chain, + const sha256 = await applyPatchChainInMemory( oldBinaryPath, + chain.patches.map((p) => p.data), destPath ); diff --git a/test/e2e/delta-upgrade.test.ts b/test/e2e/delta-upgrade.test.ts index 30fa8f217..a0e9af4b8 100644 --- a/test/e2e/delta-upgrade.test.ts +++ b/test/e2e/delta-upgrade.test.ts @@ -17,7 +17,7 @@ import { tmpdir } from "node:os"; import { join } from "node:path"; import { afterAll, beforeAll, describe, expect, test } from "vitest"; import { getPlatformBinaryName } from "../../src/lib/binary.js"; -import { applyPatch } from "../../src/lib/bspatch.js"; +import { applyPatch, applyPatchChainInMemory } from "../../src/lib/bspatch.js"; // Restore real fetch for E2E tests (preload.ts mocks it globally) const realFetch = (globalThis as { __originalFetch?: typeof fetch }) @@ -56,6 +56,8 @@ describe.skipIf(!canRun)("e2e: delta upgrade", () => { const newPath = join(workDir, "new"); const patchPath = join(workDir, "patch.trdiff10"); const outputPath = join(workDir, "output"); + const chainPatchPath = join(workDir, "patch2.trdiff10"); + const chainOutputPath = join(workDir, "chain-output"); try { // Download old and new binaries from GitHub Releases @@ -93,9 +95,38 @@ describe.skipIf(!canRun)("e2e: delta upgrade", () => { const expectedBytes = new Uint8Array(await readFile(newPath)); expect(outputBytes.byteLength).toBe(expectedBytes.byteLength); expect(outputBytes).toEqual(expectedBytes); + + // Verify a real two-hop chain: old --p1--> new --p2--> new. + // p2 is a (near-identity) new→new patch, so the chain must still yield + // the new binary while keeping the intermediate entirely in memory. + execSync( + `${BSDIFF_PATH} ${newPath} ${newPath} ${chainPatchPath} --use-zstd`, + { stdio: "pipe" } + ); + const p1 = new Uint8Array(await readFile(patchPath)); + const p2 = new Uint8Array(await readFile(chainPatchPath)); + const chainSha = await applyPatchChainInMemory( + oldPath, + [p1, p2], + chainOutputPath + ); + expect(chainSha).toBe(expectedHash); + expect(new Uint8Array(await readFile(chainOutputPath))).toEqual( + expectedBytes + ); + // Intermediates never touch disk. + expect(existsSync(`${chainOutputPath}.patching.a`)).toBe(false); + expect(existsSync(`${chainOutputPath}.patching.b`)).toBe(false); } finally { // Cleanup - for (const path of [oldPath, newPath, patchPath, outputPath]) { + for (const path of [ + oldPath, + newPath, + patchPath, + outputPath, + chainPatchPath, + chainOutputPath, + ]) { try { unlinkSync(path); } catch { diff --git a/test/lib/bspatch.test.ts b/test/lib/bspatch.test.ts index 4d6e8fb55..1d10bbb0c 100644 --- a/test/lib/bspatch.test.ts +++ b/test/lib/bspatch.test.ts @@ -11,15 +11,104 @@ */ import { createHash } from "node:crypto"; -import { unlinkSync } from "node:fs"; -import { readFile } from "node:fs/promises"; +import { existsSync, unlinkSync } from "node:fs"; +import { readFile, writeFile } from "node:fs/promises"; import { tmpdir } from "node:os"; import { join } from "node:path"; +import { zstdCompressSync } from "node:zlib"; import { describe, expect, test } from "vitest"; -import { applyPatch, parsePatchHeader } from "../../src/lib/bspatch.js"; +import { + applyPatch, + applyPatchChainInMemory, + applyPatchToMemory, + parsePatchHeader, +} from "../../src/lib/bspatch.js"; const FIXTURES_DIR = join(import.meta.dirname, "../fixtures/patches"); +/** + * Write a non-negative integer as a little-endian i64, matching the + * sign-magnitude encoding that {@link offtin} reads (sign bit clear). + */ +function writeI64LE(buf: Uint8Array, offset: number, value: number): void { + const view = new DataView(buf.buffer, buf.byteOffset + offset, 8); + view.setUint32(0, value % 0x1_00_00_00_00, true); + view.setUint32(4, Math.floor(value / 0x1_00_00_00_00), true); +} + +/** + * Build a minimal "diff-only" TRDIFF10 patch transforming `oldBytes` → `newBytes` + * (which must be equal length). Emits one or more control tuples, each copying + * up to `chunkSize` diff bytes (added to the old bytes), with no extra bytes and + * no seek. Splitting into many small tuples forces the patcher to issue many + * small windowed reads against the base — exercising {@link FileOldReader}'s + * block cache across boundaries. Lets chain/cache tests run in CI without the + * external zig-bsdiff encoder. + * + * @param chunkSize - Max diff bytes per control tuple (defaults to the whole file) + */ +function buildDiffOnlyPatch( + oldBytes: Uint8Array, + newBytes: Uint8Array, + chunkSize = newBytes.length +): Uint8Array { + if (oldBytes.length !== newBytes.length) { + throw new Error("diff-only patch requires equal lengths"); + } + const len = newBytes.length; + + // One control tuple per chunk: readDiffBy=chunk, readExtraBy=0, seekBy=0. + const chunks: number[] = []; + for (let remaining = len; remaining > 0; ) { + const c = Math.min(chunkSize, remaining); + chunks.push(c); + remaining -= c; + } + const control = new Uint8Array(chunks.length * 24); + for (let t = 0; t < chunks.length; t++) { + writeI64LE(control, t * 24, chunks[t] ?? 0); + writeI64LE(control, t * 24 + 8, 0); + writeI64LE(control, t * 24 + 16, 0); + } + + // diff[i] = (new[i] - old[i]) mod 256, so wrapping (old + diff) == new. + const diff = new Uint8Array(len); + for (let i = 0; i < len; i++) { + diff[i] = ((newBytes[i] ?? 0) - (oldBytes[i] ?? 0) + 256) % 256; + } + + const controlZ = new Uint8Array(zstdCompressSync(control)); + const diffZ = new Uint8Array(zstdCompressSync(diff)); + const extraZ = new Uint8Array(zstdCompressSync(new Uint8Array(0))); + + const patch = new Uint8Array( + 32 + controlZ.length + diffZ.length + extraZ.length + ); + patch.set(new TextEncoder().encode("TRDIFF10"), 0); + writeI64LE(patch, 8, controlZ.length); + writeI64LE(patch, 16, diffZ.length); + writeI64LE(patch, 24, len); + patch.set(controlZ, 32); + patch.set(diffZ, 32 + controlZ.length); + patch.set(extraZ, 32 + controlZ.length + diffZ.length); + return patch; +} + +/** + * Generate deterministic pseudo-random bytes via a 32-bit LCG. Uses modular + * arithmetic (no bitwise ops) — intermediate products stay under 2^53. + */ +function makeBytes(seed: number, len: number): Uint8Array { + const modulus = 0x1_00_00_00_00; + const out = new Uint8Array(len); + let x = seed % modulus; + for (let i = 0; i < len; i++) { + x = (x * 1_664_525 + 1_013_904_223) % modulus; + out[i] = x % 256; + } + return out; +} + describe("parsePatchHeader: fixtures", () => { test("parses valid small fixture header", async () => { const patchData = new Uint8Array( @@ -156,3 +245,221 @@ describe("applyPatch", () => { } }); }); + +describe("applyPatchChainInMemory", () => { + function tempFile(name: string): string { + return join( + tmpdir(), + `bspatch-chain-${Date.now()}-${Math.random().toString(36).slice(2)}-${name}` + ); + } + + test("applies a multi-hop chain in memory, writing only the final binary", async () => { + // old --p1--> mid --p2--> new. The first hop reads the on-disk base via the + // fd-backed reader; the second reads the in-memory intermediate. Exercises + // both reader paths plus the chaining loop without touching scratch files. + const oldBytes = makeBytes(1, 4096); + const midBytes = makeBytes(2, 4096); + const newBytes = makeBytes(3, 4096); + const p1 = buildDiffOnlyPatch(oldBytes, midBytes); + const p2 = buildDiffOnlyPatch(midBytes, newBytes); + + const oldPath = tempFile("old.bin"); + const destPath = tempFile("out.bin"); + await writeFile(oldPath, oldBytes); + + try { + const sha256 = await applyPatchChainInMemory(oldPath, [p1, p2], destPath); + + const out = new Uint8Array(await readFile(destPath)); + expect(out).toEqual(newBytes); + expect(sha256).toBe(createHash("sha256").update(newBytes).digest("hex")); + + // Intermediates are held in memory — no scratch files on disk. + expect(existsSync(`${destPath}.patching.a`)).toBe(false); + expect(existsSync(`${destPath}.patching.b`)).toBe(false); + } finally { + for (const p of [oldPath, destPath]) { + if (existsSync(p)) { + unlinkSync(p); + } + } + } + }); + + test("single-element chain matches the on-disk patcher on a real fixture", async () => { + // Exercises the fd-backed reader against a real zig-bsdiff patch whose + // control entries include diff, extra, and seek operations. + const oldPath = join(FIXTURES_DIR, "large-old.bin"); + const patchData = new Uint8Array( + await readFile(join(FIXTURES_DIR, "large.trdiff10")) + ); + const destPath = tempFile("single-chain.bin"); + + try { + const sha256 = await applyPatchChainInMemory( + oldPath, + [patchData], + destPath + ); + const expected = await readFile(join(FIXTURES_DIR, "large-new.bin")); + expect(new Uint8Array(await readFile(destPath))).toEqual( + new Uint8Array(expected) + ); + expect(sha256).toBe(createHash("sha256").update(expected).digest("hex")); + } finally { + if (existsSync(destPath)) { + unlinkSync(destPath); + } + } + }); + + test("rejects an empty patch chain", async () => { + const oldPath = join(FIXTURES_DIR, "small-old.bin"); + const destPath = tempFile("empty-chain.bin"); + + try { + await expect( + applyPatchChainInMemory(oldPath, [], destPath) + ).rejects.toThrow("empty patch chain"); + } finally { + if (existsSync(destPath)) { + unlinkSync(destPath); + } + } + }); +}); + +describe("applyPatchToMemory", () => { + function tempFile(name: string): string { + return join( + tmpdir(), + `bspatch-mem-${Date.now()}-${Math.random().toString(36).slice(2)}-${name}` + ); + } + + test("produces the same bytes and hash as the on-disk patcher", async () => { + // Cross-checks the memory-backed reader against the fd-backed reader on a + // real fixture patch (diff + extra + seek). + const oldPath = join(FIXTURES_DIR, "large-old.bin"); + const oldBytes = new Uint8Array(await readFile(oldPath)); + const patchData = new Uint8Array( + await readFile(join(FIXTURES_DIR, "large.trdiff10")) + ); + const destPath = tempFile("disk.bin"); + + try { + const memOut = await applyPatchToMemory(oldBytes, patchData); + const diskSha = await applyPatch(oldPath, patchData, destPath); + const diskOut = new Uint8Array(await readFile(destPath)); + + expect(memOut).toEqual(diskOut); + expect(createHash("sha256").update(memOut).digest("hex")).toBe(diskSha); + } finally { + if (existsSync(destPath)) { + unlinkSync(destPath); + } + } + }); +}); + +describe("FileOldReader block cache", () => { + const ONE_MIB = 1024 * 1024; + + function tempFile(name: string): string { + return join( + tmpdir(), + `bspatch-cache-${Date.now()}-${Math.random().toString(36).slice(2)}-${name}` + ); + } + + /** SHA-256 hex of `bytes`. */ + function sha(bytes: Uint8Array): string { + return createHash("sha256").update(bytes).digest("hex"); + } + + /** + * Apply `patch` to `oldBytes` via both the fd-backed reader (`applyPatch`, + * which uses the block cache) and the in-memory reader (`applyPatchToMemory`, + * cacheless), returning their result hashes plus `applyPatch`'s self-reported + * hash. Any cache offset/staleness bug shows up as a hash divergence. + * + * Compares via SHA-256 rather than `toEqual` — Vitest's deep-equality is far + * too slow on multi-MiB typed arrays (seconds per call), whereas native + * hashing is O(n) and fast. + */ + async function runBothReaders( + oldBytes: Uint8Array, + patch: Uint8Array + ): Promise<{ fileSha: string; fileBytesSha: string; memSha: string }> { + const oldPath = tempFile("old.bin"); + const destPath = tempFile("out.bin"); + await writeFile(oldPath, oldBytes); + try { + const fileSha = await applyPatch(oldPath, patch, destPath); + const fileBytesSha = sha(new Uint8Array(await readFile(destPath))); + const memSha = sha(await applyPatchToMemory(oldBytes, patch)); + return { fileSha, fileBytesSha, memSha }; + } finally { + for (const p of [oldPath, destPath]) { + if (existsSync(p)) { + unlinkSync(p); + } + } + } + } + + test("serves many small windows spanning multiple blocks", async () => { + // 3 MiB base read in ~100 KiB windows → reads cross the 1 MiB block + // boundaries repeatedly, forcing several cache refills. + const len = 3 * ONE_MIB + 12_345; + const oldBytes = makeBytes(11, len); + const newBytes = makeBytes(22, len); + const patch = buildDiffOnlyPatch(oldBytes, newBytes, 100 * 1024); + + const expected = sha(newBytes); + const { fileSha, fileBytesSha, memSha } = await runBothReaders( + oldBytes, + patch + ); + expect(fileSha).toBe(expected); + expect(fileBytesSha).toBe(expected); + expect(memSha).toBe(expected); + }); + + test("reads a single window larger than the cache block", async () => { + // One 1.5 MiB diff window > 1 MiB block → exercises the direct-read bypass. + const len = ONE_MIB + ONE_MIB / 2; + const oldBytes = makeBytes(33, len); + const newBytes = makeBytes(44, len); + const patch = buildDiffOnlyPatch(oldBytes, newBytes); // single tuple + + const expected = sha(newBytes); + const { fileSha, fileBytesSha, memSha } = await runBothReaders( + oldBytes, + patch + ); + expect(fileSha).toBe(expected); + expect(fileBytesSha).toBe(expected); + expect(memSha).toBe(expected); + }); + + test("handles windows landing exactly on a block boundary", async () => { + // Window size divides the block size evenly, so a read lands precisely at + // the 1 MiB boundary (start === blockStart + blockLen) — the off-by-one + // boundary case for blockCovers. + const len = 2 * ONE_MIB; + const oldBytes = makeBytes(55, len); + const newBytes = makeBytes(66, len); + const patch = buildDiffOnlyPatch(oldBytes, newBytes, ONE_MIB / 4); + + const expected = sha(newBytes); + const { fileSha, fileBytesSha, memSha } = await runBothReaders( + oldBytes, + patch + ); + expect(fileSha).toBe(expected); + expect(fileBytesSha).toBe(expected); + expect(memSha).toBe(expected); + }); +}); diff --git a/test/lib/delta-upgrade.test.ts b/test/lib/delta-upgrade.test.ts index 8924a07e5..bd839dc2e 100644 --- a/test/lib/delta-upgrade.test.ts +++ b/test/lib/delta-upgrade.test.ts @@ -1537,21 +1537,18 @@ describe("applyPatchChain", () => { } }); - test("applies multi-step chain via intermediate file", async () => { - // We only have one-step fixtures, but we can test the intermediate - // file path by using identity-like patches. Use the same patch twice: - // old → new → applying a second patch will likely fail in the real - // patcher (since new != old for the second patch), so we test that - // the cleanup logic works by checking intermediate files don't leak. + test("applies multi-step chains in memory without intermediate files", async () => { + // Intermediate hops are kept in memory, so the legacy `.patching.a`/`.b` + // scratch files must never be written. We only have one-step fixtures, so + // reuse the same patch twice: the first hop produces valid bytes, the + // second applies to mismatched bytes and fails final-hash verification — + // but either way no intermediate file should touch disk. const oldPath = join(fixturesDir, "small-old.bin"); const destPath = tempFile("multi-chain-out.bin"); const intermediateA = `${destPath}.patching.a`; const intermediateB = `${destPath}.patching.b`; const patchData = await readFile(join(fixturesDir, "small.trdiff10")); - // Create a chain where the first step succeeds but the second will fail - // (applying old→new patch to the "new" binary won't produce valid output, - // but we're testing the intermediate file handling) const chain: PatchChain = { patches: [ { @@ -1568,17 +1565,15 @@ describe("applyPatchChain", () => { }; try { - // This may succeed or fail depending on whether the second patch - // application works, but either way intermediate files should be cleaned await applyPatchChain(chain, oldPath, destPath).catch(() => { - // Expected — second patch on mismatched binary + // Expected — second patch applied to mismatched bytes fails verification }); + + // The in-memory chain never creates scratch files on disk. + expect(existsSync(intermediateA)).toBe(false); + expect(existsSync(intermediateB)).toBe(false); } finally { - // Clean up any files that were created - if (existsSync(destPath)) { - unlinkSync(destPath); - } - for (const p of [intermediateA, intermediateB]) { + for (const p of [destPath, intermediateA, intermediateB]) { if (existsSync(p)) { unlinkSync(p); } From d0572ec2503b65f718d772ca488c19a091254a9b Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Tue, 23 Jun 2026 14:10:04 +0000 Subject: [PATCH 2/2] fix(upgrade): close base file handle if stat fails in loadOldBinary If open() succeeded but a later step (stat) threw, the catch block fell back to an in-memory read without closing the already-open FileHandle, leaking a file descriptor. Track the handle outside the try and close it in the catch before falling back. Flagged by Cursor Bugbot and Seer on #1127. --- src/lib/bspatch.ts | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/lib/bspatch.ts b/src/lib/bspatch.ts index e0decb724..7df06205e 100644 --- a/src/lib/bspatch.ts +++ b/src/lib/bspatch.ts @@ -448,15 +448,26 @@ async function loadOldBinary(oldPath: string): Promise { tmpdir(), `sentry-patch-old-${process.pid}-${loadCounter}` ); + // Tracked outside the try so the catch can release a handle that was opened + // before a later step (e.g. stat) failed — otherwise the fd would leak. + let handle: FileHandle | undefined; try { // COPYFILE_FICLONE: attempt CoW reflink first (near-instant on btrfs/xfs/APFS), // silently falls back to regular copy on filesystems that don't support it. copyFileSync(oldPath, tempCopy, constants.COPYFILE_FICLONE); - const handle = await open(tempCopy, "r"); + handle = await open(tempCopy, "r"); const { size } = await handle.stat(); return new FileOldReader(handle, size, tempCopy); } catch { - // Copy/open failed — fall back to a direct in-memory read of the original. + // Roll back any partially-acquired resources, then fall back to a direct + // in-memory read of the original. Close the handle first (if open() + // succeeded but a later step threw) so it isn't leaked, then drop the + // temp copy. + if (handle) { + await handle.close().catch(() => { + /* Already closed or never fully opened */ + }); + } await unlink(tempCopy).catch(() => { /* May not exist if copyFileSync failed */ });