diff --git a/src/lib/bspatch.ts b/src/lib/bspatch.ts index 8a8a69882..7df06205e 100644 --- a/src/lib/bspatch.ts +++ b/src/lib/bspatch.ts @@ -5,11 +5,22 @@ * TRDIFF10 format (produced by zig-bsdiff with `--use-zstd`). Designed for * minimal memory usage during CLI self-upgrades: * - * - Old binary: copy to temp file, then read via `readFile()` (~100 MB heap) + * - Old binary: copy to temp file, then read on demand via positional `read()` + * (`pread`), so the base never sits fully in the JS heap — only the windows + * actually referenced are pulled in, served from the OS page cache * - Diff/extra blocks: streamed via zstd `Transform` from `node:zlib` * - Output: written incrementally to disk via `createWriteStream()` * - Integrity: SHA-256 computed inline via `node:crypto` * + * Multi-patch chains keep every intermediate result in memory and only persist + * (and hash) the final binary, avoiding the redundant disk write, temp-copy, and + * SHA-256 pass that a file-by-file chain would incur per hop. The running binary + * is the only input copied to a temp file. See {@link applyPatchChainInMemory}. + * + * The base ("old") bytes are accessed through an {@link OldReader} so the same + * transform serves both an fd-backed on-disk binary (first hop / single patch) + * and an in-memory intermediate buffer (subsequent hops). + * * TRDIFF10 format (from zig-bsdiff): * ``` * [0..8] magic: "TRDIFF10" @@ -22,7 +33,7 @@ import { createHash } from "node:crypto"; import { constants, copyFileSync, createWriteStream } from "node:fs"; -import { readFile, unlink } from "node:fs/promises"; +import { type FileHandle, open, readFile, unlink } from "node:fs/promises"; import { tmpdir } from "node:os"; import { join } from "node:path"; import { Readable } from "node:stream"; @@ -239,106 +250,256 @@ function createZstdStreamReader(compressed: Uint8Array): BufferedStreamReader { ); } -/** Result of loading the old binary for patching */ -type OldFileHandle = { - /** In-memory view of the old binary */ - data: Uint8Array; - /** Cleanup function to call after patching (removes temp copy, if any) */ - cleanup: () => void | Promise; +/** + * Random-access view of the base ("old") binary during patching. + * + * Abstracts over an fd-backed on-disk file (read on demand via `pread`) and an + * in-memory buffer (a multi-patch intermediate), so {@link transformPatch} can + * source old bytes the same way regardless of where they live. + */ +type OldReader = { + /** + * Read exactly `len` bytes starting at `pos`. + * + * Positions outside `[0, size)` are zero-filled, matching the original + * `oldFile[oldpos + i] ?? 0` semantics (bsdiff seeks can reference offsets + * past the end, and a negative/oversized seek must read as zeros rather than + * fail). The returned buffer is always exactly `len` bytes. + */ + read: (pos: number, len: number) => Promise; + /** Release any held resources (fd, temp copy). Safe to call more than once. */ + close: () => Promise; }; /** - * Load the old binary for read access during patching. + * {@link OldReader} backed by an in-memory buffer. + * + * Used for multi-patch intermediates, whose bytes already live in the JS heap + * and must stay there for the next hop's random access. + */ +class MemoryOldReader implements OldReader { + private readonly data: Uint8Array; + + constructor(data: Uint8Array) { + this.data = data; + } + + read(pos: number, len: number): Promise { + const out = new Uint8Array(len); // zero-filled + const start = Math.max(pos, 0); + const end = Math.min(pos + len, this.data.length); + if (end > start) { + out.set(this.data.subarray(start, end), start - pos); + } + return Promise.resolve(out); + } + + close(): Promise { + // Bytes live in the JS heap — nothing to release. + return Promise.resolve(); + } +} + +/** + * Size of {@link FileOldReader}'s read-ahead cache block (1 MiB). + * + * bsdiff references the base mostly forward in many small windows; caching a + * sliding block of this size collapses thousands of per-window positional reads + * into roughly `fileSize / BLOCK_SIZE` reads, at a bounded ~1 MiB memory cost. + */ +const OLD_READER_BLOCK_SIZE = 1024 * 1024; + +/** + * {@link OldReader} backed by an open file descriptor, read on demand via + * positional reads (`pread`) through a single-block read-ahead cache. + * + * Keeps the base binary out of the JS heap — only the referenced windows are + * pulled in, served from the OS page cache populated by the reflink copy. The + * cache coalesces the many small windowed reads bsdiff performs (mostly forward + * with occasional jumps) into a handful of block reads, avoiding a per-window + * syscall storm while staying bounded at {@link OLD_READER_BLOCK_SIZE}. + */ +class FileOldReader implements OldReader { + private closed = false; + private readonly handle: FileHandle; + private readonly size: number; + private readonly tempPath: string; + + /** Read-ahead block buffer (allocated once, reused across refills). */ + private readonly block: Buffer = Buffer.alloc(OLD_READER_BLOCK_SIZE); + /** File offset the cached block starts at, or -1 when the cache is empty. */ + private blockStart = -1; + /** Number of valid bytes currently held in the block. */ + private blockLen = 0; + + constructor(handle: FileHandle, size: number, tempPath: string) { + this.handle = handle; + this.size = size; + this.tempPath = tempPath; + } + + async read(pos: number, len: number): Promise { + const out = Buffer.alloc(len); // zero-filled; out-of-range stays zero + const start = Math.max(pos, 0); + const end = Math.min(pos + len, this.size); + if (end <= start) { + return out; // window is entirely out of range — all zeros + } + + const need = end - start; + const outOffset = start - pos; + + if (need > OLD_READER_BLOCK_SIZE) { + // Window larger than a cache block — read straight into the output and + // leave the cache untouched (caching it would blow the memory bound). + await this.readExact(out, outOffset, need, start); + return out; + } + + if (!this.blockCovers(start, end)) { + await this.fillBlock(start); + } + out.set( + this.block.subarray(start - this.blockStart, end - this.blockStart), + outOffset + ); + return out; + } + + /** True when the cached block fully covers `[start, end)`. */ + private blockCovers(start: number, end: number): boolean { + return ( + this.blockStart >= 0 && + start >= this.blockStart && + end <= this.blockStart + this.blockLen + ); + } + + /** + * Refill the cache block starting at `start`. The length is clamped to the + * file size; callers only reach here when `[start, end)` fits in one block, + * and `start + len <= size`, so the read never crosses EOF. + */ + private async fillBlock(start: number): Promise { + const len = Math.min(OLD_READER_BLOCK_SIZE, this.size - start); + await this.readExact(this.block, 0, len, start); + this.blockStart = start; + this.blockLen = len; + } + + /** + * Read exactly `length` bytes at file offset `filePos` into `buf` at `offset`, + * looping over short positional reads (possible across some filesystems). + */ + private async readExact( + buf: Buffer, + offset: number, + length: number, + filePos: number + ): Promise { + let read = 0; + while (read < length) { + const { bytesRead } = await this.handle.read( + buf, + offset + read, + length - read, + filePos + read + ); + if (bytesRead === 0) { + break; // Unexpected EOF within bounds — leave remainder as-is + } + read += bytesRead; + } + } + + async close(): Promise { + if (this.closed) { + return; + } + this.closed = true; + try { + await this.handle.close(); + } catch { + // fd may already be closed — safe to ignore + } + await unlink(this.tempPath).catch(() => { + /* Best-effort cleanup — OS will reclaim on reboot */ + }); + } +} + +/** + * Open the old binary for on-demand read access during patching. * - * Strategy: copy to temp file, then read into memory. The copy avoids - * ETXTBSY (Linux) / AMFI SIGKILL (macOS) issues with reading the running - * binary directly. On CoW filesystems (btrfs, xfs, APFS) the copy is a - * metadata-only reflink (near-instant). + * Strategy: copy to a temp file, then read windows on demand via `pread`. The + * copy avoids ETXTBSY (Linux) / AMFI SIGKILL (macOS) issues with reading the + * running binary directly; on CoW filesystems (btrfs, xfs, APFS) it is a + * metadata-only reflink (near-instant). Reading on demand keeps the ~100 MB + * base out of the JS heap. + * + * Falls back to a full in-memory read of the original file if the copy or open + * fails (rare) — correctness over the memory optimization. */ let loadCounter = 0; -async function loadOldBinary(oldPath: string): Promise { +async function loadOldBinary(oldPath: string): Promise { loadCounter += 1; const tempCopy = join( tmpdir(), `sentry-patch-old-${process.pid}-${loadCounter}` ); + // Tracked outside the try so the catch can release a handle that was opened + // before a later step (e.g. stat) failed — otherwise the fd would leak. + let handle: FileHandle | undefined; try { // COPYFILE_FICLONE: attempt CoW reflink first (near-instant on btrfs/xfs/APFS), // silently falls back to regular copy on filesystems that don't support it. copyFileSync(oldPath, tempCopy, constants.COPYFILE_FICLONE); - - return { - data: new Uint8Array(await readFile(tempCopy)), - cleanup: () => - unlink(tempCopy).catch(() => { - /* Best-effort cleanup — OS will reclaim on reboot */ - }), - }; + handle = await open(tempCopy, "r"); + const { size } = await handle.stat(); + return new FileOldReader(handle, size, tempCopy); } catch { - // Copy failed — read directly into JS heap + // Roll back any partially-acquired resources, then fall back to a direct + // in-memory read of the original. Close the handle first (if open() + // succeeded but a later step threw) so it isn't leaked, then drop the + // temp copy. + if (handle) { + await handle.close().catch(() => { + /* Already closed or never fully opened */ + }); + } await unlink(tempCopy).catch(() => { /* May not exist if copyFileSync failed */ }); - return { - data: new Uint8Array(await readFile(oldPath)), - cleanup: () => { - // Data is in JS heap — no temp file to clean up - }, - }; - } -} - -/** Resources to clean up after patch application. */ -type PatchResources = { - writer: import("node:fs").WriteStream; - /** Returns the latest write error (live reference, not a snapshot). */ - getWriteError: () => Error | undefined; - diffReader: BufferedStreamReader; - extraReader: BufferedStreamReader; - cleanupOldFile: () => void | Promise; -}; - -/** - * Clean up all patch resources: flush the output stream, cancel decompression - * readers, and remove temp files. Each step runs regardless of prior failures. - */ -async function cleanupPatchResources(r: PatchResources): Promise { - try { - await new Promise((resolve, reject) => { - r.writer.end((err?: Error | null) => { - const finalErr = err ?? r.getWriteError(); - if (finalErr) { - reject(finalErr); - } else { - resolve(); - } - }); - }); - } finally { - await Promise.all([r.diffReader.cancel(), r.extraReader.cancel()]); - await r.cleanupOldFile(); + return new MemoryOldReader(await readFile(oldPath)); } } /** - * Apply a TRDIFF10 binary patch with streaming I/O for minimal memory usage. + * Core TRDIFF10 transform. * - * Copies the old file to a temp path and reads it into memory. Streams - * diff/extra blocks via `node:zlib` zstd decompressor, writes output via - * `createWriteStream()`, and computes SHA-256 inline. + * Applies a patch to in-memory old bytes, emitting output chunks in order via + * `onChunk`. Handles header parsing, streaming zstd decompression of the diff + * and extra blocks, the wrapping-add reconstruction, and output-size validation. * - * @param oldPath - Path to the existing (old) binary file + * The base bytes are pulled from `oldReader` one diff-window at a time (the + * algorithm only references the old binary during the diff step), so the caller + * decides whether they come from disk or memory. Output routing is likewise the + * caller's choice: `onChunk` writes to disk, hashes, and/or collects into a + * buffer. The diff/extra decompression readers are always cancelled before + * returning, even on error. `onChunk` may throw to abort the transform early + * (used to surface a streaming write failure). + * + * @param oldReader - Random-access view of the base ("old") binary * @param patchData - Complete TRDIFF10 patch file contents - * @param destPath - Path to write the patched (new) binary - * @returns SHA-256 hex digest of the written output - * @throws {Error} On corrupt patch, I/O failure, or size mismatch + * @param onChunk - Receives each output chunk in order; may throw to abort + * @throws {Error} On corrupt patch, or when output size disagrees with the header */ -export async function applyPatch( - oldPath: string, +async function transformPatch( + oldReader: OldReader, patchData: Uint8Array, - destPath: string -): Promise { + onChunk: (chunk: Uint8Array) => void +): Promise { const { controlLen, diffLen, newSize } = parsePatchHeader(patchData); // Slice compressed blocks from the patch buffer @@ -357,21 +518,6 @@ export async function applyPatch( ); const extraReader = createZstdStreamReader(patchData.subarray(extraStart)); - // Load old binary via copy-then-read (or direct read as fallback). - const { data: oldFile, cleanup: cleanupOldFile } = - await loadOldBinary(oldPath); - - // Streaming output: write directly to disk, compute SHA-256 inline - const writer = createWriteStream(destPath); - const hasher = createHash("sha256"); - - // Capture write errors early — without a listener, Node crashes with - // ERR_UNHANDLED_ERROR if a write fails (ENOSPC, EIO, etc.) during the loop. - let writeError: Error | undefined; - writer.on("error", (err) => { - writeError ??= err; - }); - let oldpos = 0; let newpos = 0; @@ -382,9 +528,6 @@ export async function applyPatch( controlPos < controlBlock.byteLength; controlPos += 24 ) { - if (writeError) { - break; - } const readDiffBy = offtin(controlBlock, controlPos); const readExtraBy = offtin(controlBlock, controlPos + 8); const seekBy = offtin(controlBlock, controlPos + 16); @@ -392,16 +535,17 @@ export async function applyPatch( // Step 1: Read diff bytes and add to old file bytes (wrapping u8 add) if (readDiffBy > 0) { const diffChunk = await diffReader.read(readDiffBy); + // Pull exactly the old-file window this step references (zero-filled + // beyond the file's bounds — see OldReader.read). + const oldChunk = await oldReader.read(oldpos, readDiffBy); const outputChunk = new Uint8Array(readDiffBy); for (let i = 0; i < readDiffBy; i++) { // Wrapping unsigned byte addition, matching zig-bsdiff's @addWithOverflow - outputChunk[i] = - ((oldFile[oldpos + i] ?? 0) + (diffChunk[i] ?? 0)) % 256; + outputChunk[i] = ((oldChunk[i] ?? 0) + (diffChunk[i] ?? 0)) % 256; } - writer.write(outputChunk); - hasher.update(outputChunk); + onChunk(outputChunk); oldpos += readDiffBy; newpos += readDiffBy; } @@ -409,8 +553,7 @@ export async function applyPatch( // Step 2: Copy extra bytes directly to output (new data) if (readExtraBy > 0) { const extraChunk = await extraReader.read(readExtraBy); - writer.write(extraChunk); - hasher.update(extraChunk); + onChunk(extraChunk); newpos += readExtraBy; } @@ -418,13 +561,7 @@ export async function applyPatch( oldpos += seekBy; } } finally { - await cleanupPatchResources({ - writer, - getWriteError: () => writeError, - diffReader, - extraReader, - cleanupOldFile, - }); + await Promise.all([diffReader.cancel(), extraReader.cancel()]); } // Validate output size matches header @@ -433,6 +570,187 @@ export async function applyPatch( `Output size mismatch: wrote ${newpos} bytes, expected ${newSize}` ); } +} + +/** + * Apply a patch to the base bytes from `oldReader`, streaming the result to + * `destPath` while computing its SHA-256. + * + * Used for the final hop of a chain (and single-patch upgrades), where the + * output must be persisted and verified. + * + * @param oldReader - Random-access view of the base ("old") binary + * @param patchData - Complete TRDIFF10 patch file contents + * @param destPath - Path to write the patched output + * @returns SHA-256 hex digest of the written output + * @throws {Error} On corrupt patch, I/O failure, or size mismatch + */ +async function applyReaderToFile( + oldReader: OldReader, + patchData: Uint8Array, + destPath: string +): Promise { + const writer = createWriteStream(destPath); + const hasher = createHash("sha256"); + + // Capture write errors early — without a listener, Node crashes with + // ERR_UNHANDLED_ERROR if a write fails (ENOSPC, EIO, etc.) during the loop. + let writeError: Error | undefined; + writer.on("error", (err) => { + writeError ??= err; + }); + + try { + await transformPatch(oldReader, patchData, (chunk) => { + // Abort the transform on the first I/O failure. Throwing here unwinds + // through transformPatch's reader cleanup; the writer is then flushed + // and the error re-surfaced in the finally below. + if (writeError) { + throw writeError; + } + writer.write(chunk); + hasher.update(chunk); + }); + } finally { + await new Promise((resolve, reject) => { + writer.end((err?: Error | null) => { + const finalErr = err ?? writeError; + if (finalErr) { + reject(finalErr); + } else { + resolve(); + } + }); + }); + } return hasher.digest("hex"); } + +/** + * Apply a patch to the base bytes from `oldReader`, returning the result as a + * new in-memory buffer. + * + * Used for the intermediate hops of a multi-patch chain: the output becomes the + * base for the next patch without ever touching disk, and no SHA-256 is computed + * (only the final binary is hashed and verified). + * + * @param oldReader - Random-access view of the base ("old") binary + * @param patchData - Complete TRDIFF10 patch file contents + * @returns The patched output bytes + * @throws {Error} On corrupt patch or size mismatch + */ +async function applyReaderToMemory( + oldReader: OldReader, + patchData: Uint8Array +): Promise { + // Preallocate the exact output size from the header so chunks can be copied + // in place — avoids a final concat pass over ~100 MB of output. + const { newSize } = parsePatchHeader(patchData); + const output = new Uint8Array(newSize); + let offset = 0; + + await transformPatch(oldReader, patchData, (chunk) => { + output.set(chunk, offset); + offset += chunk.byteLength; + }); + + return output; +} + +/** + * Apply a patch to in-memory old bytes, returning the result as a new buffer. + * + * Convenience wrapper over the internal reader-based path for callers that + * already hold the base bytes in memory (e.g. tests). Production chains use + * {@link applyPatchChainInMemory}, which reads the on-disk base on demand. + * + * @param oldFile - Full contents of the base ("old") binary + * @param patchData - Complete TRDIFF10 patch file contents + * @returns The patched output bytes + * @throws {Error} On corrupt patch or size mismatch + */ +export function applyPatchToMemory( + oldFile: Uint8Array, + patchData: Uint8Array +): Promise { + return applyReaderToMemory(new MemoryOldReader(oldFile), patchData); +} + +/** + * Apply a sequence of TRDIFF10 patches, oldest first, writing the final binary + * to `destPath` and returning its SHA-256. + * + * The base binary at `oldPath` is loaded once (copied to a temp file to avoid + * reading the running executable in place — see {@link loadOldBinary}). Every + * intermediate result is kept in memory and fed straight into the next patch, + * so intermediates never hit disk and only the final binary is hashed. This + * eliminates the N−1 redundant disk writes, temp-copies, and SHA-256 passes a + * file-by-file chain would incur — and because reads and writes never target + * the same path, there is no risk of truncating a file that is being read. + * + * For a single-patch chain this is equivalent to applying that patch straight + * to `destPath`. + * + * @param oldPath - Path to the base ("old") binary + * @param patches - Patches to apply in order (oldest first); must be non-empty + * @param destPath - Path to write the final patched binary + * @returns SHA-256 hex digest of the final output + * @throws {Error} When `patches` is empty, or on corrupt patch / I/O / size mismatch + */ +export async function applyPatchChainInMemory( + oldPath: string, + patches: Uint8Array[], + destPath: string +): Promise { + if (patches.length === 0) { + throw new Error("Cannot apply an empty patch chain"); + } + + // First hop reads the on-disk base on demand (fd-backed). Subsequent hops + // read the previous in-memory output. Each reader is closed before the next + // replaces it; the active one is closed in the finally. + let reader = await loadOldBinary(oldPath); + + try { + // Intermediate hops stay entirely in memory — no disk I/O, no hashing. + for (let i = 0; i < patches.length - 1; i++) { + const patch = patches[i]; + if (!patch) { + throw new Error(`Missing patch at index ${i}`); + } + const next = await applyReaderToMemory(reader, patch); + await reader.close(); + reader = new MemoryOldReader(next); + } + + // Final hop streams to disk and computes the verification hash. + const finalPatch = patches.at(-1); + if (!finalPatch) { + throw new Error("Missing final patch"); + } + return await applyReaderToFile(reader, finalPatch, destPath); + } finally { + await reader.close(); + } +} + +/** + * Apply a single TRDIFF10 binary patch and write the result to `destPath`. + * + * Thin wrapper over {@link applyPatchChainInMemory} for the common single-patch + * case; preserved as the documented entry point for one-shot patch application. + * + * @param oldPath - Path to the existing (old) binary file + * @param patchData - Complete TRDIFF10 patch file contents + * @param destPath - Path to write the patched (new) binary + * @returns SHA-256 hex digest of the written output + * @throws {Error} On corrupt patch, I/O failure, or size mismatch + */ +export function applyPatch( + oldPath: string, + patchData: Uint8Array, + destPath: string +): Promise { + return applyPatchChainInMemory(oldPath, [patchData], destPath); +} diff --git a/src/lib/delta-upgrade.ts b/src/lib/delta-upgrade.ts index 991210914..b6e0a2fee 100644 --- a/src/lib/delta-upgrade.ts +++ b/src/lib/delta-upgrade.ts @@ -17,8 +17,6 @@ * - Any error occurs during patch download or application */ -import { unlinkSync } from "node:fs"; - // biome-ignore lint/performance/noNamespaceImport: Sentry SDK recommends namespace import import * as Sentry from "@sentry/node-core/light"; import { compare as semverCompare, valid as semverValid } from "semver"; @@ -29,7 +27,7 @@ import { isDowngrade, isNightlyVersion, } from "./binary.js"; -import { applyPatch } from "./bspatch.js"; +import { applyPatchChainInMemory } from "./bspatch.js"; import { CLI_VERSION } from "./constants.js"; import { customFetch } from "./custom-ca.js"; import { formatBytes } from "./formatters/numbers.js"; @@ -1158,75 +1156,14 @@ async function resolveNightlyChainWithContext( ); } -/** Remove intermediate patching files, ignoring errors. */ -function cleanupIntermediates(destPath: string): void { - for (const suffix of [".patching.a", ".patching.b"]) { - try { - unlinkSync(`${destPath}${suffix}`); - } catch { - // Ignore cleanup errors - } - } -} - -/** - * Apply patches sequentially, alternating between two intermediate files. - * - * Extracted to keep cognitive complexity manageable when the caller wraps - * this in a tracing span. - * - * @returns SHA-256 hex of the final output - */ -async function applyPatchesSequentially( - chain: PatchChain, - oldBinaryPath: string, - destPath: string -): Promise { - let currentOldPath = oldBinaryPath; - let sha256 = ""; - - // Alternate between two intermediate paths to avoid reading and writing - // the same file (mmap'd read + writer truncation = corruption). - const intermediateA = `${destPath}.patching.a`; - const intermediateB = `${destPath}.patching.b`; - - try { - for (let i = 0; i < chain.patches.length; i++) { - const patch = chain.patches[i]; - if (!patch) { - throw new Error(`Missing patch at index ${i}`); - } - const isLast = i === chain.patches.length - 1; - const intermediate = i % 2 === 0 ? intermediateA : intermediateB; - const outputPath = isLast ? destPath : intermediate; - - sha256 = await withTracing( - `apply-patch-${i}`, - "upgrade.delta.apply", - () => applyPatch(currentOldPath, patch.data, outputPath) - ); - - if (!isLast) { - currentOldPath = outputPath; - } - } - } finally { - // Always clean up intermediate files, even on failure - if (chain.patches.length > 1) { - cleanupIntermediates(destPath); - } - } - - return sha256; -} - /** - * Apply a resolved patch chain sequentially and verify the result. + * Apply a resolved patch chain and verify the result. * - * For single-patch chains, applies directly from old binary to dest. - * For multi-patch chains, alternates between two intermediate files - * so that read and write never target the same path — writing to the - * source would truncate it and corrupt the output. + * Delegates to {@link applyPatchChainInMemory}, which loads the base binary + * once, keeps every intermediate hop in memory (no per-hop disk writes, + * temp-copies, or SHA-256 passes), and streams only the final binary to + * `destPath`. Because reads and writes never target the same path, there is no + * read/write truncation hazard. * * Does **not** set executable permissions — the caller * (`downloadBinaryToTemp`) handles that uniformly for both delta @@ -1257,9 +1194,9 @@ export function applyPatchChain( `Applying ${chain.patches.length} patch(es), expected SHA-256: ${chain.expectedSha256.slice(0, 12)}...` ); - const sha256 = await applyPatchesSequentially( - chain, + const sha256 = await applyPatchChainInMemory( oldBinaryPath, + chain.patches.map((p) => p.data), destPath ); diff --git a/test/e2e/delta-upgrade.test.ts b/test/e2e/delta-upgrade.test.ts index 30fa8f217..a0e9af4b8 100644 --- a/test/e2e/delta-upgrade.test.ts +++ b/test/e2e/delta-upgrade.test.ts @@ -17,7 +17,7 @@ import { tmpdir } from "node:os"; import { join } from "node:path"; import { afterAll, beforeAll, describe, expect, test } from "vitest"; import { getPlatformBinaryName } from "../../src/lib/binary.js"; -import { applyPatch } from "../../src/lib/bspatch.js"; +import { applyPatch, applyPatchChainInMemory } from "../../src/lib/bspatch.js"; // Restore real fetch for E2E tests (preload.ts mocks it globally) const realFetch = (globalThis as { __originalFetch?: typeof fetch }) @@ -56,6 +56,8 @@ describe.skipIf(!canRun)("e2e: delta upgrade", () => { const newPath = join(workDir, "new"); const patchPath = join(workDir, "patch.trdiff10"); const outputPath = join(workDir, "output"); + const chainPatchPath = join(workDir, "patch2.trdiff10"); + const chainOutputPath = join(workDir, "chain-output"); try { // Download old and new binaries from GitHub Releases @@ -93,9 +95,38 @@ describe.skipIf(!canRun)("e2e: delta upgrade", () => { const expectedBytes = new Uint8Array(await readFile(newPath)); expect(outputBytes.byteLength).toBe(expectedBytes.byteLength); expect(outputBytes).toEqual(expectedBytes); + + // Verify a real two-hop chain: old --p1--> new --p2--> new. + // p2 is a (near-identity) new→new patch, so the chain must still yield + // the new binary while keeping the intermediate entirely in memory. + execSync( + `${BSDIFF_PATH} ${newPath} ${newPath} ${chainPatchPath} --use-zstd`, + { stdio: "pipe" } + ); + const p1 = new Uint8Array(await readFile(patchPath)); + const p2 = new Uint8Array(await readFile(chainPatchPath)); + const chainSha = await applyPatchChainInMemory( + oldPath, + [p1, p2], + chainOutputPath + ); + expect(chainSha).toBe(expectedHash); + expect(new Uint8Array(await readFile(chainOutputPath))).toEqual( + expectedBytes + ); + // Intermediates never touch disk. + expect(existsSync(`${chainOutputPath}.patching.a`)).toBe(false); + expect(existsSync(`${chainOutputPath}.patching.b`)).toBe(false); } finally { // Cleanup - for (const path of [oldPath, newPath, patchPath, outputPath]) { + for (const path of [ + oldPath, + newPath, + patchPath, + outputPath, + chainPatchPath, + chainOutputPath, + ]) { try { unlinkSync(path); } catch { diff --git a/test/lib/bspatch.test.ts b/test/lib/bspatch.test.ts index 4d6e8fb55..1d10bbb0c 100644 --- a/test/lib/bspatch.test.ts +++ b/test/lib/bspatch.test.ts @@ -11,15 +11,104 @@ */ import { createHash } from "node:crypto"; -import { unlinkSync } from "node:fs"; -import { readFile } from "node:fs/promises"; +import { existsSync, unlinkSync } from "node:fs"; +import { readFile, writeFile } from "node:fs/promises"; import { tmpdir } from "node:os"; import { join } from "node:path"; +import { zstdCompressSync } from "node:zlib"; import { describe, expect, test } from "vitest"; -import { applyPatch, parsePatchHeader } from "../../src/lib/bspatch.js"; +import { + applyPatch, + applyPatchChainInMemory, + applyPatchToMemory, + parsePatchHeader, +} from "../../src/lib/bspatch.js"; const FIXTURES_DIR = join(import.meta.dirname, "../fixtures/patches"); +/** + * Write a non-negative integer as a little-endian i64, matching the + * sign-magnitude encoding that {@link offtin} reads (sign bit clear). + */ +function writeI64LE(buf: Uint8Array, offset: number, value: number): void { + const view = new DataView(buf.buffer, buf.byteOffset + offset, 8); + view.setUint32(0, value % 0x1_00_00_00_00, true); + view.setUint32(4, Math.floor(value / 0x1_00_00_00_00), true); +} + +/** + * Build a minimal "diff-only" TRDIFF10 patch transforming `oldBytes` → `newBytes` + * (which must be equal length). Emits one or more control tuples, each copying + * up to `chunkSize` diff bytes (added to the old bytes), with no extra bytes and + * no seek. Splitting into many small tuples forces the patcher to issue many + * small windowed reads against the base — exercising {@link FileOldReader}'s + * block cache across boundaries. Lets chain/cache tests run in CI without the + * external zig-bsdiff encoder. + * + * @param chunkSize - Max diff bytes per control tuple (defaults to the whole file) + */ +function buildDiffOnlyPatch( + oldBytes: Uint8Array, + newBytes: Uint8Array, + chunkSize = newBytes.length +): Uint8Array { + if (oldBytes.length !== newBytes.length) { + throw new Error("diff-only patch requires equal lengths"); + } + const len = newBytes.length; + + // One control tuple per chunk: readDiffBy=chunk, readExtraBy=0, seekBy=0. + const chunks: number[] = []; + for (let remaining = len; remaining > 0; ) { + const c = Math.min(chunkSize, remaining); + chunks.push(c); + remaining -= c; + } + const control = new Uint8Array(chunks.length * 24); + for (let t = 0; t < chunks.length; t++) { + writeI64LE(control, t * 24, chunks[t] ?? 0); + writeI64LE(control, t * 24 + 8, 0); + writeI64LE(control, t * 24 + 16, 0); + } + + // diff[i] = (new[i] - old[i]) mod 256, so wrapping (old + diff) == new. + const diff = new Uint8Array(len); + for (let i = 0; i < len; i++) { + diff[i] = ((newBytes[i] ?? 0) - (oldBytes[i] ?? 0) + 256) % 256; + } + + const controlZ = new Uint8Array(zstdCompressSync(control)); + const diffZ = new Uint8Array(zstdCompressSync(diff)); + const extraZ = new Uint8Array(zstdCompressSync(new Uint8Array(0))); + + const patch = new Uint8Array( + 32 + controlZ.length + diffZ.length + extraZ.length + ); + patch.set(new TextEncoder().encode("TRDIFF10"), 0); + writeI64LE(patch, 8, controlZ.length); + writeI64LE(patch, 16, diffZ.length); + writeI64LE(patch, 24, len); + patch.set(controlZ, 32); + patch.set(diffZ, 32 + controlZ.length); + patch.set(extraZ, 32 + controlZ.length + diffZ.length); + return patch; +} + +/** + * Generate deterministic pseudo-random bytes via a 32-bit LCG. Uses modular + * arithmetic (no bitwise ops) — intermediate products stay under 2^53. + */ +function makeBytes(seed: number, len: number): Uint8Array { + const modulus = 0x1_00_00_00_00; + const out = new Uint8Array(len); + let x = seed % modulus; + for (let i = 0; i < len; i++) { + x = (x * 1_664_525 + 1_013_904_223) % modulus; + out[i] = x % 256; + } + return out; +} + describe("parsePatchHeader: fixtures", () => { test("parses valid small fixture header", async () => { const patchData = new Uint8Array( @@ -156,3 +245,221 @@ describe("applyPatch", () => { } }); }); + +describe("applyPatchChainInMemory", () => { + function tempFile(name: string): string { + return join( + tmpdir(), + `bspatch-chain-${Date.now()}-${Math.random().toString(36).slice(2)}-${name}` + ); + } + + test("applies a multi-hop chain in memory, writing only the final binary", async () => { + // old --p1--> mid --p2--> new. The first hop reads the on-disk base via the + // fd-backed reader; the second reads the in-memory intermediate. Exercises + // both reader paths plus the chaining loop without touching scratch files. + const oldBytes = makeBytes(1, 4096); + const midBytes = makeBytes(2, 4096); + const newBytes = makeBytes(3, 4096); + const p1 = buildDiffOnlyPatch(oldBytes, midBytes); + const p2 = buildDiffOnlyPatch(midBytes, newBytes); + + const oldPath = tempFile("old.bin"); + const destPath = tempFile("out.bin"); + await writeFile(oldPath, oldBytes); + + try { + const sha256 = await applyPatchChainInMemory(oldPath, [p1, p2], destPath); + + const out = new Uint8Array(await readFile(destPath)); + expect(out).toEqual(newBytes); + expect(sha256).toBe(createHash("sha256").update(newBytes).digest("hex")); + + // Intermediates are held in memory — no scratch files on disk. + expect(existsSync(`${destPath}.patching.a`)).toBe(false); + expect(existsSync(`${destPath}.patching.b`)).toBe(false); + } finally { + for (const p of [oldPath, destPath]) { + if (existsSync(p)) { + unlinkSync(p); + } + } + } + }); + + test("single-element chain matches the on-disk patcher on a real fixture", async () => { + // Exercises the fd-backed reader against a real zig-bsdiff patch whose + // control entries include diff, extra, and seek operations. + const oldPath = join(FIXTURES_DIR, "large-old.bin"); + const patchData = new Uint8Array( + await readFile(join(FIXTURES_DIR, "large.trdiff10")) + ); + const destPath = tempFile("single-chain.bin"); + + try { + const sha256 = await applyPatchChainInMemory( + oldPath, + [patchData], + destPath + ); + const expected = await readFile(join(FIXTURES_DIR, "large-new.bin")); + expect(new Uint8Array(await readFile(destPath))).toEqual( + new Uint8Array(expected) + ); + expect(sha256).toBe(createHash("sha256").update(expected).digest("hex")); + } finally { + if (existsSync(destPath)) { + unlinkSync(destPath); + } + } + }); + + test("rejects an empty patch chain", async () => { + const oldPath = join(FIXTURES_DIR, "small-old.bin"); + const destPath = tempFile("empty-chain.bin"); + + try { + await expect( + applyPatchChainInMemory(oldPath, [], destPath) + ).rejects.toThrow("empty patch chain"); + } finally { + if (existsSync(destPath)) { + unlinkSync(destPath); + } + } + }); +}); + +describe("applyPatchToMemory", () => { + function tempFile(name: string): string { + return join( + tmpdir(), + `bspatch-mem-${Date.now()}-${Math.random().toString(36).slice(2)}-${name}` + ); + } + + test("produces the same bytes and hash as the on-disk patcher", async () => { + // Cross-checks the memory-backed reader against the fd-backed reader on a + // real fixture patch (diff + extra + seek). + const oldPath = join(FIXTURES_DIR, "large-old.bin"); + const oldBytes = new Uint8Array(await readFile(oldPath)); + const patchData = new Uint8Array( + await readFile(join(FIXTURES_DIR, "large.trdiff10")) + ); + const destPath = tempFile("disk.bin"); + + try { + const memOut = await applyPatchToMemory(oldBytes, patchData); + const diskSha = await applyPatch(oldPath, patchData, destPath); + const diskOut = new Uint8Array(await readFile(destPath)); + + expect(memOut).toEqual(diskOut); + expect(createHash("sha256").update(memOut).digest("hex")).toBe(diskSha); + } finally { + if (existsSync(destPath)) { + unlinkSync(destPath); + } + } + }); +}); + +describe("FileOldReader block cache", () => { + const ONE_MIB = 1024 * 1024; + + function tempFile(name: string): string { + return join( + tmpdir(), + `bspatch-cache-${Date.now()}-${Math.random().toString(36).slice(2)}-${name}` + ); + } + + /** SHA-256 hex of `bytes`. */ + function sha(bytes: Uint8Array): string { + return createHash("sha256").update(bytes).digest("hex"); + } + + /** + * Apply `patch` to `oldBytes` via both the fd-backed reader (`applyPatch`, + * which uses the block cache) and the in-memory reader (`applyPatchToMemory`, + * cacheless), returning their result hashes plus `applyPatch`'s self-reported + * hash. Any cache offset/staleness bug shows up as a hash divergence. + * + * Compares via SHA-256 rather than `toEqual` — Vitest's deep-equality is far + * too slow on multi-MiB typed arrays (seconds per call), whereas native + * hashing is O(n) and fast. + */ + async function runBothReaders( + oldBytes: Uint8Array, + patch: Uint8Array + ): Promise<{ fileSha: string; fileBytesSha: string; memSha: string }> { + const oldPath = tempFile("old.bin"); + const destPath = tempFile("out.bin"); + await writeFile(oldPath, oldBytes); + try { + const fileSha = await applyPatch(oldPath, patch, destPath); + const fileBytesSha = sha(new Uint8Array(await readFile(destPath))); + const memSha = sha(await applyPatchToMemory(oldBytes, patch)); + return { fileSha, fileBytesSha, memSha }; + } finally { + for (const p of [oldPath, destPath]) { + if (existsSync(p)) { + unlinkSync(p); + } + } + } + } + + test("serves many small windows spanning multiple blocks", async () => { + // 3 MiB base read in ~100 KiB windows → reads cross the 1 MiB block + // boundaries repeatedly, forcing several cache refills. + const len = 3 * ONE_MIB + 12_345; + const oldBytes = makeBytes(11, len); + const newBytes = makeBytes(22, len); + const patch = buildDiffOnlyPatch(oldBytes, newBytes, 100 * 1024); + + const expected = sha(newBytes); + const { fileSha, fileBytesSha, memSha } = await runBothReaders( + oldBytes, + patch + ); + expect(fileSha).toBe(expected); + expect(fileBytesSha).toBe(expected); + expect(memSha).toBe(expected); + }); + + test("reads a single window larger than the cache block", async () => { + // One 1.5 MiB diff window > 1 MiB block → exercises the direct-read bypass. + const len = ONE_MIB + ONE_MIB / 2; + const oldBytes = makeBytes(33, len); + const newBytes = makeBytes(44, len); + const patch = buildDiffOnlyPatch(oldBytes, newBytes); // single tuple + + const expected = sha(newBytes); + const { fileSha, fileBytesSha, memSha } = await runBothReaders( + oldBytes, + patch + ); + expect(fileSha).toBe(expected); + expect(fileBytesSha).toBe(expected); + expect(memSha).toBe(expected); + }); + + test("handles windows landing exactly on a block boundary", async () => { + // Window size divides the block size evenly, so a read lands precisely at + // the 1 MiB boundary (start === blockStart + blockLen) — the off-by-one + // boundary case for blockCovers. + const len = 2 * ONE_MIB; + const oldBytes = makeBytes(55, len); + const newBytes = makeBytes(66, len); + const patch = buildDiffOnlyPatch(oldBytes, newBytes, ONE_MIB / 4); + + const expected = sha(newBytes); + const { fileSha, fileBytesSha, memSha } = await runBothReaders( + oldBytes, + patch + ); + expect(fileSha).toBe(expected); + expect(fileBytesSha).toBe(expected); + expect(memSha).toBe(expected); + }); +}); diff --git a/test/lib/delta-upgrade.test.ts b/test/lib/delta-upgrade.test.ts index 8924a07e5..bd839dc2e 100644 --- a/test/lib/delta-upgrade.test.ts +++ b/test/lib/delta-upgrade.test.ts @@ -1537,21 +1537,18 @@ describe("applyPatchChain", () => { } }); - test("applies multi-step chain via intermediate file", async () => { - // We only have one-step fixtures, but we can test the intermediate - // file path by using identity-like patches. Use the same patch twice: - // old → new → applying a second patch will likely fail in the real - // patcher (since new != old for the second patch), so we test that - // the cleanup logic works by checking intermediate files don't leak. + test("applies multi-step chains in memory without intermediate files", async () => { + // Intermediate hops are kept in memory, so the legacy `.patching.a`/`.b` + // scratch files must never be written. We only have one-step fixtures, so + // reuse the same patch twice: the first hop produces valid bytes, the + // second applies to mismatched bytes and fails final-hash verification — + // but either way no intermediate file should touch disk. const oldPath = join(fixturesDir, "small-old.bin"); const destPath = tempFile("multi-chain-out.bin"); const intermediateA = `${destPath}.patching.a`; const intermediateB = `${destPath}.patching.b`; const patchData = await readFile(join(fixturesDir, "small.trdiff10")); - // Create a chain where the first step succeeds but the second will fail - // (applying old→new patch to the "new" binary won't produce valid output, - // but we're testing the intermediate file handling) const chain: PatchChain = { patches: [ { @@ -1568,17 +1565,15 @@ describe("applyPatchChain", () => { }; try { - // This may succeed or fail depending on whether the second patch - // application works, but either way intermediate files should be cleaned await applyPatchChain(chain, oldPath, destPath).catch(() => { - // Expected — second patch on mismatched binary + // Expected — second patch applied to mismatched bytes fails verification }); + + // The in-memory chain never creates scratch files on disk. + expect(existsSync(intermediateA)).toBe(false); + expect(existsSync(intermediateB)).toBe(false); } finally { - // Clean up any files that were created - if (existsSync(destPath)) { - unlinkSync(destPath); - } - for (const p of [intermediateA, intermediateB]) { + for (const p of [destPath, intermediateA, intermediateB]) { if (existsSync(p)) { unlinkSync(p); }