From c713a0f3beabccf79184e6c7e6db58da9cc4874d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Viktor=20L=C3=A1z=C3=A1r?= Date: Thu, 30 Apr 2026 22:56:04 +0200 Subject: [PATCH] feat: fork static export --- docs/src/components/ViewMarkdown.jsx | 22 +- .../(content-negotiation).middleware.mjs | 16 +- docs/src/pages/en/(pages)/features/cli.mdx | 18 + .../pages/en/(pages)/router/static.page.mdx | 32 + docs/src/pages/ja/(pages)/features/cli.mdx | 18 + .../pages/ja/(pages)/router/static.page.mdx | 32 + docs/src/pages/md/GET.[...slug].server.mjs | 118 ++- examples/ppr/react-server.config.json | 4 +- packages/react-server/bin/commands/build.mjs | 4 + packages/react-server/config/schema.d.ts | 51 +- packages/react-server/lib/build/fanout.mjs | 84 ++ .../react-server/lib/build/p-map-stream.mjs | 67 ++ .../react-server/lib/build/path-source.mjs | 150 +++ .../lib/build/static-coordinator.mjs | 238 +++++ .../react-server/lib/build/static-emit.mjs | 517 +++++++++++ .../react-server/lib/build/static-runtime.mjs | 175 ++++ .../react-server/lib/build/static-worker.mjs | 197 ++++ packages/react-server/lib/build/static.mjs | 863 ++++-------------- .../react-server/lib/start/create-server.mjs | 12 +- .../__test__/apps/static-export-many.spec.mjs | 60 ++ test/__test__/static-export-stream.spec.mjs | 580 ++++++++++++ test/fixtures/static-export-many/entry.jsx | 17 + .../react-server.config.mjs | 27 + test/vitestSetup.mjs | 23 +- 24 files changed, 2605 insertions(+), 720 deletions(-) create mode 100644 packages/react-server/lib/build/fanout.mjs create mode 100644 packages/react-server/lib/build/p-map-stream.mjs create mode 100644 packages/react-server/lib/build/path-source.mjs create mode 100644 packages/react-server/lib/build/static-coordinator.mjs create mode 100644 packages/react-server/lib/build/static-emit.mjs create mode 100644 packages/react-server/lib/build/static-runtime.mjs create mode 100644 packages/react-server/lib/build/static-worker.mjs create mode 100644 test/__test__/apps/static-export-many.spec.mjs create mode 100644 test/__test__/static-export-stream.spec.mjs create mode 100644 test/fixtures/static-export-many/entry.jsx create mode 100644 test/fixtures/static-export-many/react-server.config.mjs diff --git a/docs/src/components/ViewMarkdown.jsx b/docs/src/components/ViewMarkdown.jsx index dcfe2676..31ab3c97 100644 --- a/docs/src/components/ViewMarkdown.jsx +++ b/docs/src/components/ViewMarkdown.jsx @@ -1,17 +1,31 @@ import { FileText } from "lucide-react"; +import { defaultLanguage } from "../const.mjs"; import { useLanguage } from "../i18n.mjs"; export default function ViewMarkdown({ pathname }) { const lang = useLanguage(); - const canonical = pathname.replace(new RegExp(`^/${lang}`), ""); - // Don't show on the homepage or language root - if (!canonical || canonical === "/") { + // English is canonical and lives at unprefixed URLs — `/en/...` + // exists only as a non-canonical form that the i18n middleware + // redirects to `/...`, so the `.md` link must strip `/en` to point + // at the real artifact. Non-default languages (`/ja/...`) keep + // their prefix because that's where their own translation is + // emitted (`/ja/api/dev.md`, not `/api/dev.md`). + const stripped = pathname.replace( + new RegExp(`^/${defaultLanguage}(?=/|$)`), + "" + ); + + // Hide on the homepage and bare language roots (`/`, `/ja`, `/ja/`). + // Use the active language for this check so `/ja` collapses to "" + // even though we don't strip it from the link itself. + const visibilityPath = stripped.replace(new RegExp(`^/${lang}(?=/|$)`), ""); + if (!visibilityPath || visibilityPath === "/") { return null; } - const mdUrl = `${canonical}.md`; + const mdUrl = `${stripped.replace(/\/$/, "")}.md`; return ( pathname === `/${lang}` || pathname === `/${lang}/` ); - const mdPath = stripped === "" || stripped === "/" ? "" : stripped; + const mdPath = + pathname === "/" || isLanguageOnly ? "" : pathname.replace(/\/$/, ""); // Always advertise the response varies on Accept so caches don't poison // each other across HTML/markdown. diff --git a/docs/src/pages/en/(pages)/features/cli.mdx b/docs/src/pages/en/(pages)/features/cli.mdx index 34f6088b..80dc56d6 100644 --- a/docs/src/pages/en/(pages)/features/cli.mdx +++ b/docs/src/pages/en/(pages)/features/cli.mdx @@ -242,6 +242,24 @@ Static export. Default is `false`. You can export your app as static HTML pages. You can define the routes to export in your `react-server.config.mjs` file. See more details at [Static generation](/router/static). + +### --export-concurrency + + +Number of parallel processes used to render static export paths. Default is between `2` and `4`, scaled to the number of available CPU cores. + +Pass `1` to run the export in a single in-process pipeline — paths are rendered one at a time in the main process, with no fork or IPC overhead. This is the lowest-overhead mode and is a good fit for small exports or for debugging. + +Pass `>1` to fork that many render-worker child processes. Each child runs its own RSC main thread plus an SSR worker thread and renders one path at a time. The coordinator dispatches paths to free children over IPC; output bytes never cross the IPC boundary — every artifact (HTML, `.gz` / `.br` sidecars, `.postponed.json`, `.prerender-cache.json`) is written to disk inside the child. Both modes produce the same artifact set. + +This is the right knob to reach for when exporting tens of thousands of pages or when individual pages do heavy server-side work (Shiki highlighting, large MDX trees, costly server components). On smaller exports the per-fork startup cost dominates and a value of `1` may be faster overall. + +```sh +pnpm react-server build --export --export-concurrency 8 +``` + +You can also set this in your `react-server.config.mjs` as `exportConcurrency`. + ### --compression diff --git a/docs/src/pages/en/(pages)/router/static.page.mdx b/docs/src/pages/en/(pages)/router/static.page.mdx index 90779fa4..886dba1b 100644 --- a/docs/src/pages/en/(pages)/router/static.page.mdx +++ b/docs/src/pages/en/(pages)/router/static.page.mdx @@ -95,6 +95,38 @@ export default { }; ``` + +## Streaming the export path source + + +The regular-function form of `export` receives an array — every path the file-system router resolved, all at once. That's fine for most apps, but for very large exports (tens of thousands of pages, paginated content sourced from a CMS, on-the-fly path generation) materializing the full list before rendering starts is wasteful: peak memory grows with the path count, and the first render can't start until the last path is collected. + +Declare `export` as an `async function*` (an async generator) and you opt into streaming instead. The generator receives the live `AsyncIterable` of paths produced by the file-system router and `exportPaths`, and it can `yield` paths to the renderer one at a time. The renderer pulls paths only when a worker is free, so the generator stays one step ahead of the export — peak memory is `O(one path descriptor)` regardless of the total count, and rendering starts on the first yielded path. Pairs naturally with [`--export-concurrency`](/features/cli#build-export-concurrency) for parallel rendering. + +```js filename="react-server.config.mjs" +export default { + async *export(paths) { + // Pass through whatever the file-system router resolved. + for await (const p of paths) { + yield p; + } + + // Then yield additional paths lazily — e.g. fetched page-by-page from + // a CMS, computed from a large database query, or read from a file. + let cursor = null; + do { + const { items, next } = await fetchPage(cursor); + for (const item of items) { + yield { path: `/posts/${item.slug}` }; + } + cursor = next; + } while (cursor); + }, +}; +``` + +> **Note:** detection is by function kind. You must write `async function*` (or `function*`) **directly** as the value of `export` — wrappers that return an ordinary function (memoization, decorators, …) fall back to the legacy array-transform contract. The regular-function form is unchanged and remains the right choice for "give me the array, let me return a transformed array." + ## Static export API routes diff --git a/docs/src/pages/ja/(pages)/features/cli.mdx b/docs/src/pages/ja/(pages)/features/cli.mdx index f245015d..8c8882e9 100644 --- a/docs/src/pages/ja/(pages)/features/cli.mdx +++ b/docs/src/pages/ja/(pages)/features/cli.mdx @@ -242,6 +242,24 @@ export default { アプリケーションを静的HTMLページでエクスポートすることが出来ます。`react-server.config.mjs`でルーティングを定義してエクスポートすることが出来ます。詳細は[静的生成](/router/static)を参照してください。 + +### --export-concurrency + + +静的エクスポートのパスをレンダリングするのに使う並列プロセス数を指定します。デフォルトは利用可能なCPUコア数に応じて`2`から`4`の範囲で決定されます。 + +`1`を指定すると、エクスポートはメインプロセス内で単一のパイプラインとして実行され、パスは1つずつ順番にレンダリングされます。フォークやIPCのオーバーヘッドがないため最も軽量なモードで、小規模なエクスポートやデバッグに適しています。 + +`1`より大きい値を指定すると、その数だけレンダリング用の子プロセスがフォークされます。各子プロセスは独自のRSCメインスレッドとSSRワーカースレッドを持ち、1度に1つのパスをレンダリングします。コーディネーターはIPC経由で空いている子プロセスにパスを割り当てます。出力バイトはIPC境界を越えず、すべての成果物(HTML、`.gz` / `.br`サイドカー、`.postponed.json`、`.prerender-cache.json`)は子プロセス内で直接ディスクに書き出されます。どちらのモードでも生成される成果物のセットは同一です。 + +このオプションは、数万ページに及ぶ大規模なエクスポートを行う場合や、個々のページでサーバー側の処理が重い場合(Shikiによるシンタックスハイライト、大規模なMDXツリー、負荷の高いサーバーコンポーネントなど)に効果を発揮します。小規模なエクスポートではフォーク自体のコストが支配的になるため、`1`を指定したほうが速いこともあります。 + +```sh +pnpm react-server build --export --export-concurrency 8 +``` + +`react-server.config.mjs`の`exportConcurrency`としても設定できます。 + ### --compression diff --git a/docs/src/pages/ja/(pages)/router/static.page.mdx b/docs/src/pages/ja/(pages)/router/static.page.mdx index 3310727a..6f33f166 100644 --- a/docs/src/pages/ja/(pages)/router/static.page.mdx +++ b/docs/src/pages/ja/(pages)/router/static.page.mdx @@ -95,6 +95,38 @@ export default { }; ``` + +## エクスポートパスソースのストリーミング + + +通常の関数形式の`export`は配列を受け取ります。ファイルシステムルーターが解決したすべてのパスを一度に受け取る形です。ほとんどのアプリではこれで十分ですが、非常に大規模なエクスポート(数万ページ、CMSから取得するページネーションされたコンテンツ、動的に生成されるパスなど)では、レンダリング開始前にリスト全体をメモリ上に展開するのは効率的ではありません。ピークメモリがパス数に比例して増大し、最後のパスが収集されるまで最初のレンダリングを開始できないからです。 + +`export`を`async function*`(非同期ジェネレーター)として宣言すると、ストリーミングモードに切り替わります。ジェネレーターはファイルシステムルーターと`exportPaths`が生成するライブな`AsyncIterable`を受け取り、レンダラーにパスを1つずつ`yield`できます。レンダラーはワーカーが空いたときにのみパスをプルするため、ジェネレーターはエクスポートのちょうど1ステップ先を進む形になります。ピークメモリは総パス数に関係なく`O(パス記述子1つ分)`に保たれ、最初に`yield`されたパスからレンダリングが開始されます。並列レンダリングのために[`--export-concurrency`](/features/cli#build-export-concurrency)と組み合わせると効果的です。 + +```js filename="react-server.config.mjs" +export default { + async *export(paths) { + // ファイルシステムルーターが解決したパスをそのまま流す。 + for await (const p of paths) { + yield p; + } + + // その後、追加のパスを遅延的にyieldする。例: CMSからページ単位で取得、 + // 大規模なDBクエリから計算、ファイルから読み込み、など。 + let cursor = null; + do { + const { items, next } = await fetchPage(cursor); + for (const item of items) { + yield { path: `/posts/${item.slug}` }; + } + cursor = next; + } while (cursor); + }, +}; +``` + +> **注意:** 検出は関数の種別で行われます。`export`の値として`async function*`(または`function*`)を**直接**書く必要があります。通常の関数を返すラッパー(メモ化、デコレーターなど)を介すると、レガシーな配列変換契約にフォールバックします。通常の関数形式は変更されておらず、「配列を渡してくれれば、変換した配列を返す」という用途には引き続きこちらが適しています。 + ## APIルートの静的エクスポート diff --git a/docs/src/pages/md/GET.[...slug].server.mjs b/docs/src/pages/md/GET.[...slug].server.mjs index 057218cf..bf198d38 100644 --- a/docs/src/pages/md/GET.[...slug].server.mjs +++ b/docs/src/pages/md/GET.[...slug].server.mjs @@ -3,50 +3,62 @@ import { join } from "node:path"; import { useMatch } from "@lazarv/react-server/router"; -import { m } from "../../i18n.mjs"; +import { defaultLanguage, languages } from "../../const.mjs"; +import { + api_landing_title, + api_translation_banner, +} from "../../paraglide/messages.js"; import { apiReferenceIndex, renderApiReferenceLandingMarkdown, renderApiReferencePageMarkdown, } from "../../lib/api-reference.mjs"; -// Lazy loaders for frontmatter only +// Lazy loaders for frontmatter only. Glob both languages so requests +// for `/md//...` can resolve to the corresponding translation. +// API reference pages have no on-disk source — they render live from +// the `.d.ts` files; non-default languages get the English content +// with a translation banner. const moduleLoaders = import.meta.glob([ "../en/*/**/*.{md,mdx}", "../en/*.\\(index\\).{md,mdx}", + "../ja/*/**/*.{md,mdx}", + "../ja/*.\\(index\\).{md,mdx}", ]); const apiSlugs = new Set(apiReferenceIndex().map((p) => p.slug)); -function getSlug(key) { +function getSlug(relPath) { // For pages in (pages)/ directory: (pages)/guide/quick-start.mdx → guide/quick-start - let match = key.match(/\(pages\)\/(.+?)\.mdx?$/); + let match = relPath.match(/\(pages\)\/(.+?)\.mdx?$/); if (match) { return match[1].replace(/\.page$/, "").replace(/\/index$/, ""); } // For category index pages: guide.(index).mdx → guide - match = key.match(/^(.+?)\.\(index\)\.mdx?$/); + match = relPath.match(/^(.+?)\.\(index\)\.mdx?$/); if (match) { return match[1]; } return null; } -// Map from glob key to raw file path relative to pages/en/ -function globKeyToRelPath(globKey) { - return globKey.replace(/^\.\.\/en\//, ""); -} - -// Build slug → keys mapping -const slugToKey = new Map(); +// Build per-language slug maps. Each entry maps a slug like +// `guide/quick-start` to the glob key + relative path for that +// language, so the route handler can fetch the right translation. +const slugByLang = new Map(); for (const globKey of Object.keys(moduleLoaders)) { - const relPath = globKeyToRelPath(globKey); + const langMatch = globKey.match(/^\.\.\/([^/]+)\//); + const lang = langMatch?.[1]; + if (!lang || !languages.includes(lang)) continue; + const relPath = globKey.replace(/^\.\.\/[^/]+\//, ""); const slug = getSlug(relPath); - if (slug) { - slugToKey.set(slug, { globKey, relPath }); - } + if (!slug) continue; + if (!slugByLang.has(lang)) slugByLang.set(lang, new Map()); + slugByLang.get(lang).set(slug, { globKey, relPath }); } +const enSlugs = slugByLang.get(defaultLanguage) ?? new Map(); + function cleanMdx(raw) { // Remove frontmatter let content = raw.replace(/^---[\s\S]*?---\n*/m, ""); @@ -91,30 +103,59 @@ function cleanMdx(raw) { } // Export all available slugs so they can be used for static generation. -// In addition to the MDX-derived slugs, publish every API reference slug — -// those pages have no on-disk source; their `.md` form is rendered live -// by `renderApiReferencePageMarkdown`. -export const slugs = [ - ...slugToKey.keys(), - "api", - ...[...apiSlugs].map((s) => `api/${s}`), -]; +// Default-language slugs are exposed bare (e.g. `guide/quick-start`), +// every other configured language is also exposed under a language +// prefix (e.g. `ja/guide/quick-start`) so the file-router emits a +// per-language `.md` artifact for each page. API reference slugs are +// always exposed under every language prefix — there is no Japanese +// `.d.ts` source, so non-default languages get the English content +// with a translation banner via `api_translation_banner`. +export const slugs = (() => { + const out = []; + for (const lang of languages) { + const prefix = lang === defaultLanguage ? "" : `${lang}/`; + const langSlugs = slugByLang.get(lang) ?? new Map(); + for (const slug of langSlugs.keys()) out.push(`${prefix}${slug}`); + out.push(`${prefix}api`); + for (const apiSlug of apiSlugs) out.push(`${prefix}api/${apiSlug}`); + } + return out; +})(); export default async function MarkdownRoute() { const { slug } = useMatch("/md/[[...slug]]"); - const path = slug?.join("/"); + const segs = slug ?? []; - if (!path) { + if (segs.length === 0) { return new Response("Not Found", { status: 404 }); } - // Dynamic API reference pages — rendered on demand from the `.d.ts` - // files in `packages/react-server`, no on-disk source exists. + // Detect a language prefix on the slug. URLs of the form + // `/md//...` route to that language's translation; bare + // `/md/...` paths use the default language. Anything else + // (`segs[0]` not a known language) is treated as a default-language + // slug whose first segment happens to share a name with no language. + let lang = defaultLanguage; + let pathSegs = segs; + if ( + segs.length > 0 && + languages.includes(segs[0]) && + segs[0] !== defaultLanguage + ) { + lang = segs[0]; + pathSegs = segs.slice(1); + } + const path = pathSegs.join("/"); + + // API reference: content is generated from `.d.ts` files (English + // only). Non-default languages get the English content with a + // translation banner; the banner itself is fetched in the active + // language so it reads naturally to the agent making the request. if (path === "api") { return new Response( renderApiReferenceLandingMarkdown({ - title: m.api_landing_title(), - banner: m.api_translation_banner(), + title: api_landing_title({}, { languageTag: lang }), + banner: api_translation_banner({}, { languageTag: lang }), }), { headers: { "Content-Type": "text/markdown; charset=utf-8" }, @@ -125,7 +166,7 @@ export default async function MarkdownRoute() { const apiSlug = path.slice("api/".length); if (apiSlugs.has(apiSlug)) { const markdown = renderApiReferencePageMarkdown(apiSlug, { - banner: m.api_translation_banner(), + banner: api_translation_banner({}, { languageTag: lang }), }); if (markdown) { return new Response(markdown, { @@ -136,13 +177,22 @@ export default async function MarkdownRoute() { return new Response("Not Found", { status: 404 }); } - const keys = slugToKey.get(path); + // MDX page lookup: prefer the requested language; if a translation + // is missing (e.g. `pages/ja/(pages)/advanced/...` doesn't exist + // for some pages that exist under en/), fall back to English so + // the URL still resolves with content rather than 404. + const langMap = slugByLang.get(lang) ?? new Map(); + let keys = langMap.get(path); + let resolvedLang = lang; + if (!keys && lang !== defaultLanguage) { + keys = enSlugs.get(path); + resolvedLang = defaultLanguage; + } if (!keys) { return new Response("Not Found", { status: 404 }); } - // Read raw source file from disk (works in dev and during static export build) - const pagesDir = join(process.cwd(), "src", "pages", "en"); + const pagesDir = join(process.cwd(), "src", "pages", resolvedLang); const raw = await readFile(join(pagesDir, keys.relPath), "utf-8"); const mod = await moduleLoaders[keys.globKey](); const title = mod?.frontmatter?.title; diff --git a/examples/ppr/react-server.config.json b/examples/ppr/react-server.config.json index 0967ef42..8a70347d 100644 --- a/examples/ppr/react-server.config.json +++ b/examples/ppr/react-server.config.json @@ -1 +1,3 @@ -{} +{ + "export": ["/"] +} diff --git a/packages/react-server/bin/commands/build.mjs b/packages/react-server/bin/commands/build.mjs index 0de538c4..12c37819 100644 --- a/packages/react-server/bin/commands/build.mjs +++ b/packages/react-server/bin/commands/build.mjs @@ -15,6 +15,10 @@ export default (cli) => .option("--no-check", "skip dependency checks", { default: false }) .option("--no-validation", "skip config validation", { default: false }) .option("--export", "[boolean] static export") + .option( + "--export-concurrency ", + "[number] number of parallel processes for static export (1 = single-process; >1 = multi-process)" + ) .option("--compression", "[boolean] enable compression", { default: false }) .option("--adapter ", "[boolean|string] adapter", { default: "", diff --git a/packages/react-server/config/schema.d.ts b/packages/react-server/config/schema.d.ts index 98d1d4b1..197afbed 100644 --- a/packages/react-server/config/schema.d.ts +++ b/packages/react-server/config/schema.d.ts @@ -1153,14 +1153,61 @@ export interface ReactServerConfig { compression?: boolean; /** - * Static export configuration. Provide paths to export, a function returning paths, or boolean. - * @example `export: ["/", "/about"]` or `export: [{ path: "/" }]` or `export: true` + * Static export configuration. Provide paths to export, a function + * returning paths, or a boolean. Functions written as + * `async function*` (or `function*`) are treated as streaming + * transforms — they receive an `AsyncIterable` + * and yield paths lazily, without ever materializing the full list. + * Regular functions keep the legacy array-in / array-out contract + * (the path list is materialized for them). + * @example `export: ["/", "/about"]` + * @example `export: [{ path: "/" }]` + * @example `export: true` + * @example + * ```js + * // Streaming transform — handles arbitrarily large path sources. + * export: async function* (paths) { + * for await (const p of paths) { + * if (!p.path.startsWith("/draft/")) yield p; + * } + * } + * ``` */ export?: | boolean | ((paths: string[]) => string[] | ExportPathDescriptor[]) + | (( + paths: AsyncIterable + ) => + | Iterable + | AsyncIterable) | (string | ExportPathDescriptor)[]; + /** + * Number of parallel processes for static export. + * + * - `1` runs the export in the current process (single-threaded + * RSC + one SSR worker). + * - `> 1` forks N child processes, each with its own RSC main thread + * and SSR worker, distributing paths via IPC. This is what gives + * true CPU parallelism for RSC-bound workloads (heavy server + * components, syntax highlighters like Shiki, etc.) — main-thread + * RSC is single-thread per process, so N processes = N parallel + * renders. + * + * HTML/RSC bytes never cross the IPC boundary; each child writes + * directly to disk. Memory at the coordinator stays O(workerCount); + * memory per child stays O(one-chunk × few-sinks) thanks to streaming + * fanout. The combined ceiling scales to effectively unbounded path + * counts. + * + * Default: `Math.max(2, Math.min(availableParallelism() - 1, 4))`. + * Set to `1` to opt out of multi-process and avoid fork startup cost + * on tiny exports. + * @example `exportConcurrency: 8` + */ + exportConcurrency?: number; + /** * Enable prerendering. * @example `prerender: true` or `prerender: { timeout: 30000 }` diff --git a/packages/react-server/lib/build/fanout.mjs b/packages/react-server/lib/build/fanout.mjs new file mode 100644 index 00000000..3e498f94 --- /dev/null +++ b/packages/react-server/lib/build/fanout.mjs @@ -0,0 +1,84 @@ +import { once } from "node:events"; + +/** + * Pump a Web ReadableStream into multiple Node.js Writable streams with + * synchronized backpressure. + * + * Memory profile per call: one chunk in flight × number of sinks. We do + * NOT use ReadableStream.tee() because tee buffers the gap between the + * fastest and slowest consumer — and brotli is 5–10× slower than a raw + * file write, which means tee accumulates the entire HTML body in memory + * for any non-trivial page. fanout reads one chunk, pushes it to every + * sink, waits for *all* sinks to accept it (honoring backpressure on each + * Writable), then reads the next chunk. Predictable, bounded memory. + * + * Backpressure: writeWithBackpressure only resolves when the sink has + * either accepted the chunk synchronously or emitted "drain" after a + * full buffer. The slowest sink dictates the read cadence — exactly what + * we want. + */ +export async function fanout(webStream, sinks) { + if (!webStream) { + // Nothing to pump (e.g. response had no body). Still close sinks + // cleanly so the pipeline ends. + await Promise.all(sinks.map(endSink)); + return; + } + + const reader = webStream.getReader(); + let pumpError = null; + try { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + // Normalize Uint8Array → Buffer once. Node Writables accept both, + // but Buffer is what gzip/brotli streams expect natively, and + // sharing one Buffer across sinks avoids per-sink allocation. + const chunk = + Buffer.isBuffer(value) || typeof value === "string" + ? value + : Buffer.from(value.buffer, value.byteOffset, value.byteLength); + await Promise.all(sinks.map((s) => writeWithBackpressure(s, chunk))); + } + } catch (e) { + pumpError = e; + } finally { + try { + reader.releaseLock(); + } catch { + // releaseLock can throw if the stream is in a weird state; the + // pump error (if any) is what matters. + } + } + + // End every sink, even on error. Letting them dangle would leak file + // descriptors. If the pump failed we still want to close the sinks + // (with destroy semantics) before propagating the error. + if (pumpError) { + for (const s of sinks) { + try { + s.destroy(pumpError); + } catch { + /* noop */ + } + } + throw pumpError; + } + + await Promise.all(sinks.map(endSink)); +} + +function writeWithBackpressure(stream, chunk) { + if (stream.write(chunk)) return Promise.resolve(); + return once(stream, "drain"); +} + +function endSink(stream) { + return new Promise((resolve, reject) => { + if (stream.writableEnded || stream.destroyed) { + resolve(); + return; + } + stream.end((err) => (err ? reject(err) : resolve())); + }); +} diff --git a/packages/react-server/lib/build/p-map-stream.mjs b/packages/react-server/lib/build/p-map-stream.mjs new file mode 100644 index 00000000..74b2f221 --- /dev/null +++ b/packages/react-server/lib/build/p-map-stream.mjs @@ -0,0 +1,67 @@ +/** + * Bounded-concurrency consumer of an AsyncIterable. + * + * Spawns N parallel "workers" that share a single async iterator. Each + * worker pulls the next item, runs the mapper, repeats until the iterator + * is exhausted. There is no internal queue — the iterator itself is the + * queue. Memory is exactly O(concurrency) retained mapper scopes, + * regardless of how many items the iterable yields. This is the property + * Promise.all(map(...)) cannot give: that scheduler instantiates every + * mapper closure up front, holding their full scopes alive in parallel. + * + * Backpressure is automatic: a slow source generator (e.g. paginated DB + * fetch) is only pulled when a worker is free, so the source never gets + * ahead of the consumer. + * + * Errors thrown by the mapper are fatal: the first error aborts further + * pulls and rejects the returned promise. If the caller wants per-item + * error tolerance, they should catch inside the mapper. + */ +export async function pMapStream(asyncIterable, mapper, concurrency) { + if (concurrency < 1) { + throw new Error(`pMapStream concurrency must be >= 1, got ${concurrency}`); + } + const it = asyncIterable[Symbol.asyncIterator] + ? asyncIterable[Symbol.asyncIterator]() + : asyncIterable[Symbol.iterator](); + + let aborted = null; + + // Serialize iterator.next() across workers. Async iterators are not + // required to be reentrant — calling next() again before the previous + // call resolves is undefined behavior on some sources. A simple chained + // promise gate enforces sequential pulls without blocking workers from + // running their mappers concurrently. + let nextChain = Promise.resolve(); + const safeNext = () => { + const p = nextChain.then(() => it.next()); + nextChain = p.then( + () => undefined, + () => undefined + ); + return p; + }; + + const worker = async () => { + while (!aborted) { + let step; + try { + step = await safeNext(); + } catch (e) { + aborted = e; + return; + } + if (step.done) return; + try { + await mapper(step.value); + } catch (e) { + aborted = e; + return; + } + } + }; + + const workers = Array.from({ length: concurrency }, worker); + await Promise.all(workers); + if (aborted) throw aborted; +} diff --git a/packages/react-server/lib/build/path-source.mjs b/packages/react-server/lib/build/path-source.mjs new file mode 100644 index 00000000..742a75ba --- /dev/null +++ b/packages/react-server/lib/build/path-source.mjs @@ -0,0 +1,150 @@ +/** + * Streaming path-source primitives for the static exporter. + * + * The exporter consumes paths via a single AsyncIterable. This + * file normalizes everything users may pass — strings, descriptors, + * functions, sync/async iterables, generators — into that uniform stream + * without ever materializing the path list. With a generator path source, + * memory cost is O(one path descriptor) regardless of total path count. + * + * Back-compat: + * - `options.exportPaths` accepts the historical array shape, plus new + * async-iterable / generator shapes. + * - `configRoot.export` as a *regular* function keeps array-in/array-out + * semantics (we materialize for it). As an `async function*` (or sync + * generator function) it receives the live AsyncIterable and is + * expected to yield ExportPath items lazily — true streaming transform. + * The constructor-name check (`AsyncGeneratorFunction` / + * `GeneratorFunction`) is the explicit opt-in: if you write + * `async function*`, you get streaming. + */ + +/** + * Normalize anything path-source-shaped into AsyncIterable. + * + * Accepts: + * - `null` / `undefined` (yields nothing) + * - `string` (yields `{ path: string }`) + * - descriptor object (yields it as-is if it has `path` or `filename`) + * - function returning any of the above (called, result re-normalized) + * - sync iterable (Array, Set, generator) of any of the above + * - async iterable (async generator, ReadableStream-like) of any of the + * above + * + * Recursive: arrays-of-functions-returning-arrays etc. are flattened lazily. + */ +export async function* toPathStream(source) { + if (source == null) return; + + if (typeof source === "string") { + yield { path: source }; + return; + } + + if (typeof source === "function") { + yield* toPathStream(await source()); + return; + } + + // Async iterables take precedence over sync iterables — some objects + // implement both (e.g. ReadableStream in newer Node). + if (typeof source[Symbol.asyncIterator] === "function") { + for await (const item of source) { + yield* toPathStream(item); + } + return; + } + + if (typeof source[Symbol.iterator] === "function") { + for (const item of source) { + yield* toPathStream(item); + } + return; + } + + if (typeof source === "object" && (source.path || source.filename)) { + yield source; + return; + } + + throw new Error( + `Invalid export path entry: ${JSON.stringify(source)} — expected string, descriptor object with "path" or "filename", function, or (async) iterable thereof.` + ); +} + +/** + * Detect whether a function is a generator or async generator. Detection is + * by `constructor.name`, which is well-defined for the language's built-in + * generator function constructors. Wrappers (e.g. memoization layers) that + * return ordinary functions will fall through to the array-transform path — + * users who need streaming should write `async function*` directly. + */ +function isGeneratorFunction(fn) { + if (typeof fn !== "function") return false; + const name = fn.constructor?.name; + return name === "AsyncGeneratorFunction" || name === "GeneratorFunction"; +} + +/** + * Compose `options.exportPaths` and `configRoot.export` into a single + * AsyncIterable. This is what the exporter consumes. + * + * Rules: + * - `options.exportPaths` is always normalized via `toPathStream` — + * anything goes, including async generators. + * - `configRoot.export` of array form is a static prelude that yields + * before user paths. + * - `configRoot.export` of regular-function form preserves the + * historical array-in/array-out contract: we materialize the user + * stream into an array, hand it to the function, then re-stream the + * return value. This is back-compat — the user opted into array + * semantics by writing a non-generator function. + * - `configRoot.export` of generator-function form (`async function*`) + * receives the live AsyncIterable and is itself a + * streaming transform. No materialization. + */ +export async function* buildPathStream(options, configRoot) { + const userStream = toPathStream(options.exportPaths); + + if (typeof configRoot.export === "function") { + if (isGeneratorFunction(configRoot.export)) { + // Streaming transform. The user yields paths derived from `userStream` + // (or independent sources) without materializing. + const transformed = configRoot.export(userStream); + yield* toPathStream(transformed); + return; + } + + // Legacy array-transform contract: materialize, hand over, re-stream. + // Users with millions of paths should switch to a generator form to + // skip this materialization. + const collected = []; + for await (const p of userStream) collected.push(p); + const result = await configRoot.export(collected); + yield* toPathStream(result); + return; + } + + if (Array.isArray(configRoot.export)) { + yield* toPathStream(configRoot.export); + } + yield* userStream; +} + +/** + * Validate-as-you-go. Wraps an AsyncIterable with per-item + * normalization (string → descriptor) and fail-fast validation. Any item + * lacking both `path` and `filename` throws immediately, naming the + * offending item — no count-and-report-at-end pass needed. + */ +export async function* validatedPathStream(stream) { + for await (const item of stream) { + const descriptor = typeof item === "string" ? { path: item } : item; + if (!descriptor || (!descriptor.path && !descriptor.filename)) { + throw new Error( + `Export path entry is missing "path" (or "filename"): ${JSON.stringify(item)}` + ); + } + yield descriptor; + } +} diff --git a/packages/react-server/lib/build/static-coordinator.mjs b/packages/react-server/lib/build/static-coordinator.mjs new file mode 100644 index 00000000..46130a35 --- /dev/null +++ b/packages/react-server/lib/build/static-coordinator.mjs @@ -0,0 +1,238 @@ +import { fork } from "node:child_process"; +import { fileURLToPath } from "node:url"; + +import { pMapStream } from "./p-map-stream.mjs"; + +/** + * Multi-process static-export coordinator. + * + * Forks N render-worker children (`static-worker.mjs`). Each child + * independently runs the same render pipeline as the single-process + * exporter (`setupStaticRender` + `emitAllArtifacts`). The coordinator + * iterates the path stream and dispatches one path at a time over IPC + * to a free child; the child writes every artifact for that path to + * disk and replies with the log entries. + * + * Memory at the coordinator stays O(1) in path bytes — bytes never + * cross IPC. Memory at each child is whatever a single render uses, + * which is the well-tested production envelope. Postpone / + * prerender-cache sidecars are emitted because the child renders + * directly via `setupStaticRender`, the same code path single-process + * mode uses; both modes produce the same artifact set. + */ + +export async function runMultiProcess({ + root, + options, + configRoot, + pathStream, + workerCount, + onLog, + onError, +}) { + if (workerCount < 1) { + throw new Error(`workerCount must be >= 1, got ${workerCount}`); + } + + const workerScript = fileURLToPath( + new URL("./static-worker.mjs", import.meta.url) + ); + const childOptions = stripNonSerializable(options); + + // Fork all children up front and wait until each is ready. Doing + // this in parallel hides the per-child startup cost (loader register, + // config load, SSR worker spawn) behind the slowest child's wall-clock. + const children = await Promise.all( + Array.from({ length: workerCount }, () => + spawnChild(workerScript, root, childOptions, configRoot) + ) + ); + + // Free-children pool: simple lock-free pull pattern. Workers enter + // and leave the pool atomically (a path holds a child until its + // artifacts are written, then releases). + const free = [...children]; + const waiters = []; + const acquire = () => { + if (free.length > 0) return Promise.resolve(free.pop()); + return new Promise((resolve) => waiters.push(resolve)); + }; + const release = (child) => { + const w = waiters.shift(); + if (w) w(child); + else free.push(child); + }; + + let pathCount = 0; + let coordinatorError = null; + + try { + await pMapStream( + pathStream, + async (p) => { + pathCount++; + const child = await acquire(); + try { + const entries = await renderOnChild(child, p); + for (const entry of entries) onLog?.(entry); + } catch (e) { + // Per-path error: report and continue. A single bad page + // must not kill a 24k-page run; the orchestrator counts these + // and exits non-zero at the end. + onError?.({ + message: e?.message ?? String(e), + stack: e?.stack, + path: p, + }); + } finally { + release(child); + } + }, + workerCount + ); + } catch (e) { + coordinatorError = e; + } finally { + // Tell every child to terminate its SSR worker and exit. Wait for + // all to finish before returning so a re-run sees a clean slate. + await Promise.all(children.map((c) => shutdownChild(c))); + } + + if (coordinatorError) throw coordinatorError; + return { pathCount }; +} + +async function spawnChild(workerScript, root, options, config) { + return new Promise((resolveChild, rejectChild) => { + // Silence all child stdio — children run a full render pipeline + // (loader register, SSR worker, render-stream chatter) which would + // interleave with the parent's spinner and per-artifact log lines. + // Anything actionable still reaches the parent: fatal errors come + // through the IPC `fatal` envelope, render failures come through + // `render-error`, and unexpected child exits are observed below. + const proc = fork(workerScript, { + stdio: ["inherit", "inherit", "inherit", "ipc"], + env: { + ...process.env, + REACT_SERVER_STATIC_WORKER: "1", + REACT_SERVER_PRERENDER: config.prerender, + }, + }); + + // Per-child render slot. Only one render is in flight at a time + // because the coordinator's free-children pool serializes dispatch. + const child = { proc, pending: null }; + + let ready = false; + + proc.on("message", (msg) => { + if (!msg || typeof msg !== "object") return; + if (msg.type === "ready") { + ready = true; + resolveChild(child); + } else if (msg.type === "render-complete") { + const slot = child.pending; + child.pending = null; + slot?.resolve(msg.entries ?? []); + } else if (msg.type === "render-error") { + const err = new Error(msg.message ?? "static-worker render error"); + err.stack = msg.stack; + const slot = child.pending; + child.pending = null; + slot?.reject(err); + } else if (msg.type === "fatal") { + const err = new Error(msg.message ?? "static-worker fatal"); + err.stack = msg.stack; + const slot = child.pending; + child.pending = null; + if (!ready) rejectChild(err); + else slot?.reject(err); + } + }); + + proc.once("error", (e) => { + const slot = child.pending; + child.pending = null; + if (!ready) rejectChild(e); + else slot?.reject(e); + }); + + proc.once("exit", (code) => { + // An exit during init rejects the spawn; an exit during render + // rejects the in-flight render; a clean exit during shutdown is + // the expected case and we just ignore it. + if (!ready) { + rejectChild( + new Error( + `static-export worker (pid ${proc.pid}) exited with code ${code} during init` + ) + ); + return; + } + const slot = child.pending; + child.pending = null; + slot?.reject( + new Error( + `static-export worker (pid ${proc.pid}) exited with code ${code} mid-render` + ) + ); + }); + + proc.send({ type: "init", root, options }); + }); +} + +function renderOnChild(child, path) { + return new Promise((resolve, reject) => { + if (child.pending) { + reject(new Error("internal: render dispatched to busy child")); + return; + } + child.pending = { resolve, reject }; + try { + child.proc.send({ type: "render", path }); + } catch (e) { + child.pending = null; + reject(e); + } + }); +} + +async function shutdownChild(child) { + return new Promise((resolve) => { + const t = setTimeout(() => { + try { + child.proc.kill("SIGKILL"); + } catch { + /* already gone */ + } + resolve(); + }, 5000); + + child.proc.once("exit", () => { + clearTimeout(t); + resolve(); + }); + + try { + if (child.proc.connected) child.proc.send({ type: "shutdown" }); + else child.proc.kill(); + } catch { + child.proc.kill(); + } + }); +} + +function stripNonSerializable(options) { + const out = {}; + for (const [k, v] of Object.entries(options ?? {})) { + if (typeof v === "function") continue; + try { + JSON.stringify(v); + out[k] = v; + } catch { + /* circular / non-serializable — skip */ + } + } + return out; +} diff --git a/packages/react-server/lib/build/static-emit.mjs b/packages/react-server/lib/build/static-emit.mjs new file mode 100644 index 00000000..30c43d00 --- /dev/null +++ b/packages/react-server/lib/build/static-emit.mjs @@ -0,0 +1,517 @@ +import { createWriteStream } from "node:fs"; +import { once } from "node:events"; +import { stat } from "node:fs/promises"; +import { basename, dirname, join } from "node:path"; +import { pipeline } from "node:stream/promises"; +import { createBrotliCompress, createGzip } from "node:zlib"; + +import { filesize } from "filesize"; +import colors from "picocolors"; + +import { toBuffer } from "../../cache/rsc.mjs"; +import * as sys from "../sys.mjs"; +import { fanout } from "./fanout.mjs"; + +const cwd = sys.cwd(); + +/** + * Map a path entry + artifact kind to the on-disk location and the + * canonical relative basename used for log output. + */ +function resolveTarget(p, kind, outDir) { + const normalizedPath = p.path + ? p.path.replace(/^\/+/g, "").replace(/\/+$/g, "") + : ""; + let normalizedBasename; + if (kind === "html") { + normalizedBasename = (p.filename ?? `${normalizedPath}/index.html`).replace( + /^\/+/g, + "" + ); + } else if (kind === "rsc") { + const tail = p.outlet ? `@${p.outlet}.rsc.x-component` : "rsc.x-component"; + normalizedBasename = `${normalizedPath}/${tail}`.replace(/^\/+/g, ""); + } else if (kind === "remote") { + normalizedBasename = `${normalizedPath}/remote.x-component`.replace( + /^\/+/g, + "" + ); + } else { + throw new Error(`Unknown artifact kind: ${kind}`); + } + const filename = join(cwd, outDir, "dist", normalizedBasename); + return { filename, normalizedBasename }; +} + +/** + * Pump a Web ReadableStream body into the primary file plus optional + * gzip / brotli sidecars. Memory bound: one chunk × (1 + 2*compression) + * sinks via `fanout`, regardless of body size. The `pipeline(transform, + * file)` calls are kept as `tails` so the caller's await resolves only + * after every downstream file has fully closed — without the await, + * `statSafe` could race the file flush and report stale (or zero) sizes. + */ +async function streamToCompressedArtifacts({ body, filename, compression }) { + const sinks = []; + const tails = []; + + const fileOut = createWriteStream(filename); + sinks.push(fileOut); + + if (compression) { + const gzip = createGzip(); + const gzipFile = createWriteStream(`${filename}.gz`); + tails.push(pipeline(gzip, gzipFile)); + sinks.push(gzip); + + const brotli = createBrotliCompress(); + const brotliFile = createWriteStream(`${filename}.br`); + tails.push(pipeline(brotli, brotliFile)); + sinks.push(brotli); + } + + await fanout(body, sinks); + if (tails.length) await Promise.all(tails); +} + +/** + * `stat()` that returns 0 when the file doesn't exist, instead of + * throwing. Used to populate size columns in the verbose log without + * having to know up front whether a sidecar was actually emitted. + */ +async function statSafe(filename) { + try { + const s = await stat(filename); + return s.size; + } catch { + return 0; + } +} + +/** + * Single-process artifact emitters and verbose-log formatter. + * + * Each emit function streams its rendered artifact to disk and returns + * a plain JSON-serializable log entry. `formatLogEntry` is the + * verbose/CI consumer that prints aligned size columns. The + * multi-process coordinator does its own HTTP-based fetch + write + * loop; the only thing it shares with this module is `formatLogEntry` + * (so the two modes produce identical-looking output). + * + * Memory model: per-path peak is one Web-stream chunk × (file + gzip + + * brotli) sinks via fanout. We never `await response.text()`. + */ + +// Filename column padding for verbose/CI mode. Fixed across the run so +// the size columns sit at the same horizontal position on every line — +// alignment is what makes the verbose output legible. The streaming +// exporter can't pre-compute the perfect width (that would force the +// path list to materialize), so we use a generous constant: filenames +// shorter than this pad to the column; filenames longer than this +// don't pad and run into the size column on their line only, which is +// less ugly than the alternative of recomputing alignment per-line. +const FILENAME_PAD = 60; + +function size(bytes) { + const s = filesize(bytes); + return " ".repeat(Math.max(0, 10 - s.length)) + s; +} + +function truncateFilename(dirPart, filePart, maxLength) { + const totalLength = dirPart.length + filePart.length; + if (totalLength <= maxLength || maxLength < 10) { + return { dir: dirPart, file: filePart }; + } + + const excess = totalLength - maxLength + 3; + + if (filePart.length > excess + 6) { + const remaining = filePart.length - excess - 3; + const keepStart = Math.ceil(remaining / 2); + const keepEnd = remaining - keepStart; + const truncatedFile = + filePart.slice(0, keepStart) + "..." + filePart.slice(-keepEnd); + return { dir: dirPart, file: truncatedFile }; + } + + const availableTotal = maxLength - 3; + const keepFileEnd = Math.min( + filePart.length, + Math.ceil(availableTotal * 0.6) + ); + const keepDirStart = availableTotal - keepFileEnd; + + const truncatedDir = + keepDirStart > 0 ? dirPart.slice(0, keepDirStart) + "..." : "..."; + const truncatedFile = filePart.slice(-keepFileEnd); + + return { dir: truncatedDir, file: truncatedFile }; +} + +/** + * Format a log entry produced by emitHtml/emitRsc/emitRemote and write + * it to stdout. Verbose layout mirrors the historical column format. + */ +export function formatLogEntry(entry) { + const { + outDir, + normalizedBasename, + htmlSize: htmlBytes, + gzipSize: gzipBytes, + brotliSize: brotliBytes, + postponedSize: postponedBytes, + prerenderCacheSize: prerenderCacheBytes, + } = entry; + + // CI / non-TTY: there's no real terminal width, so don't truncate. + // Using `Infinity` lets the column-fitting logic below keep every + // size column unconditionally, which is what users want when piping + // to a log file or reading in a CI buffer. + const termWidth = + process.stdout.columns || (process.stdout.isTTY ? 80 : Infinity); + const prefix = `${outDir}/dist/`; + const dirPart = dirname(normalizedBasename).replace(".", ""); + const filePart = + (dirname(normalizedBasename) === "." ? "" : "/") + + basename(normalizedBasename); + const filenamePart = dirPart + filePart; + + const htmlSize = size(htmlBytes ?? 0); + const gzipSize = gzipBytes ? ` │ gzip: ${size(gzipBytes)}` : ""; + const brotliSize = brotliBytes ? ` │ brotli: ${size(brotliBytes)}` : ""; + const postponedSize = postponedBytes + ? ` │ partial pre-render: ${size(postponedBytes)}` + : ""; + const prerenderCacheSize = prerenderCacheBytes + ? ` │ pre-render cache: ${size(prerenderCacheBytes)}` + : ""; + + const allSizeColumns = + gzipSize + brotliSize + postponedSize + prerenderCacheSize; + + const idealPadding = Math.max(0, FILENAME_PAD - normalizedBasename.length); + const fullLineLength = + prefix.length + + filenamePart.length + + idealPadding + + htmlSize.length + + allSizeColumns.length; + + let sizeSuffix = ""; + if (fullLineLength <= termWidth) { + sizeSuffix = allSizeColumns; + } else { + const sizeColumns = [ + gzipSize, + brotliSize, + postponedSize, + prerenderCacheSize, + ]; + let currentLength = + prefix.length + filenamePart.length + idealPadding + htmlSize.length; + for (const col of sizeColumns) { + if (col && currentLength + col.length <= termWidth) { + sizeSuffix += col; + currentLength += col.length; + } else if (col) { + break; + } + } + } + + const totalSizeLength = htmlSize.length + sizeSuffix.length; + const availableForFilename = termWidth - prefix.length - totalSizeLength - 1; + + let displayDir = dirPart; + let displayFile = filePart; + let displayPadding; + + if ( + filenamePart.length + idealPadding > availableForFilename && + availableForFilename > 10 + ) { + if (filenamePart.length <= availableForFilename) { + displayPadding = " ".repeat( + Math.max(0, availableForFilename - filenamePart.length) + ); + } else { + const { dir, file } = truncateFilename( + dirPart, + filePart, + availableForFilename + ); + displayDir = dir; + displayFile = file; + displayPadding = " ".repeat( + Math.max( + 0, + availableForFilename - displayDir.length - displayFile.length + ) + ); + } + } else { + displayPadding = " ".repeat(idealPadding); + } + + console.log( + `${colors.dim(prefix)}${colors.green(displayDir)}${colors.cyan(displayFile)}${displayPadding}${colors.gray(colors.bold(htmlSize))}${colors.dim(sizeSuffix)}` + ); +} + +function makeUrl(p, config, suffix = "") { + const proto = config.server?.https ? "https" : "http"; + const host = config.host ?? "localhost"; + const port = config.port ?? 3000; + return new URL(`${proto}://${host}:${port}${p.path}${suffix}`); +} + +function makeRequest(url, p, accept, extraHeaders) { + return { + url: url.toString(), + method: p.method ?? "GET", + headers: new Headers({ + accept, + origin: p.origin ?? sys.getEnv("ORIGIN") ?? url.origin, + host: p.host ?? sys.getEnv("HOST") ?? url.hostname, + ...(p.headers ?? {}), + ...extraHeaders, + }), + }; +} + +async function writePrerenderCache(filename, cacheSet) { + const entries = Array.from(cacheSet).filter( + (entry) => entry.provider?.options?.prerender + ); + if (entries.length === 0) return false; + + const out = createWriteStream(filename); + const writeBackpressured = (chunk) => { + if (out.write(chunk)) return Promise.resolve(); + return once(out, "drain"); + }; + + await writeBackpressured("["); + for (let i = 0; i < entries.length; i++) { + const entry = entries[i]; + const [kBuffer, vBuffer] = await Promise.all([ + toBuffer(entry.keys), + toBuffer(entry.result), + ]); + const cacheEntry = [ + kBuffer.toString("base64"), + vBuffer.toString("base64"), + Date.now(), + entry.ttl, + { + ...entry?.provider, + serializer: entry.provider?.serializer ? "rsc" : undefined, + }, + ]; + if (i > 0) await writeBackpressured(","); + await writeBackpressured(JSON.stringify(cacheEntry)); + } + await writeBackpressured("]"); + await new Promise((res, rej) => out.end((e) => (e ? rej(e) : res()))); + return true; +} + +/** + * Render and write the HTML artifact (plus optional .gz / .br / + * .postponed.json / .prerender-cache.json sidecars). Returns a log + * entry for the orchestrator to print. + * + * `ctx` shape: + * - render(req) → ssrHandler-bound render function + * - config → server config (for host/port) + * - configRoot → user config (for prerender flag) + * - outDir → from CLI/options + * - compression → boolean + * - ensureDir(d) → mkdir-with-cache helper + */ +async function emitHtml(p, ctx) { + const url = makeUrl(p, ctx.config); + const { filename, normalizedBasename } = resolveTarget(p, "html", ctx.outDir); + await ctx.ensureDir(dirname(filename)); + + // Resolve the effective prerender state for this path: a per-path + // `p.prerender` overrides config; absent both we render dynamic. + // When prerender is disabled at either level we skip *all* postpone / + // prerender-cache machinery — no callback wired into render(), no + // Set allocated, no sidecar emitted. This matches what users + // configuring `prerender: false` actually mean: "don't do partial + // pre-rendering for this build". + const prerenderEffective = + p.prerender ?? + ctx.configRoot.prerender ?? + sys.getEnv("REACT_SERVER_PRERENDER") !== "false"; + const prerenderDisabled = prerenderEffective === false; + + let postponed; + const prerenderCache = prerenderDisabled ? null : new Set(); + const response = await ctx.render({ + url, + method: p.method ?? "GET", + request: makeRequest(url, p, "text/html"), + prerender: prerenderEffective, + prerenderCache, + onPostponed: prerenderDisabled + ? null + : (postponedState) => { + postponed = postponedState; + }, + }); + + if (p.filename) { + // Filename-based output (e.g. `api/dev.html`, `api/dev.md`). + // Historically these skipped compression entirely; now they emit + // `.gz` / `.br` sidecars when compression is enabled, matching + // the path-based branch. The postponed/prerender-cache sidecars + // remain off — those depend on routing through the path-based + // path layout. + await streamToCompressedArtifacts({ + body: response.body, + filename, + compression: ctx.compression, + }); + const [htmlSize, gzipSize, brotliSize] = await Promise.all([ + statSafe(filename), + ctx.compression ? statSafe(`${filename}.gz`) : Promise.resolve(0), + ctx.compression ? statSafe(`${filename}.br`) : Promise.resolve(0), + ]); + return { + kind: "html", + outDir: ctx.outDir, + normalizedBasename, + htmlSize, + gzipSize, + brotliSize, + }; + } + + await streamToCompressedArtifacts({ + body: response.body, + filename, + compression: ctx.compression, + }); + + let postponedSize = 0; + if (postponed) { + const postponedFilename = `${filename}.postponed.json`; + await new Promise((resolve, reject) => { + const out = createWriteStream(postponedFilename); + out.end(JSON.stringify(postponed), "utf8", (e) => + e ? reject(e) : resolve() + ); + }); + postponedSize = await statSafe(postponedFilename); + } + + let prerenderCacheSize = 0; + if (prerenderCache && prerenderCache.size > 0) { + const cacheFilename = `${filename}.prerender-cache.json`; + const wrote = await writePrerenderCache(cacheFilename, prerenderCache); + if (wrote) prerenderCacheSize = await statSafe(cacheFilename); + } + + const [htmlSize, gzipSize, brotliSize] = await Promise.all([ + statSafe(filename), + ctx.compression ? statSafe(`${filename}.gz`) : Promise.resolve(0), + ctx.compression ? statSafe(`${filename}.br`) : Promise.resolve(0), + ]); + + return { + kind: "html", + outDir: ctx.outDir, + normalizedBasename, + htmlSize, + gzipSize, + brotliSize, + postponedSize, + prerenderCacheSize, + }; +} + +async function emitRsc(p, ctx) { + const tail = p.outlet ? `@${p.outlet}.rsc.x-component` : "rsc.x-component"; + const url = makeUrl(p, ctx.config, `/${tail}`); + const { filename, normalizedBasename } = resolveTarget(p, "rsc", ctx.outDir); + await ctx.ensureDir(dirname(filename)); + + const response = await ctx.render({ + url, + request: makeRequest(url, p, "text/x-component"), + }); + + await streamToCompressedArtifacts({ + body: response.body, + filename, + compression: ctx.compression, + }); + + const [htmlSize, gzipSize, brotliSize] = await Promise.all([ + statSafe(filename), + ctx.compression ? statSafe(`${filename}.gz`) : Promise.resolve(0), + ctx.compression ? statSafe(`${filename}.br`) : Promise.resolve(0), + ]); + + return { + kind: "rsc", + outDir: ctx.outDir, + normalizedBasename, + htmlSize, + gzipSize, + brotliSize, + }; +} + +async function emitRemote(p, ctx) { + const url = makeUrl(p, ctx.config, "/remote.x-component"); + const { filename, normalizedBasename } = resolveTarget( + p, + "remote", + ctx.outDir + ); + await ctx.ensureDir(dirname(filename)); + + const response = await ctx.render({ + url, + request: makeRequest(url, p, "text/x-component", { + "React-Server-Outlet": "REACT_SERVER_BUILD_OUTLET", + }), + }); + + await streamToCompressedArtifacts({ + body: response.body, + filename, + compression: ctx.compression, + }); + + const [htmlSize, gzipSize, brotliSize] = await Promise.all([ + statSafe(filename), + ctx.compression ? statSafe(`${filename}.gz`) : Promise.resolve(0), + ctx.compression ? statSafe(`${filename}.br`) : Promise.resolve(0), + ]); + + return { + kind: "remote", + outDir: ctx.outDir, + normalizedBasename, + htmlSize, + gzipSize, + brotliSize, + }; +} + +/** + * Render and write all artifacts for a single path. Returns the array + * of log entries produced (always 1 for `filename`-form paths, up to 3 + * otherwise). + */ +export async function emitAllArtifacts(p, ctx) { + const entries = []; + entries.push(await emitHtml(p, ctx)); + if (!p.filename && p.rsc !== false) entries.push(await emitRsc(p, ctx)); + if (!p.filename && p.remote) entries.push(await emitRemote(p, ctx)); + return entries; +} diff --git a/packages/react-server/lib/build/static-runtime.mjs b/packages/react-server/lib/build/static-runtime.mjs new file mode 100644 index 00000000..46fd8cba --- /dev/null +++ b/packages/react-server/lib/build/static-runtime.mjs @@ -0,0 +1,175 @@ +import colors from "picocolors"; + +import memoryDriver, { StorageCache } from "../../cache/index.mjs"; +import { forRoot } from "../../config/index.mjs"; +import { getContext } from "../../server/context.mjs"; +import { + getRuntime, + init$ as runtime_init$, + runtime$, +} from "../../server/runtime.mjs"; +import { + CONFIG_CONTEXT, + LOGGER_CONTEXT, + MEMORY_CACHE_CONTEXT, + WORKER_THREAD, +} from "../../server/symbols.mjs"; +import { createRenderer, hasRenderer } from "../start/render-dom.mjs"; +import ssrHandler from "../start/ssr-handler.mjs"; + +/** + * Set up everything the static exporter needs to call `render(...)` on a + * single path: SSR worker thread, runtime$ wiring, ssrHandler. + * + * Returns a `render` function and a `terminate` function. Caller is + * responsible for invoking `terminate` exactly once on shutdown — both + * the in-process exporter (concurrency=1) and each child process in + * the multi-process exporter use this helper, and both must clean up + * the SSR worker. + * + * The work is wrapped in `runtime_init$` so the AsyncLocalStorage scope + * persists for every call to `render`. The returned `terminate` + * resolves when the SSR worker has fully exited. + */ +export async function setupStaticRender( + root, + options, + { config, onError } = {} +) { + let render; + let ssrWorker; + // The parent build action wraps staticSiteGenerator in a ContextStorage + // scope that exposes CONFIG_CONTEXT. The multi-process child runs + // outside that scope and must pass `config` explicitly. + config ??= getContext(CONFIG_CONTEXT); + + // The runtime$ store, captured here so terminate can be called from + // outside the runtime_init$ scope. runtime_init$ persists the last + // store on globalThis as a fallback, so once setup() resolves we can + // call render() from anywhere — including child-process IPC handlers. + let configRoot; + let compression; + + await runtime_init$(async () => { + // Pass `config` explicitly so this works inside the multi-process + // child too — the child has loaded the config itself but is not + // running inside the parent's ContextStorage scope, so a bare + // `forRoot()` would throw "Config not loaded". + configRoot = forRoot(config); + compression = !( + options.compression === false || configRoot.compression === false + ); + + // Strip exporter-orchestration knobs from the options handed to the + // SSR worker. The worker doesn't care how the host process schedules + // paths — only how to render them. + const { + exportPaths: _ep, + exportConcurrency: _ec, + ...workerOptions + } = options; + + if (hasRenderer(options)) { + ssrWorker = await createRenderer({ root, options }); + } else { + const { Worker } = await import("node:worker_threads"); + // The renderer worker script lives next to the production server + // entrypoint (lib/start/render-stream.mjs) — `lib/build/` doesn't + // ship one of its own, so resolve relative to the start/ directory. + ssrWorker = new Worker( + new URL("../start/render-stream.mjs", import.meta.url), + { workerData: { root, options: workerOptions } } + ); + } + + runtime$(WORKER_THREAD, ssrWorker); + runtime$(CONFIG_CONTEXT, config); + + // Logger proxy: prefix output with newlines + apply colors. The + // historical implementation also captured stray Errors here to set + // exit status; that side-channel has been replaced by per-path + // try/catch in the orchestrator (single-process pMapStream / child + // IPC error envelope). + const initialRuntime = { + [MEMORY_CACHE_CONTEXT]: new StorageCache(memoryDriver), + [LOGGER_CONTEXT]: new Proxy(console, { + get(target, prop) { + if (typeof target[prop] === "function") { + return (...args) => { + if (prop === "log" || prop === "info") { + console.log( + "\n", + ...args.map((arg) => + typeof arg === "string" ? colors.dim(arg) : arg + ) + ); + } else if (prop === "warn") { + console.warn( + "\n", + ...args.map((arg) => + typeof arg === "string" ? colors.yellow(arg) : arg + ) + ); + } else if (prop === "error") { + // Postponed is a Partial Pre-Rendering control signal, not + // an error — the renderer throws it intentionally to mark + // a dynamic boundary, then catches it to emit the prelude + // + .postponed.json sidecar. The render-rsc onError hook + // forwards every thrown value (including this signal) to + // the logger; suppress it here so a successful PPR export + // doesn't print a scary stack trace next to its own + // "exported in …" success line. + const isPostponedSignal = args.some( + (arg) => + arg instanceof Error && + arg.digest === "REACT_SERVER_POSTPONED" + ); + if (isPostponedSignal) return; + console.error( + "\n", + ...args.map((arg) => + typeof arg === "string" + ? colors.red(arg) + : arg instanceof Error + ? colors.red(arg.stack) + : arg + ) + ); + if (onError && args[0] instanceof Error) onError(args[0]); + } else { + target[prop](...args); + } + }; + } + return target[prop]; + }, + }), + }; + + runtime$( + typeof config.runtime === "function" + ? (config.runtime(initialRuntime) ?? initialRuntime) + : { + ...initialRuntime, + ...config.runtime, + } + ); + + render = await ssrHandler(null, options); + }); + + return { + render: (req) => render(req), + configRoot, + compression, + async terminate() { + // Worker.terminate returns a promise that resolves when the worker + // exits. Swallow errors — a worker that already crashed is fine. + try { + await getRuntime(WORKER_THREAD)?.terminate(); + } catch { + /* noop */ + } + }, + }; +} diff --git a/packages/react-server/lib/build/static-worker.mjs b/packages/react-server/lib/build/static-worker.mjs new file mode 100644 index 00000000..bc0cb95d --- /dev/null +++ b/packages/react-server/lib/build/static-worker.mjs @@ -0,0 +1,197 @@ +import { mkdir } from "node:fs/promises"; + +import { ContextStorage } from "../../server/context.mjs"; +import { BUILD_OPTIONS, CONFIG_CONTEXT } from "../../server/symbols.mjs"; +import { emitAllArtifacts } from "./static-emit.mjs"; +import { setupStaticRender } from "./static-runtime.mjs"; + +/** + * Multi-process static-export worker entry. + * + * Each child sets up the same render pipeline the single-process + * exporter uses (`setupStaticRender` → SSR worker thread → `render` + * function) and exposes it over IPC. The coordinator dispatches paths + * one at a time; the child renders and writes every artifact for a + * given path (HTML + `.gz` / `.br` sidecars + `.postponed.json` + + * `.prerender-cache.json` when applicable) to disk, then replies with + * the log entries the coordinator should print. + * + * Why direct render rather than HTTP-fan-out: the production HTTP + * server doesn't surface `onPostponed` or the prerender-cache Set to + * its callers, which means the HTTP-based coordinator we used to have + * silently dropped both sidecars. Going through `setupStaticRender` + * instead gives the child the same render contract as the + * single-process path, so postpone / prerender-cache work in + * multi-process mode too. + * + * Bytes never cross IPC: the child writes artifacts straight to disk + * via `emitAllArtifacts` (which uses `fanout` for the body stream), + * and only sends back a small JSON log entry per artifact. + * + * IPC protocol: + * parent → child: + * { type: "init", root, options } + * { type: "render", path } + * { type: "shutdown" } + * + * child → parent: + * { type: "ready" } + * { type: "render-complete", entries } + * { type: "render-error", message, stack } + * { type: "fatal", message, stack } + * + * Each child handles one render at a time — the coordinator's + * free-children pool serializes dispatches, so we don't need request + * IDs. + */ + +if (!process.send) { + // Defensive: this script must only run as a forked child. Children's + // stdio is silenced by the coordinator, so we have nothing useful to + // print anyway — just exit non-zero so the misuse is observable. + process.exit(1); +} + +let initialized = false; +let setup = null; +let ctx = null; +let savedConfig = null; +let savedOptions = null; + +function fatal(err) { + // process.send is async on the wire even though it returns synchronously. + // If we follow it with process.exit(1) immediately, the exit aborts the + // IPC pipe before the envelope is flushed and the parent only sees the + // exit event with no underlying error — a "silent" worker death. Wait + // for the send callback (Node guarantees it fires after the message is + // queued in the kernel pipe) before exiting; if the parent already + // disconnected, the send throws and we fall through to exit. The + // unref'd timer is a backstop so we never hang here forever. + const payload = { + type: "fatal", + message: err?.message ?? String(err), + stack: err?.stack, + }; + try { + let exited = false; + const exit = () => { + if (exited) return; + exited = true; + process.exit(1); + }; + process.send(payload, exit); + setTimeout(exit, 500).unref(); + } catch { + process.exit(1); + } +} +process.on("uncaughtException", fatal); +process.on("unhandledRejection", fatal); + +process.on("message", (msg) => { + if (!msg || typeof msg !== "object") return; + if (msg.type === "init") void handleInit(msg); + else if (msg.type === "render") void handleRender(msg); + else if (msg.type === "shutdown") void handleShutdown(); +}); + +async function handleInit({ root, options }) { + if (initialized) { + fatal(new Error("init received twice")); + return; + } + initialized = true; + + try { + // Same loader + config-load pipeline as `react-server start` / + // `reactServer()`, but without binding an HTTP server. We need + // `init$` to register the loader (so config / runtime imports + // resolve under the build aliases) and `loadConfig` to produce + // the merged config object that `setupStaticRender` consumes. + const { default: init$ } = await import("../loader/init.mjs"); + await init$({ root, ...options }); + const { loadConfig } = await import("../../config/prebuilt.mjs"); + const config = await loadConfig({}, options); + + setup = await setupStaticRender(root, options, { config }); + + // Each child has its own dirCache. mkdir-recursive is idempotent, + // so concurrent children racing on the same directory is fine — + // the coordinator just pays a few extra syscalls vs. centralizing + // the cache, which is negligible relative to the render itself. + const dirCache = new Set(); + const ensureDir = async (d) => { + if (dirCache.has(d)) return; + await mkdir(d, { recursive: true }); + dirCache.add(d); + }; + + ctx = { + render: setup.render, + config, + configRoot: setup.configRoot, + compression: setup.compression, + outDir: options.outDir, + ensureDir, + }; + + // Stash for the per-render ContextStorage.run wrapper. The parent + // build action wraps `staticSiteGenerator` in a `ContextStorage.run` + // scope that exposes CONFIG_CONTEXT and BUILD_OPTIONS; mirror that + // here so anything in the render pipeline that reads them via + // `getContext` sees the same values it would in single-process mode. + savedConfig = config; + savedOptions = options; + + process.send({ type: "ready" }); + } catch (e) { + fatal(e); + } +} + +async function handleRender({ path }) { + if (!setup || !ctx) { + fatal(new Error("render received before init")); + return; + } + try { + const entries = await ContextStorage.run( + { [CONFIG_CONTEXT]: savedConfig, [BUILD_OPTIONS]: savedOptions }, + () => emitAllArtifacts(path, ctx) + ); + try { + process.send({ type: "render-complete", entries }); + } catch (e) { + fatal(e); + } + } catch (e) { + // Per-path failure: report and stay alive so the coordinator can + // dispatch the next path to this child. Only `fatal` ends the run. + try { + process.send({ + type: "render-error", + message: e?.message ?? String(e), + stack: e?.stack, + }); + } catch (sendErr) { + fatal(sendErr); + } + } +} + +async function handleShutdown() { + // Tear down the SSR worker thread, then disconnect IPC and exit. + // The coordinator only sends shutdown once it's drained the path + // stream, so there's no in-flight render to wait on here. + try { + await setup?.terminate(); + } catch { + /* worker may already be gone */ + } + try { + process.disconnect(); + } catch { + /* noop */ + } + process.exit(0); +} diff --git a/packages/react-server/lib/build/static.mjs b/packages/react-server/lib/build/static.mjs index 69df6d9d..ee4509d7 100644 --- a/packages/react-server/lib/build/static.mjs +++ b/packages/react-server/lib/build/static.mjs @@ -1,706 +1,233 @@ -import { createWriteStream } from "node:fs"; -import { mkdir, stat, writeFile } from "node:fs/promises"; -import { basename, dirname, join } from "node:path"; -import { Readable } from "node:stream"; -import { pipeline } from "node:stream/promises"; -import { createBrotliCompress, createGzip } from "node:zlib"; +import { mkdir } from "node:fs/promises"; +import { availableParallelism } from "node:os"; -import { filesize } from "filesize"; import colors from "picocolors"; -import memoryDriver, { StorageCache } from "../../cache/index.mjs"; -import { forRoot } from "../../config/index.mjs"; import { getContext } from "../../server/context.mjs"; -import { - getRuntime, - runtime$, - init$ as runtime_init$, -} from "../../server/runtime.mjs"; -import { - CONFIG_CONTEXT, - LOGGER_CONTEXT, - MEMORY_CACHE_CONTEXT, - WORKER_THREAD, -} from "../../server/symbols.mjs"; -import ssrHandler from "../start/ssr-handler.mjs"; -import * as sys from "../sys.mjs"; +import { CONFIG_CONTEXT } from "../../server/symbols.mjs"; +import { forRoot } from "../../config/index.mjs"; import banner from "./banner.mjs"; -import { toBuffer } from "../../cache/rsc.mjs"; -import { hasRenderer, createRenderer } from "../start/render-dom.mjs"; import { createSpinner, isInteractive } from "./output-filter.mjs"; - -const cwd = sys.cwd(); - -// Module-level spinner for interactive mode +import { emitAllArtifacts, formatLogEntry } from "./static-emit.mjs"; +import { runMultiProcess } from "./static-coordinator.mjs"; +import { setupStaticRender } from "./static-runtime.mjs"; +import { pMapStream } from "./p-map-stream.mjs"; +import { buildPathStream, validatedPathStream } from "./path-source.mjs"; + +/** + * Static-site generator entry point. + * + * Two modes: + * + * 1. Single-process (concurrency === 1): in-line streaming export. + * Bounded memory via pMapStream + streaming fanout. No fork/IPC + * overhead. Default for tiny exports and the historical baseline. + * + * 2. Multi-process (concurrency > 1): fork N child processes, each + * running its own RSC main thread + SSR worker. Coordinator owns + * the path stream and feeds children one path at a time. Gives + * true CPU parallelism for RSC-bound workloads (Shiki, heavy + * server components). Children write artifacts directly to disk + * and report log entries over IPC — output bytes never cross IPC, + * and the artifact set (HTML, gz/br sidecars, postpone, + * prerender-cache) matches single-process exactly. + * + * The path source is the same in both modes — a single + * `AsyncIterable` built from `options.exportPaths` and + * `configRoot.export` via `buildPathStream`. Generators are consumed + * lazily so the path list is never materialized. + */ + +// Spinner is module-level only because the streaming pipeline updates +// it from many concurrent points. The throttled writer (~20 Hz) +// coalesces tty writes; without it, 24k pages is 24k tty writes. let ssgSpinner = null; let ssgFileCount = 0; - -function size(bytes) { - const s = filesize(bytes); - return " ".repeat(Math.max(0, 10 - s.length)) + s; +let spinnerReportLast = 0; +function spinnerReport(message) { + if (!ssgSpinner) return; + const now = Date.now(); + if (now - spinnerReportLast < 50) return; + spinnerReportLast = now; + ssgSpinner.update(message); } -function truncateFilename(dirPart, filePart, maxLength) { - const totalLength = dirPart.length + filePart.length; - if (totalLength <= maxLength || maxLength < 10) { - return { dir: dirPart, file: filePart }; - } - - const excess = totalLength - maxLength + 3; // +3 for "..." - - // Strategy: single truncation point, prefer truncating in the middle of file part - if (filePart.length > excess + 6) { - // Can truncate just within the file part - const remaining = filePart.length - excess - 3; - const keepStart = Math.ceil(remaining / 2); - const keepEnd = remaining - keepStart; - const truncatedFile = - filePart.slice(0, keepStart) + "..." + filePart.slice(-keepEnd); - return { dir: dirPart, file: truncatedFile }; +function reportLogEntry(entry) { + ssgFileCount++; + if (ssgSpinner) { + spinnerReport(`exporting ${entry.normalizedBasename}`); + return; } + formatLogEntry(entry); +} - // Need to truncate across both parts - single "..." at the boundary - // Keep start of dir and end of file (to preserve extension) - const availableTotal = maxLength - 3; // -3 for single "..." - - // Prefer keeping more of the file (extension is important) - const keepFileEnd = Math.min( - filePart.length, - Math.ceil(availableTotal * 0.6) - ); - const keepDirStart = availableTotal - keepFileEnd; - - const truncatedDir = - keepDirStart > 0 ? dirPart.slice(0, keepDirStart) + "..." : "..."; - const truncatedFile = filePart.slice(-keepFileEnd); - - return { dir: truncatedDir, file: truncatedFile }; +// Default export concurrency. Stays modest by default: forking N +// processes has real startup cost, and going past CPU count yields +// nothing for RSC-bound workloads. Users with I/O-bound RSC can raise +// it; users with tiny exports can drop to 1 to avoid fork overhead. +function defaultConcurrency() { + const cpus = availableParallelism(); + return Math.max(2, Math.min(cpus - 1, 4)); } -function log( - outDir, - normalizedBasename, - htmlStat, - gzipStat, - brotliStat, - postponedStat, - prerenderCacheStat, - maxFilenameLength -) { - ssgFileCount++; +export default async function staticSiteGenerator(root, options) { + // Empty line before banner — preserves the original layout. + console.log(); + banner("static", options.dev); - // In interactive mode, update spinner instead of logging - if (ssgSpinner) { - ssgSpinner.update(`exporting ${normalizedBasename}`); + const config = getContext(CONFIG_CONTEXT); + const configRoot = forRoot(); + + if (!(options.export || configRoot?.export)) { return; } - // Verbose/CI mode: log file details - const termWidth = process.stdout.columns || 80; - const prefix = `${outDir}/dist/`; - const dirPart = dirname(normalizedBasename).replace(".", ""); - const filePart = - (dirname(normalizedBasename) === "." ? "" : "/") + - basename(normalizedBasename); - const filenamePart = dirPart + filePart; - - // Build size columns (we may omit some if line is too long) - const htmlSize = size(htmlStat.size); - const gzipSize = gzipStat.size ? ` │ gzip: ${size(gzipStat.size)}` : ""; - const brotliSize = brotliStat.size - ? ` │ brotli: ${size(brotliStat.size)}` - : ""; - const postponedSize = postponedStat.size - ? ` │ partial pre-render: ${size(postponedStat.size)}` - : ""; - const prerenderCacheSize = prerenderCacheStat.size - ? ` │ pre-render cache: ${size(prerenderCacheStat.size)}` - : ""; - - // Calculate full line with all size columns - const allSizeColumns = - gzipSize + brotliSize + postponedSize + prerenderCacheSize; - const idealPadding = Math.max( - 0, - maxFilenameLength - normalizedBasename.length + // CLI passes strings; config passes numbers. Coerce defensively. + const rawConcurrency = + options.exportConcurrency ?? configRoot.exportConcurrency; + const concurrency = Math.max( + 1, + rawConcurrency != null ? Number(rawConcurrency) : defaultConcurrency() ); - const fullLineLength = - prefix.length + - filenamePart.length + - idealPadding + - htmlSize.length + - allSizeColumns.length; - // Determine which size columns to include based on terminal width - let sizeSuffix = ""; - if (fullLineLength <= termWidth) { - // Everything fits - sizeSuffix = allSizeColumns; - } else { - // Try adding columns one by one until we run out of space - const sizeColumns = [ - gzipSize, - brotliSize, - postponedSize, - prerenderCacheSize, - ]; - let currentLength = - prefix.length + filenamePart.length + idealPadding + htmlSize.length; - for (const col of sizeColumns) { - if (col && currentLength + col.length <= termWidth) { - sizeSuffix += col; - currentLength += col.length; - } else if (col) { - break; - } + // Build the streaming path source. Lazy: the source generator is + // pulled exactly when a worker / mapper is free. With an async + // generator path source, the full path list is never materialized. + const pathStream = validatedPathStream(buildPathStream(options, configRoot)); + + // Counted view: we still want the "no paths to export" warning, but + // can't precompute it without forcing materialization. Wrap to count + // as we yield — memory cost is O(1). + let pathCount = 0; + async function* counted(stream) { + for await (const p of stream) { + pathCount++; + yield p; } } - // Now calculate how much space we have for filename + padding - const totalSizeLength = htmlSize.length + sizeSuffix.length; - const availableForFilename = termWidth - prefix.length - totalSizeLength - 1; // -1 for safety + if (isInteractive()) { + ssgSpinner = createSpinner("exporting..."); + ssgFileCount = 0; + spinnerReportLast = 0; + } - // Truncate filename if needed - let displayDir = dirPart; - let displayFile = filePart; - let displayPadding; + // Per-path error reporting matches the original: each render failure + // is printed in red to stderr so the user can see what broke, then + // counted so the orchestrator can throw a single summary line at the + // end and exit the build non-zero. Errors come from two sources — the + // single-process `pMapStream` mapper's try/catch and the multi-process + // coordinator's IPC `render-error` envelope — both routed through + // `onPathError` so output is identical between modes. + let errorCount = 0; + const onPathError = (e) => { + errorCount++; + const message = e?.stack ?? e?.callstack ?? e?.message ?? String(e); + console.error("\n" + colors.red(message)); + }; - if ( - filenamePart.length + idealPadding > availableForFilename && - availableForFilename > 10 - ) { - // First, reduce padding to minimum (0) - if (filenamePart.length <= availableForFilename) { - // Filename fits without padding, use reduced padding - displayPadding = " ".repeat( - Math.max(0, availableForFilename - filenamePart.length) - ); + try { + if (concurrency === 1) { + await runSingleProcess({ + root, + options, + config, + configRoot, + pathStream: counted(pathStream), + onError: onPathError, + }); } else { - // Need to truncate filename - const { dir, file } = truncateFilename( - dirPart, - filePart, - availableForFilename + // Multi-process: each child runs the same render pipeline as + // single-process (`setupStaticRender` + `emitAllArtifacts`); the + // coordinator dispatches one path per free child over IPC. + // Output bytes never cross the IPC boundary — the child writes + // every artifact (HTML, `.gz` / `.br`, postpone, prerender-cache) + // to disk itself and reports back only the small log entries. + await runMultiProcess({ + root, + options, + config, + configRoot, + pathStream: counted(pathStream), + workerCount: concurrency, + onLog: reportLogEntry, + onError: onPathError, + }); + } + + if (pathCount === 0) { + console.log(colors.yellow("warning: no paths to export, skipping...")); + } + } finally { + if (ssgSpinner) { + ssgSpinner.stop( + `${colors.green("✔")} ${colors.dim(`${ssgFileCount} files exported`)}` ); - displayDir = dir; - displayFile = file; - displayPadding = " ".repeat( - Math.max( - 0, - availableForFilename - displayDir.length - displayFile.length - ) + ssgSpinner = null; + } + + if (errorCount > 0) { + throw colors.bold( + `\nStatic export completed with errors. See logs above.` ); } - } else { - displayPadding = " ".repeat(idealPadding); } - - console.log( - `${colors.dim(prefix)}${colors.green(displayDir)}${colors.cyan(displayFile)}${displayPadding}${colors.gray(colors.bold(htmlSize))}${colors.dim(sizeSuffix)}` - ); } -export default async function staticSiteGenerator(root, options) { - // empty line - console.log(); - banner("static", options.dev); - const config = getContext(CONFIG_CONTEXT); - - const errorHandler = (e) => { - console.error("\n", colors.red(e?.callstack ?? e?.message ?? e)); +/** + * In-process static export. Runs RSC + SSR worker + writes inside the + * current process. `pMapStream` bounds main-thread mapper concurrency + * to 1 (one path at a time) — the rendering itself is single-threaded + * anyway, and L2 concurrency adds no throughput for one process. + * + * Used when `exportConcurrency === 1`. Preserves historical behavior + * for users who explicitly opt out of multi-process. + */ +async function runSingleProcess({ + root, + options, + config, + configRoot, + pathStream, + onError, +}) { + const setup = await setupStaticRender(root, options, { config }); + + const dirCache = new Set(); + const ensureDir = async (d) => { + if (dirCache.has(d)) return; + await mkdir(d, { recursive: true }); + dirCache.add(d); }; - await runtime_init$(async () => { - const { exportPaths: _, ...baseOptions } = options; + const ctx = { + render: setup.render, + config, + configRoot: setup.configRoot ?? configRoot, + compression: setup.compression, + outDir: options.outDir, + ensureDir, + }; - let worker; - if (hasRenderer(options)) { - worker = await createRenderer({ root, options }); - } else { - const { Worker } = await import("node:worker_threads"); - worker = new Worker( - new URL("../start/render-stream.mjs", import.meta.url), - { - workerData: { root, options: baseOptions }, + try { + // Concurrency = 1 here: even though we're in-process, parallelizing + // multiple in-flight renders on the main thread doesn't give CPU + // parallelism for RSC. It only buys async-I/O interleaving — useful + // for I/O-bound workloads but not for the typical content-export + // case. Keep it simple; users wanting parallelism go multi-process. + await pMapStream( + pathStream, + async (p) => { + try { + const entries = await emitAllArtifacts(p, ctx); + for (const entry of entries) reportLogEntry(entry); + } catch (e) { + onError(e); } - ); - } - - runtime$(WORKER_THREAD, worker); - runtime$(CONFIG_CONTEXT, config); - - let error = null; - const initialRuntime = { - [MEMORY_CACHE_CONTEXT]: new StorageCache(memoryDriver), - [LOGGER_CONTEXT]: new Proxy(console, { - get(target, prop) { - if (typeof target[prop] === "function") { - return (...args) => { - if (prop === "log" || prop === "info") { - console.log( - "\n", - ...args.map((arg) => - typeof arg === "string" ? colors.dim(arg) : arg - ) - ); - } else if (prop === "warn") { - console.warn( - "\n", - ...args.map((arg) => - typeof arg === "string" ? colors.yellow(arg) : arg - ) - ); - } else if (prop === "error") { - console.error( - "\n", - ...args.map((arg) => - typeof arg === "string" - ? colors.red(arg) - : arg instanceof Error - ? colors.red(arg.stack) - : arg - ) - ); - if (args[0] instanceof Error && !error) { - error = args[0]; - } - } else { - target[prop](...args); - } - }; - } - return target[prop]; - }, - }), - }; - runtime$( - typeof config.runtime === "function" - ? (config.runtime(initialRuntime) ?? initialRuntime) - : { - ...initialRuntime, - ...config.runtime, - } - ); - - const configRoot = forRoot(); - const compression = !( - options.compression === false || configRoot.compression === false + }, + 1 ); - - if (options.export || configRoot?.export) { - let paths = ( - options.exportPaths - ? await Promise.all( - options.exportPaths.map(async (path) => { - if (typeof path === "string") { - return { path }; - } - if (typeof path === "function") { - return path(); - } - return path; - }) - ) - : [] - ).flat(); - paths = - typeof configRoot.export === "function" - ? await configRoot.export(paths) - : [...(configRoot.export ?? []), ...paths]; - const validPaths = paths - .map((path) => (typeof path === "string" ? { path } : path)) - .filter(({ path, filename }) => filename || path); - if (validPaths.length < paths.length) { - throw new Error( - `${colors.bold("path")} property is not defined for ${colors.bold( - paths.length - validPaths.length - )} path${paths.length - validPaths.length > 1 ? "s" : ""}` - ); - } - paths = validPaths; - - if (paths.length === 0) { - console.log(colors.yellow("warning: no paths to export, skipping...")); - getRuntime(WORKER_THREAD)?.terminate(); - return; - } - - const filenames = paths.flatMap(({ path, filename, outlet }) => { - if (filename) { - return [filename]; - } - const normalizedPath = path.replace(/^\/+/g, "").replace(/\/+$/g, ""); - const basename = `${normalizedPath}/index.html`.replace(/^\/+/g, ""); - return [ - basename, - basename.replace( - /index\.html$/, - outlet ? `@${outlet}.rsc.x-component` : "rsc.x-component" - ), - ]; - }); - const maxFilenameLength = Math.max( - ...filenames.map((filename) => filename.length) - ); - - // Start spinner in interactive mode - if (isInteractive()) { - ssgSpinner = createSpinner("exporting..."); - ssgFileCount = 0; - } - - try { - const render = await ssrHandler(null, options); - await Promise.all( - paths.map( - async ({ - path, - filename: out, - method, - headers, - prerender, - origin, - host, - }) => { - try { - const url = new URL( - `http${config.server?.https ? "s" : ""}://${config.host ?? "localhost"}:${config.port ?? 3000}${path}` - ); - if (!out) { - await mkdir(join(cwd, options.outDir, "dist", path), { - recursive: true, - }); - } - const normalizedPath = path - .replace(/^\/+/g, "") - .replace(/\/+$/g, ""); - const normalizedBasename = ( - out ?? `${normalizedPath}/index.html` - ).replace(/^\/+/g, ""); - const filename = join( - cwd, - options.outDir, - "dist", - normalizedBasename - ); - - let postponed; - const prerenderCache = new Set(); - const stream = await render({ - url, - method: method ?? "GET", - request: { - url: url.toString(), - method: method ?? "GET", - headers: new Headers({ - accept: "text/html", - origin: origin ?? sys.getEnv("ORIGIN") ?? url.origin, - host: host ?? sys.getEnv("HOST") ?? url.hostname, - ...headers, - }), - }, - prerender: prerender ?? configRoot.prerender, - prerenderCache, - onPostponed: - configRoot.prerender === false - ? null - : (_postponed) => (postponed = _postponed), - }); - - if (out) { - const content = await stream.text(); - await mkdir(dirname(filename), { recursive: true }); - await writeFile(filename, content, "utf8"); - const outStat = await stat(filename); - - log( - options.outDir, - normalizedBasename, - outStat, - { size: 0 }, - { size: 0 }, - { size: 0 }, - { size: 0 }, - maxFilenameLength - ); - } else { - const html = await stream.text(); - - const files = []; - if (compression) { - const gzip = createGzip(); - const brotli = createBrotliCompress(); - const gzipWriteStream = createWriteStream(`${filename}.gz`); - const brotliWriteStream = createWriteStream( - `${filename}.br` - ); - files.push( - pipeline(Readable.from(html), gzip, gzipWriteStream), - pipeline(Readable.from(html), brotli, brotliWriteStream), - writeFile(filename, html, "utf8") - ); - } else { - files.push(writeFile(filename, html, "utf8")); - } - - const postponedFilename = `${filename}.postponed.json`; - if (postponed) { - files.push( - writeFile( - postponedFilename, - JSON.stringify(postponed), - "utf8" - ) - ); - } - const cacheFilename = `${filename}.prerender-cache.json`; - if (prerenderCache.size > 0) { - files.push( - writeFile( - cacheFilename, - `[${( - await Promise.all( - Array.from(prerenderCache) - .filter( - (entry) => entry.provider?.options?.prerender - ) - .map(async (entry) => { - const [kBuffer, vBuffer] = await Promise.all([ - toBuffer(entry.keys), - toBuffer(entry.result), - ]); - const cacheEntry = [ - kBuffer.toString("base64"), - vBuffer.toString("base64"), - Date.now(), - entry.ttl, - { - ...entry?.provider, - serializer: entry.provider?.serializer - ? "rsc" - : undefined, - }, - ]; - return JSON.stringify(cacheEntry); - }) - ) - ).join(",")}]`, - "utf8" - ) - ); - } - await Promise.all(files); - - const [ - htmlStat, - gzipStat, - brotliStat, - postponedStat, - prerenderCacheStat, - ] = await Promise.all([ - stat(filename), - compression - ? stat(`${filename}.gz`) - : Promise.resolve({ size: 0 }), - compression - ? stat(`${filename}.br`) - : Promise.resolve({ size: 0 }), - postponed - ? stat(postponedFilename) - : Promise.resolve({ size: 0 }), - prerenderCache.size > 0 - ? stat(cacheFilename) - : Promise.resolve({ size: 0 }), - ]); - - log( - options.outDir, - normalizedBasename, - htmlStat, - gzipStat, - brotliStat, - postponedStat, - prerenderCacheStat, - maxFilenameLength - ); - } - } catch (e) { - errorHandler(e); - } - } - ) - ); - - await Promise.all( - paths - .filter(({ filename, rsc }) => !filename && rsc !== false) - .map(async ({ path, outlet, origin, host }) => { - try { - const url = new URL( - `http${config.server?.https ? "s" : ""}://${config.host ?? "localhost"}:${config.port ?? 3000}${path}/${outlet ? `@${outlet}.rsc.x-component` : "rsc.x-component"}` - ); - const stream = await render({ - url, - request: { - url: url.toString(), - headers: new Headers({ - accept: "text/x-component", - origin: origin ?? sys.getEnv("ORIGIN") ?? url.origin, - host: host ?? sys.getEnv("HOST") ?? url.hostname, - }), - }, - }); - const html = await stream.text(); - await mkdir(join(cwd, options.outDir, "dist", path), { - recursive: true, - }); - const normalizedPath = path - .replace(/^\/+/g, "") - .replace(/\/+$/g, ""); - const normalizedBasename = - `${normalizedPath}/${outlet ? `@${outlet}.rsc.x-component` : "rsc.x-component"}`.replace( - /^\/+/g, - "" - ); - const filename = join( - cwd, - options.outDir, - "dist", - normalizedBasename - ); - - if (compression) { - const gzip = createGzip(); - const brotli = createBrotliCompress(); - const gzipWriteStream = createWriteStream(`${filename}.gz`); - const brotliWriteStream = createWriteStream(`${filename}.br`); - await Promise.all([ - pipeline(Readable.from(html), gzip, gzipWriteStream), - pipeline(Readable.from(html), brotli, brotliWriteStream), - writeFile(filename, html, "utf8"), - ]); - } else { - await writeFile(filename, html, "utf8"); - } - - const [htmlStat, gzipStat, brotliStat] = await Promise.all([ - stat(filename), - compression - ? stat(`${filename}.gz`) - : Promise.resolve({ size: 0 }), - compression - ? stat(`${filename}.br`) - : Promise.resolve({ size: 0 }), - ]); - - log( - options.outDir, - normalizedBasename, - htmlStat, - gzipStat, - brotliStat, - { size: 0 }, - { size: 0 }, - maxFilenameLength - ); - } catch (e) { - errorHandler(e); - } - }) - ); - - await Promise.all( - paths - .filter(({ filename, remote }) => !filename && remote) - .map(async ({ path, origin, host }) => { - try { - const url = new URL( - `http${config.server?.https ? "s" : ""}://${config.host ?? "localhost"}:${config.port ?? 3000}${path}/remote.x-component` - ); - const stream = await render({ - url, - request: { - url: url.toString(), - headers: new Headers({ - accept: "text/x-component", - origin: origin ?? sys.getEnv("ORIGIN") ?? url.origin, - host: host ?? sys.getEnv("HOST") ?? url.hostname, - "React-Server-Outlet": "REACT_SERVER_BUILD_OUTLET", - }), - }, - }); - const html = await stream.text(); - await mkdir(join(cwd, options.outDir, "dist", path), { - recursive: true, - }); - const normalizedPath = path - .replace(/^\/+/g, "") - .replace(/\/+$/g, ""); - const normalizedBasename = - `${normalizedPath}/remote.x-component`.replace(/^\/+/g, ""); - const filename = join( - cwd, - options.outDir, - "dist", - normalizedBasename - ); - - if (compression) { - const gzip = createGzip(); - const brotli = createBrotliCompress(); - const gzipWriteStream = createWriteStream(`${filename}.gz`); - const brotliWriteStream = createWriteStream(`${filename}.br`); - await Promise.all([ - pipeline(Readable.from(html), gzip, gzipWriteStream), - pipeline(Readable.from(html), brotli, brotliWriteStream), - writeFile(filename, html, "utf8"), - ]); - } else { - await writeFile(filename, html, "utf8"); - } - - const [htmlStat, gzipStat, brotliStat] = await Promise.all([ - stat(filename), - compression - ? stat(`${filename}.gz`) - : Promise.resolve({ size: 0 }), - compression - ? stat(`${filename}.br`) - : Promise.resolve({ size: 0 }), - ]); - - log( - options.outDir, - normalizedBasename, - htmlStat, - gzipStat, - brotliStat, - { size: 0 }, - { size: 0 }, - maxFilenameLength - ); - } catch (e) { - errorHandler(e); - } - }) - ); - } finally { - // Stop spinner in interactive mode - if (ssgSpinner) { - ssgSpinner.stop( - `${colors.green("✔")} ${colors.dim(`${ssgFileCount} files exported`)}` - ); - ssgSpinner = null; - } - - getRuntime(WORKER_THREAD)?.terminate(); - - if (error) { - throw colors.bold( - "\nStatic export completed with errors. See logs above." - ); - } - } - } - }); + } finally { + await setup.terminate(); + } } diff --git a/packages/react-server/lib/start/create-server.mjs b/packages/react-server/lib/start/create-server.mjs index 8335e2f9..10b2ca28 100644 --- a/packages/react-server/lib/start/create-server.mjs +++ b/packages/react-server/lib/start/create-server.mjs @@ -199,6 +199,15 @@ export default async function createServer(root, options) { }); } }, + // Establish the per-request PrerenderStorage scope before any static + // handler runs. The static handler for HTML serves `.postponed.json` + // sidecars by calling `prerender$(POSTPONE_STATE, …)`, which mutates + // the active PrerenderStorage store; without a scope here that mutation + // is a write to `undefined` and the request 500s. Has to be ahead of + // the static handlers, not just ahead of `ssrHandler`. + async function prerenderInit() { + PrerenderStorage.enterWith({}); + }, // Static files are served before admission control — they are cheap I/O // and should not be gated by the concurrency limiter or count toward inflight. staticHandler(join(cwd, options.outDir, "dist"), { @@ -289,9 +298,6 @@ export default async function createServer(root, options) { }, ] : []), - async function prerenderInit() { - PrerenderStorage.enterWith({}); - }, cookie(config.cookies), ...(config.handlers?.pre ?? []), ssrHandler(root, options), diff --git a/test/__test__/apps/static-export-many.spec.mjs b/test/__test__/apps/static-export-many.spec.mjs new file mode 100644 index 00000000..d8d4d6a7 --- /dev/null +++ b/test/__test__/apps/static-export-many.spec.mjs @@ -0,0 +1,60 @@ +import { appDir, hostname, page, server } from "playground/utils"; +import { describe, expect, test } from "vitest"; + +// This spec only runs under `pnpm test-build-start` — the static +// exporter is a build-time pipeline; in dev mode there is nothing to +// exercise. The fixture directory under `test/fixtures/static-export-many` +// owns its own `react-server.config.mjs` whose `export` is an +// async-generator function — the path source the streaming exporter +// is built around. We don't go through `initialConfig` here on +// purpose: a generator function can't survive JSON, and the array +// form blows past env-var size limits at very high N. The on-disk +// config lets us declare the path source once, lazily, and crank the +// count via env without touching the harness. +const isProduction = process.env.NODE_ENV === "production"; + +// "Many" is a knob — the goal is to push enough paths through the +// streaming pipeline that an O(N²) regression (accidental +// materialization, broken backpressure, growing internal queue) shows +// up either as a wall-clock blow-up that trips the test timeout or as +// a process-level OOM. Default 1000 finishes well under a minute on a +// developer laptop; CI is given a generous timeout below. Override with +// the env var if you're stress-testing the pipeline manually — the +// fixture's `react-server.config.mjs` reads the same env so the +// generator yields the same count the spec expects. +const PATH_COUNT = Number(process.env.STATIC_EXPORT_MANY_COUNT ?? 1000); + +describe.skipIf(!isProduction)("static export at scale", () => { + test("exports many paths declared via async-generator configRoot.export and serves them as static files", async () => { + await server("./entry.jsx", { + cwd: appDir("test/fixtures/static-export-many"), + // The harness defaults `options.export` to false; the build + // action's gate (`options.export !== false`) would otherwise + // skip static export entirely, even with the on-disk config + // present. Passing `{ export: true }` flips the harness flag. + // The actual path source is the async generator declared in + // the fixture's react-server.config.mjs — that function can't + // cross the JSON boundary into the build-worker fork. + initialConfig: { export: true }, + // Build + render of N paths is the slowest thing this suite + // does. Give CI a generous ceiling; locally it finishes in a + // fraction of this. + timeout: 240_000, + }); + + // Spot-check across the range: first, second, middle, last. If + // any of these miss the dist tree, the streaming exporter dropped + // paths somewhere between buildPathStream and the on-disk write. + // Fetching via HTTP rather than reading the dist directly is + // deliberate — the production server's static handler is the + // user-observable consumer of the export output, so testing the + // round-trip catches both producer and serve-side regressions. + const samples = [0, 1, Math.floor(PATH_COUNT / 2), PATH_COUNT - 1]; + for (const i of samples) { + const path = `/p/${i}`; + await page.goto(hostname + path); + const text = await page.textContent("#page"); + expect(text).toBe(`Static ${path}`); + } + }, 240_000); +}); diff --git a/test/__test__/static-export-stream.spec.mjs b/test/__test__/static-export-stream.spec.mjs new file mode 100644 index 00000000..8bc478fc --- /dev/null +++ b/test/__test__/static-export-stream.spec.mjs @@ -0,0 +1,580 @@ +import { mkdtemp, readFile, rm } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { createWriteStream } from "node:fs"; +import { join } from "node:path"; + +import { describe, expect, test } from "vitest"; + +import { fanout } from "@lazarv/react-server/lib/build/fanout.mjs"; +import { pMapStream } from "@lazarv/react-server/lib/build/p-map-stream.mjs"; +import { + buildPathStream, + toPathStream, + validatedPathStream, +} from "@lazarv/react-server/lib/build/path-source.mjs"; + +// These tests cover the load-bearing properties of the new streaming +// static-export pipeline. Each property is tested as a standalone unit +// so a regression points cleanly at the primitive that broke: +// +// pMapStream - bounded concurrency, no eager closure materialization +// toPathStream - normalization of every supported input shape +// buildPathStream - generator-function streaming + array back-compat +// validatedPathStream - fail-fast on bad entries +// fanout - one-chunk-in-flight fan-out, backpressure honored +// +// End-to-end (running an actual export against a large fixture and +// checking heap_used) is the right complement, but lives in a separate +// integration spec — the build phase is too heavyweight for this file. + +describe("pMapStream", () => { + test("processes every item exactly once", async () => { + const seen = []; + await pMapStream( + (function* () { + for (let i = 0; i < 100; i++) yield i; + })(), + async (i) => { + seen.push(i); + }, + 4 + ); + expect(seen.toSorted((a, b) => a - b)).toEqual( + Array.from({ length: 100 }, (_, i) => i) + ); + }); + + test("respects concurrency bound (never exceeds N in flight)", async () => { + let inflight = 0; + let peak = 0; + const concurrency = 4; + await pMapStream( + (function* () { + for (let i = 0; i < 200; i++) yield i; + })(), + async () => { + inflight++; + if (inflight > peak) peak = inflight; + await new Promise((r) => setTimeout(r, 1)); + inflight--; + }, + concurrency + ); + expect(peak).toBeLessThanOrEqual(concurrency); + // Should saturate the pool — not just run sequentially. + expect(peak).toBeGreaterThanOrEqual(concurrency - 1); + }); + + test("consumes async iterables lazily (source pull only when worker free)", async () => { + let pulled = 0; + const concurrency = 3; + const total = 50; + + async function* source() { + for (let i = 0; i < total; i++) { + pulled++; + yield i; + } + } + + let completed = 0; + await pMapStream( + source(), + async () => { + // The invariant we care about: the source is not pulled ahead + // of the consumer by more than `concurrency` items. + expect(pulled - completed).toBeLessThanOrEqual(concurrency); + await new Promise((r) => setTimeout(r, 0)); + completed++; + }, + concurrency + ); + expect(completed).toBe(total); + }); + + test("propagates the first mapper error and stops further pulls", async () => { + const seen = []; + await expect( + pMapStream( + (function* () { + for (let i = 0; i < 100; i++) yield i; + })(), + async (i) => { + seen.push(i); + if (i === 5) throw new Error("boom"); + }, + 2 + ) + ).rejects.toThrow("boom"); + // After the error, in-flight workers may finish their current item + // but no new items should be pulled. Sanity check: we shouldn't + // have processed all 100. + expect(seen.length).toBeLessThan(100); + }); + + test("rejects concurrency < 1", async () => { + await expect(pMapStream([1, 2], async () => {}, 0)).rejects.toThrow( + /concurrency must be >= 1/ + ); + }); +}); + +describe("toPathStream", () => { + async function collect(asyncIter) { + const out = []; + for await (const x of asyncIter) out.push(x); + return out; + } + + test("normalizes string to descriptor", async () => { + expect(await collect(toPathStream("/foo"))).toEqual([{ path: "/foo" }]); + }); + + test("yields nothing for null / undefined", async () => { + expect(await collect(toPathStream(null))).toEqual([]); + expect(await collect(toPathStream(undefined))).toEqual([]); + }); + + test("normalizes descriptor as-is", async () => { + const d = { path: "/x", outlet: "y" }; + expect(await collect(toPathStream(d))).toEqual([d]); + }); + + test("flattens arrays of mixed shapes", async () => { + const out = await collect(toPathStream(["/a", { path: "/b" }, () => "/c"])); + expect(out).toEqual([{ path: "/a" }, { path: "/b" }, { path: "/c" }]); + }); + + test("consumes sync generator", async () => { + const out = await collect( + toPathStream( + (function* () { + yield "/a"; + yield { path: "/b" }; + })() + ) + ); + expect(out).toEqual([{ path: "/a" }, { path: "/b" }]); + }); + + test("consumes async generator", async () => { + async function* gen() { + yield "/a"; + yield { path: "/b" }; + } + expect(await collect(toPathStream(gen()))).toEqual([ + { path: "/a" }, + { path: "/b" }, + ]); + }); + + test("calls function and re-normalizes its result", async () => { + expect(await collect(toPathStream(() => ["/a", "/b"]))).toEqual([ + { path: "/a" }, + { path: "/b" }, + ]); + expect( + await collect( + toPathStream(async () => + (async function* () { + yield "/c"; + })() + ) + ) + ).toEqual([{ path: "/c" }]); + }); + + test("throws on invalid entry", async () => { + await expect(collect(toPathStream(42))).rejects.toThrow( + /Invalid export path entry/ + ); + await expect(collect(toPathStream({}))).rejects.toThrow( + /Invalid export path entry/ + ); + }); +}); + +describe("buildPathStream", () => { + async function collect(asyncIter) { + const out = []; + for await (const x of asyncIter) out.push(x); + return out; + } + + test("array-form configRoot.export prepends to options.exportPaths", async () => { + const out = await collect( + buildPathStream( + { exportPaths: ["/from-options"] }, + { export: ["/from-config"] } + ) + ); + expect(out).toEqual([{ path: "/from-config" }, { path: "/from-options" }]); + }); + + test("regular-function configRoot.export gets array, returns array", async () => { + let received; + const out = await collect( + buildPathStream( + { exportPaths: ["/a", "/b"] }, + { + // Plain (non-generator) function — legacy contract. + export: (paths) => { + received = paths; + return [...paths, { path: "/c" }]; + }, + } + ) + ); + expect(received).toEqual([{ path: "/a" }, { path: "/b" }]); + expect(out).toEqual([{ path: "/a" }, { path: "/b" }, { path: "/c" }]); + }); + + test("async-generator configRoot.export streams (no materialization)", async () => { + // Source yields paths one at a time; the generator transform must + // produce its first output before the source is exhausted. We assert + // this by interleaving the source with a flag the transform reads. + const sourceSeen = []; + let firstYielded = false; + + async function* source() { + for (let i = 0; i < 5; i++) { + sourceSeen.push(i); + // After we yield the first item, the transform should be able to + // produce its first output before we loop again. If + // buildPathStream materialized the source for the function, + // `firstYielded` would still be false here at i=1. + if (i === 1) { + // Yield to event loop so the consumer can advance. + await new Promise((r) => setImmediate(r)); + expect(firstYielded).toBe(true); + } + yield { path: `/p${i}` }; + } + } + + async function* transform(paths) { + for await (const p of paths) { + firstYielded = true; + yield { ...p, transformed: true }; + } + } + + const out = await collect( + buildPathStream({ exportPaths: source() }, { export: transform }) + ); + expect(out.length).toBe(5); + expect(out.every((p) => p.transformed)).toBe(true); + expect(sourceSeen).toEqual([0, 1, 2, 3, 4]); + }); + + test("sync-generator configRoot.export also streams", async () => { + // function* (not async function*) is also detected as streaming. + function* transform(_paths) { + // Sync generators can't `for await`, but they can pass through + // the iterable. Verify the constructor.name detection is by class. + // Here we just yield a sentinel without consuming paths to prove + // the transform was called with the live AsyncIterable. + yield { path: "/sentinel" }; + } + const out = await collect( + buildPathStream({ exportPaths: ["/skipped"] }, { export: transform }) + ); + expect(out).toEqual([{ path: "/sentinel" }]); + }); + + test("generator export passes through router paths and lazily yields more", async () => { + // The documented "level up" pattern from docs/router/static#streaming-export: + // for await (const p of paths) yield p; // passthrough + // for (const item of ) yield ...; // append more, lazily + // + // We assert two things: + // (1) every router-side path appears in the output before the appended ones + // (2) the generator was driven lazily (the appended ones aren't pre-collected + // before the passthrough completes) + let appendedYieldedAt = -1; + let passthroughDoneAt = -1; + let consumed = 0; + + async function* transform(paths) { + for await (const p of paths) { + yield { ...p, source: "router" }; + } + passthroughDoneAt = consumed; + for (let i = 0; i < 3; i++) { + if (appendedYieldedAt === -1) appendedYieldedAt = consumed; + yield { path: `/cms/${i}`, source: "cms" }; + } + } + + const stream = buildPathStream( + { exportPaths: ["/r1", "/r2"] }, + { export: transform } + ); + const out = []; + for await (const p of stream) { + consumed++; + out.push(p); + } + + expect(out).toEqual([ + { path: "/r1", source: "router" }, + { path: "/r2", source: "router" }, + { path: "/cms/0", source: "cms" }, + { path: "/cms/1", source: "cms" }, + { path: "/cms/2", source: "cms" }, + ]); + // Lazy proof: the first appended item was yielded *after* the consumer + // had already pulled both router paths — not pre-collected. + expect(passthroughDoneAt).toBe(2); + expect(appendedYieldedAt).toBe(2); + }); + + test("generator export composes with validatedPathStream end-to-end", async () => { + // Mirrors the production pipeline shape: buildPathStream → validatedPathStream + // → consumer. Generator output must flow through validation unchanged when + // every yielded item has `path` or `filename`. + async function* transform(paths) { + for await (const p of paths) yield p; + yield { path: "/added" }; + yield { filename: "404.html" }; + } + const out = []; + for await (const p of validatedPathStream( + buildPathStream({ exportPaths: ["/r"] }, { export: transform }) + )) { + out.push(p); + } + expect(out).toEqual([ + { path: "/r" }, + { path: "/added" }, + { filename: "404.html" }, + ]); + }); + + test("generator export errors propagate as rejections (fail-fast)", async () => { + // A user generator that throws mid-stream must surface the error to the + // consumer rather than being swallowed by the iterable plumbing. + async function* transform(paths) { + let i = 0; + for await (const p of paths) { + if (i++ === 1) throw new Error("boom from user export()"); + yield p; + } + } + const stream = buildPathStream( + { exportPaths: ["/a", "/b", "/c"] }, + { export: transform } + ); + await expect( + (async () => { + // eslint-disable-next-line no-unused-vars + for await (const _item of stream) { + /* drain */ + } + })() + ).rejects.toThrow(/boom from user export/); + }); +}); + +describe("streaming pipeline outstanding-window invariant", () => { + // The load-bearing property of the streaming exporter is that the + // source is never pulled more than `concurrency` items ahead of the + // consumer — if that ever breaks, an arbitrarily large source can no + // longer be exported in bounded memory. We assert this *structural* + // invariant rather than measure RSS (V8 GC is non-deterministic in CI). + // + // The end-to-end behaviour at very high N (100k+ paths through a real + // build) lives in `__test__/apps/static-export-many.spec.mjs`, which + // only runs under `pnpm test-build-start`. Here we run the same + // structural check at a much smaller N: the invariant is N-independent, + // and keeping N small keeps `test-dev-base` fast — these specs run on + // every dev iteration. + + const N = 2000; + + test("generator-yielded paths flow through buildPathStream with bounded outstanding window", async () => { + const concurrency = 8; + let pulled = 0; + let completed = 0; + let maxOutstanding = 0; + + async function* routerSource() { + for (let i = 0; i < N; i++) { + pulled++; + // Track the largest gap we ever observe between source pulls and + // consumer completions. If the streaming contract holds, this + // stays <= concurrency for the entire run, regardless of `N`. + const outstanding = pulled - completed; + if (outstanding > maxOutstanding) maxOutstanding = outstanding; + yield { path: `/p/${i}` }; + } + } + + // User-defined async generator export: passes through router paths + // and tags each — exactly the documented `config.export` shape. + async function* userExport(paths) { + for await (const p of paths) yield { ...p, tag: "x" }; + } + + await pMapStream( + validatedPathStream( + buildPathStream({ exportPaths: routerSource() }, { export: userExport }) + ), + async () => { + // Trivial mapper — the test is about the path source, not work + // done per item. A microtask-yielding await is enough to make + // the outstanding-window invariant non-trivial to satisfy. + await Promise.resolve(); + completed++; + }, + concurrency + ); + + expect(completed).toBe(N); + expect(pulled).toBe(N); + // The structural invariant: outstanding never exceeds the worker pool. + // This is what makes the exporter usable for "infinite" path sources. + expect(maxOutstanding).toBeLessThanOrEqual(concurrency); + }); + + test("single-worker consumer holds the outstanding window at <= 1", async () => { + // concurrency = 1 should reduce to "pull one, process one" behaviour — + // the outstanding window collapses to <= 1 at all times. Captures the + // single-process exporter path (`exportConcurrency: 1`). + let pulled = 0; + let completed = 0; + let maxOutstanding = 0; + + async function* source() { + for (let i = 0; i < N; i++) { + pulled++; + const outstanding = pulled - completed; + if (outstanding > maxOutstanding) maxOutstanding = outstanding; + yield { path: `/p/${i}` }; + } + } + + await pMapStream( + validatedPathStream(buildPathStream({ exportPaths: source() }, {})), + async () => { + completed++; + }, + 1 + ); + + expect(completed).toBe(N); + expect(maxOutstanding).toBeLessThanOrEqual(1); + }); +}); + +describe("validatedPathStream", () => { + async function collect(asyncIter) { + const out = []; + for await (const x of asyncIter) out.push(x); + return out; + } + + test("yields valid descriptors as-is", async () => { + const out = await collect( + validatedPathStream( + (async function* () { + yield "/a"; + yield { path: "/b" }; + yield { filename: "404.html" }; + })() + ) + ); + expect(out).toEqual([ + { path: "/a" }, + { path: "/b" }, + { filename: "404.html" }, + ]); + }); + + test("throws fail-fast on first invalid entry", async () => { + await expect( + collect( + validatedPathStream( + (async function* () { + yield "/a"; + yield { outlet: "no-path-or-filename" }; + })() + ) + ) + ).rejects.toThrow(/missing "path"/); + }); +}); + +describe("fanout", () => { + async function tmpFile(name) { + const dir = await mkdtemp(join(tmpdir(), "rs-fanout-")); + return { + path: join(dir, name), + cleanup: () => rm(dir, { recursive: true, force: true }), + }; + } + + function chunkStream(chunks) { + return new ReadableStream({ + start(controller) { + for (const c of chunks) { + controller.enqueue( + typeof c === "string" ? new TextEncoder().encode(c) : c + ); + } + controller.close(); + }, + }); + } + + test("delivers every chunk to every sink in order", async () => { + const f1 = await tmpFile("a.txt"); + const f2 = await tmpFile("b.txt"); + try { + await fanout(chunkStream(["hello ", "world", "!"]), [ + createWriteStream(f1.path), + createWriteStream(f2.path), + ]); + expect(await readFile(f1.path, "utf8")).toBe("hello world!"); + expect(await readFile(f2.path, "utf8")).toBe("hello world!"); + } finally { + await f1.cleanup(); + await f2.cleanup(); + } + }); + + test("handles empty stream by closing sinks cleanly", async () => { + const f = await tmpFile("empty.txt"); + try { + await fanout(chunkStream([]), [createWriteStream(f.path)]); + expect(await readFile(f.path, "utf8")).toBe(""); + } finally { + await f.cleanup(); + } + }); + + test("waits for slow sink (backpressure honored)", async () => { + const f = await tmpFile("slow.txt"); + try { + // 1 MB of data, written through a normal file stream. With proper + // backpressure, fanout finishes only after the file is fully + // flushed — the assertion below hits real bytes on disk. + const big = "x".repeat(1024 * 1024); + await fanout(chunkStream([big]), [createWriteStream(f.path)]); + const stat = await readFile(f.path, "utf8"); + expect(stat.length).toBe(big.length); + } finally { + await f.cleanup(); + } + }); + + test("handles null body (no source) by closing sinks", async () => { + const f = await tmpFile("null.txt"); + try { + await fanout(null, [createWriteStream(f.path)]); + expect(await readFile(f.path, "utf8")).toBe(""); + } finally { + await f.cleanup(); + } + }); +}); diff --git a/test/fixtures/static-export-many/entry.jsx b/test/fixtures/static-export-many/entry.jsx new file mode 100644 index 00000000..477a9de8 --- /dev/null +++ b/test/fixtures/static-export-many/entry.jsx @@ -0,0 +1,17 @@ +import { usePathname } from "@lazarv/react-server"; + +// Tiny dynamic page used by the static-export-at-scale spec. The static +// exporter renders this component once per path yielded by the +// async-generator `export` in this directory's react-server.config.mjs. +// The body output captures whichever path was rendered so the spec can +// spot-check that paths from across the range made it to disk. +export default function StaticExportManyPage() { + const pathname = usePathname(); + return ( + + +

Static {pathname}

+ + + ); +} diff --git a/test/fixtures/static-export-many/react-server.config.mjs b/test/fixtures/static-export-many/react-server.config.mjs new file mode 100644 index 00000000..b8e13244 --- /dev/null +++ b/test/fixtures/static-export-many/react-server.config.mjs @@ -0,0 +1,27 @@ +// Static export config for the at-scale spec. +// +// The whole point of this fixture is to drive the streaming export +// pipeline with the *generator* form of `config.export` — same shape +// as the documented "level up" pattern in docs/router/static. We +// declare an async generator here rather than an array so that: +// +// - the path source is never materialized (this is what makes +// "very-high amount of static exports" actually work); +// - we exercise the generator-detection branch in +// `lib/build/path-source.mjs` end-to-end through a real build; +// - the path count can be lifted arbitrarily without hitting any +// OS / IPC / env-var size limits the array form would. +// +// The count is read from STATIC_EXPORT_MANY_COUNT so the spec — and +// any contributor stress-testing the exporter manually — can crank +// the number without editing this file. +const COUNT = Number(process.env.STATIC_EXPORT_MANY_COUNT ?? 1000); + +export default { + async *export() { + for (let i = 0; i < COUNT; i++) { + yield { path: `/p/${i}`, rsc: false }; + } + }, + prerender: false, +}; diff --git a/test/vitestSetup.mjs b/test/vitestSetup.mjs index 1075e545..a4c4e474 100644 --- a/test/vitestSetup.mjs +++ b/test/vitestSetup.mjs @@ -138,7 +138,18 @@ test.beforeAll(async (_context, suite) => { outDir: `.react-server-build-${id}-${hash}`, server: true, client: true, - export: false, + // The build action gates static export with `options.export + // !== false` (see lib/build/action.mjs); leaving this `false` + // means an on-disk `configRoot.export` never runs. Specs that + // need static export pass `initialConfig: { export: true }` + // (a serializable flag) to flip this; the actual path source + // — which can be a function or async generator — is owned by + // the fixture's react-server.config.mjs, since functions + // can't cross the build-worker fork boundary. + export: initialConfig?.export + ? Boolean(initialConfig.export) + : false, + compression: false, adapter: ["false"], minify: false, edge: process.env.EDGE || process.env.EDGE_ENTRY ? true : undefined, @@ -259,7 +270,15 @@ test.beforeAll(async (_context, suite) => { outDir: `.react-server-build-${id}-${hash}`, server: true, client: true, - export: false, + // Mirrors the build-only-phase block above: derive the + // build-time export flag from `initialConfig.export` so a + // spec can opt in without on-disk config gymnastics. The + // actual path source still comes from the fixture's + // react-server.config.mjs. + export: initialConfig?.export + ? Boolean(initialConfig.export) + : false, + compression: false, adapter: ["false"], minify: false, edge: