diff --git a/AGENTS.md b/AGENTS.md index 6ff30deb95..490cb691d4 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -21,6 +21,7 @@ TanStack Router is a type-safe router with built-in caching and URL state manage - Framework-agnostic core logic separated from React/Solid bindings - Type-safe routing with search params and path params - Use workspace protocol for internal dependencies (`workspace:*`) +- Always use curly braces for `if`, `else`, loops, and similar control statements. Never write one-line bodies like `if (foo) x = 1`. ## Dev environment tips diff --git a/benchmarks/bundle-size/README.md b/benchmarks/bundle-size/README.md index f222b8088b..4d4d3dc2d3 100644 --- a/benchmarks/bundle-size/README.md +++ b/benchmarks/bundle-size/README.md @@ -32,11 +32,41 @@ Each package has `minimal` and `full` scenarios: pnpm nx run @benchmarks/bundle-size:build ``` +Run one or more scenarios during local optimization: + +```bash +pnpm nx run @benchmarks/bundle-size:build -- --scenario react-router.minimal +pnpm nx run @benchmarks/bundle-size:build -- --scenario react-router.minimal,react-router.full +``` + +Filtered runs build only the package projects needed by selected scenarios. Full runs build all package projects needed by all scenarios. If the required packages are already built and unchanged, skip that step: + +```bash +pnpm nx run @benchmarks/bundle-size:build -- --scenario react-router.minimal --skip-package-builds +``` + This writes: - `benchmarks/bundle-size/results/current.json` - `benchmarks/bundle-size/results/benchmark-action.json` +`current.json` includes run status, selected package build projects, per-scenario totals, per-file sizes, and the emitted JS files used for measurement. Dist paths use `scenarioDir`/`outDir`, e.g. `react-router.minimal` maps to `benchmarks/bundle-size/dist/react-router-minimal/`. + +## Local Query Tools + +```bash +pnpm benchmark:bundle-size:query --id react-router.minimal +pnpm benchmark:bundle-size:diff --baseline /tmp/base-current.json --id react-router.minimal +pnpm benchmark:bundle-size:history --id react-router.minimal --top-deltas 20 +``` + +For source attribution, run an analysis build. This uses hidden source maps and writes source estimates into `current.json`; those estimates are for investigation only, not tracking. + +```bash +pnpm nx run @benchmarks/bundle-size:build -- --scenario react-router.minimal --analysis +pnpm benchmark:bundle-size:analyze --id react-router.minimal --top-sources 30 +``` + ## CI Reporting - PR workflow generates a sticky comment with: @@ -56,6 +86,10 @@ The measurement script supports optional interfaces for historical backfilling: - `--sha` - `--measured-at` - `--append-history` +- `--scenario` +- `--analysis` +- `--sourcemap` +- `--skip-package-builds` These are intended for one-off scripts that replay historical commits and append results to the same history dataset shape used for chart generation. If `--append-history` points at a `data.js` file, output is written as `window.BENCHMARK_DATA = ...` for direct GitHub Pages compatibility. diff --git a/benchmarks/bundle-size/package.json b/benchmarks/bundle-size/package.json index 9a4b70b858..8364d033f0 100644 --- a/benchmarks/bundle-size/package.json +++ b/benchmarks/bundle-size/package.json @@ -5,6 +5,14 @@ "scripts": { "build": "node ../../scripts/benchmarks/bundle-size/measure.mjs" }, + "nx": { + "targets": { + "build": { + "cache": false, + "dependsOn": [] + } + } + }, "dependencies": { "@tanstack/react-router": "workspace:^", "@tanstack/solid-router": "workspace:^", diff --git a/package.json b/package.json index 41b3a29874..9287a0a34d 100644 --- a/package.json +++ b/package.json @@ -20,6 +20,10 @@ "test:types": "nx affected --target=test:types --exclude=examples/**", "test:e2e": "nx run-many --target=test:e2e", "benchmark:bundle-size": "pnpm nx run @benchmarks/bundle-size:build", + "benchmark:bundle-size:query": "node scripts/benchmarks/bundle-size/query.mjs", + "benchmark:bundle-size:diff": "node scripts/benchmarks/bundle-size/diff.mjs", + "benchmark:bundle-size:history": "node scripts/benchmarks/bundle-size/history.mjs", + "benchmark:bundle-size:analyze": "node scripts/benchmarks/bundle-size/analyze.mjs", "benchmark:client-nav": "pnpm nx run @benchmarks/client-nav:test:perf", "benchmark:ssr": "pnpm nx run @benchmarks/ssr:test:perf", "build": "nx affected --target=build --exclude=e2e/** --exclude=examples/**", @@ -35,6 +39,7 @@ "labeler-generate": "node scripts/generate-labeler-config.ts", "cleanup-empty-packages": "node scripts/cleanup-empty-packages.mjs", "test:docs": "node scripts/verify-links.ts", + "ts:symbol-references": "node scripts/ts-symbol-references.mjs", "vite-ecosystem-ci:build": "nx run-many --targets=build --projects=@tanstack/router-plugin,@tanstack/start-plugin-core,@tanstack/react-start,@tanstack/react-start-client,@tanstack/react-start-server --skipRemoteCache", "vite-ecosystem-ci:before-test": "pnpm exec playwright install chromium", "vite-ecosystem-ci:test": "nx run-many --targets=test:unit --projects=@tanstack/router-plugin,@tanstack/start-plugin-core,@tanstack/react-start-client --skipRemoteCache && nx run-many --target=test:e2e --projects=tanstack-router-e2e-react-basic-file-based,tanstack-router-e2e-react-basic-file-based-code-splitting,tanstack-react-start-e2e-basic,tanstack-vue-start-e2e-basic,tanstack-solid-start-e2e-basic --skipRemoteCache" diff --git a/scripts/benchmarks/bundle-size/analyze.mjs b/scripts/benchmarks/bundle-size/analyze.mjs new file mode 100644 index 0000000000..0e8f99f92c --- /dev/null +++ b/scripts/benchmarks/bundle-size/analyze.mjs @@ -0,0 +1,60 @@ +#!/usr/bin/env node + +import fs from 'node:fs' +import path from 'node:path' +import { parseArgs } from 'node:util' + +const { values } = parseArgs({ + allowPositionals: false, + options: { + current: { + type: 'string', + default: 'benchmarks/bundle-size/results/current.json', + }, + id: { type: 'string' }, + 'top-sources': { type: 'string', default: '30' }, + json: { type: 'boolean' }, + }, +}) + +if (!values.id) { + throw new Error('Missing required argument: --id') +} + +const current = JSON.parse( + fs.readFileSync(path.resolve(values.current), 'utf8'), +) +const metric = (current.metrics || []).find((item) => item.id === values.id) + +if (!metric) { + throw new Error(`Unknown bundle-size metric: ${values.id}`) +} + +if (!metric.sources) { + throw new Error( + `No source attribution found for ${values.id}. Re-run measure with --analysis.`, + ) +} + +const sourceBytes = new Map() +for (const chunk of metric.sources) { + for (const source of chunk.sources || []) { + sourceBytes.set( + source.source, + (sourceBytes.get(source.source) || 0) + source.estimatedBytes, + ) + } +} + +const rows = [...sourceBytes] + .map(([source, estimatedBytes]) => ({ source, estimatedBytes })) + .sort((a, b) => b.estimatedBytes - a.estimatedBytes) + .slice(0, Number.parseInt(values['top-sources'], 10)) + +if (values.json) { + process.stdout.write(JSON.stringify(rows, null, 2) + '\n') +} else { + for (const row of rows) { + process.stdout.write(`${row.estimatedBytes} ${row.source}\n`) + } +} diff --git a/scripts/benchmarks/bundle-size/diff.mjs b/scripts/benchmarks/bundle-size/diff.mjs new file mode 100644 index 0000000000..bd698bb9eb --- /dev/null +++ b/scripts/benchmarks/bundle-size/diff.mjs @@ -0,0 +1,78 @@ +#!/usr/bin/env node + +import fs from 'node:fs' +import path from 'node:path' +import { parseArgs } from 'node:util' + +const { values } = parseArgs({ + allowPositionals: false, + options: { + baseline: { type: 'string' }, + current: { + type: 'string', + default: 'benchmarks/bundle-size/results/current.json', + }, + id: { type: 'string' }, + json: { type: 'boolean' }, + }, +}) + +if (!values.baseline) { + throw new Error('Missing required argument: --baseline') +} + +function readCurrent(filePath) { + return JSON.parse(fs.readFileSync(path.resolve(filePath), 'utf8')) +} + +function byId(current) { + return new Map((current.metrics || []).map((metric) => [metric.id, metric])) +} + +const baselineById = byId(readCurrent(values.baseline)) +const currentById = byId(readCurrent(values.current)) +const ids = values.id + ? [values.id] + : [...new Set([...baselineById.keys(), ...currentById.keys()])].sort() + +const rows = ids.map((id) => { + const baseline = baselineById.get(id) + const current = currentById.get(id) + return { + id, + baseline: baseline?.gzipBytes, + current: current?.gzipBytes, + delta: + Number.isFinite(baseline?.gzipBytes) && + Number.isFinite(current?.gzipBytes) + ? current.gzipBytes - baseline.gzipBytes + : undefined, + initialDelta: + Number.isFinite(baseline?.initialGzipBytes) && + Number.isFinite(current?.initialGzipBytes) + ? current.initialGzipBytes - baseline.initialGzipBytes + : undefined, + rawDelta: + Number.isFinite(baseline?.rawBytes) && Number.isFinite(current?.rawBytes) + ? current.rawBytes - baseline.rawBytes + : undefined, + brotliDelta: + Number.isFinite(baseline?.brotliBytes) && + Number.isFinite(current?.brotliBytes) + ? current.brotliBytes - baseline.brotliBytes + : undefined, + } +}) + +if (values.json) { + process.stdout.write(JSON.stringify(rows, null, 2) + '\n') +} else { + for (const row of rows) { + const delta = Number.isFinite(row.delta) + ? `${row.delta >= 0 ? '+' : ''}${row.delta}` + : 'n/a' + process.stdout.write( + `${row.id} ${row.baseline ?? 'n/a'} -> ${row.current ?? 'n/a'} (${delta}) initial=${row.initialDelta ?? 'n/a'} raw=${row.rawDelta ?? 'n/a'} brotli=${row.brotliDelta ?? 'n/a'}\n`, + ) + } +} diff --git a/scripts/benchmarks/bundle-size/history.mjs b/scripts/benchmarks/bundle-size/history.mjs new file mode 100644 index 0000000000..d219122b5e --- /dev/null +++ b/scripts/benchmarks/bundle-size/history.mjs @@ -0,0 +1,85 @@ +#!/usr/bin/env node + +import fs from 'node:fs' +import { execFileSync } from 'node:child_process' +import vm from 'node:vm' +import { parseArgs } from 'node:util' + +const HISTORY_PATH = 'benchmarks/bundle-size/data.js' + +const { values } = parseArgs({ + allowPositionals: false, + options: { + history: { type: 'string' }, + id: { type: 'string' }, + 'top-deltas': { type: 'string', default: '20' }, + json: { type: 'boolean' }, + }, +}) + +function parseHistory(raw) { + const trimmed = raw.trim() + if (trimmed.startsWith('window.BENCHMARK_DATA')) { + const sandbox = { window: {} } + vm.runInNewContext(trimmed, sandbox, { timeout: 1000 }) + return sandbox.window.BENCHMARK_DATA + } + return JSON.parse(trimmed) +} + +function readHistoryFromGit() { + for (const ref of ['origin/gh-pages', 'gh-pages']) { + try { + return execFileSync('git', ['show', `${ref}:${HISTORY_PATH}`], { + encoding: 'utf8', + }) + } catch {} + } + + throw new Error( + `Could not read ${HISTORY_PATH} from origin/gh-pages or gh-pages. Run: git fetch origin gh-pages`, + ) +} + +const raw = values.history + ? fs.readFileSync(values.history, 'utf8') + : readHistoryFromGit() +const history = parseHistory(raw) +const entries = history.entries?.['Bundle Size (gzip)'] || [] +const previous = new Map() +const deltas = [] + +for (const entry of entries) { + for (const bench of entry.benches || []) { + if (values.id && bench.name !== values.id) { + continue + } + + const prior = previous.get(bench.name) + if (prior !== undefined && prior !== bench.value) { + deltas.push({ + id: bench.name, + delta: bench.value - prior, + value: bench.value, + sha: entry.commit?.id, + message: String(entry.commit?.message || '').split('\n')[0], + timestamp: entry.commit?.timestamp, + }) + } + + previous.set(bench.name, bench.value) + } +} + +deltas.sort((a, b) => Math.abs(b.delta) - Math.abs(a.delta)) +const rows = deltas.slice(0, Number.parseInt(values['top-deltas'], 10)) + +if (values.json) { + process.stdout.write(JSON.stringify(rows, null, 2) + '\n') +} else { + for (const row of rows) { + process.stdout.write( + `${row.id} ${row.delta >= 0 ? '+' : ''}${row.delta} => ${row.value} ${row.sha?.slice(0, 12)} ${row.message}\n`, + ) + } +} diff --git a/scripts/benchmarks/bundle-size/measure.mjs b/scripts/benchmarks/bundle-size/measure.mjs index 8435a9b7db..621fa4b6d3 100644 --- a/scripts/benchmarks/bundle-size/measure.mjs +++ b/scripts/benchmarks/bundle-size/measure.mjs @@ -6,8 +6,9 @@ import { createRequire } from 'node:module' import path from 'node:path' import { fileURLToPath, pathToFileURL } from 'node:url' import { parseArgs as parseNodeArgs } from 'node:util' +import vm from 'node:vm' import { brotliCompressSync, gzipSync } from 'node:zlib' -import { execSync } from 'node:child_process' +import { execFileSync, execSync } from 'node:child_process' import { build } from 'vite' @@ -129,6 +130,10 @@ function parseArgs(argv) { 'append-history': { type: 'string' }, 'results-dir': { type: 'string' }, 'dist-dir': { type: 'string' }, + scenario: { type: 'string' }, + analysis: { type: 'boolean' }, + sourcemap: { type: 'boolean' }, + 'skip-package-builds': { type: 'boolean' }, }, }) @@ -138,9 +143,52 @@ function parseArgs(argv) { appendHistory: values['append-history'], resultsDir: values['results-dir'], distDir: values['dist-dir'], + scenario: values.scenario, + analysis: values.analysis === true, + sourcemap: values.sourcemap === true, + skipPackageBuilds: values['skip-package-builds'] === true, } } +function filterScenarios(filter) { + if (!filter) { + return SCENARIOS + } + + const requested = filter + .split(',') + .map((value) => value.trim()) + .filter(Boolean) + const scenarios = SCENARIOS.filter((scenario) => { + return requested.some( + (value) => + value === scenario.id || + value === scenario.dir || + value === scenario.outDir || + value === `${scenario.framework}-${scenario.case}`, + ) + }) + + const missing = requested.filter( + (value) => + !SCENARIOS.some( + (scenario) => + value === scenario.id || + value === scenario.dir || + value === scenario.outDir || + value === `${scenario.framework}-${scenario.case}`, + ), + ) + + if (missing.length > 0) { + throw new Error( + `Unknown bundle-size scenario: ${missing.join(', ')}\nKnown scenarios: ${SCENARIOS.map((scenario) => scenario.id).join(', ')}`, + ) + } + + return scenarios +} + function toIsoDate(value) { const date = new Date(value) @@ -159,10 +207,9 @@ function parseMaybeDataJs(raw) { const trimmed = raw.trim() if (trimmed.startsWith('window.BENCHMARK_DATA')) { - const withoutPrefix = trimmed - .replace(/^window\.BENCHMARK_DATA\s*=\s*/, '') - .replace(/;\s*$/, '') - return JSON.parse(withoutPrefix) + const sandbox = { window: {} } + vm.runInNewContext(trimmed, sandbox, { timeout: 1000 }) + return sandbox.window.BENCHMARK_DATA } return JSON.parse(trimmed) @@ -231,24 +278,35 @@ function collectAllViteJsFiles(manifest) { return [...files].sort() } -function bytesForFiles(baseDir, fileList) { +function sizesForFiles(baseDir, fileList) { let rawBytes = 0 let gzipBytes = 0 let brotliBytes = 0 + const files = [] for (const relativeFile of fileList) { const fullPath = path.join(baseDir, relativeFile) const content = fs.readFileSync(fullPath) - - rawBytes += content.byteLength - gzipBytes += gzipSync(content).byteLength - brotliBytes += brotliCompressSync(content).byteLength + const rawByteLength = content.byteLength + const gzipByteLength = gzipSync(content).byteLength + const brotliByteLength = brotliCompressSync(content).byteLength + + rawBytes += rawByteLength + gzipBytes += gzipByteLength + brotliBytes += brotliByteLength + files.push({ + file: relativeFile, + rawBytes: rawByteLength, + gzipBytes: gzipByteLength, + brotliBytes: brotliByteLength, + }) } return { rawBytes, gzipBytes, brotliBytes, + files, } } @@ -385,7 +443,75 @@ async function importFromRoot(root, specifier) { return import(pathToFileURL(requireFromRoot.resolve(specifier)).href) } -async function buildViteScenario({ root, outDir }) { +function getGitStatus() { + try { + return { + branch: execSync('git branch --show-current', { + encoding: 'utf8', + }).trim(), + dirty: + execSync('git status --porcelain', { encoding: 'utf8' }).trim().length > + 0, + } + } catch { + return { + branch: '', + dirty: undefined, + } + } +} + +function getPackageBuildProjects(scenarios) { + const projects = new Set() + + for (const scenario of scenarios) { + projects.add(scenario.packageName) + + if (scenario.packageName.endsWith('-router')) { + projects.add('@tanstack/router-plugin') + } + } + + return [...projects].sort() +} + +function buildRequiredPackages({ repoRoot, scenarios, skipPackageBuilds }) { + const projects = getPackageBuildProjects(scenarios) + + if (skipPackageBuilds || projects.length === 0) { + return projects + } + + process.stdout.write( + `Building package projects for bundle-size scenarios: ${projects.join(', ')}\n`, + ) + + const args = [ + 'nx', + 'run-many', + '--target=build', + `--projects=${projects.join(',')}`, + '--outputStyle=stream', + '--skipRemoteCache', + ] + + if ( + process.env.NX_SKIP_NX_CACHE === 'true' || + process.env.NX_DISABLE_NX_CACHE === 'true' + ) { + args.push('--skipNxCache') + } + + execFileSync(process.platform === 'win32' ? 'pnpm.cmd' : 'pnpm', args, { + cwd: repoRoot, + stdio: 'inherit', + env: process.env, + }) + + return projects +} + +async function buildViteScenario({ root, outDir, sourcemap }) { const configFile = path.join(root, 'vite.config.ts') await build({ @@ -400,14 +526,14 @@ async function buildViteScenario({ root, outDir }) { emptyOutDir: true, target: 'es2022', minify: 'esbuild', - sourcemap: false, + sourcemap: sourcemap ? 'hidden' : false, reportCompressedSize: false, manifest: true, }, }) } -async function buildRsbuildScenario({ root, outDir }) { +async function buildRsbuildScenario({ root, outDir, sourcemap }) { const configFile = path.join(root, 'rsbuild.config.ts') const { createRsbuild, loadConfig } = await importFromRoot( root, @@ -426,7 +552,17 @@ async function buildRsbuildScenario({ root, outDir }) { const rsbuild = await createRsbuild({ cwd: root, callerName: 'bundle-size-benchmark', - config: content, + config: sourcemap + ? { + ...content, + output: { + ...content.output, + sourceMap: { + js: 'source-map', + }, + }, + } + : content, }) const result = await rsbuild.build() await result.close() @@ -439,17 +575,17 @@ async function buildRsbuildScenario({ root, outDir }) { } } -async function buildScenario({ root, outDir, scenario }) { +async function buildScenario({ root, outDir, scenario, sourcemap }) { const previousCwd = process.cwd() process.chdir(root) try { if (scenario.toolchain === 'rsbuild') { - await buildRsbuildScenario({ root, outDir }) + await buildRsbuildScenario({ root, outDir, sourcemap }) return } - await buildViteScenario({ root, outDir }) + await buildViteScenario({ root, outDir, sourcemap }) } finally { process.chdir(previousCwd) } @@ -522,6 +658,115 @@ function collectRsbuildAllJsFiles(manifest) { return [...files].sort() } +function decodeVlq(segment) { + const values = [] + let value = 0 + let shift = 0 + + for (const char of segment) { + let digit = + 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'.indexOf( + char, + ) + + if (digit < 0) { + continue + } + + const continuation = digit & 32 + digit &= 31 + value += digit << shift + + if (continuation) { + shift += 5 + continue + } + + const negative = value & 1 + value >>= 1 + values.push(negative ? -value : value) + value = 0 + shift = 0 + } + + return values +} + +function estimateSourceBytesFromMap(mapPath, generatedPath) { + if (!fs.existsSync(mapPath)) { + return [] + } + + const map = readJson(mapPath) + const sources = map.sources || [] + const sourceBytes = new Map() + let sourceIndex = 0 + const generatedLines = fs.existsSync(generatedPath) + ? fs.readFileSync(generatedPath, 'utf8').split('\n') + : [] + let lineIndex = 0 + + for (const line of String(map.mappings || '').split(';')) { + let generatedColumn = 0 + let previousColumn = 0 + let previousSource = -1 + const segments = line.split(',').filter(Boolean) + const decodedSegments = [] + + for (const segment of segments) { + const values = decodeVlq(segment) + generatedColumn += values[0] || 0 + + if (values.length >= 4) { + sourceIndex += values[1] || 0 + decodedSegments.push({ column: generatedColumn, sourceIndex }) + } + } + + for (const segment of decodedSegments) { + if (previousSource >= 0) { + const bytes = Math.max(0, segment.column - previousColumn) + sourceBytes.set( + sources[previousSource], + (sourceBytes.get(sources[previousSource]) || 0) + bytes, + ) + } + + previousColumn = segment.column + previousSource = segment.sourceIndex + } + + if (previousSource >= 0) { + const bytes = Math.max( + 1, + (generatedLines[lineIndex]?.length || previousColumn) - previousColumn, + ) + sourceBytes.set( + sources[previousSource], + (sourceBytes.get(sources[previousSource]) || 0) + bytes, + ) + } + + lineIndex++ + } + + return [...sourceBytes] + .map(([source, estimatedBytes]) => ({ source, estimatedBytes })) + .sort((a, b) => b.estimatedBytes - a.estimatedBytes) +} + +function sourceAttributionForFiles(baseDir, fileList) { + return fileList.map((file) => { + return { + file, + sources: estimateSourceBytesFromMap( + path.join(baseDir, `${file}.map`), + path.join(baseDir, file), + ), + } + }) +} + async function resolveBundleFiles({ outDir, scenario }) { if (scenario.toolchain === 'rsbuild') { const manifestInfo = await resolveRsbuildManifest(outDir, scenario.id) @@ -632,42 +877,73 @@ async function main() { ? toIsoDate(args.measuredAt) : new Date().toISOString() const sha = getCurrentSha(args.sha) + const startedAt = Date.now() + const scenarios = filterScenarios(args.scenario) + const packageBuildProjects = buildRequiredPackages({ + repoRoot, + scenarios, + skipPackageBuilds: args.skipPackageBuilds, + }) await fsp.mkdir(resultsDir, { recursive: true }) await fsp.mkdir(distDir, { recursive: true }) const metrics = [] - for (const scenario of SCENARIOS) { + for (const scenario of scenarios) { const root = path.join(scenariosRoot, scenario.dir) const outDir = path.join(distDir, scenario.outDir || scenario.dir) - await buildScenario({ root, outDir, scenario }) + await buildScenario({ + root, + outDir, + scenario, + sourcemap: args.sourcemap || args.analysis, + }) const bundleInfo = await resolveBundleFiles({ outDir, scenario }) - const sizes = bytesForFiles(bundleInfo.manifestOutDir, bundleInfo.jsFiles) - const initialSizes = bytesForFiles( + const sizes = sizesForFiles(bundleInfo.manifestOutDir, bundleInfo.jsFiles) + const initialSizes = sizesForFiles( bundleInfo.manifestOutDir, bundleInfo.initialJsFiles, ) - - metrics.push({ + const initialFileSet = new Set(bundleInfo.initialJsFiles) + const files = sizes.files.map((file) => ({ + ...file, + initial: initialFileSet.has(file.file), + })) + const metric = { id: scenario.id, scenarioDir: scenario.dir, + outDir: scenario.outDir || scenario.dir, toolchain: scenario.toolchain || 'vite', framework: scenario.framework, packageName: scenario.packageName, case: scenario.case, entryKey: bundleInfo.entryKey, manifestPath: path.relative(outDir, bundleInfo.manifestPath), + manifestOutDir: path.relative(repoRoot, bundleInfo.manifestOutDir), initialJsFiles: bundleInfo.initialJsFiles, jsFiles: bundleInfo.jsFiles, + files, initialRawBytes: initialSizes.rawBytes, initialGzipBytes: initialSizes.gzipBytes, initialBrotliBytes: initialSizes.brotliBytes, - ...sizes, - }) + rawBytes: sizes.rawBytes, + gzipBytes: sizes.gzipBytes, + brotliBytes: sizes.brotliBytes, + } + + if (args.analysis) { + metric.sources = sourceAttributionForFiles( + bundleInfo.manifestOutDir, + bundleInfo.jsFiles, + ) + } + + metrics.push(metric) } + const completedAt = Date.now() const current = { schemaVersion: 1, @@ -675,6 +951,19 @@ async function main() { measuredAt: measuredAtIso, generatedAt: new Date().toISOString(), sha, + status: { + state: 'success', + command: `node ${path.relative(repoRoot, fileURLToPath(import.meta.url))}${args.scenario ? ` --scenario ${args.scenario}` : ''}${args.analysis ? ' --analysis' : ''}${args.sourcemap ? ' --sourcemap' : ''}${args.skipPackageBuilds ? ' --skip-package-builds' : ''}`, + scenarioFilter: args.scenario || null, + measuredScenarios: scenarios.map((scenario) => scenario.id), + packageBuildProjects, + skipPackageBuilds: args.skipPackageBuilds, + durationMs: completedAt - startedAt, + git: { + sha, + ...getGitStatus(), + }, + }, metrics, } diff --git a/scripts/benchmarks/bundle-size/pr-report.mjs b/scripts/benchmarks/bundle-size/pr-report.mjs index 446817df83..5118d877ec 100644 --- a/scripts/benchmarks/bundle-size/pr-report.mjs +++ b/scripts/benchmarks/bundle-size/pr-report.mjs @@ -4,6 +4,7 @@ import fs from 'node:fs' import { promises as fsp } from 'node:fs' import path from 'node:path' import { parseArgs as parseNodeArgs } from 'node:util' +import vm from 'node:vm' const DEFAULT_MARKER = '' const INT_FORMAT = new Intl.NumberFormat('en-US', { @@ -68,11 +69,9 @@ function parseMaybeDataJs(raw) { const trimmed = raw.trim() if (trimmed.startsWith('window.BENCHMARK_DATA')) { - return JSON.parse( - trimmed - .replace(/^window\.BENCHMARK_DATA\s*=\s*/, '') - .replace(/;\s*$/, ''), - ) + const sandbox = { window: {} } + vm.runInNewContext(trimmed, sandbox, { timeout: 1000 }) + return sandbox.window.BENCHMARK_DATA } return JSON.parse(trimmed) diff --git a/scripts/benchmarks/bundle-size/query.mjs b/scripts/benchmarks/bundle-size/query.mjs new file mode 100644 index 0000000000..b27668366f --- /dev/null +++ b/scripts/benchmarks/bundle-size/query.mjs @@ -0,0 +1,45 @@ +#!/usr/bin/env node + +import fs from 'node:fs' +import path from 'node:path' +import { parseArgs } from 'node:util' + +const { values } = parseArgs({ + allowPositionals: false, + options: { + current: { + type: 'string', + default: 'benchmarks/bundle-size/results/current.json', + }, + id: { type: 'string' }, + json: { type: 'boolean' }, + }, +}) + +const currentPath = path.resolve(values.current) +const current = JSON.parse(fs.readFileSync(currentPath, 'utf8')) +const metrics = values.id + ? (current.metrics || []).filter((metric) => metric.id === values.id) + : current.metrics || [] + +if (values.id && metrics.length === 0) { + throw new Error(`Unknown bundle-size metric: ${values.id}`) +} + +if (values.json) { + process.stdout.write(JSON.stringify(metrics, null, 2) + '\n') +} else { + for (const metric of metrics) { + process.stdout.write( + [ + metric.id, + `gzip=${metric.gzipBytes}`, + `initial=${metric.initialGzipBytes}`, + `raw=${metric.rawBytes}`, + `brotli=${metric.brotliBytes}`, + `dist=${metric.outDir || metric.scenarioDir}`, + `files=${(metric.jsFiles || []).join(',')}`, + ].join(' ') + '\n', + ) + } +} diff --git a/scripts/ts-symbol-references.mjs b/scripts/ts-symbol-references.mjs new file mode 100644 index 0000000000..61638e2cb9 --- /dev/null +++ b/scripts/ts-symbol-references.mjs @@ -0,0 +1,288 @@ +#!/usr/bin/env node + +import { existsSync, readFileSync } from 'node:fs' +import { dirname, relative, resolve } from 'node:path' +import ts from 'typescript' + +const usage = `Usage: + pnpm ts:symbol-references -- --project --file --symbol [--line --column ] + +Example: + pnpm ts:symbol-references -- --project packages/router-core/tsconfig.json --file packages/router-core/src/utils.ts --symbol last +` + +const args = parseArgs(process.argv.slice(2)) + +if (args.help) { + console.log(usage) + process.exit(0) +} + +if (!args.project || !args.file || !args.symbol) { + fail(usage) +} + +const cwd = process.cwd() +const projectPath = resolve(cwd, args.project) +const targetFile = resolve(cwd, args.file) + +if (!existsSync(projectPath)) { + fail(`Project not found: ${args.project}`) +} + +if (!existsSync(targetFile)) { + fail(`File not found: ${args.file}`) +} + +const configFile = ts.readConfigFile(projectPath, ts.sys.readFile) +if (configFile.error) { + fail(formatDiagnostics([configFile.error])) +} + +const parsedConfig = ts.parseJsonConfigFileContent( + configFile.config, + ts.sys, + dirname(projectPath), + undefined, + projectPath, +) + +if (parsedConfig.errors.length) { + fail(formatDiagnostics(parsedConfig.errors)) +} + +const fileNames = Array.from(new Set([...parsedConfig.fileNames, targetFile])) +const fileTextCache = new Map() +const service = ts.createLanguageService({ + getCompilationSettings: () => parsedConfig.options, + getCurrentDirectory: () => dirname(projectPath), + getDefaultLibFileName: (options) => ts.getDefaultLibFilePath(options), + getDirectories: ts.sys.getDirectories, + getNewLine: () => ts.sys.newLine, + getScriptFileNames: () => fileNames, + getScriptSnapshot(fileName) { + const text = getFileText(fileName) + return text === undefined ? undefined : ts.ScriptSnapshot.fromString(text) + }, + getScriptVersion: () => '0', + readDirectory: ts.sys.readDirectory, + readFile: ts.sys.readFile, + fileExists: ts.sys.fileExists, + directoryExists: ts.sys.directoryExists, + useCaseSensitiveFileNames: () => ts.sys.useCaseSensitiveFileNames, + realpath: ts.sys.realpath, +}) + +const program = service.getProgram() +const sourceFile = program + ?.getSourceFiles() + .find((source) => resolve(source.fileName) === targetFile) + +if (!sourceFile) { + fail(`File is not in the TypeScript program: ${args.file}`) +} + +const symbolNode = findSymbolNode( + sourceFile, + args.symbol, + args.line, + args.column, +) +const refs = service.findReferences( + sourceFile.fileName, + symbolNode.getStart(sourceFile), +) + +if (!refs?.length) { + fail(`No references found for ${args.symbol}`) +} + +const rows = dedupeReferences(refs) +console.log( + `Found ${rows.length} references for ${args.symbol} at ${formatLocation(sourceFile, symbolNode.getStart(sourceFile))}`, +) + +for (const row of rows) { + const source = program?.getSourceFile(row.fileName) + if (!source) continue + + const location = formatLocation(source, row.textSpan.start) + const line = getLineText(row.fileName, source, row.textSpan.start) + const marker = row.isDefinition ? ' [definition]' : '' + console.log(`${location}${marker} ${line}`) +} + +function parseArgs(rawArgs) { + const parsed = {} + for (let index = 0; index < rawArgs.length; index++) { + const arg = rawArgs[index] + + if (arg === '--') { + continue + } + + if (arg === '--help' || arg === '-h') { + parsed.help = true + continue + } + + if (!arg.startsWith('--')) { + fail(`Unexpected argument: ${arg}\n\n${usage}`) + } + + const key = arg.slice(2) + const value = rawArgs[index + 1] + if (!value || value.startsWith('--')) { + fail(`Missing value for --${key}\n\n${usage}`) + } + + parsed[key] = value + index++ + } + + return parsed +} + +function findSymbolNode(sourceFile, symbol, line, column) { + if (line !== undefined || column !== undefined) { + if (line === undefined || column === undefined) { + fail('Pass both --line and --column, or neither.') + } + + const lineNumber = Number(line) + const columnNumber = Number(column) + if (!Number.isInteger(lineNumber) || !Number.isInteger(columnNumber)) { + fail('--line and --column must be integers.') + } + + const position = sourceFile.getPositionOfLineAndCharacter( + lineNumber - 1, + columnNumber - 1, + ) + const node = findIdentifierAtPosition(sourceFile, position) + if (!node) { + fail(`No identifier at ${line}:${column}`) + } + + if (node.text !== symbol) { + fail(`Identifier at ${line}:${column} is ${node.text}, not ${symbol}`) + } + + return node + } + + const candidates = [] + visitIdentifiers(sourceFile, (node) => { + if (node.text === symbol) candidates.push(node) + }) + + if (!candidates.length) { + fail(`Symbol not found in ${relative(cwd, sourceFile.fileName)}: ${symbol}`) + } + + const declarations = candidates.filter(isDeclarationName) + if (declarations.length === 1) { + return declarations[0] + } + + const options = (declarations.length ? declarations : candidates) + .map((node) => ` ${formatLocation(sourceFile, node.getStart(sourceFile))}`) + .join('\n') + + fail(`Symbol is ambiguous. Pass --line and --column for one of:\n${options}`) +} + +function visitIdentifiers(node, onIdentifier) { + if (ts.isIdentifier(node)) { + onIdentifier(node) + } + ts.forEachChild(node, (child) => visitIdentifiers(child, onIdentifier)) +} + +function findIdentifierAtPosition(sourceFile, position) { + let match + visitIdentifiers(sourceFile, (node) => { + const start = node.getStart(sourceFile) + const end = node.getEnd() + if (start <= position && position <= end) { + match = node + } + }) + return match +} + +function isDeclarationName(node) { + const parent = node.parent + if (!parent || parent.name !== node) return false + + return ( + ts.isClassDeclaration(parent) || + ts.isEnumDeclaration(parent) || + ts.isFunctionDeclaration(parent) || + ts.isGetAccessorDeclaration(parent) || + ts.isInterfaceDeclaration(parent) || + ts.isMethodDeclaration(parent) || + ts.isModuleDeclaration(parent) || + ts.isParameter(parent) || + ts.isPropertyDeclaration(parent) || + ts.isSetAccessorDeclaration(parent) || + ts.isTypeAliasDeclaration(parent) || + ts.isVariableDeclaration(parent) + ) +} + +function dedupeReferences(refs) { + const seen = new Set() + const rows = [] + + for (const group of refs) { + for (const reference of group.references) { + const key = `${resolve(reference.fileName)}:${reference.textSpan.start}:${reference.textSpan.length}` + if (seen.has(key)) continue + + seen.add(key) + rows.push(reference) + } + } + + rows.sort((left, right) => { + const fileCompare = relative(cwd, left.fileName).localeCompare( + relative(cwd, right.fileName), + ) + return fileCompare || left.textSpan.start - right.textSpan.start + }) + + return rows +} + +function getFileText(fileName) { + if (fileTextCache.has(fileName)) return fileTextCache.get(fileName) + if (!existsSync(fileName)) return undefined + + const text = readFileSync(fileName, 'utf8') + fileTextCache.set(fileName, text) + return text +} + +function getLineText(fileName, sourceFile, position) { + const { line } = sourceFile.getLineAndCharacterOfPosition(position) + return getFileText(fileName)?.split(/\r?\n/)[line]?.trim() ?? '' +} + +function formatLocation(sourceFile, position) { + const { line, character } = sourceFile.getLineAndCharacterOfPosition(position) + return `${relative(cwd, sourceFile.fileName)}:${line + 1}:${character + 1}` +} + +function formatDiagnostics(diagnostics) { + return ts.formatDiagnosticsWithColorAndContext(diagnostics, { + getCanonicalFileName: (fileName) => fileName, + getCurrentDirectory: () => cwd, + getNewLine: () => ts.sys.newLine, + }) +} + +function fail(message) { + console.error(message) + process.exit(1) +} diff --git a/skills/bundle-size-optimization/SKILL.md b/skills/bundle-size-optimization/SKILL.md new file mode 100644 index 0000000000..d24538be69 --- /dev/null +++ b/skills/bundle-size-optimization/SKILL.md @@ -0,0 +1,130 @@ +--- +name: bundle-size-optimization +description: Use when working in this repository on JS bundle size, gzip regressions, benchmark scenarios, source attribution, treeshaking, dead code elimination, or Rolldown annotations. +--- + +# Bundle Size Optimization + +## Overview + +Optimize measured client bundles, not source text. The source of truth is `@benchmarks/bundle-size:build`, `benchmarks/bundle-size/results/current.json`, and emitted JS in `benchmarks/bundle-size/dist/`. + +## Commands + +| Need | Command | +| ------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Full benchmark | `CI=1 NX_DAEMON=false pnpm nx run @benchmarks/bundle-size:build --outputStyle=stream --skipRemoteCache --skipNxCache >/tmp/bundle-size-build.log 2>&1 && pnpm benchmark:bundle-size:query` | +| One scenario | `CI=1 NX_DAEMON=false pnpm nx run @benchmarks/bundle-size:build --outputStyle=stream --skipRemoteCache --skipNxCache -- --scenario react-router.minimal >/tmp/bundle-size-build.log 2>&1 && pnpm benchmark:bundle-size:query --id react-router.minimal` | +| Read result | `pnpm benchmark:bundle-size:query --id react-router.minimal` | +| Compare results | `pnpm benchmark:bundle-size:diff --baseline /tmp/base-current.json --id react-router.minimal` | +| History deltas | `git fetch --quiet origin gh-pages && pnpm benchmark:bundle-size:history --id react-router.minimal --top-deltas 20` | +| Source attribution | `CI=1 NX_DAEMON=false pnpm nx run @benchmarks/bundle-size:build --outputStyle=stream --skipRemoteCache --skipNxCache -- --scenario react-router.minimal --analysis >/tmp/bundle-size-build.log 2>&1 && pnpm benchmark:bundle-size:analyze --id react-router.minimal` | +| Symbol refs | `pnpm ts:symbol-references -- --project packages/router-core/tsconfig.json --file packages/router-core/src/utils.ts --symbol last` | + +## Rules + +- Run one Nx command at a time. +- Redirect noisy Nx build output to a log file, then print only `query`, `diff`, or `analyze` output. If the build fails, search/read the log for the error instead of printing the full log. +- Track `gzipBytes` first; also inspect `initialGzipBytes`, `rawBytes`, `brotliBytes`, `jsFiles`, and per-file `files`. +- Dist paths use `scenarioDir`/`outDir`, not metric ids: `react-router.minimal` maps to `dist/react-router-minimal/`. +- For tiny changes, measure after each candidate; gzip can move opposite raw bytes. +- To compare a base commit, run the same scenario in a separate worktree under `/var/folders/6f/2t42ntqs4yv4h6qwzbh5pmcm0000gn/T/opencode` and diff the two `current.json` files. +- Use history for prior patterns and baselines, not source attribution. It is commit-level data. +- Runtime performance and security may never be sacrificed for bundle size. +- Do not stop after the first verified win. Keep iterating through reasonable local, emitted-JS, and algorithmic candidates until measured regressions, readability, or risk rule out the remaining paths. +- When inlining helpers or simplifying non-obvious logic, preserve readability with a short comment explaining the meaning/invariant, not the mechanics. +- Before inlining or deleting a helper/function, use the TypeScript language-service script to check references: `pnpm ts:symbol-references -- --project /tsconfig.json --file --symbol `. If the helper is used elsewhere, inlining one use is usually not worth it for bundle size unless measurement proves otherwise. If no references remain, delete the helper and verify with the script. +- Run unit/types tests for the package being modified plus relevant e2e tests under `e2e/`. +- Continue optimizing until further reductions would make code unreadable/unmaintainable, or no more reductions remain. A user-provided byte target is not required. +- Be willing to make large, risky architectural or algorithmic changes, but only within the runtime, security, readability, maintainability, and test constraints above. +- If you are unsure whether to land a passing change because runtime semantics might change, add unit/e2e tests and/or ask clarifying questions. If codebase exploration can answer the question, explore instead. For each question, provide your recommended answer. +- If runtime performance implications are unclear, add a focused Vitest benchmark (`*.bench.ts`) comparing candidate implementations across realistic and edge-case inputs, like `packages/router-core/tests/closing-tag-detection.bench.ts`; verify implementations produce identical results before `bench()` cases. +- If you learn a reusable bundle-size pattern, hit a tooling gap, or lack analysis capability, ask the user before updating this skill or the benchmark scripts. + +## Benchmark Rules + +- During iteration, pick one bundle-size scenario that is most likely to contain the changed code. Use `react-router.minimal` for router-core and react-router changes by default; use `solid-router.minimal` for solid-router changes, `vue-router.minimal` for vue-router changes, `react-start.minimal` or `react-start.rsbuild.minimal` for React Start changes, and `solid-start.minimal` for Solid Start changes. +- Override the default targeted scenario when code is only pulled into a fuller scenario. For example, if a hook/function is referenced only by `solid-router.full`, iterate on `solid-router.full` instead of `solid-router.minimal`. +- If a change can affect several package families, pick the smallest scenario that imports the shared code for quick iteration, then spot-check the next most likely affected family before finalizing. +- Before finalizing, run the full bundle-size benchmark without `--scenario` and compare all scenarios. Look for outliers/anomalies even when the targeted scenario improved. +- Benchmark the changed mechanism directly, not just the public API around it. +- Keep broad realistic scenarios as smoke/regression coverage; use focused cases for proof. +- Compare baseline and current with the same benchmark file. Use a separate worktree when only implementation should differ. +- Run noisy benchmark families separately with `-t `; all-in-one suites can perturb tiny operations. +- For branchy fast paths, include best-case, worst-case, and expected mixed distributions. +- Batch ultra-fast operations inside one benchmark iteration when single calls are dominated by timer/outlier noise. +- Read `hz`, `mean`, `p99`/`p999`, `rme`, and samples together. Do not trust one noisy `hz` value. +- Treat high `rme` or large p999 outliers as directional only; rerun narrower cases before deciding. +- Name cases after the behavior under test so future readers know what result matters. +- Verify correctness before timed cases so benchmarks do not measure invalid or dead paths. + +## Attribution Round + +Before calling an optimization final, prove which exact production hunks should remain: + +1. Snapshot the unoptimized baseline and the full candidate metrics. +2. Split the production diff into logical hunks or dependent hunk groups. Include syntax-only and readability-only edits if they can affect emitted code. +3. Benchmark each independent hunk alone against the same baseline. Benchmark relevant combinations when hunks only matter together or interact. +4. For each hunk/group, record bundle metrics and focused performance results when runtime cost could change. +5. Keep only changes that improve bundle size or performance, or are required for correctness/tests/style and do not regress measured results. Revert neutral or harmful optimization-only changes. +6. Rebuild and remeasure the final composed version. It must not be larger or slower than the pre-attribution candidate unless the retained change is explicitly required for correctness or style. + +## Optimization Loop + +1. Measure baseline scenario. +2. Inspect diff, emitted JS, per-file sizes, and analysis sources if needed. +3. Analyze the algorithm before syntax. Identify redundant loops, duplicate branches, repeated scans/slices/lowercasing, allocation-heavy paths, search order, and data-shape choices. +4. Make the smallest behavior-preserving algorithmic edit that removes work or code shape first; use syntax-only edits only after algorithmic candidates are exhausted. +5. Re-measure and keep only proven wins. +6. Run package unit/types, relevant e2e, and `git diff --check`. +7. Run the attribution round, then the post-optimization coverage/perf workflow before finalizing. + +## Algorithmic Pass + +For hot files, split the code into phases and optimize each phase by work removed, not characters removed: + +- Parsing/scanning: prefer one pass over helper scans plus substring allocation; keep offsets into source strings when possible. +- Tree/building: fuse identical node-creation branches when the data shape is shared; cache repeated route/options fields in locals. +- Matching/search: preserve priority order, but merge candidate loops only when stack push order stays identical; avoid allocation in suffix/prefix checks unless correctness needs it. +- Extraction/validation: compute params lazily and carry state only where needed; do not reuse partial params across skipped/pathless branches unless covered by tests. +- Sorting/scoring: replace helper calls and comparator ladders only when measured and still readable. +- Sorting/tree post-processing: if a full tree walk only sorts sparse child arrays, record arrays when they become sortable (length reaches 2) during construction, then sort the recorded arrays once. + +After each candidate, run focused perf benchmarks before bundle measurement. Reject wins that hide runtime regressions or make invariants hard to audit. + +## Post-Optimization Coverage/Perf Workflow + +When done optimizing: + +1. Spawn 5 subagents to review the optimization diff against existing tests. Ask each to identify missing unit test cases that could fail with the current changes or newly uncovered edge cases, and missing performance benchmarks that could hide regressions. +2. If a possible regression is unclear, ask the user or explore the codebase until the expected behavior is clear. +3. Use their input to add focused unit tests and benchmarks. +4. Commit only the tests/benchmarks/supporting test-script changes. +5. Stash the implementation changes. +6. Run tests, performance benchmarks, and the relevant bundle-size measurement, then write BEFORE results to `RESULT-optimization-{topic}.md`. +7. Pop the implementation changes. +8. Run the same tests, performance benchmarks, and bundle-size measurement, then append AFTER results to the same file. +9. When reviewing benchmark output, consider statistical quality: standard deviation, margin of error, variance/noise, sample count, and percentiles. Re-run or narrow conclusions when results are noisy. +10. Compare BEFORE and AFTER. If anything regressed, iterate until green or revert the regression. + +Useful patterns: remove prod-only strings, remove unused exports, flatten wrappers, inline one-use helpers, avoid duplicate literals, improve treeshaking boundaries, simplify branches after preserving behavior. + +## DCE And Annotations + +Rolldown removes code only when unused and side-effect-free. Property reads may trigger getters; storage/global access can observe or throw. + +| Annotation | Valid | Unsafe | +| ----------------------------------------- | --------------------------------------------------------------------------- | ---------------------------------------------------------------------------------- | +| `/* @__PURE__ */ call()` | immediately before a call/new expression whose unused result can be dropped | declarations, property reads, setup, storage, DOM/history/listener code | +| `/* @__NO_SIDE_EFFECTS__ */ function f()` | every call of the function is side-effect-free | functions touching globals, storage, DOM, history, subscriptions, warnings, caches | +| `sideEffects`/module flags | module has no import-time effects when unused | CSS, polyfills, storage hydration, DOM/history setup | + +## Red Flags + +- Using package `test:build` as a size proxy. +- Trusting source bytes or raw bytes instead of measured `gzipBytes`. +- Inspecting `dist/` instead of `dist/`. +- Adding DCE annotations to effectful code because the byte target is small. +- Skipping behavior or benchmark tests because the change is “only bundle size.” +- Skipping hunk-level attribution and keeping changes only because the full candidate improved. +- Trading runtime performance, security, readability, or maintainability for bytes.