diff --git a/.oxfmtrc.json b/.oxfmtrc.json
index 32bec925..ce2add4d 100644
--- a/.oxfmtrc.json
+++ b/.oxfmtrc.json
@@ -14,6 +14,7 @@
     "**/*.mdx",
     "**/*.md",
     "*-lock.*",
-    "*.lock"
+    "*.lock",
+    ".*-cache"
   ]
 }
diff --git a/.oxlintrc.json b/.oxlintrc.json
index e6345136..05d0bec4 100644
--- a/.oxlintrc.json
+++ b/.oxlintrc.json
@@ -65,7 +65,8 @@
     "*.mdx",
     "*.md",
     "*.json",
-    "*-lock.*"
+    "*-lock.*",
+    ".*-cache"
   ],
   "overrides": [
     {
diff --git a/docs/src/pages/en/(pages)/deploy/docker.mdx b/docs/src/pages/en/(pages)/deploy/docker.mdx
index 62be79ac..1b288272 100644
--- a/docs/src/pages/en/(pages)/deploy/docker.mdx
+++ b/docs/src/pages/en/(pages)/deploy/docker.mdx
@@ -118,6 +118,44 @@ docker run -p 8080:8080 -e PORT=8080 my-app:latest
 
 If you build with `--sourcemap`, the Dockerfile will also set `NODE_OPTIONS="--enable-source-maps"`.
 
+<Link name="kubernetes">
+## Kubernetes
+</Link>
+
+When deploying to Kubernetes, configure liveness and readiness probes using the built-in health check endpoints:
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: my-app
+spec:
+  template:
+    spec:
+      terminationGracePeriodSeconds: 30
+      containers:
+        - name: app
+          image: my-app:latest
+          ports:
+            - containerPort: 3000
+          livenessProbe:
+            httpGet:
+              path: /__react_server_health__
+              port: 3000
+            initialDelaySeconds: 5
+            periodSeconds: 10
+          readinessProbe:
+            httpGet:
+              path: /__react_server_ready__
+              port: 3000
+            initialDelaySeconds: 3
+            periodSeconds: 5
+```
+
+The server automatically handles graceful shutdown on `SIGTERM` — it stops accepting new connections and drains in-flight requests before exiting. See the [HTTP layer](/features/http-layer) page for tuning keep-alive timeouts, request timeouts, and shutdown behavior.
+
+> **Tip:** When running behind an AWS ALB or NLB, the default `keepAliveTimeout` of 65 seconds is configured to exceed the load balancer's 60-second idle timeout, preventing 502 errors under load. You can adjust this in your `react-server.config.mjs` via `server.keepAliveTimeout`.
+
 <Link name="how-it-works">
 ## How it works
 </Link>
diff --git a/docs/src/pages/en/(pages)/features/cluster.mdx b/docs/src/pages/en/(pages)/features/cluster.mdx
index 670ec979..2583b246 100644
--- a/docs/src/pages/en/(pages)/features/cluster.mdx
+++ b/docs/src/pages/en/(pages)/features/cluster.mdx
@@ -37,4 +37,6 @@ You can also enable cluster mode by setting the `cluster` option in your `react-
 }
 ```
 
+In cluster mode, if a worker process dies unexpectedly, it is automatically restarted. During graceful shutdown (`SIGTERM`/`SIGINT`), the primary process waits for all workers to drain their connections before exiting. See the [HTTP layer](/features/http-layer) page for tuning `shutdownTimeout` and other production server options.
+
 > **Note:** It's best to not use more cluster workers than the number of CPU cores available on your machine.
diff --git a/docs/src/pages/en/(pages)/features/http-layer.mdx b/docs/src/pages/en/(pages)/features/http-layer.mdx
new file mode 100644
index 00000000..ceabcd76
--- /dev/null
+++ b/docs/src/pages/en/(pages)/features/http-layer.mdx
@@ -0,0 +1,161 @@
+---
+title: HTTP layer
+category: Features
+order: 9
+---
+
+import Link from "../../../../components/Link.jsx";
+
+# HTTP layer
+
+The production HTTP server in `@lazarv/react-server` is built on Node.js `node:http` (or `node:http2` for HTTPS without proxy) and includes built-in support for keep-alive management, request timeouts, admission control, health check endpoints, and graceful shutdown. These features are critical when running behind a load balancer (e.g. AWS ALB/NLB, k8s Ingress) to prevent 502 errors, connection exhaustion, and dropped requests during deployments.
+
+<Link name="configuration">
+## Configuration
+</Link>
+
+All HTTP layer options live under the `server` section of your config file. Every value has a safe default that works well with common load balancer configurations.
+
+```mjs filename="react-server.config.mjs"
+export default {
+  server: {
+    keepAliveTimeout: 65000,
+    headersTimeout: 66000,
+    requestTimeout: 30000,
+    maxConcurrentRequests: 100,
+    shutdownTimeout: 25000,
+  },
+};
+```
+
+| Option | Default | Description |
+|---|---|---|
+| `keepAliveTimeout` | `65000` | How long (ms) the server keeps idle connections open. Must exceed your load balancer's idle timeout to prevent 502 errors. AWS ALB defaults to 60s, so 65s is a safe starting point. |
+| `headersTimeout` | `66000` | Maximum time (ms) to wait for the client to send the full request headers. Must exceed `keepAliveTimeout`. |
+| `requestTimeout` | `30000` | Maximum time (ms) for the client to send the complete request (headers + body). Set to `0` to disable. |
+| `maxConcurrentRequests` | `0` | Maximum number of concurrent requests before the server responds with `503 Service Busy`. Set to `0` to disable admission control. |
+| `shutdownTimeout` | `25000` | After receiving `SIGTERM`/`SIGINT`, the server stops accepting new connections and waits up to this duration (ms) for in-flight requests to complete before force-exiting. Should be less than your k8s `terminationGracePeriodSeconds` (default 30s). |
+
+<Link name="keep-alive">
+## Keep-alive and timeouts
+</Link>
+
+Node.js defaults `keepAliveTimeout` to 5 seconds, which is far too low for environments with a load balancer. If the server closes an idle connection before the load balancer does, the load balancer may send a request on a connection the server has already torn down, resulting in a **502 Bad Gateway**.
+
+The default values in `@lazarv/react-server` are chosen to avoid this:
+
+- `keepAliveTimeout` (65s) exceeds the AWS ALB default idle timeout (60s)
+- `headersTimeout` (66s) exceeds `keepAliveTimeout` as required by Node.js
+- `requestTimeout` (30s) prevents slow or stalled clients from holding sockets indefinitely
+
+<Link name="admission-control">
+## Admission control
+</Link>
+
+When `maxConcurrentRequests` is set to a value greater than `0`, the server tracks in-flight requests and responds with `503 Service Busy` (with a `Retry-After: 1` header) when the limit is reached. This prevents thundering-herd scenarios where all requests compete for CPU/memory simultaneously, causing all of them to be slow rather than serving some fast and rejecting others.
+
+The counter is decremented after the response is fully sent, ensuring accurate tracking even for streaming responses. On error paths, the counter is also properly decremented.
+
+<Link name="adaptive-backpressure">
+## Adaptive backpressure
+</Link>
+
+`@lazarv/react-server` ships with an adaptive backpressure system that is **enabled by default** in production. It uses **Event Loop Utilization (ELU)** — `performance.eventLoopUtilization()` — as a direct measure of Node.js event loop saturation. Unlike CPU% or latency-based algorithms, ELU is unaffected by workload heterogeneity (switching between fast and slow routes) and only rises when the event loop itself is genuinely saturated.
+
+The control loop uses **AIMD (Additive Increase, Multiplicative Decrease)**:
+- **ELU &lt; 0.95**: increase the limit by `√limit` per window (fast recovery)
+- **ELU ≥ 0.95**: decrease the limit by 10% per window (gentle backoff)
+
+The limiter starts wide open (`initialLimit = maxLimit`) and has **zero overhead** on the fast path — it is invisible under normal load and only tightens when the event loop is genuinely saturated.
+
+To customize or disable it, use `server.backpressure`:
+
+```mjs filename="react-server.config.mjs"
+export default {
+  server: {
+    backpressure: {
+      enabled: true,        // set to false to disable
+      initialLimit: 1000,   // starting limit (defaults to maxLimit)
+      minLimit: 1,          // floor
+      maxLimit: 1000,       // ceiling
+      eluMax: 0.95,         // skip queuing above 95% ELU
+      sampleWindow: 1000,   // recalculate every 1s
+      smoothingFactor: 0.2, // EWMA latency smoothing
+      queueSize: 100,       // max requests waiting for a slot
+      queueTimeout: 5000,   // max wait time (ms) before 503
+    },
+  },
+};
+```
+
+| Option | Default | Description |
+|---|---|---|
+| `enabled` | `true` | Enable adaptive backpressure. Set to `false` to disable and fall back to static `maxConcurrentRequests`. |
+| `initialLimit` | `maxLimit` | Starting concurrency limit. Defaults to `maxLimit` (start wide open, tighten under overload). |
+| `minLimit` | `1` | Floor — the adaptive limit never drops below this. |
+| `maxLimit` | `1000` | Ceiling — capped by `maxConcurrentRequests` when both are set. |
+| `eluMax` | `0.95` | ELU level (0–1) where the limit decreases and excess requests skip the queue. |
+| `sampleWindow` | `1000` | Interval (ms) for recalculation and ELU sampling. |
+| `smoothingFactor` | `0.2` | EWMA factor (0–1) for latency smoothing. Higher = more reactive. |
+| `queueSize` | `100` | Maximum requests waiting in the backpressure queue. When full, additional requests are immediately rejected with 503. |
+| `queueTimeout` | `5000` | Maximum time (ms) a request waits in the queue before being rejected with 503. Should be shorter than your load balancer's request timeout. |
+
+When both `backpressure.enabled` and `maxConcurrentRequests` are configured, the static limit acts as the hard ceiling for the adaptive limit. This gives you a safety net: the algorithm can explore up to `maxConcurrentRequests` but never exceed it.
+
+### How the queue works
+
+Instead of immediately rejecting requests when the concurrency limit is reached, the limiter places them in a bounded FIFO queue. When an in-flight request completes, the freed slot is handed directly to the next queued waiter rather than returning to the general pool — ensuring fair ordering.
+
+Requests are removed from the queue when:
+- A slot becomes available → the request proceeds normally
+- `queueTimeout` expires → the request is rejected with 503
+- The client disconnects → the request is silently discarded (no wasted work)
+- ELU exceeds `eluMax` → requests bypass the queue entirely and are immediately rejected
+
+This absorbs short traffic bursts transparently while still shedding load during sustained overload.
+
+> **Tip:** Start with the defaults and monitor. The limiter exposes stats (current limit, inflight count, queue depth, ELU, smoothed latency) that you can pipe into your observability stack to tune the parameters for your workload.
+
+<Link name="health-check">
+## Health check endpoints
+</Link>
+
+The production server exposes two built-in endpoints for Kubernetes liveness and readiness probes. These endpoints are registered at the very top of the middleware chain, bypassing all other middleware for minimal latency.
+
+| Endpoint | Purpose | Response |
+|---|---|---|
+| `/__react_server_health__` | Liveness probe | `200 ok` — the process is alive |
+| `/__react_server_ready__` | Readiness probe | `200 ok` when the worker thread is running, `503 not ready` when the worker has exited |
+
+Example Kubernetes pod spec:
+
+```yaml
+livenessProbe:
+  httpGet:
+    path: /__react_server_health__
+    port: 3000
+  initialDelaySeconds: 5
+  periodSeconds: 10
+readinessProbe:
+  httpGet:
+    path: /__react_server_ready__
+    port: 3000
+  initialDelaySeconds: 3
+  periodSeconds: 5
+```
+
+> **Tip:** Point your liveness probe at `/__react_server_health__` rather than `/`. The health endpoint returns instantly without touching the SSR pipeline, so it won't false-fail under heavy rendering load.
+
+<Link name="graceful-shutdown">
+## Graceful shutdown
+</Link>
+
+When the server receives `SIGTERM` or `SIGINT`:
+
+1. It stops accepting new connections
+2. In-flight requests are allowed to complete
+3. After `shutdownTimeout` milliseconds, the process force-exits
+
+In [cluster mode](/features/cluster), the primary process waits for all workers to drain before exiting. If a worker dies unexpectedly during normal operation, it is automatically restarted — rather than taking down the entire service.
+
+This ensures zero-downtime rolling deployments on Kubernetes and other container orchestrators. The default `shutdownTimeout` of 25 seconds leaves a 5-second buffer within the default k8s `terminationGracePeriodSeconds` of 30 seconds.
diff --git a/docs/src/pages/en/(pages)/features/http.mdx b/docs/src/pages/en/(pages)/features/http.mdx
index 7d47df69..1c6ef389 100644
--- a/docs/src/pages/en/(pages)/features/http.mdx
+++ b/docs/src/pages/en/(pages)/features/http.mdx
@@ -472,7 +472,23 @@ export default function MyComponent() {
 }
 ```
 
-The `after()` hook can be called multiple times to register multiple callbacks. All registered callbacks run concurrently via `Promise.allSettled` after the response stream completes, so one failing callback does not prevent the others from running.
+The `after()` hook can be called multiple times to register multiple callbacks. All registered callbacks run concurrently via `Promise.allSettled` after the response stream completes, so one failing callback does not prevent the others from running. If the request failed with an error, the error is passed to each callback as the first argument:
+
+```jsx
+import { after, logger } from "@lazarv/react-server";
+
+export default function MyComponent() {
+  after((error) => {
+    if (error) {
+      logger.error("Request failed:", error.message);
+    } else {
+      logger.info("Request completed successfully");
+    }
+  });
+
+  return <p>Hello World</p>;
+}
+```
 
 ```jsx
 import { after } from "@lazarv/react-server";
diff --git a/docs/src/pages/ja/(pages)/deploy/docker.mdx b/docs/src/pages/ja/(pages)/deploy/docker.mdx
index 88eb18aa..3e0e847f 100644
--- a/docs/src/pages/ja/(pages)/deploy/docker.mdx
+++ b/docs/src/pages/ja/(pages)/deploy/docker.mdx
@@ -118,6 +118,44 @@ docker run -p 8080:8080 -e PORT=8080 my-app:latest
 
 `--sourcemap` でビルドした場合、Dockerfile に `NODE_OPTIONS="--enable-source-maps"` も設定されます。
 
+<Link name="kubernetes">
+## Kubernetes
+</Link>
+
+Kubernetesにデプロイする場合、組み込みのヘルスチェックエンドポイントを使用してlivenessプローブとreadinessプローブを設定します：
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: my-app
+spec:
+  template:
+    spec:
+      terminationGracePeriodSeconds: 30
+      containers:
+        - name: app
+          image: my-app:latest
+          ports:
+            - containerPort: 3000
+          livenessProbe:
+            httpGet:
+              path: /__react_server_health__
+              port: 3000
+            initialDelaySeconds: 5
+            periodSeconds: 10
+          readinessProbe:
+            httpGet:
+              path: /__react_server_ready__
+              port: 3000
+            initialDelaySeconds: 3
+            periodSeconds: 5
+```
+
+サーバーは`SIGTERM`でグレースフルシャットダウンを自動的に処理します。新しいコネクションの受け入れを停止し、処理中のリクエストをドレインしてから終了します。Keep-Aliveタイムアウト、リクエストタイムアウト、シャットダウン動作の調整については、[HTTPレイヤー](/ja/features/http-layer)ページを参照してください。
+
+> **ヒント:** AWS ALBまたはNLBの背後で実行する場合、デフォルトの`keepAliveTimeout`は65秒に設定されており、ロードバランサーの60秒アイドルタイムアウトを超えるため、高負荷時の502エラーを防ぎます。`react-server.config.mjs`の`server.keepAliveTimeout`で調整できます。
+
 <Link name="how-it-works">
 ## 仕組み
 </Link>
diff --git a/docs/src/pages/ja/(pages)/features/cluster.mdx b/docs/src/pages/ja/(pages)/features/cluster.mdx
index fe73756f..1ab50989 100644
--- a/docs/src/pages/ja/(pages)/features/cluster.mdx
+++ b/docs/src/pages/ja/(pages)/features/cluster.mdx
@@ -37,4 +37,6 @@ REACT_SERVER_CLUSTER="on" pnpm react-server start
 }
 ```
 
+クラスタモードでは、ワーカープロセスが予期せず終了した場合、自動的に再起動されます。グレースフルシャットダウン（`SIGTERM`/`SIGINT`）時には、プライマリプロセスはすべてのワーカーがコネクションをドレインするまで待機してから終了します。`shutdownTimeout`やその他のプロダクションサーバーオプションの調整については、[HTTPレイヤー](/ja/features/http-layer)ページを参照してください。
+
 > **Note:** マシンで使用可能なCPUコア数よりも多くのクラスタワーカーを使用しない方がよいでしょう。
diff --git a/docs/src/pages/ja/(pages)/features/http-layer.mdx b/docs/src/pages/ja/(pages)/features/http-layer.mdx
new file mode 100644
index 00000000..263c6d39
--- /dev/null
+++ b/docs/src/pages/ja/(pages)/features/http-layer.mdx
@@ -0,0 +1,161 @@
+---
+title: HTTPレイヤー
+category: Features
+order: 9
+---
+
+import Link from "../../../../components/Link.jsx";
+
+# HTTPレイヤー
+
+`@lazarv/react-server` のプロダクションHTTPサーバーは、Node.jsの `node:http`（またはプロキシなしHTTPSの場合は `node:http2`）上に構築されており、Keep-Alive管理、リクエストタイムアウト、アドミッション制御、ヘルスチェックエンドポイント、グレースフルシャットダウンの組み込みサポートを含んでいます。これらの機能は、ロードバランサー（AWS ALB/NLB、k8s Ingressなど）の背後で実行する場合に、502エラー、コネクション枯渇、デプロイ中のリクエスト消失を防ぐために重要です。
+
+<Link name="configuration">
+## 設定
+</Link>
+
+HTTPレイヤーのすべてのオプションは、設定ファイルの `server` セクションに配置します。すべての値には、一般的なロードバランサー設定で適切に動作する安全なデフォルト値があります。
+
+```mjs filename="react-server.config.mjs"
+export default {
+  server: {
+    keepAliveTimeout: 65000,
+    headersTimeout: 66000,
+    requestTimeout: 30000,
+    maxConcurrentRequests: 100,
+    shutdownTimeout: 25000,
+  },
+};
+```
+
+| オプション | デフォルト | 説明 |
+|---|---|---|
+| `keepAliveTimeout` | `65000` | アイドルコネクションを開いたままにする時間（ミリ秒）。502エラーを防ぐため、ロードバランサーのアイドルタイムアウトを超える値に設定してください。AWS ALBのデフォルトは60秒なので、65秒が安全な開始点です。 |
+| `headersTimeout` | `66000` | クライアントが完全なリクエストヘッダーを送信するまでの最大待機時間（ミリ秒）。`keepAliveTimeout`を超える値に設定してください。 |
+| `requestTimeout` | `30000` | クライアントが完全なリクエスト（ヘッダー＋ボディ）を送信するまでの最大時間（ミリ秒）。`0`に設定すると無効になります。 |
+| `maxConcurrentRequests` | `0` | サーバーが`503 Service Busy`を返すまでの最大同時リクエスト数。`0`に設定するとアドミッション制御が無効になります。 |
+| `shutdownTimeout` | `25000` | `SIGTERM`/`SIGINT`を受信後、サーバーは新しいコネクションの受け入れを停止し、処理中のリクエストが完了するまでこの時間（ミリ秒）待機してから強制終了します。k8sの`terminationGracePeriodSeconds`（デフォルト30秒）より短く設定してください。 |
+
+<Link name="keep-alive">
+## Keep-Aliveとタイムアウト
+</Link>
+
+Node.jsのデフォルトの `keepAliveTimeout` は5秒であり、ロードバランサーがある環境では短すぎます。ロードバランサーよりも先にサーバーがアイドルコネクションを閉じると、ロードバランサーはサーバーが既に切断したコネクションでリクエストを送信する可能性があり、**502 Bad Gateway** が発生します。
+
+`@lazarv/react-server` のデフォルト値は、これを回避するように選択されています：
+
+- `keepAliveTimeout`（65秒）はAWS ALBのデフォルトアイドルタイムアウト（60秒）を超えます
+- `headersTimeout`（66秒）はNode.jsの要件通り `keepAliveTimeout` を超えます
+- `requestTimeout`（30秒）は低速またはストールしたクライアントがソケットを無期限に保持するのを防ぎます
+
+<Link name="admission-control">
+## アドミッション制御
+</Link>
+
+`maxConcurrentRequests` が `0` より大きい値に設定されている場合、サーバーは処理中のリクエストを追跡し、制限に達すると `503 Service Busy`（`Retry-After: 1` ヘッダー付き）で応答します。これにより、すべてのリクエストがCPU/メモリを同時に奪い合い、すべてが遅くなるのではなく、一部を高速に処理し残りを拒否するサンダリングハードシナリオを防ぎます。
+
+カウンターはレスポンスが完全に送信された後にデクリメントされるため、ストリーミングレスポンスでも正確な追跡が保証されます。エラーパスでもカウンターは適切にデクリメントされます。
+
+<Link name="adaptive-backpressure">
+## アダプティブバックプレッシャー
+</Link>
+
+`@lazarv/react-server` はプロダクション環境で**デフォルトで有効**なアダプティブバックプレッシャーシステムを搭載しています。**イベントループ使用率（ELU）** — `performance.eventLoopUtilization()` — を使用してNode.jsのイベントループ飽和度を直接測定します。CPU%やレイテンシーベースのアルゴリズムとは異なり、ELUはワークロードの不均一性（高速ルートと低速ルートの切り替え）の影響を受けず、イベントループ自体が真に飽和したときのみ上昇します。
+
+制御ループは**AIMD（加法増加・乗法減少）**を使用します：
+- **ELU &lt; 0.95**: ウィンドウごとに `√limit` ずつ制限を増加（高速回復）
+- **ELU ≥ 0.95**: ウィンドウごとに10%ずつ制限を減少（緩やかなバックオフ）
+
+リミッターは全開（`initialLimit = maxLimit`）で開始し、ファストパスで**オーバーヘッドゼロ** — 通常の負荷では不可視で、イベントループが真に飽和したときのみ制限を強化します。
+
+カスタマイズまたは無効にするには `server.backpressure` を使用します：
+
+```mjs filename="react-server.config.mjs"
+export default {
+  server: {
+    backpressure: {
+      enabled: true,        // falseで無効化
+      initialLimit: 1000,   // 開始制限（デフォルトはmaxLimit）
+      minLimit: 1,          // 下限
+      maxLimit: 1000,       // 上限
+      eluMax: 0.95,         // ELU 95%超でキューをスキップ
+      sampleWindow: 1000,   // 1秒ごとに再計算
+      smoothingFactor: 0.2, // EWMAレイテンシー平滑化
+      queueSize: 100,       // スロット待ちの最大リクエスト数
+      queueTimeout: 5000,   // 503までの最大待機時間（ミリ秒）
+    },
+  },
+};
+```
+
+| オプション | デフォルト | 説明 |
+|---|---|---|
+| `enabled` | `true` | アダプティブバックプレッシャーを有効化。`false`に設定すると無効になり、静的な`maxConcurrentRequests`にフォールバックします。 |
+| `initialLimit` | `maxLimit` | 開始時の同時実行制限。デフォルトは`maxLimit`（最初は全開、過負荷時に制限）。 |
+| `minLimit` | `1` | 下限 — アダプティブ制限はこの値を下回りません。 |
+| `maxLimit` | `1000` | 上限 — 両方が設定されている場合、`maxConcurrentRequests`で制限されます。 |
+| `eluMax` | `0.95` | 制限が縮小し、超過リクエストがキューをスキップするELUレベル（0–1）。 |
+| `sampleWindow` | `1000` | 再計算とELUサンプリングの間隔（ミリ秒）。 |
+| `smoothingFactor` | `0.2` | レイテンシー平滑化のEWMA係数（0–1）。高い値 = より反応的。 |
+| `queueSize` | `100` | バックプレッシャーキューで待機できる最大リクエスト数。満杯の場合、追加のリクエストは即座に503で拒否されます。 |
+| `queueTimeout` | `5000` | リクエストがキューで待機する最大時間（ミリ秒）。503で拒否されるまでの時間です。ロードバランサーのリクエストタイムアウトより短く設定してください。 |
+
+`backpressure.enabled` と `maxConcurrentRequests` の両方が設定されている場合、静的制限がアダプティブ制限のハードシーリングとして機能します。これにより安全ネットが提供されます：アルゴリズムは `maxConcurrentRequests` まで探索できますが、それを超えることはありません。
+
+### キューの仕組み
+
+同時実行制限に達したとき、リクエストを即座に拒否するのではなく、リミッターは制限付きのFIFOキューに配置します。処理中のリクエストが完了すると、解放されたスロットは汎用プールに戻るのではなく、次のキュー待ちのリクエストに直接渡されます — 公平な順序を保証します。
+
+リクエストは以下の場合にキューから削除されます：
+- スロットが利用可能になった場合 → リクエストは通常通り処理されます
+- `queueTimeout` が期限切れになった場合 → リクエストは503で拒否されます
+- クライアントが切断した場合 → リクエストはサイレントに破棄されます（無駄な作業なし）
+- ELUが `eluMax` を超えた場合 → リクエストはキューを完全にバイパスし、即座に拒否されます
+
+これにより、短いトラフィックバーストは透過的に吸収されながら、持続的な過負荷時には負荷が適切にシェッドされます。
+
+> **ヒント:** デフォルト値で開始し、監視してください。リミッターは統計情報（現在の制限、処理中の数、キュー深度、ELU、平滑化されたレイテンシー）を公開しており、これをオブザーバビリティスタックに送信してワークロードに合わせてパラメーターを調整できます。
+
+<Link name="health-check">
+## ヘルスチェックエンドポイント
+</Link>
+
+プロダクションサーバーは、Kubernetesのlivenessプローブおよびreadinessプローブ用に2つの組み込みエンドポイントを公開しています。これらのエンドポイントはミドルウェアチェーンの最上位に登録されており、最小限のレイテンシーのために他のすべてのミドルウェアをバイパスします。
+
+| エンドポイント | 目的 | レスポンス |
+|---|---|---|
+| `/__react_server_health__` | Livenessプローブ | `200 ok` — プロセスが生存中 |
+| `/__react_server_ready__` | Readinessプローブ | ワーカースレッドが実行中の場合は`200 ok`、ワーカーが終了している場合は`503 not ready` |
+
+Kubernetes Podスペックの例：
+
+```yaml
+livenessProbe:
+  httpGet:
+    path: /__react_server_health__
+    port: 3000
+  initialDelaySeconds: 5
+  periodSeconds: 10
+readinessProbe:
+  httpGet:
+    path: /__react_server_ready__
+    port: 3000
+  initialDelaySeconds: 3
+  periodSeconds: 5
+```
+
+> **ヒント:** livenessプローブは `/` ではなく `/__react_server_health__` に向けてください。ヘルスエンドポイントはSSRパイプラインに触れることなく即座にレスポンスを返すため、レンダリング高負荷時に誤って失敗することがありません。
+
+<Link name="graceful-shutdown">
+## グレースフルシャットダウン
+</Link>
+
+サーバーが `SIGTERM` または `SIGINT` を受信した場合：
+
+1. 新しいコネクションの受け入れを停止します
+2. 処理中のリクエストは完了が許可されます
+3. `shutdownTimeout` ミリ秒後にプロセスが強制終了します
+
+[クラスタモード](/ja/features/cluster)では、プライマリプロセスはすべてのワーカーがドレインされるまで待機してから終了します。通常の運用中にワーカーが予期せず終了した場合、サービス全体を停止するのではなく、自動的に再起動されます。
+
+これにより、Kubernetesやその他のコンテナオーケストレーターでのゼロダウンタイムローリングデプロイメントが保証されます。デフォルトの `shutdownTimeout` の25秒は、k8sのデフォルトの `terminationGracePeriodSeconds`（30秒）内に5秒のバッファーを残します。
diff --git a/docs/src/pages/ja/(pages)/features/http.mdx b/docs/src/pages/ja/(pages)/features/http.mdx
index e984c733..efe1f2e3 100644
--- a/docs/src/pages/ja/(pages)/features/http.mdx
+++ b/docs/src/pages/ja/(pages)/features/http.mdx
@@ -408,4 +408,118 @@ export default function MyComponent() {
 
   return <p>Render lock</p>;
 }
-```
\ No newline at end of file
+```
+
+<Link name="logger">
+## ロガー
+</Link>
+
+`logger` を使用すると、ランタイムの組み込みロガーを使ってメッセージをログに記録できます。`logger` オブジェクトは `info`、`warn`、`error`、`debug` メソッドを提供し、ランタイムのロギングシステムと統合されて、一貫したフォーマットの出力を提供します。
+
+```jsx
+import { logger } from "@lazarv/react-server";
+
+export default function MyComponent() {
+  logger.info("Rendering MyComponent");
+
+  return <p>Hello World</p>;
+}
+```
+
+`logger` は開発モードではランタイムのVite統合ロガーを自動的に使用してきれいにフォーマットされた出力を提供し、プロダクションでは `console` にフォールバックします。コンテキストを認識するため、`after()` コールバック内で呼び出された場合、ログ出力に `(after)` ラベルが付加され、レスポンス後のログとレンダリングログを区別できます。
+
+```jsx
+import { after, logger } from "@lazarv/react-server";
+
+export default function MyComponent() {
+  logger.info("Rendering component");
+
+  after(() => {
+    logger.info("Response sent"); // 開発モードでは (after) ラベル付きでログ出力
+  });
+
+  return <p>Hello World</p>;
+}
+```
+
+利用可能なメソッド：
+
+| メソッド | 説明 |
+|---|---|
+| `logger.info(msg, ...args)` | 情報メッセージをログに記録 |
+| `logger.warn(msg, ...args)` | 警告メッセージをログに記録 |
+| `logger.error(msg, ...args)` | エラーメッセージまたは `Error` オブジェクトをログに記録 |
+| `logger.debug(msg, ...args)` | デバッグメッセージをログに記録 |
+
+> **Note:** `logger` はサーバー上のどこでも使用できます — コンポーネント、サーバー関数、ミドルウェア、ルートハンドラ、ワーカー、`after()` コールバック内で利用可能です。リクエストコンテキストは必須ではありませんが、利用可能な場合はコンテキスト固有のロガーインスタンスを使用します。
+
+<Link name="after">
+## After
+</Link>
+
+`after()` を使用すると、**レスポンスがクライアントに送信された後**に実行されるコールバック関数を登録できます。これは、クリーンアップタスク、ロギング、アナリティクス、またはレスポンスを遅延させるべきではない副作用を実行するのに便利です。
+
+```jsx
+import { after, logger } from "@lazarv/react-server";
+
+export default function MyComponent() {
+  after(() => {
+    logger.info("Response sent to client.");
+  });
+
+  return <p>Hello World</p>;
+}
+```
+
+`after()` フックは複数回呼び出して複数のコールバックを登録できます。登録されたすべてのコールバックは、レスポンスストリームが完了した後に `Promise.allSettled` を介して並行して実行されるため、1つのコールバックが失敗しても他のコールバックの実行は妨げられません。リクエストがエラーで失敗した場合、エラーは最初の引数として各コールバックに渡されます：
+
+```jsx
+import { after, logger } from "@lazarv/react-server";
+
+export default function MyComponent() {
+  after((error) => {
+    if (error) {
+      logger.error("Request failed:", error.message);
+    } else {
+      logger.info("Request completed successfully");
+    }
+  });
+
+  return <p>Hello World</p>;
+}
+```
+
+```jsx
+import { after } from "@lazarv/react-server";
+
+export default function MyComponent() {
+  after(async () => {
+    await saveAnalytics({ page: "/home", timestamp: Date.now() });
+  });
+
+  after(async () => {
+    await cleanupTempFiles();
+  });
+
+  return <p>Home</p>;
+}
+```
+
+サーバー関数、ミドルウェア、ルートハンドラ、またはリクエストコンテキスト内で実行されるサーバーサイドコードでも `after()` を使用できます：
+
+```jsx
+import { after } from "@lazarv/react-server";
+
+export async function submitForm(formData) {
+  "use server";
+
+  const data = Object.fromEntries(formData.entries());
+  await saveToDatabase(data);
+
+  after(async () => {
+    await sendNotificationEmail(data.email);
+  });
+}
+```
+
+> **Note:** `after()` フックはリクエスト中にのみ呼び出すことができます。リクエストコンテキスト外（モジュールスコープやスタンドアロンスクリプトなど）で呼び出すとエラーがスローされます。
\ No newline at end of file
diff --git a/examples/benchmark/bench.mjs b/examples/benchmark/bench.mjs
index d9921007..c981a957 100644
--- a/examples/benchmark/bench.mjs
+++ b/examples/benchmark/bench.mjs
@@ -3,12 +3,13 @@
  *
  * Usage:
  *   1. pnpm --filter @lazarv/react-server-example-benchmark build
- *   2. node bench.mjs [--save <label>] [--compare <file>] [--cluster <n>]
+ *   2. node bench.mjs [--save <label>] [--compare <file>] [--cluster <n>] [--only <names>]
  *
  * Options:
  *   --save <label>     Save results to results-<label>.json
  *   --compare <file>   Compare against a previous results JSON file
  *   --cluster <n>      Run in cluster mode with n workers (uses react-server start)
+ *   --only <names>     Run only specific benchmarks (comma-separated, e.g. --only 404-miss,cached)
  *
  * Runs autocannon against each benchmark route and prints a summary table.
  */
@@ -38,6 +39,14 @@ const filters = filterArg
       .filter(Boolean)
   : null;
 
+// --only name1,name2  or  --only name1 --only name2
+const onlyFilter = new Set(
+  args.reduce((acc, a, i, arr) => {
+    if (a === "--only" && arr[i + 1]) acc.push(...arr[i + 1].split(","));
+    return acc;
+  }, [])
+);
+
 function parseCluster() {
   const idx = args.findIndex((a) => a.startsWith("--cluster"));
   if (idx === -1) return 0;
@@ -179,7 +188,12 @@ const BENCHMARKS = [
     path: null, // resolved dynamically
     desc: "Static file (JS bundle)",
   },
-  { name: "404-miss", path: "/nonexistent", desc: "404 miss → SSR" },
+  {
+    name: "404-miss",
+    path: "/nonexistent",
+    desc: "404 miss → SSR",
+    expect: 404,
+  },
   {
     name: "hybrid-min",
     path: "/hybrid",
@@ -297,27 +311,47 @@ for (const b of BENCHMARKS) {
     console.log(`⏭  Skipping ${b.name} (no path resolved)`);
     continue;
   }
+  if (onlyFilter.size > 0 && !onlyFilter.has(b.name)) continue;
 
   process.stdout.write(`▶  ${b.name.padEnd(14)} ${b.desc}...`);
   const url = `http://localhost:${PORT}${b.path}`;
   const data = await runAutocannon(url);
 
+  const total2xx = data["2xx"] ?? 0;
+  const totalNon2xx = (data.non2xx ?? 0) + (data.errors ?? 0);
+  const totalRequests = total2xx + totalNon2xx;
+  const durationSec = data.duration ?? DURATION;
+
+  // For routes with expected non-2xx responses (e.g. 404), count all
+  // completed requests as "ok". Otherwise only count 2xx.
+  const totalOk = b.expect ? totalRequests - (data.errors ?? 0) : total2xx;
+  const okReqSec = durationSec > 0 ? totalOk / durationSec : 0;
+
+  // Unexpected non-2xx: for a 404 route, the 404s are expected — only
+  // connection errors and 503s are unexpected failures
+  const unexpectedErrors = b.expect ? (data.errors ?? 0) : totalNon2xx;
+
   const result = {
     name: b.name,
     desc: b.desc,
     path: b.path,
-    reqSec: data.requests.average,
+    reqSec: okReqSec,
+    totalReqSec: data.requests.average,
     latencyAvg: data.latency.average,
     latencyP50: data.latency.p50,
     latencyP99: data.latency.p99,
     throughputMB: (data.throughput.average / 1024 / 1024).toFixed(1),
-    total2xx: data["2xx"],
-    errors: data.errors,
+    total2xx,
+    totalNon2xx,
+    totalRequests,
+    unexpectedErrors,
+    errors: data.errors ?? 0,
   };
   results.push(result);
 
+  const status = unexpectedErrors > 0 ? ` | ${unexpectedErrors} non-2xx` : "";
   console.log(
-    ` ${result.reqSec.toFixed(0)} req/s | avg ${result.latencyAvg}ms | p99 ${result.latencyP99}ms`
+    ` ${result.reqSec.toFixed(0)} req/s | avg ${result.latencyAvg}ms | p99 ${result.latencyP99}ms${status}`
   );
 }
 
@@ -344,8 +378,12 @@ function fmtDelta(current, baseline, lowerIsBetter = false) {
   return ` ${arrow}${sign}${pct.toFixed(0)}%`;
 }
 
+function fmtErrors(r) {
+  return r.unexpectedErrors > 0 ? String(r.unexpectedErrors) : "";
+}
+
 if (compareData) {
-  console.log("\n" + "═".repeat(130));
+  console.log("\n" + "═".repeat(140));
   console.log(
     "  " +
       "Benchmark".padEnd(16) +
@@ -354,10 +392,11 @@ if (compareData) {
       "P50 (ms)".padStart(14) +
       "P99 (ms)".padStart(14) +
       "Throughput".padStart(12) +
+      "Errors".padStart(10) +
       "  " +
       "Description"
   );
-  console.log("─".repeat(130));
+  console.log("─".repeat(140));
   for (const r of results) {
     const base = compareData.get(r.name);
     console.log(
@@ -370,13 +409,14 @@ if (compareData) {
         String(r.latencyP50).padStart(14) +
         String(r.latencyP99).padStart(14) +
         `${r.throughputMB} MB/s`.padStart(12) +
+        fmtErrors(r).padStart(10) +
         "  " +
         r.desc
     );
   }
-  console.log("═".repeat(130));
+  console.log("═".repeat(140));
 } else {
-  console.log("\n" + "═".repeat(110));
+  console.log("\n" + "═".repeat(120));
   console.log(
     "  " +
       "Benchmark".padEnd(16) +
@@ -385,10 +425,11 @@ if (compareData) {
       "P50 (ms)".padStart(10) +
       "P99 (ms)".padStart(10) +
       "Throughput".padStart(12) +
+      "Errors".padStart(10) +
       "  " +
       "Description"
   );
-  console.log("─".repeat(110));
+  console.log("─".repeat(120));
   for (const r of results) {
     console.log(
       "  " +
@@ -398,11 +439,12 @@ if (compareData) {
         String(r.latencyP50).padStart(10) +
         String(r.latencyP99).padStart(10) +
         `${r.throughputMB} MB/s`.padStart(12) +
+        fmtErrors(r).padStart(10) +
         "  " +
         r.desc
     );
   }
-  console.log("═".repeat(110));
+  console.log("═".repeat(120));
 }
 
 // ── Save results ─────────────────────────────────────────────────────────────
diff --git a/examples/benchmark/pages/(rsc)/cpu.jsx b/examples/benchmark/pages/(rsc)/cpu.jsx
new file mode 100644
index 00000000..ed1ee392
--- /dev/null
+++ b/examples/benchmark/pages/(rsc)/cpu.jsx
@@ -0,0 +1,12 @@
+// Test fixture: a CPU-bound route that saturates the event loop.
+// Used to verify adaptive limiter shrinks the limit when ELU is high.
+function burn(ms) {
+  const end = Date.now() + ms;
+  // eslint-disable-next-line no-empty
+  while (Date.now() < end) {}
+}
+
+export default function Cpu() {
+  burn(20); // ~20ms of synchronous CPU per request
+  return <div>cpu ok</div>;
+}
diff --git a/examples/benchmark/pages/(rsc)/slow.jsx b/examples/benchmark/pages/(rsc)/slow.jsx
new file mode 100644
index 00000000..3e0eef44
--- /dev/null
+++ b/examples/benchmark/pages/(rsc)/slow.jsx
@@ -0,0 +1,6 @@
+// Test fixture: a route that takes ~2s to respond.
+// Used to verify graceful shutdown drains in-flight requests.
+export default async function Slow() {
+  await new Promise((r) => setTimeout(r, 2000));
+  return <div>slow ok</div>;
+}
diff --git a/examples/benchmark/pages/(rsc)/throw.jsx b/examples/benchmark/pages/(rsc)/throw.jsx
new file mode 100644
index 00000000..1ca874d7
--- /dev/null
+++ b/examples/benchmark/pages/(rsc)/throw.jsx
@@ -0,0 +1,14 @@
+// Test fixture: a route that throws synchronously during render.
+// Used to verify afterHooks still fire on the error path with context.
+import { after } from "@lazarv/react-server/server";
+
+export default function Throw() {
+  // Register an afterHook that logs to stderr. If the error path doesn't run
+  // hooks correctly (or doesn't restore ContextStorage), we won't see the log.
+  after((err) => {
+    process.stderr.write(
+      `[afterHook] fired with err=${err ? err.message : "(none)"}\n`
+    );
+  });
+  throw new Error("intentional throw for afterHook test");
+}
diff --git a/examples/benchmark/react-server.runtime.config.mjs b/examples/benchmark/react-server.runtime.config.mjs
new file mode 100644
index 00000000..c59a1749
--- /dev/null
+++ b/examples/benchmark/react-server.runtime.config.mjs
@@ -0,0 +1,3 @@
+export default {
+  root: "pages",
+};
diff --git a/packages/react-server/adapters/docker/server/index.mjs b/packages/react-server/adapters/docker/server/index.mjs
index addb9ff3..e59b078b 100644
--- a/packages/react-server/adapters/docker/server/index.mjs
+++ b/packages/react-server/adapters/docker/server/index.mjs
@@ -93,14 +93,48 @@ const server = createServer((req, res) => {
   middlewares(req, res);
 });
 
+// Apply keep-alive and timeout settings to prevent 502s behind load balancers
+server.keepAliveTimeout = 65_000;
+server.headersTimeout = 66_000;
+server.requestTimeout = 30_000;
+
+// During shutdown, set Connection: close so clients stop reusing keep-alive
+let isShuttingDown = false;
+server.on("request", (_req, res) => {
+  if (isShuttingDown && !res.headersSent) {
+    res.setHeader("Connection", "close");
+  }
+});
+
 server.listen(port, host, () => {
   console.log(`Server listening on http://${host}:${port}`);
 });
 
-// Graceful shutdown
-function shutdown() {
+// Graceful shutdown — drain connections before exiting
+function shutdown(signal) {
+  if (isShuttingDown) return;
+  isShuttingDown = true;
+  console.log(`${signal} received, draining connections...`);
+
+  // Connections finishing a response after this get a 1ms keep-alive timer
+  server.keepAliveTimeout = 1;
+  // Destroy connections that are already idle right now
+  if (typeof server.closeIdleConnections === "function") {
+    server.closeIdleConnections();
+  }
+  // After a grace period, force-close ALL remaining connections.
+  // This handles sockets that Node.js hasn't marked as idle yet
+  // (e.g. response flushing, keep-alive state transitions).
+  const forceClose = setTimeout(() => {
+    if (typeof server.closeAllConnections === "function") {
+      server.closeAllConnections();
+    }
+  }, 1500);
+  forceClose.unref?.();
+
   server.close(() => process.exit(0));
-  setTimeout(() => process.exit(1), 5000);
+  const forceTimeout = setTimeout(() => process.exit(1), 25_000);
+  forceTimeout.unref?.();
 }
-process.on("SIGTERM", shutdown);
-process.on("SIGINT", shutdown);
+process.on("SIGTERM", () => shutdown("SIGTERM"));
+process.on("SIGINT", () => shutdown("SIGINT"));
diff --git a/packages/react-server/config/schema.d.ts b/packages/react-server/config/schema.d.ts
index daf18658..98d1d4b1 100644
--- a/packages/react-server/config/schema.d.ts
+++ b/packages/react-server/config/schema.d.ts
@@ -199,6 +199,165 @@ export interface ServerConfig {
    */
   trustProxy?: boolean;
 
+  /**
+   * Keep-alive timeout in milliseconds. How long the server keeps idle connections
+   * open before closing them. Must exceed your load balancer's idle timeout to
+   * prevent 502 errors (e.g. AWS ALB defaults to 60s, so use ≥65000).
+   * @default 65000
+   * @example `keepAliveTimeout: 65000`
+   */
+  keepAliveTimeout?: number;
+
+  /**
+   * Headers timeout in milliseconds. Maximum time to wait for the client to send
+   * the full request headers. Must exceed `keepAliveTimeout`.
+   * @default 66000
+   * @example `headersTimeout: 66000`
+   */
+  headersTimeout?: number;
+
+  /**
+   * Request timeout in milliseconds. Maximum time allowed for the client to send
+   * the complete request (headers + body). Set to `0` to disable.
+   * @default 30000
+   * @example `requestTimeout: 30000`
+   */
+  requestTimeout?: number;
+
+  /**
+   * Maximum number of concurrent requests before the server responds with 503.
+   * Set to `0` to disable admission control.
+   * @default 0
+   * @example `maxConcurrentRequests: 100`
+   */
+  maxConcurrentRequests?: number;
+
+  /**
+   * Graceful shutdown timeout in milliseconds. After receiving SIGTERM/SIGINT,
+   * the server stops accepting new connections and waits up to this duration
+   * for in-flight requests to complete before force-exiting.
+   * @default 25000
+   * @example `shutdownTimeout: 25000`
+   */
+  shutdownTimeout?: number;
+
+  /**
+   * How often (ms) Node's HTTP server scans for connections that have exceeded
+   * their `headersTimeout` or `requestTimeout`. Node's default is 30000ms,
+   * which means slow-loris connections can hold a socket for up to 30s past
+   * their configured deadline. We override the default to 5000 so timeouts
+   * fire much closer to their configured value. Lower = faster detection,
+   * higher = less overhead.
+   * @default 5000
+   * @example `connectionsCheckingInterval: 5000`
+   */
+  connectionsCheckingInterval?: number;
+
+  /**
+   * Crash-loop trip wire (cluster mode). The master exits if it observes more
+   * than this many worker exits within `clusterRespawnWindow` milliseconds —
+   * preventing a fork-bomb when the worker is failing deterministically.
+   * @default numCPUs * 5
+   * @example `clusterRespawnLimit: 20`
+   */
+  clusterRespawnLimit?: number;
+
+  /**
+   * Sliding window (ms) used by `clusterRespawnLimit` to detect crash loops.
+   * @default 60000
+   * @example `clusterRespawnWindow: 60000`
+   */
+  clusterRespawnWindow?: number;
+
+  /**
+   * Adaptive backpressure configuration using Event Loop Utilization (ELU).
+   * The server dynamically adjusts its concurrency limit based on event loop
+   * saturation using AIMD (Additive Increase, Multiplicative Decrease).
+   *
+   * **Node.js-only.** Relies on `performance.eventLoopUtilization()` and a
+   * long-lived event loop, so it does not load on edge runtimes (Cloudflare
+   * Workers, Vercel Edge, Deno Deploy) or in serverless invocations
+   * (Lambda, Vercel Functions).
+   *
+   * When enabled, an admission-control middleware is inserted into the request
+   * chain (~10μs/request overhead). When disabled, that middleware is omitted
+   * entirely — zero per-request cost. The limiter starts wide open and only
+   * tightens when the event loop is genuinely saturated (ELU ≥ 0.95).
+   *
+   * **Resolution priority** (highest first):
+   *   1. `REACT_SERVER_BACKPRESSURE` env var (`1`/`true` enables, `0`/`false` disables)
+   *   2. `enabled` flag in this config (explicit boolean)
+   *   3. Cluster mode default — `on` when running under cluster, `off` otherwise
+   *
+   * When both `backpressure` and `maxConcurrentRequests` are configured,
+   * `maxConcurrentRequests` acts as the hard ceiling for the adaptive limit.
+   *
+   * @example `backpressure: { enabled: true }` to force-enable
+   */
+  backpressure?: {
+    /**
+     * Enable adaptive backpressure explicitly. When unset, falls back to the
+     * cluster-mode default (`on` in cluster, `off` in single-process). Set
+     * to `false` to force-disable. The `REACT_SERVER_BACKPRESSURE` env var
+     * overrides this flag if both are set.
+     * @default cluster ? true : false
+     */
+    enabled?: boolean;
+
+    /**
+     * Starting concurrency limit. Defaults to `maxLimit` (start wide open,
+     * tighten under overload).
+     * @default maxLimit
+     */
+    initialLimit?: number;
+
+    /**
+     * Minimum concurrency limit (floor). The adaptive limit never drops below this.
+     * @default 1
+     */
+    minLimit?: number;
+
+    /**
+     * Maximum concurrency limit (ceiling). Capped by `maxConcurrentRequests` when set.
+     * @default 1000
+     */
+    maxLimit?: number;
+
+    /**
+     * Event Loop Utilization threshold (0–1). When ELU exceeds this, the limit
+     * decreases and excess requests skip the queue.
+     * @default 0.95
+     */
+    eluMax?: number;
+
+    /**
+     * Interval (ms) for recalculating the concurrency limit and sampling ELU.
+     * @default 1000
+     */
+    sampleWindow?: number;
+
+    /**
+     * EWMA smoothing factor for the `smoothedLatency` field in the limiter's
+     * stats output. Observability-only — does not affect admission decisions.
+     * @default 0.2
+     */
+    smoothingFactor?: number;
+
+    /**
+     * Maximum number of requests waiting in the backpressure queue. When the
+     * queue is full, additional requests are immediately rejected with 503.
+     * @default 100
+     */
+    queueSize?: number;
+
+    /**
+     * Maximum time (ms) a request waits in the queue before being rejected
+     * with 503. Should be shorter than your load balancer's request timeout.
+     * @default 5000
+     */
+    queueTimeout?: number;
+  };
+
   /**
    * Custom response headers for the dev server.
    * @example `headers: { "X-Custom": "value" }`
diff --git a/packages/react-server/config/schema.json b/packages/react-server/config/schema.json
index 46fe4616..30032bf4 100644
--- a/packages/react-server/config/schema.json
+++ b/packages/react-server/config/schema.json
@@ -442,6 +442,99 @@
           "type": "boolean",
           "description": "Trust the X-Forwarded-* headers from reverse proxies."
         },
+        "keepAliveTimeout": {
+          "type": "integer",
+          "minimum": 0,
+          "description": "Keep-alive timeout in milliseconds. Must exceed your load balancer's idle timeout to prevent 502 errors. Default: 65000."
+        },
+        "headersTimeout": {
+          "type": "integer",
+          "minimum": 0,
+          "description": "Headers timeout in milliseconds. Maximum time to wait for the client to send full request headers. Must exceed keepAliveTimeout. Default: 66000."
+        },
+        "requestTimeout": {
+          "type": "integer",
+          "minimum": 0,
+          "description": "Request timeout in milliseconds. Maximum time allowed for the client to send the complete request. Set to 0 to disable. Default: 30000."
+        },
+        "maxConcurrentRequests": {
+          "type": "integer",
+          "minimum": 0,
+          "description": "Maximum concurrent requests before the server responds with 503. Set to 0 to disable. Default: 0 (disabled)."
+        },
+        "shutdownTimeout": {
+          "type": "integer",
+          "minimum": 0,
+          "description": "Graceful shutdown timeout in milliseconds. Time to wait for in-flight requests to drain after SIGTERM/SIGINT. Default: 25000."
+        },
+        "connectionsCheckingInterval": {
+          "type": "integer",
+          "minimum": 100,
+          "description": "How often (ms) the HTTP server scans for connections exceeding their headers/request timeouts. Lower = faster slow-loris detection. Node's default is 30000ms; we override to 5000. Default: 5000."
+        },
+        "clusterRespawnLimit": {
+          "type": "integer",
+          "minimum": 1,
+          "description": "Crash-loop trip wire: max worker exits within `clusterRespawnWindow` before the master gives up and exits. Default: numCPUs * 5."
+        },
+        "clusterRespawnWindow": {
+          "type": "integer",
+          "minimum": 1000,
+          "description": "Sliding window (ms) used by `clusterRespawnLimit` to detect crash loops. Default: 60000."
+        },
+        "backpressure": {
+          "type": "object",
+          "properties": {
+            "enabled": {
+              "type": "boolean",
+              "description": "Enable adaptive backpressure. Defaults: enabled when running in cluster mode, disabled in single-process. Override via env var REACT_SERVER_BACKPRESSURE=1|0 (env wins over config), or set this flag explicitly."
+            },
+            "initialLimit": {
+              "type": "integer",
+              "minimum": 1,
+              "description": "Starting concurrency limit. Defaults to maxLimit (start wide open, tighten under overload)."
+            },
+            "minLimit": {
+              "type": "integer",
+              "minimum": 1,
+              "description": "Minimum concurrency limit (floor). The adaptive limit never drops below this. Default: 1."
+            },
+            "maxLimit": {
+              "type": "integer",
+              "minimum": 1,
+              "description": "Maximum concurrency limit (ceiling). Capped by maxConcurrentRequests when set. Default: 1000."
+            },
+            "eluMax": {
+              "type": "number",
+              "minimum": 0,
+              "maximum": 1,
+              "description": "Event Loop Utilization threshold (0–1). Above this, the limit decreases and excess requests skip the queue. Default: 0.95."
+            },
+            "sampleWindow": {
+              "type": "integer",
+              "minimum": 100,
+              "description": "Interval (ms) for recalculation and ELU sampling. Default: 1000."
+            },
+            "smoothingFactor": {
+              "type": "number",
+              "minimum": 0,
+              "maximum": 1,
+              "description": "EWMA smoothing factor for the `smoothedLatency` field in the limiter's stats output. Observability-only — does not affect admission decisions. Default: 0.2."
+            },
+            "queueSize": {
+              "type": "integer",
+              "minimum": 0,
+              "description": "Maximum requests waiting in the backpressure queue. Beyond this, requests are immediately rejected with 503. Default: 100."
+            },
+            "queueTimeout": {
+              "type": "integer",
+              "minimum": 0,
+              "description": "Maximum time (ms) a request waits in the queue before being rejected with 503. Default: 5000."
+            }
+          },
+          "additionalProperties": false,
+          "description": "Adaptive backpressure configuration using Event Loop Utilization (ELU). Enabled by default in production with zero overhead. Dynamically adjusts concurrency limit based on event loop saturation."
+        },
         "headers": {
           "type": "object",
           "description": "Custom response headers for the dev server."
diff --git a/packages/react-server/config/schema.mjs b/packages/react-server/config/schema.mjs
index 0cec3c2a..2d3715a9 100644
--- a/packages/react-server/config/schema.mjs
+++ b/packages/react-server/config/schema.mjs
@@ -90,6 +90,42 @@ export const DESCRIPTIONS = {
   "server.middlewareMode":
     "Create Vite dev server to be used as a middleware in an existing server.",
   "server.trustProxy": "Trust the X-Forwarded-* headers from reverse proxies.",
+  "server.keepAliveTimeout":
+    "Keep-alive timeout in milliseconds. Must exceed your load balancer's idle timeout to prevent 502 errors. Default: 65000.",
+  "server.headersTimeout":
+    "Headers timeout in milliseconds. Maximum time to wait for the client to send full request headers. Must exceed keepAliveTimeout. Default: 66000.",
+  "server.requestTimeout":
+    "Request timeout in milliseconds. Maximum time allowed for the client to send the complete request. Set to 0 to disable. Default: 30000.",
+  "server.maxConcurrentRequests":
+    "Maximum concurrent requests before the server responds with 503. Set to 0 to disable. Default: 0 (disabled).",
+  "server.shutdownTimeout":
+    "Graceful shutdown timeout in milliseconds. Time to wait for in-flight requests to drain after SIGTERM/SIGINT. Default: 25000.",
+  "server.connectionsCheckingInterval":
+    "How often (ms) the HTTP server scans for connections that have exceeded their headers/request timeouts. Lower = faster slow-loris detection, higher = less overhead. Node's default is 30000ms which can leave timeouts mostly unenforced; we override to 5000. Default: 5000.",
+  "server.clusterRespawnLimit":
+    "Crash-loop trip wire: max worker exits within `clusterRespawnWindow` before the master gives up and exits. Default: numCPUs * 5.",
+  "server.clusterRespawnWindow":
+    "Sliding window (ms) used by `clusterRespawnLimit` to detect crash loops. Default: 60000.",
+  "server.backpressure":
+    "Adaptive backpressure configuration using Event Loop Utilization (ELU). Node.js-only — does not load on edge runtimes or in serverless invocations. When active, dynamically adjusts the concurrency limit based on event loop saturation. Adds an admission-control middleware to the chain (~10μs/request).",
+  "server.backpressure.enabled":
+    "Enable adaptive backpressure. Defaults: enabled when running in cluster mode, disabled in single-process. Override via env var REACT_SERVER_BACKPRESSURE=1|0 (env wins over config), or set this flag explicitly.",
+  "server.backpressure.initialLimit":
+    "Starting concurrency limit. Defaults to maxLimit (start wide open, tighten under overload).",
+  "server.backpressure.minLimit":
+    "Minimum concurrency limit (floor). The adaptive limit never drops below this. Default: 1.",
+  "server.backpressure.maxLimit":
+    "Maximum concurrency limit (ceiling). Capped by maxConcurrentRequests when set. Default: 1000.",
+  "server.backpressure.eluMax":
+    "Event Loop Utilization threshold (0–1). Above this, the limit decreases and excess requests skip the queue. Default: 0.95.",
+  "server.backpressure.sampleWindow":
+    "Interval (ms) for recalculation and ELU sampling. Default: 1000.",
+  "server.backpressure.smoothingFactor":
+    "EWMA smoothing factor for the `smoothedLatency` field in the limiter's stats output. Observability-only — does not affect admission decisions. Default: 0.2.",
+  "server.backpressure.queueSize":
+    "Maximum requests waiting in the backpressure queue. Beyond this, requests are immediately rejected with 503. Default: 100.",
+  "server.backpressure.queueTimeout":
+    "Maximum time (ms) a request waits in the queue before being rejected with 503. Default: 5000.",
   "server.headers": "Custom response headers for the dev server.",
   "server.warmup": "Warm up files to pre-transform on server start.",
   "server.preTransformRequests":
@@ -540,6 +576,83 @@ export function generateJsonSchema() {
             origin: prop({ type: "string" }, "server.origin"),
             proxy: prop({ type: "object" }, "server.proxy"),
             trustProxy: prop({ type: "boolean" }, "server.trustProxy"),
+            keepAliveTimeout: prop(
+              { type: "integer", minimum: 0 },
+              "server.keepAliveTimeout"
+            ),
+            headersTimeout: prop(
+              { type: "integer", minimum: 0 },
+              "server.headersTimeout"
+            ),
+            requestTimeout: prop(
+              { type: "integer", minimum: 0 },
+              "server.requestTimeout"
+            ),
+            maxConcurrentRequests: prop(
+              { type: "integer", minimum: 0 },
+              "server.maxConcurrentRequests"
+            ),
+            shutdownTimeout: prop(
+              { type: "integer", minimum: 0 },
+              "server.shutdownTimeout"
+            ),
+            connectionsCheckingInterval: prop(
+              { type: "integer", minimum: 100 },
+              "server.connectionsCheckingInterval"
+            ),
+            clusterRespawnLimit: prop(
+              { type: "integer", minimum: 1 },
+              "server.clusterRespawnLimit"
+            ),
+            clusterRespawnWindow: prop(
+              { type: "integer", minimum: 1000 },
+              "server.clusterRespawnWindow"
+            ),
+            backpressure: prop(
+              {
+                type: "object",
+                properties: {
+                  enabled: prop(
+                    { type: "boolean" },
+                    "server.backpressure.enabled"
+                  ),
+                  initialLimit: prop(
+                    { type: "integer", minimum: 1 },
+                    "server.backpressure.initialLimit"
+                  ),
+                  minLimit: prop(
+                    { type: "integer", minimum: 1 },
+                    "server.backpressure.minLimit"
+                  ),
+                  maxLimit: prop(
+                    { type: "integer", minimum: 1 },
+                    "server.backpressure.maxLimit"
+                  ),
+                  eluMax: prop(
+                    { type: "number", minimum: 0, maximum: 1 },
+                    "server.backpressure.eluMax"
+                  ),
+                  sampleWindow: prop(
+                    { type: "integer", minimum: 100 },
+                    "server.backpressure.sampleWindow"
+                  ),
+                  smoothingFactor: prop(
+                    { type: "number", minimum: 0, maximum: 1 },
+                    "server.backpressure.smoothingFactor"
+                  ),
+                  queueSize: prop(
+                    { type: "integer", minimum: 0 },
+                    "server.backpressure.queueSize"
+                  ),
+                  queueTimeout: prop(
+                    { type: "integer", minimum: 0 },
+                    "server.backpressure.queueTimeout"
+                  ),
+                },
+                additionalProperties: false,
+              },
+              "server.backpressure"
+            ),
             headers: prop({ type: "object" }, "server.headers"),
             warmup: prop({ type: "object" }, "server.warmup"),
             preTransformRequests: prop(
diff --git a/packages/react-server/config/validate.mjs b/packages/react-server/config/validate.mjs
index 67930408..d1533b07 100644
--- a/packages/react-server/config/validate.mjs
+++ b/packages/react-server/config/validate.mjs
@@ -266,6 +266,27 @@ const REACT_SERVER_SCHEMA = {
         'react-server always runs Vite in middleware mode internally. This option cannot be changed. Use the "vite" config key for raw Vite overrides if needed.'
       ),
       trustProxy: optional(is.boolean),
+      keepAliveTimeout: optional(is.number),
+      headersTimeout: optional(is.number),
+      requestTimeout: optional(is.number),
+      maxConcurrentRequests: optional(is.number),
+      shutdownTimeout: optional(is.number),
+      connectionsCheckingInterval: optional(is.number),
+      clusterRespawnLimit: optional(is.number),
+      clusterRespawnWindow: optional(is.number),
+      backpressure: optional(
+        objectShape({
+          enabled: optional(is.boolean),
+          initialLimit: optional(is.number),
+          minLimit: optional(is.number),
+          maxLimit: optional(is.number),
+          eluMax: optional(is.number),
+          sampleWindow: optional(is.number),
+          smoothingFactor: optional(is.number),
+          queueSize: optional(is.number),
+          queueTimeout: optional(is.number),
+        })
+      ),
       headers: optional(is.object),
       warmup: optional(is.object),
       preTransformRequests: optional(is.boolean),
@@ -610,6 +631,24 @@ const EXAMPLES = {
   "server.proxy": `server: { proxy: { "/api": "http://localhost:4000" } }`,
   "server.middlewareMode": `server: { middlewareMode: true }`,
   "server.trustProxy": `server: { trustProxy: true }`,
+  "server.keepAliveTimeout": `server: { keepAliveTimeout: 65000 }`,
+  "server.headersTimeout": `server: { headersTimeout: 66000 }`,
+  "server.requestTimeout": `server: { requestTimeout: 30000 }`,
+  "server.maxConcurrentRequests": `server: { maxConcurrentRequests: 100 }`,
+  "server.shutdownTimeout": `server: { shutdownTimeout: 25000 }`,
+  "server.connectionsCheckingInterval": `server: { connectionsCheckingInterval: 5000 }`,
+  "server.clusterRespawnLimit": `server: { clusterRespawnLimit: 20 }`,
+  "server.clusterRespawnWindow": `server: { clusterRespawnWindow: 60000 }`,
+  "server.backpressure": `server: { backpressure: { enabled: false } }`,
+  "server.backpressure.enabled": `server: { backpressure: { enabled: false } }`,
+  "server.backpressure.initialLimit": `server: { backpressure: { initialLimit: 1000 } }`,
+  "server.backpressure.minLimit": `server: { backpressure: { minLimit: 1 } }`,
+  "server.backpressure.maxLimit": `server: { backpressure: { maxLimit: 1000 } }`,
+  "server.backpressure.eluMax": `server: { backpressure: { eluMax: 0.95 } }`,
+  "server.backpressure.sampleWindow": `server: { backpressure: { sampleWindow: 1000 } }`,
+  "server.backpressure.smoothingFactor": `server: { backpressure: { smoothingFactor: 0.2 } }`,
+  "server.backpressure.queueSize": `server: { backpressure: { queueSize: 100 } }`,
+  "server.backpressure.queueTimeout": `server: { backpressure: { queueTimeout: 5000 } }`,
   "server.headers": `server: { headers: { "X-Custom": "value" } }`,
   "server.warmup": `server: { warmup: { clientFiles: ["./src/main.ts"] } }`,
   "server.preTransformRequests": `server: { preTransformRequests: true }`,
diff --git a/packages/react-server/devtools/devtools.css b/packages/react-server/devtools/devtools.css
index f87b4528..889df3d6 100644
--- a/packages/react-server/devtools/devtools.css
+++ b/packages/react-server/devtools/devtools.css
@@ -1,5 +1,6 @@
 /* ── Reset ── */
-html, body {
+html,
+body {
   margin: 0;
   padding: 0;
   height: 100%;
@@ -15,19 +16,19 @@ html, body {
 :root {
   --dt-bg: #ffffff;
   --dt-bg-gradient: linear-gradient(to bottom, #ffffff, #e5e7eb);
-  --dt-fg: #111827;          /* gray-900 */
-  --dt-muted: #4b5563;       /* gray-600 */
-  --dt-dimmed: #9ca3af;      /* gray-400 */
-  --dt-faint: #d1d5db;       /* gray-300 */
-  --dt-border: #e5e7eb;      /* gray-200 */
-  --dt-surface: #f9fafb;     /* gray-50 */
-  --dt-row-border: #f3f4f6;  /* gray-100 */
-  --dt-accent: #6366f1;      /* indigo-500 */
+  --dt-fg: #111827; /* gray-900 */
+  --dt-muted: #4b5563; /* gray-600 */
+  --dt-dimmed: #9ca3af; /* gray-400 */
+  --dt-faint: #d1d5db; /* gray-300 */
+  --dt-border: #e5e7eb; /* gray-200 */
+  --dt-surface: #f9fafb; /* gray-50 */
+  --dt-row-border: #f3f4f6; /* gray-100 */
+  --dt-accent: #6366f1; /* indigo-500 */
   --dt-accent-subtle: color-mix(in srgb, #6366f1 12%, transparent);
-  --dt-warn: #d97706;        /* amber-600 */
-  --dt-link: #4338ca;        /* indigo-700 */
+  --dt-warn: #d97706; /* amber-600 */
+  --dt-link: #4338ca; /* indigo-700 */
   --dt-link-underline: color-mix(in srgb, #4338ca 25%, transparent);
-  --dt-success: #16a34a;     /* green-600 */
+  --dt-success: #16a34a; /* green-600 */
   --dt-toolbar-bg: #f9fafb;
   --dt-toolbar-border: #e5e7eb;
   --dt-toolbar-fg: #4b5563;
@@ -35,21 +36,21 @@ html, body {
 }
 
 .dark {
-  --dt-bg: #18181b;          /* zinc-900 */
+  --dt-bg: #18181b; /* zinc-900 */
   --dt-bg-gradient: linear-gradient(to bottom, #27272a, #18181b);
-  --dt-fg: #d1d5db;          /* gray-300 */
-  --dt-muted: #9ca3af;       /* gray-400 */
-  --dt-dimmed: #71717a;      /* zinc-500 */
-  --dt-faint: #52525b;       /* zinc-600 */
-  --dt-border: #3f3f46;      /* zinc-700 */
-  --dt-surface: #27272a;     /* zinc-800 */
-  --dt-row-border: #27272a;  /* zinc-800 */
-  --dt-accent: #ca8a04;      /* yellow-600 */
+  --dt-fg: #d1d5db; /* gray-300 */
+  --dt-muted: #9ca3af; /* gray-400 */
+  --dt-dimmed: #71717a; /* zinc-500 */
+  --dt-faint: #52525b; /* zinc-600 */
+  --dt-border: #3f3f46; /* zinc-700 */
+  --dt-surface: #27272a; /* zinc-800 */
+  --dt-row-border: #27272a; /* zinc-800 */
+  --dt-accent: #ca8a04; /* yellow-600 */
   --dt-accent-subtle: color-mix(in srgb, #ca8a04 12%, transparent);
-  --dt-warn: #ca8a04;        /* yellow-600 */
-  --dt-link: #a5b4fc;        /* indigo-300 */
+  --dt-warn: #ca8a04; /* yellow-600 */
+  --dt-link: #a5b4fc; /* indigo-300 */
   --dt-link-underline: color-mix(in srgb, #a5b4fc 25%, transparent);
-  --dt-success: #86efac;     /* green-300 */
+  --dt-success: #86efac; /* green-300 */
   --dt-toolbar-bg: #27272a;
   --dt-toolbar-border: #3f3f46;
   --dt-toolbar-fg: #9ca3af;
@@ -61,7 +62,8 @@ html, body {
   display: flex;
   flex-direction: column;
   height: 100vh;
-  font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
+  font-family:
+    -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
   font-size: 13px;
   color: var(--dt-fg);
   background: var(--dt-bg);
@@ -87,7 +89,9 @@ html, body {
   border-bottom: 2px solid transparent;
   cursor: pointer;
   white-space: nowrap;
-  transition: color 0.15s, border-color 0.15s;
+  transition:
+    color 0.15s,
+    border-color 0.15s;
 }
 
 .dt-tab:hover {
@@ -225,18 +229,42 @@ html, body {
   flex-shrink: 0;
 }
 
-.dt-tag-indigo   { background: #6366f1; }
-.dt-tag-violet   { background: #8b5cf6; }
-.dt-tag-green    { background: #22c55e; }
-.dt-tag-amber    { background: #f59e0b; }
-.dt-tag-red      { background: #ef4444; }
-.dt-tag-cyan     { background: #06b6d4; }
-.dt-tag-teal     { background: #14b8a6; }
-.dt-tag-pink     { background: #ec4899; }
-.dt-tag-orange   { background: #f97316; }
-.dt-tag-sky      { background: #0ea5e9; }
-.dt-tag-gray     { background: #6b7280; }
-.dt-tag-purple   { background: #7c3aed; }
+.dt-tag-indigo {
+  background: #6366f1;
+}
+.dt-tag-violet {
+  background: #8b5cf6;
+}
+.dt-tag-green {
+  background: #22c55e;
+}
+.dt-tag-amber {
+  background: #f59e0b;
+}
+.dt-tag-red {
+  background: #ef4444;
+}
+.dt-tag-cyan {
+  background: #06b6d4;
+}
+.dt-tag-teal {
+  background: #14b8a6;
+}
+.dt-tag-pink {
+  background: #ec4899;
+}
+.dt-tag-orange {
+  background: #f97316;
+}
+.dt-tag-sky {
+  background: #0ea5e9;
+}
+.dt-tag-gray {
+  background: #6b7280;
+}
+.dt-tag-purple {
+  background: #7c3aed;
+}
 
 /* ── Mono text ── */
 .dt-mono {
@@ -627,7 +655,9 @@ html, body {
   color: var(--dt-dimmed);
   cursor: pointer;
   text-decoration: none;
-  transition: color 0.15s, background 0.15s;
+  transition:
+    color 0.15s,
+    background 0.15s;
 }
 
 .dt-remote-action:hover {
@@ -637,8 +667,12 @@ html, body {
 
 /* ── Card flash animation (used when navigating to an outlet) ── */
 @keyframes dt-flash {
-  0%   { box-shadow: 0 0 0 2px var(--dt-accent); }
-  100% { box-shadow: 0 0 0 2px transparent; }
+  0% {
+    box-shadow: 0 0 0 2px var(--dt-accent);
+  }
+  100% {
+    box-shadow: 0 0 0 2px transparent;
+  }
 }
 
 .dt-card-flash {
@@ -690,7 +724,10 @@ html, body {
   font-size: 16px;
   cursor: pointer;
   opacity: 0;
-  transition: opacity 0.15s, color 0.15s, background 0.15s;
+  transition:
+    opacity 0.15s,
+    color 0.15s,
+    background 0.15s;
 }
 
 .dt-outlet-card:hover .dt-outlet-refresh {
@@ -881,9 +918,15 @@ html, body {
   font-weight: 600;
 }
 
-.dt-cache-stat-hit { color: #22c55e; }
-.dt-cache-stat-miss { color: #f59e0b; }
-.dt-cache-stat-revalidate { color: #6366f1; }
+.dt-cache-stat-hit {
+  color: #22c55e;
+}
+.dt-cache-stat-miss {
+  color: #f59e0b;
+}
+.dt-cache-stat-revalidate {
+  color: #6366f1;
+}
 
 .dt-cache-filters {
   display: flex;
@@ -921,7 +964,8 @@ html, body {
   gap: 8px;
   padding: 5px 8px;
   font-size: 12px;
-  font-family: ui-monospace, SFMono-Regular, "SF Mono", Menlo, Consolas, monospace;
+  font-family:
+    ui-monospace, SFMono-Regular, "SF Mono", Menlo, Consolas, monospace;
   border-bottom: 1px solid var(--dt-row-border);
 }
 
@@ -991,7 +1035,10 @@ a.dt-cache-loc:hover {
   align-items: center;
   justify-content: center;
   opacity: 0;
-  transition: opacity 0.15s, background 0.15s, color 0.15s;
+  transition:
+    opacity 0.15s,
+    background 0.15s,
+    color 0.15s;
 }
 
 .dt-cache-event:hover .dt-cache-invalidate {
@@ -1180,8 +1227,12 @@ a.dt-cache-loc:hover {
   color: var(--dt-muted);
 }
 
-.dt-worker-stat-server { color: #8b5cf6; }
-.dt-worker-stat-client { color: #0ea5e9; }
+.dt-worker-stat-server {
+  color: #8b5cf6;
+}
+.dt-worker-stat-client {
+  color: #0ea5e9;
+}
 
 .dt-worker-filters {
   display: flex;
diff --git a/packages/react-server/lib/handlers/static.mjs b/packages/react-server/lib/handlers/static.mjs
index 0cb88c51..f40f0220 100644
--- a/packages/react-server/lib/handlers/static.mjs
+++ b/packages/react-server/lib/handlers/static.mjs
@@ -1,4 +1,4 @@
-import { statSync } from "node:fs";
+import { stat } from "node:fs/promises";
 import { open } from "node:fs/promises";
 import { join } from "node:path";
 import { pathToFileURL } from "node:url";
@@ -16,9 +16,24 @@ import * as sys from "../sys.mjs";
 
 const cwd = sys.cwd();
 
+// Bound the misses cache to prevent unbounded growth from 404 probes
+const MAX_MISSES = 10_000;
+
+// Cap how many cold-path `stat()` calls can be in flight at once. libuv has
+// only 4 thread-pool workers by default; an unbounded burst of unique paths
+// (e.g. a 404 flood) would queue stats indefinitely and starve every other
+// FS-bound operation in the process — including the renderer's own reads.
+// When this is hit we fall through (returning false) so the request flows
+// down to admission control / SSR rather than blocking on the FS.
+const MAX_PENDING_STATS = 100;
+
 export default async function staticHandler(dir, options = {}) {
   const files = new Map();
   const misses = new Set();
+  // In-flight stat() resolutions, keyed by path. Without this, a concurrent
+  // burst for the same uncached path would fan out into N stat syscalls
+  // (thundering herd). Each entry resolves once and is removed after.
+  const pending = new Map();
 
   const exists = (path) => {
     if (files.has(path)) {
@@ -27,30 +42,60 @@ export default async function staticHandler(dir, options = {}) {
     if (misses.has(path)) {
       return false;
     }
-    try {
-      const file = statSync(join(cwd, options.cwd ?? ".", path));
-      if (file.isFile()) {
-        const uncompressedPath = path.replace(/\.(br|gz)$/, "");
-        files.set(path, {
-          ...file,
-          stats: file,
-          path: join(options.cwd ?? cwd, path),
-          etag: `W/"${file.size}-${file.mtime.getTime()}"`,
-          mime: /(@[^.]+\.)?(rsc|remote)\.x-component$/.test(uncompressedPath)
-            ? "text/x-component"
-            : mime.getType(uncompressedPath) || "application/octet-stream",
-        });
-        return true;
-      }
-    } catch {
-      // ignore
+    const inflight = pending.get(path);
+    if (inflight) return inflight;
+
+    // Defensive cap: if we already have too many cold-path stats running,
+    // pretend this one missed without statting. The request will fall
+    // through to the next handler (and ultimately to admission control,
+    // which is the right place to push back from).
+    if (pending.size >= MAX_PENDING_STATS) {
+      return false;
     }
-    misses.add(path);
-    return false;
+
+    const work = (async () => {
+      try {
+        const file = await stat(join(cwd, options.cwd ?? ".", path));
+        if (file.isFile()) {
+          const uncompressedPath = path.replace(/\.(br|gz)$/, "");
+          files.set(path, {
+            ...file,
+            stats: file,
+            path: join(options.cwd ?? cwd, path),
+            etag: `W/"${file.size}-${file.mtime.getTime()}"`,
+            mime: /(@[^.]+\.)?(rsc|remote)\.x-component$/.test(uncompressedPath)
+              ? "text/x-component"
+              : mime.getType(uncompressedPath) || "application/octet-stream",
+          });
+          return true;
+        }
+      } catch {
+        // ignore
+      }
+      if (misses.size >= MAX_MISSES) {
+        misses.clear();
+      }
+      misses.add(path);
+      return false;
+    })().finally(() => {
+      pending.delete(path);
+    });
+
+    pending.set(path, work);
+    return work;
   };
 
   const fileCache = new Map();
 
+  // `exists()` returns `boolean` synchronously when the path is in the
+  // `files`/`misses` cache, or a `Promise<boolean>` on the cold path.
+  // We unwrap inline at every call site rather than `await exists(...)`
+  // unconditionally — `await` on a plain boolean still costs a microtask,
+  // and the 404-flood path hits the misses cache 100% of the time after
+  // the first request. Eliding ~8 microtasks per 404 closes the small
+  // regression we measured against the sync-stat baseline.
+  const settled = (r) => (typeof r === "boolean" ? r : null);
+
   return async function serveStatic(context) {
     if (context.request.method !== "GET") {
       return;
@@ -61,11 +106,13 @@ export default async function staticHandler(dir, options = {}) {
 
     // Resolve the file: try the path directly, then as /index.html
     let basename;
-    if (exists(pathname)) {
+    let r = exists(pathname);
+    if (settled(r) ?? (await r)) {
       basename = pathname;
     } else {
       const indexPath = `${pathname}/index.html`.replace(/^\/+/g, "/");
-      if (exists(indexPath)) {
+      r = exists(indexPath);
+      if (settled(r) ?? (await r)) {
         basename = indexPath;
       } else {
         // Neither the path nor its index.html exist in this handler's directory.
@@ -75,15 +122,18 @@ export default async function staticHandler(dir, options = {}) {
     }
 
     let prelude = null;
-    if (exists(`${basename}.postponed.json`)) {
+    r = exists(`${basename}.postponed.json`);
+    if (settled(r) ?? (await r)) {
       prelude = basename;
       pathname = basename;
+      const cacheR = exists(`${basename}.prerender-cache.json`);
+      const cacheExists = settled(cacheR) ?? (await cacheR);
       const [{ default: postponed }, { default: cacheData }] =
         await Promise.all([
           import(pathToFileURL(join(dir, `${basename}.postponed.json`)), {
             with: { type: "json" },
           }),
-          exists(`${basename}.prerender-cache.json`)
+          cacheExists
             ? import(
                 pathToFileURL(join(dir, `${basename}.prerender-cache.json`)),
                 {
@@ -99,18 +149,29 @@ export default async function staticHandler(dir, options = {}) {
       const isBrotli = acceptEncoding?.includes("br");
       const isGzip = acceptEncoding?.includes("gzip");
 
-      if (isBrotli && exists(`${basename}.br`)) {
-        pathname = `${basename}.br`;
-        contentEncoding = "br";
-      } else if (isGzip && exists(`${basename}.gz`)) {
-        pathname = `${basename}.gz`;
-        contentEncoding = "gzip";
+      if (isBrotli) {
+        r = exists(`${basename}.br`);
+        if (settled(r) ?? (await r)) {
+          pathname = `${basename}.br`;
+          contentEncoding = "br";
+        } else {
+          pathname = basename;
+        }
+      } else if (isGzip) {
+        r = exists(`${basename}.gz`);
+        if (settled(r) ?? (await r)) {
+          pathname = `${basename}.gz`;
+          contentEncoding = "gzip";
+        } else {
+          pathname = basename;
+        }
       } else {
         pathname = basename;
       }
     }
 
-    if (pathname !== "/" && exists(pathname)) {
+    r = pathname !== "/" ? exists(pathname) : false;
+    if (pathname !== "/" && (settled(r) ?? (await r))) {
       try {
         const file = files.get(pathname);
         if (context.request.headers.get("if-none-match") === file.etag) {
diff --git a/packages/react-server/lib/http/middleware.mjs b/packages/react-server/lib/http/middleware.mjs
index 3c8ed2f3..3ab87f64 100644
--- a/packages/react-server/lib/http/middleware.mjs
+++ b/packages/react-server/lib/http/middleware.mjs
@@ -76,9 +76,24 @@ export function createMiddleware(handler, options = {}) {
           ? xfFor.split(/[,]/)[0].trim()
           : req.socket?.remoteAddress;
       const fullUrl = `${protocol}://${host}${req.url}`;
+      // Sanitize headers for the WHATWG Request constructor.
+      // Under Node's HTTP/2 compat layer, `req.headers` contains:
+      //   - `Symbol(sensitiveHeaders)` — Node's internal sensitive-header
+      //     tracking; webidl's `record<ByteString, ByteString>` chokes on
+      //     symbol keys with a TypeError before the constructor can fall
+      //     back to anything sensible.
+      //   - HTTP/2 pseudo-headers (`:method`, `:path`, `:authority`,
+      //     `:scheme`) which WHATWG Headers reject as forbidden header names.
+      // Both have to be stripped explicitly. We build a plain string-keyed
+      // record so `req.headers` itself is left untouched (other code paths,
+      // logging, observability all still see the raw shape).
+      const fetchHeaders = {};
+      for (const k of Object.keys(headersObj)) {
+        if (k[0] !== ":") fetchHeaders[k] = headersObj[k];
+      }
       const requestInit = {
         method: req.method,
-        headers: headersObj,
+        headers: fetchHeaders,
       };
       if (!(req.method === "GET" || req.method === "HEAD")) {
         if (isDeno) {
@@ -223,7 +238,7 @@ export function createMiddleware(handler, options = {}) {
 
       try {
         const { afterHooks } = ctx;
-        if (afterHooks) {
+        if (afterHooks?.size > 0) {
           const logger = getRuntime(LOGGER_CONTEXT);
           await ContextStorage.run(
             {
@@ -253,6 +268,27 @@ export function createMiddleware(handler, options = {}) {
           // no-op if OTel not available
         }
       }
+      // Run afterHooks on error path too (e.g. admission control decrement).
+      // Use the same ContextStorage wrapping as the success path so hooks
+      // that read from runtime context (logger, etc.) keep working when the
+      // request errored out.
+      if (ctx?.afterHooks?.size > 0) {
+        try {
+          const logger = getRuntime(LOGGER_CONTEXT);
+          await ContextStorage.run(
+            {
+              [AFTER_CONTEXT]: true,
+              [LOGGER_CONTEXT]: logger,
+            },
+            () =>
+              Promise.allSettled(
+                Array.from(ctx.afterHooks).map((hook) => hook(e))
+              )
+          );
+        } catch {
+          // no-op
+        }
+      }
       if (e.name !== "AbortError" && e.message !== "aborted") {
         if (next) next(e);
         else internalError(res, e);
diff --git a/packages/react-server/lib/start/action.mjs b/packages/react-server/lib/start/action.mjs
index e7251006..4176ca4d 100644
--- a/packages/react-server/lib/start/action.mjs
+++ b/packages/react-server/lib/start/action.mjs
@@ -21,22 +21,105 @@ import { getServerConfig } from "../utils/server-config.mjs";
 import createLogger from "./create-logger.mjs";
 import createServer from "./create-server.mjs";
 
-function primary(numCPUs) {
+function primary(numCPUs, configRoot) {
+  let isShuttingDown = false;
+
   // fork workers
   for (let i = 0; i < numCPUs; i++) {
     cluster.fork();
   }
 
-  cluster.on("exit", () => {
-    process.exit(1);
-  });
+  // ── Crash-loop protection ──
+  // If workers keep dying within a short window we treat that as a
+  // deterministic startup failure rather than a transient crash and
+  // exit the master so the orchestrator can surface the failure
+  // (instead of fork-bombing the host).
+  //
+  // The defaults (numCPUs * 5 exits per 60s) are tuned so a fleet rolling
+  // through a deterministic boot bug exits quickly, but transient worker
+  // panics under traffic don't trip the trap.
+  const crashLoopWindowMs = configRoot?.server?.clusterRespawnWindow ?? 60_000;
+  const crashLoopThreshold =
+    configRoot?.server?.clusterRespawnLimit ?? numCPUs * 5;
+  const recentExits = [];
 
-  process.on("SIGINT", () => {
-    process.exit(0);
-  });
-  process.on("SIGTERM", () => {
-    process.exit(0);
+  cluster.on("exit", (worker, code, signal) => {
+    if (isShuttingDown) return;
+    const logger = getRuntime(LOGGER_CONTEXT);
+    (logger ?? console).warn(
+      `worker #${worker.process.pid} died (${signal || code}), restarting...`
+    );
+
+    const now = Date.now();
+    recentExits.push(now);
+    while (recentExits.length > 0 && now - recentExits[0] > crashLoopWindowMs) {
+      recentExits.shift();
+    }
+    if (recentExits.length >= crashLoopThreshold) {
+      (logger ?? console).error(
+        `worker crash loop detected (${recentExits.length} exits in ${crashLoopWindowMs}ms), exiting master`
+      );
+      // Kill any in-flight workers BEFORE exiting so their IPC channels close
+      // cleanly. Without this, freshly-forked workers race to send their
+      // "online" message and emit EPIPE / unhandled 'error' events, dumping
+      // alarming stack traces into operator logs at the end of an already
+      // bad situation.
+      isShuttingDown = true;
+      for (const id in cluster.workers) {
+        try {
+          cluster.workers[id].process.kill("SIGKILL");
+        } catch {
+          // already gone
+        }
+      }
+      // Brief delay so the workers actually die before we exit.
+      setTimeout(() => process.exit(1), 50).unref();
+      return;
+    }
+    cluster.fork();
   });
+
+  // Master's hard deadline must outlast workers — they have `shutdownTimeout`
+  // to drain, plus a small grace for IPC/exit ceremony.
+  const workerShutdownTimeout = configRoot?.server?.shutdownTimeout ?? 25_000;
+  const masterShutdownTimeout = workerShutdownTimeout + 5_000;
+
+  function shutdown(signal) {
+    if (isShuttingDown) return;
+    isShuttingDown = true;
+    const logger = getRuntime(LOGGER_CONTEXT);
+    (logger ?? console).info?.(`${signal} received, shutting down workers...`);
+
+    // Forward the signal to every worker explicitly. We can't rely on the
+    // OS process group: under Docker/k8s the master is PID 1 and signals
+    // are not propagated. Without this, workers never see SIGTERM and
+    // never trigger their own gracefulShutdown — the master would just
+    // time out and force-exit, dropping in-flight requests instead of
+    // draining them.
+    for (const id in cluster.workers) {
+      try {
+        cluster.workers[id].process.kill(signal);
+      } catch {
+        // worker already dead
+      }
+    }
+
+    // If workers don't exit in time, force-exit master.
+    const timeout = setTimeout(() => {
+      process.exit(1);
+    }, masterShutdownTimeout);
+    timeout.unref?.();
+
+    let remaining = Object.keys(cluster.workers).length;
+    if (remaining === 0) process.exit(0);
+    cluster.on("exit", () => {
+      remaining--;
+      if (remaining <= 0) process.exit(0);
+    });
+  }
+
+  process.on("SIGINT", () => shutdown("SIGINT"));
+  process.on("SIGTERM", () => shutdown("SIGTERM"));
 }
 
 async function worker(root, options, config) {
@@ -68,6 +151,37 @@ async function worker(root, options, config) {
         )
       );
     }
+
+    // ── Graceful shutdown for worker processes ──
+    const shutdownTimeout = configRoot?.server?.shutdownTimeout ?? 25_000;
+    let isShuttingDown = false;
+
+    function gracefulShutdown(signal) {
+      if (isShuttingDown) return;
+      isShuttingDown = true;
+      logger.info?.(`${signal} received, draining connections...`);
+
+      // Reject queued backpressure waiters, set Connection: close on
+      // future responses, drop keepAliveTimeout to 1ms, and destroy
+      // currently idle sockets.
+      server.shutdown?.();
+
+      // Stop accepting new connections, wait for in-flight to finish.
+      listener.close(() => {
+        logger.info?.("all connections drained, exiting");
+        process.exit(0);
+      });
+
+      // Force-exit after timeout (stay within k8s terminationGracePeriodSeconds)
+      const forceTimeout = setTimeout(() => {
+        logger.warn?.("forced shutdown after timeout");
+        process.exit(1);
+      }, shutdownTimeout);
+      forceTimeout.unref?.();
+    }
+
+    process.on("SIGINT", () => gracefulShutdown("SIGINT"));
+    process.on("SIGTERM", () => gracefulShutdown("SIGTERM"));
   });
 }
 
@@ -96,21 +210,17 @@ export default async function start(root, options) {
         (process.env.REACT_SERVER_CLUSTER || configRoot?.cluster) &&
         cluster.isPrimary
       ) {
-        primary(numCPUs);
+        primary(numCPUs, configRoot);
       } else {
-        process.on("SIGINT", () => {
-          process.exit(0);
-        });
-        process.on("SIGTERM", () => {
-          process.exit(0);
-        });
         process.on("unhandledRejection", (reason) => {
           const logger = getRuntime(LOGGER_CONTEXT);
           (logger ?? console).error(reason);
           process.exit(1);
         });
 
-        worker(root, options, config);
+        // Graceful shutdown signals are handled inside worker() after
+        // the server starts listening, so they can properly drain connections.
+        await worker(root, options, config);
       }
     } catch (error) {
       console.error(error);
diff --git a/packages/react-server/lib/start/adaptive-limiter.mjs b/packages/react-server/lib/start/adaptive-limiter.mjs
new file mode 100644
index 00000000..e3430d32
--- /dev/null
+++ b/packages/react-server/lib/start/adaptive-limiter.mjs
@@ -0,0 +1,331 @@
+/**
+ * Adaptive concurrency limiter using Event Loop Utilization (ELU).
+ *
+ * The limiter dynamically adjusts the maximum number of concurrent requests
+ * based on Node.js Event Loop Utilization — a direct measure of how saturated
+ * the server's single-threaded event loop is. This is the most reliable signal
+ * for a Node.js server because:
+ *
+ * - Unlike latency-based algorithms (Vegas, Gradient), ELU is unaffected by
+ *   workload heterogeneity. Switching from a fast route to a slow route
+ *   increases latency naturally but does NOT mean the server is overloaded.
+ *   ELU only rises when the event loop itself is saturated.
+ *
+ * - Unlike CPU%, ELU directly measures event loop busy/idle time, which is
+ *   the actual bottleneck for a single-threaded server.
+ *
+ * The control loop uses AIMD (Additive Increase, Multiplicative Decrease):
+ * - **ELU < eluMax**: increase limit by sqrt(limit) per window (fast recovery)
+ * - **ELU ≥ eluMax**: decrease limit by 10% per window (gentle backoff)
+ *
+ * The limiter starts wide open (initialLimit = maxLimit) and should be
+ * invisible under normal load. It only tightens when the event loop is
+ * genuinely saturated.
+ *
+ * When a request cannot be immediately admitted, it is placed in a bounded FIFO
+ * queue with a per-request timeout. Slots are released to queued waiters before
+ * becoming available for new `acquire()` calls, ensuring fair ordering.
+ *
+ * @module
+ */
+
+import { performance } from "node:perf_hooks";
+
+/**
+ * @typedef {Object} AdaptiveLimiterConfig
+ * @property {number}  [initialLimit=1000]   Starting concurrency limit (defaults to maxLimit — start wide open)
+ * @property {number}  [minLimit=1]          Floor for the adaptive limit
+ * @property {number}  [maxLimit=1000]       Ceiling for the adaptive limit
+ * @property {number}  [eluMax=0.95]         ELU level that triggers limit decrease and queue skip (0–1)
+ * @property {number}  [sampleWindow=1000]   Interval (ms) for recalculation and ELU sampling
+ * @property {number}  [smoothingFactor=0.2] EWMA factor for `smoothedLatency` in stats (observability only — not used in the control loop)
+ * @property {number}  [queueSize=100]       Max requests waiting in the backpressure queue
+ * @property {number}  [queueTimeout=5000]   Max time (ms) a request waits in the queue before 503
+ * @property {{ info?: Function, warn?: Function }} [logger] Optional logger; transitions (limit shrink/recover, queue saturation, 503 firing) are reported here.
+ */
+
+/**
+ * Create an adaptive concurrency limiter.
+ *
+ * @param {AdaptiveLimiterConfig} [config]
+ */
+export function createAdaptiveLimiter(config = {}) {
+  const {
+    minLimit = 1,
+    maxLimit = 1000,
+    initialLimit = maxLimit,
+    eluMax = 0.95,
+    sampleWindow = 1000,
+    smoothingFactor = 0.2,
+    queueSize = 100,
+    queueTimeout = 5000,
+    logger = null,
+  } = config;
+
+  // Counters for the optional periodic log line — reset every recalc tick.
+  let rejected503 = 0;
+  let queuedTotal = 0;
+
+  // ── Limiter state ──
+  let limit = Math.max(minLimit, Math.min(maxLimit, initialLimit));
+  let inflight = 0;
+  let sampleCount = 0;
+
+  // ── Latency tracking (for observability, not used in control loop) ──
+  let smoothedLatency = 0;
+
+  // ── ELU state ──
+  let prevELU = performance.eventLoopUtilization();
+  let currentELU = 0;
+
+  // ── Wait queue (bounded FIFO) ──
+  // Each entry: { resolve, timer, abortHandler, signal }
+  // resolve(true)  = slot acquired, proceed
+  // resolve(false) = timed out or destroyed, reject with 503
+  /** @type {{ resolve: (v: boolean) => void, timer: ReturnType<typeof setTimeout>, abortHandler: (() => void) | null, signal: AbortSignal | null }[]} */
+  const waitQueue = [];
+
+  /**
+   * Try to hand a slot to the next queued waiter.
+   *
+   * Critically, this respects the current adaptive limit: if inflight >= limit
+   * after the release, we do NOT wake a waiter. This lets the server drain back
+   * to the computed limit under overload. Without this check, drainOne() would
+   * defeat the adaptive algorithm by keeping inflight permanently above the
+   * limit — every finished request would immediately be replaced.
+   *
+   * Skipped entries (aborted clients) are cleaned up without consuming a slot.
+   */
+  function drainOne() {
+    while (waitQueue.length > 0) {
+      // Respect the adaptive limit — let inflight drain before admitting more
+      if (inflight >= limit) {
+        return false;
+      }
+      const waiter = waitQueue.shift();
+      clearTimeout(waiter.timer);
+      if (waiter.signal) {
+        waiter.signal.removeEventListener("abort", waiter.abortHandler);
+      }
+      // Client already disconnected — skip without consuming a slot
+      if (waiter.signal?.aborted) {
+        continue;
+      }
+      inflight++;
+      waiter.resolve(true);
+      return true;
+    }
+    return false;
+  }
+
+  // ── Periodic recalculation (AIMD based on ELU) ──
+  const recalcInterval = setInterval(() => {
+    // Sample ELU over the last window. The `prev` argument must be a
+    // cumulative snapshot, NOT a delta: Node computes `current - prev`
+    // and a diff object's idle/active fields aren't cumulative values.
+    // So we call `eventLoopUtilization()` again with no args to capture
+    // a fresh cumulative baseline for the next window. The few ns gap
+    // between the two calls is unobservable.
+    const nowELU = performance.eventLoopUtilization(prevELU);
+    currentELU = nowELU.utilization;
+    prevELU = performance.eventLoopUtilization();
+
+    const prevLimit = limit;
+
+    if (currentELU >= eluMax) {
+      // ── Decrease: multiplicative (gentle 10% backoff) ──
+      // Only shrink when we're actually at capacity. If inflight is well
+      // below the limit, the high ELU is transient (GC, etc.), not sustained.
+      if (inflight >= limit * 0.5) {
+        limit = Math.max(minLimit, Math.floor(limit * 0.9));
+      }
+    } else {
+      // ── Increase: additive (sqrt scaling for proportional exploration) ──
+      // No dead zone — always recover toward maxLimit unless overloaded.
+      // The limiter starts wide open and should stay wide open under normal load.
+      limit = Math.min(
+        maxLimit,
+        limit + Math.max(1, Math.ceil(Math.sqrt(limit)))
+      );
+    }
+
+    // Wake queued waiters if limit grew
+    if (limit > prevLimit) {
+      while (inflight < limit && waitQueue.length > 0) {
+        if (!drainOne()) break;
+      }
+    }
+
+    // ── Operator-visible transitions ──
+    // We log only when something changes — silent under steady-state.
+    if (logger) {
+      if (limit < prevLimit) {
+        logger.warn?.(
+          `[adaptive-limiter] limit ${prevLimit} → ${limit} (ELU=${currentELU.toFixed(2)}, inflight=${inflight}, queued=${waitQueue.length})`
+        );
+      } else if (limit > prevLimit && prevLimit < maxLimit) {
+        logger.info?.(
+          `[adaptive-limiter] limit ${prevLimit} → ${limit} (recovering)`
+        );
+      }
+      if (rejected503 > 0 || queuedTotal > 0) {
+        logger.warn?.(
+          `[adaptive-limiter] window: ${rejected503} rejected, ${queuedTotal} queued, queue depth ${waitQueue.length}/${queueSize}`
+        );
+      }
+    }
+    rejected503 = 0;
+    queuedTotal = 0;
+
+    // Reset sample count for next window
+    sampleCount = 0;
+  }, sampleWindow);
+
+  // Don't keep the process alive just for this timer
+  recalcInterval.unref();
+
+  return {
+    /**
+     * Try to acquire a slot, optionally waiting in a bounded queue.
+     *
+     * @param {AbortSignal} [signal] - Client connection abort signal. When the
+     *   client disconnects while queued, the waiter is removed automatically.
+     * @returns {boolean | Promise<boolean>} `true` if the request may proceed,
+     *   `false` if rejected. Returns a plain boolean for the fast path (no
+     *   Promise overhead), a Promise only when the request is queued.
+     *
+     * Resolution paths:
+     * - Slot available (inflight < limit) → returns `true` (sync, no Promise)
+     * - At limit + ELU > eluMax → returns `false` (sync, no Promise)
+     * - At limit + queue full → returns `false` (sync, no Promise)
+     * - Queued → returns Promise that resolves `true`/`false` on slot/timeout/abort
+     */
+    acquire(signal) {
+      // Fast path: a slot is available AND no one is already waiting.
+      // The `waitQueue.length === 0` guard preserves FIFO fairness — if
+      // there are queued waiters, a new arrival must not jump ahead even
+      // if `inflight < limit` (this can happen briefly between a recalc
+      // tick growing the limit and `drainOne` running through the queue).
+      // Returns a plain boolean (not a Promise) to avoid microtask overhead
+      // on the hot path. At 50k req/s, every microtask yield matters.
+      if (inflight < limit && waitQueue.length === 0) {
+        inflight++;
+        return true;
+      }
+
+      // Hard ELU ceiling: don't queue when the event loop is saturated.
+      // Note: `currentELU` only refreshes once per `sampleWindow`, so this
+      // signal lags by up to that many ms. The primary gate is still the
+      // limit itself; this check only prevents piling work onto the queue
+      // when we already know the loop is saturated.
+      if (currentELU > eluMax) {
+        rejected503++;
+        return false;
+      }
+
+      // Queue full: reject immediately
+      if (waitQueue.length >= queueSize) {
+        rejected503++;
+        return false;
+      }
+
+      // ── Enqueue with timeout ──
+      return new Promise((resolve) => {
+        const timer = setTimeout(() => {
+          // Remove from queue on timeout
+          const idx = waitQueue.indexOf(entry);
+          if (idx !== -1) waitQueue.splice(idx, 1);
+          if (signal) {
+            signal.removeEventListener("abort", abortHandler);
+          }
+          resolve(false);
+        }, queueTimeout);
+        timer.unref();
+
+        // Client disconnect handler
+        let abortHandler = null;
+        if (signal) {
+          abortHandler = () => {
+            clearTimeout(timer);
+            const idx = waitQueue.indexOf(entry);
+            if (idx !== -1) waitQueue.splice(idx, 1);
+            resolve(false);
+          };
+          signal.addEventListener("abort", abortHandler, { once: true });
+        }
+
+        const entry = { resolve, timer, abortHandler, signal: signal ?? null };
+        waitQueue.push(entry);
+        queuedTotal++;
+      });
+    },
+
+    /**
+     * Release a slot after the response is fully sent.
+     * If waiters are queued, the slot is handed to the next waiter (FIFO)
+     * rather than returned to the pool.
+     *
+     * @param {number} latencyMs - Request duration in milliseconds
+     */
+    release(latencyMs) {
+      inflight = Math.max(0, inflight - 1);
+
+      if (latencyMs > 0) {
+        // EWMA latency tracking (for observability only)
+        smoothedLatency =
+          smoothedLatency === 0
+            ? latencyMs
+            : smoothedLatency * (1 - smoothingFactor) +
+              latencyMs * smoothingFactor;
+        sampleCount++;
+      }
+
+      // Wake next queued waiter (if any)
+      drainOne();
+    },
+
+    /**
+     * Release without latency tracking. Used by the admission-control
+     * middleware on the steady-state happy path (no queueing happened),
+     * where the caller would otherwise pay `performance.now()` × 2 to
+     * compute latency that's never observed in steady-state. Latency
+     * stats remain populated by the contended `release(latencyMs)` path,
+     * which is where latency-based diagnostics actually matter.
+     */
+    releaseFast() {
+      inflight = Math.max(0, inflight - 1);
+      drainOne();
+    },
+
+    /**
+     * Observability snapshot. Safe to serialize to JSON for metrics/logging.
+     */
+    get stats() {
+      return {
+        limit,
+        inflight,
+        queued: waitQueue.length,
+        smoothedLatency,
+        elu: currentELU,
+        sampleCount,
+      };
+    },
+
+    /**
+     * Clean up the periodic interval and reject all queued waiters.
+     * Call this on server shutdown.
+     */
+    destroy() {
+      clearInterval(recalcInterval);
+      // Drain all waiters with rejection
+      while (waitQueue.length > 0) {
+        const waiter = waitQueue.shift();
+        clearTimeout(waiter.timer);
+        if (waiter.signal) {
+          waiter.signal.removeEventListener("abort", waiter.abortHandler);
+        }
+        waiter.resolve(false);
+      }
+    },
+  };
+}
diff --git a/packages/react-server/lib/start/create-server.mjs b/packages/react-server/lib/start/create-server.mjs
index 4d6d4257..8335e2f9 100644
--- a/packages/react-server/lib/start/create-server.mjs
+++ b/packages/react-server/lib/start/create-server.mjs
@@ -1,4 +1,5 @@
 import { existsSync } from "node:fs";
+import { performance } from "node:perf_hooks";
 import { join } from "node:path";
 
 import {
@@ -19,6 +20,7 @@ import {
   EXEC_OPTIONS,
   HTTP_CONTEXT,
   LIVE_IO,
+  LOGGER_CONTEXT,
   MEMORY_CACHE_CONTEXT,
   WORKER_THREAD,
 } from "../../server/symbols.mjs";
@@ -33,6 +35,7 @@ import staticHandler from "../handlers/static.mjs";
 import trailingSlashHandler from "../handlers/trailing-slash.mjs";
 import * as sys from "../sys.mjs";
 import { getServerCors } from "../utils/server-config.mjs";
+import { createAdaptiveLimiter } from "./adaptive-limiter.mjs";
 import { createRenderer, hasRenderer } from "./render-dom.mjs";
 import ssrHandler from "./ssr-handler.mjs";
 
@@ -56,6 +59,22 @@ export default async function createServer(root, options) {
   }
   runtime$(WORKER_THREAD, worker);
 
+  // ── Worker liveness tracking ──
+  // Node `Worker.exitCode` is unreliable for our use: it's `undefined` while
+  // alive AND remains `undefined` after `terminate()` resolves (verified
+  // empirically on Node 24.15). The 'exit' event is the only reliable signal.
+  // Attaching a listener is also safe for the in-process renderer (where
+  // `worker` is a custom EventEmitter port that never emits 'exit'); the
+  // listener registers but never fires, so `workerAlive` stays true — which
+  // is correct, because if the in-process renderer dies the whole server
+  // process dies and this readiness handler is unreachable anyway.
+  let workerAlive = true;
+  if (typeof worker?.on === "function") {
+    worker.on("exit", () => {
+      workerAlive = false;
+    });
+  }
+
   const config = getRuntime(CONFIG_CONTEXT)?.[CONFIG_ROOT] ?? {};
 
   // ── Telemetry: initialize OpenTelemetry SDK ──
@@ -71,6 +90,70 @@ export default async function createServer(root, options) {
     },
   });
 
+  // ── Server timeouts (configurable, with safe defaults for load-balanced environments) ──
+  const keepAliveTimeout = config.server?.keepAliveTimeout ?? 65_000;
+  const headersTimeout = config.server?.headersTimeout ?? 66_000;
+  const requestTimeout = config.server?.requestTimeout ?? 30_000;
+  const maxConcurrentRequests = config.server?.maxConcurrentRequests ?? 0;
+
+  // ── Adaptive backpressure (ELU-based AIMD) ──
+  // This feature is Node.js-only — it relies on `performance.eventLoopUtilization()`
+  // and a long-lived event loop. It does not load on edge runtimes
+  // (Cloudflare Workers, Vercel Edge, Deno Deploy) or in serverless invocations
+  // (Lambda, Vercel Functions); those code paths go through `build/edge.mjs`
+  // and never reach this file.
+  //
+  // Including the admission-control middleware in the chain costs ~10μs/request
+  // (an extra async function frame in the compose chain plus the acquire/release
+  // calls), which we measured as ~4–7% on hot routes in cluster mode. So we
+  // only enable it where overload protection is meaningful AND where the cost
+  // is justified.
+  //
+  // Resolution (highest priority first):
+  //   1. `REACT_SERVER_BACKPRESSURE` env var — `1`/`true` enables, `0`/`false`
+  //      disables. Set per-deployment in Docker/k8s without touching config.
+  //   2. `server.backpressure.enabled` in config — explicit boolean wins over
+  //      the cluster default.
+  //   3. Cluster mode default — when running under cluster (env var set or
+  //      `cluster` config > 1), backpressure is on by default. Cluster mode
+  //      is unambiguously a production deployment signal.
+  //   4. Otherwise (single-process `start`, dev): off.
+  const backpressureConfig = config.server?.backpressure;
+  const isClusterMode =
+    !!sys.getEnv("REACT_SERVER_CLUSTER") || Number(config.cluster) > 1;
+  const envBackpressure = sys.getEnv("REACT_SERVER_BACKPRESSURE");
+  let backpressureEnabled;
+  if (envBackpressure !== undefined && envBackpressure !== "") {
+    backpressureEnabled =
+      envBackpressure === "1" || envBackpressure.toLowerCase() === "true";
+  } else if (typeof backpressureConfig?.enabled === "boolean") {
+    backpressureEnabled = backpressureConfig.enabled;
+  } else {
+    backpressureEnabled = isClusterMode;
+  }
+
+  let adaptiveLimiter = null;
+  if (backpressureEnabled) {
+    adaptiveLimiter = createAdaptiveLimiter({
+      initialLimit: backpressureConfig?.initialLimit,
+      minLimit: backpressureConfig?.minLimit,
+      // When both adaptive and static limits are configured, static is the hard ceiling
+      maxLimit:
+        maxConcurrentRequests > 0
+          ? Math.min(
+              backpressureConfig?.maxLimit ?? 1000,
+              maxConcurrentRequests
+            )
+          : backpressureConfig?.maxLimit,
+      eluMax: backpressureConfig?.eluMax,
+      sampleWindow: backpressureConfig?.sampleWindow,
+      smoothingFactor: backpressureConfig?.smoothingFactor,
+      queueSize: backpressureConfig?.queueSize,
+      queueTimeout: backpressureConfig?.queueTimeout,
+      logger: getRuntime(LOGGER_CONTEXT),
+    });
+  }
+
   const initialRuntime = {
     [MEMORY_CACHE_CONTEXT]: new StorageCache(memoryDriver),
   };
@@ -85,10 +168,39 @@ export default async function createServer(root, options) {
 
   const publicDir =
     typeof config.public === "string" ? config.public : "public";
+  // ── Admission control state ──
+  let inflightRequests = 0;
+
   const initialHandlers = await Promise.all([
-    async function prerenderInit() {
-      PrerenderStorage.enterWith({});
+    // ── Health check endpoints (bypass all middleware for minimal latency) ──
+    async function healthCheck(context) {
+      if (context.url.pathname === "/__react_server_health__") {
+        return new Response("ok", {
+          status: 200,
+          headers: { "content-type": "text/plain" },
+        });
+      }
+      if (context.url.pathname === "/__react_server_ready__") {
+        // The render Worker (not the cluster worker) is what drives RSC/SSR.
+        // If it has exited, the render pipeline is dead even though the HTTP
+        // listener is still alive — return 503 so the orchestrator stops
+        // routing traffic. We track liveness via an 'exit' event listener
+        // (see `workerAlive` in the closure above) because Worker.exitCode
+        // is unreliable for this purpose.
+        if (!workerAlive) {
+          return new Response("not ready", {
+            status: 503,
+            headers: { "content-type": "text/plain" },
+          });
+        }
+        return new Response("ok", {
+          status: 200,
+          headers: { "content-type": "text/plain" },
+        });
+      }
     },
+    // Static files are served before admission control — they are cheap I/O
+    // and should not be gated by the concurrency limiter or count toward inflight.
     staticHandler(join(cwd, options.outDir, "dist"), {
       cwd: join(options.outDir, "dist"),
     }),
@@ -104,6 +216,82 @@ export default async function createServer(root, options) {
         ]
       : []),
     trailingSlashHandler(),
+    // ── Admission control (reject requests when at capacity) ──
+    // Only inserted into the chain when explicitly enabled. Even a no-op
+    // middleware costs ~10μs/request (async function frame + compose hop),
+    // which we measured at ~4–7% on hot routes in cluster mode. Build the
+    // handler conditionally and let the spread skip it when off.
+    ...(adaptiveLimiter
+      ? [
+          // Placed after static handlers so only SSR/dynamic requests are gated.
+          async function admissionControl(context) {
+            // acquire() returns `true` (sync fast path), `false` (sync reject),
+            // or a Promise (queued). Branch on the type to avoid an `await`
+            // microtask on the steady-state happy path.
+            const result = adaptiveLimiter.acquire(context.signal);
+            if (result === true) {
+              // Steady-state happy path: zero latency tracking, just decrement.
+              // `performance.now()` × 2 + EWMA math is per-request overhead we
+              // skip here; latency observability remains on the contended path.
+              try {
+                return await context.next();
+              } finally {
+                adaptiveLimiter.releaseFast();
+              }
+            }
+            if (result === false) {
+              return new Response("Service Busy", {
+                status: 503,
+                headers: {
+                  "content-type": "text/plain",
+                  "retry-after": "1",
+                },
+              });
+            }
+            // Queued: await admission, then track latency for diagnostics.
+            const acquired = await result;
+            if (!acquired) {
+              return new Response("Service Busy", {
+                status: 503,
+                headers: {
+                  "content-type": "text/plain",
+                  "retry-after": "1",
+                },
+              });
+            }
+            const startTime = performance.now();
+            try {
+              return await context.next();
+            } finally {
+              adaptiveLimiter.release(performance.now() - startTime);
+            }
+          },
+        ]
+      : maxConcurrentRequests > 0
+        ? [
+            // Static admission control fallback
+            async function staticAdmissionControl(context) {
+              if (inflightRequests >= maxConcurrentRequests) {
+                return new Response("Service Busy", {
+                  status: 503,
+                  headers: {
+                    "content-type": "text/plain",
+                    "retry-after": "1",
+                  },
+                });
+              }
+              inflightRequests++;
+              try {
+                return await context.next();
+              } finally {
+                inflightRequests--;
+              }
+            },
+          ]
+        : []),
+    async function prerenderInit() {
+      PrerenderStorage.enterWith({});
+    },
     cookie(config.cookies),
     ...(config.handlers?.pre ?? []),
     ssrHandler(root, options),
@@ -141,6 +329,15 @@ export default async function createServer(root, options) {
     trustProxy: config.server?.trustProxy ?? options.trustProxy,
   });
 
+  // Node's default `connectionsCheckingInterval` is 30s, meaning slow-headers
+  // / slow-body timeouts (`headersTimeout`, `requestTimeout`) only fire at
+  // that interval — so a partial request can hold a connection for up to 30s
+  // beyond its configured deadline. We tighten this to 5s so timeouts fire
+  // much closer to their configured value (verified empirically: with this
+  // override, a `headersTimeout: 2000` request closes at 2.0s instead of 30s).
+  const connectionsCheckingInterval =
+    config.server?.connectionsCheckingInterval ?? 5_000;
+
   let server;
   let httpServer = options.httpServer;
   if (options.middlewareMode) {
@@ -149,12 +346,18 @@ export default async function createServer(root, options) {
     const httpsOptions = config.server?.https ?? options.https;
     if (!httpsOptions) {
       const { createServer } = await import("node:http");
-      server = httpServer = createServer(middlewares);
+      server = httpServer = createServer(
+        { connectionsCheckingInterval },
+        middlewares
+      );
     } else {
       // fallback to http1 when proxy is needed.
       if (config.server?.proxy) {
         const { createServer } = await import("node:https");
-        server = httpServer = createServer(httpsOptions, middlewares);
+        server = httpServer = createServer(
+          { ...httpsOptions, connectionsCheckingInterval },
+          middlewares
+        );
       } else {
         const { createSecureServer } = await import("node:http2");
         server = httpServer = createSecureServer(
@@ -171,6 +374,41 @@ export default async function createServer(root, options) {
     }
   }
 
+  // ── Apply server timeouts ──
+  // The HTTP/1.1-specific knobs (`keepAliveTimeout`, `headersTimeout`,
+  // `requestTimeout`) only protect the HTTP/1.1 path. HTTP/2 sessions go
+  // through a different state machine and ignore them — so we ALSO call
+  // `setTimeout` on the server, which sets the underlying socket idle
+  // timeout. Without this, an HTTP/2 client that completes the TLS handshake
+  // but never sends a HEADERS frame can hold the connection indefinitely.
+  if (httpServer) {
+    httpServer.keepAliveTimeout = keepAliveTimeout;
+    httpServer.headersTimeout = headersTimeout;
+    if (requestTimeout > 0) {
+      httpServer.requestTimeout = requestTimeout;
+    }
+    if (typeof httpServer.setTimeout === "function" && requestTimeout > 0) {
+      httpServer.setTimeout(requestTimeout);
+    }
+  }
+
+  // ── Graceful shutdown: Connection: close header ──
+  // During shutdown, every response gets `Connection: close` so the client
+  // stops reusing keep-alive connections. The client closes the TCP
+  // connection itself after receiving the response — cleanly, no dropped
+  // requests. Idle sockets (no in-flight request) are destroyed directly
+  // via closeIdleConnections() since there's no response to carry the header.
+  //
+  // The 'request' listener is attached lazily inside server.shutdown() rather
+  // than at startup. At 50k req/s the savings of one less listener invocation
+  // per request, just to check a one-way boolean, is worth keeping.
+  let isServerShuttingDown = false;
+  const onShutdownRequest = (_req, res) => {
+    if (isServerShuttingDown && !res.headersSent) {
+      res.setHeader("Connection", "close");
+    }
+  };
+
   if (
     httpServer &&
     existsSync(join(cwd, options.outDir, "server/live-io.manifest.json"))
@@ -209,8 +447,15 @@ export default async function createServer(root, options) {
       });
     });
 
+    // Safety net: if anything teardown the http server without going through
+    // `server.shutdown()` (tests, embedders, future code), make sure socket.io
+    // also closes — otherwise it leaks upgrade connections.
     httpServer.on("close", () => {
-      io.close();
+      try {
+        io.close();
+      } catch {
+        // already closed
+      }
     });
   }
 
@@ -224,5 +469,52 @@ export default async function createServer(root, options) {
   // ── Telemetry: end startup span ──
   startupSpan.end();
 
+  // ── Internal shutdown hook ──
+  // `server.shutdown` is consumed by `start/action.mjs` (cluster worker
+  // graceful-shutdown handler) BEFORE `listener.close()`. It is NOT a
+  // public API — the shape and lifecycle may change. External code that
+  // wants graceful shutdown should send SIGTERM and let the worker do it.
+  //
+  // The hook:
+  // 1. Rejects all queued backpressure waiters
+  // 2. Closes socket.io (which holds upgrade connections)
+  // 3. Flags the server so all future responses include `Connection: close`
+  // 4. Sets keepAliveTimeout to 1ms for connections that complete during shutdown
+  // 5. Destroys currently idle sockets
+  // 6. After a grace period, force-closes ALL remaining connections
+  server.shutdown = () => {
+    isServerShuttingDown = true;
+    // Attach the Connection: close stamper now — kept off the hot path until
+    // shutdown actually starts.
+    if (httpServer) {
+      httpServer.on("request", onShutdownRequest);
+    }
+    if (adaptiveLimiter) {
+      adaptiveLimiter.destroy();
+    }
+    // Close socket.io BEFORE closing the HTTP server — io holds upgrade
+    // connections that prevent httpServer.close() from completing.
+    const liveIO = getRuntime(LIVE_IO);
+    if (liveIO?.io) {
+      liveIO.io.close();
+    }
+    if (httpServer) {
+      httpServer.keepAliveTimeout = 1;
+      if (typeof httpServer.closeIdleConnections === "function") {
+        httpServer.closeIdleConnections();
+      }
+      // Give in-flight requests a moment to complete, then force-close
+      // all remaining connections. This handles sockets that Node.js
+      // hasn't marked as idle yet (e.g. response flushing, keep-alive
+      // state transitions).
+      const forceClose = setTimeout(() => {
+        if (typeof httpServer.closeAllConnections === "function") {
+          httpServer.closeAllConnections();
+        }
+      }, 1500);
+      forceClose.unref();
+    }
+  };
+
   return server;
 }