diff --git a/.github/scripts/windows/build.bat b/.github/scripts/windows/build.bat index 1b38baf..23fb930 100644 --- a/.github/scripts/windows/build.bat +++ b/.github/scripts/windows/build.bat @@ -41,9 +41,7 @@ if not exist "%SDK_RUNNER%" ( exit /b 3 ) -for /f "delims=" %%T in ('call .github\scripts\windows\find-vs-toolset.bat %PHP_BUILD_CRT%') do set "VS_TOOLSET=%%T" -echo Got VS Toolset %VS_TOOLSET% -cmd /c %SDK_RUNNER% -s %VS_TOOLSET% -t .github\scripts\windows\build_task.bat +cmd /c %SDK_RUNNER% -t .github\scripts\windows\build_task.bat if %errorlevel% neq 0 exit /b 3 exit /b 0 diff --git a/.github/scripts/windows/find-target-branch.bat b/.github/scripts/windows/find-target-branch.bat index bac6c8b..89652bc 100644 --- a/.github/scripts/windows/find-target-branch.bat +++ b/.github/scripts/windows/find-target-branch.bat @@ -1,18 +1,8 @@ @echo off -rem Pin the PHP SDK dependency series to 8.5 (vs17). -rem -rem We build php-src's dev tip (true-async == 8.6-dev) on the windows-2022 -rem runner, i.e. the vs17 / VS 2022 toolchain. php.net's deps server only -rem publishes the bleeding edge (branches "8.6" and "master") for the vs18 / -rem VS 2026 toolchain -- there is no vs17 build of those. The newest series -rem that ships vs17 dependencies is 8.5, and those libs (openssl, libxml2, -rem ...) build the 8.6 tip fine: the series tracks the toolchain, not the PHP -rem minor. Asking for "8.6"/"master" under --crt vs17 fails with -rem "CRT 'vs17' doesn't match any available for branch ...". -rem -rem Bump this to 8.6 once php.net publishes packages-8.6-vs17-*, or switch the -rem runner+PHP_BUILD_CRT to vs18 to follow the dev-tip deps directly. -rem (Old logic derived "8." and remapped a hardcoded 8.5 -> master; -rem it broke when php-src went 8.6 and the dev deps moved to vs18.) -set BRANCH=8.5 +for /f "usebackq tokens=3" %%i in (`findstr PHP_MAJOR_VERSION main\php_version.h`) do set BRANCH=%%i +for /f "usebackq tokens=3" %%i in (`findstr PHP_MINOR_VERSION main\php_version.h`) do set BRANCH=%BRANCH%.%%i + +if /i "%BRANCH%" equ "8.5" ( + set BRANCH=master +) diff --git a/.github/scripts/windows/test.bat b/.github/scripts/windows/test.bat index 96f676c..57e254b 100644 --- a/.github/scripts/windows/test.bat +++ b/.github/scripts/windows/test.bat @@ -9,8 +9,7 @@ if not exist "%SDK_RUNNER%" ( exit /b 3 ) -for /f "delims=" %%T in ('call .github\scripts\windows\find-vs-toolset.bat %PHP_BUILD_CRT%') do set "VS_TOOLSET=%%T" -cmd /c %SDK_RUNNER% -s %VS_TOOLSET% -t .github\scripts\windows\test_task.bat +cmd /c %SDK_RUNNER% -t .github\scripts\windows\test_task.bat if %errorlevel% neq 0 exit /b 3 exit /b 0 diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index c491289..2596a9c 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -26,14 +26,14 @@ env: jobs: windows: name: WINDOWS_X64_ZTS_RELEASE - runs-on: windows-2022 + runs-on: windows-2025-vs2026 timeout-minutes: 60 env: PHP_BUILD_CACHE_BASE_DIR: C:\build-cache PHP_BUILD_OBJ_DIR: C:\obj PHP_BUILD_CACHE_SDK_DIR: C:\build-cache\sdk - PHP_BUILD_SDK_BRANCH: php-sdk-2.3.0 - PHP_BUILD_CRT: vs17 + PHP_BUILD_SDK_BRANCH: php-sdk-2.7.1 + PHP_BUILD_CRT: vs18 PLATFORM: x64 THREAD_SAFE: "1" INTRINSICS: AVX2 diff --git a/CHANGELOG.md b/CHANGELOG.md index e4a112e..89b1534 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- **hq-interop (HTTP/0.9-over-QUIC) for the interop matrix (#80).** A second QUIC + ALPN, `hq-interop`, served straight off the transport (no nghttp3): a raw bidi + stream `GET ` returns the file bytes + FIN from `setHttp3HqDocroot()`. + Lets the quic-interop-runner reach the server for the whole transport matrix + (transfer/multiplexing/migration/loss), which it negotiates over hq, not h3. + `h3` stays preferred when a peer offers both; the h3 path is unchanged. + +- **HTTP/3 transport reactor pool (experimental, #80).** Behind + `TRUE_ASYNC_SERVER_REACTOR_POOL=1` + `setWorkers(2+)`: dedicated C reactors own the + QUIC sockets (no PHP on the transport thread), hand parsed requests to PHP workers + by pointer, and serve responses back over a non-blocking reverse channel; static + files are served on the reactor. Adds CID steering (owner-reactor id encoded in the + connection id, forwarding migrated clients across the split — #72) and a + migration-storm guard that sheds clients rebinding past a rate cap. Dispatch is + reactor-paired: a connection sticks to one of its reactor's workers and spills to a + less-loaded worker when its home backs up or dies. Off by default. - Lock-free inter-thread message queue primitive (#81): bounded MPSC/SPSC C-ABI wrappers over moodycamel (`thread_queue`) plus a reactor-integrated MPSC mailbox (`thread_mailbox`) that wakes the consumer's loop via a trigger event with diff --git a/CMakeLists.txt b/CMakeLists.txt index 7da4197..39c0fc9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,6 +72,17 @@ set(CORE_SUBSYSTEM_SOURCES # Inter-thread message queue (issue #81): C++ moodycamel wrapper + C reactor glue. src/core/thread_queue.cc src/core/thread_mailbox.c + # Reactor thread pool (issue #80): pure-C transport reactors on the ThreadPool. + src/core/reactor_pool.c + src/core/reactor_pool_test_hooks.c + # Flat response marshalling type (issue #80, D3): worker -> reactor response. + src/core/response_wire.c + # Worker-side request dispatch (issue #80, B1b/D7): request ptr -> handler -> response. + src/core/worker_dispatch.c + # Worker inbox (issue #80, B2): per-worker request mailbox + dispatch drain. + src/core/worker_inbox.c + # Worker registry (issue #80, B3): atomic table of per-worker inboxes. + src/core/worker_registry.c ) # TLS sources — only compiled when OpenSSL is present. Both files @@ -231,6 +242,7 @@ if(ENABLE_HTTP3) src/http3/http3_callbacks.c src/http3/http3_io.c src/http3/http3_packet.c + src/http3/http3_steer.c src/http3/http3_stream.c src/http3/http3_stream_pool.c src/http3/http3_static_response.c diff --git a/config.m4 b/config.m4 index 7411832..9d0a68b 100644 --- a/config.m4 +++ b/config.m4 @@ -20,6 +20,13 @@ PHP_ARG_ENABLE([http3], [yes], [no]) +PHP_ARG_ENABLE([http-server-test-hooks], + [whether to compile internal test hooks], + [AS_HELP_STRING([--enable-http-server-test-hooks], + [Compile internal C test hooks (e.g. reactor-pool self-test). For test/CI builds only; never enable for release.])], + [no], + [no]) + PHP_ARG_WITH([openssl], [for OpenSSL TLS support], [AS_HELP_STRING([--with-openssl@<:@=DIR@:>@], @@ -492,6 +499,12 @@ if test "$PHP_HTTP_SERVER" != "no"; then src/core/async_plain_event.c src/core/thread_queue.cc src/core/thread_mailbox.c + src/core/reactor_pool.c + src/core/reactor_pool_test_hooks.c + src/core/response_wire.c + src/core/worker_dispatch.c + src/core/worker_inbox.c + src/core/worker_registry.c src/http1/http_parser.c src/http1/http1_stream.c src/http1/http1_sendfile.c @@ -585,6 +598,7 @@ if test "$PHP_HTTP_SERVER" != "no"; then src/http3/http3_io.c src/http3/http3_callbacks.c src/http3/http3_dispatch.c + src/http3/http3_steer.c src/http3/http3_static_response.c src/http3/http3_stream.c src/http3/http3_stream_pool.c @@ -613,9 +627,17 @@ if test "$PHP_HTTP_SERVER" != "no"; then done CFLAGS="$SAVE_CFLAGS" + dnl Test-only C hooks (reactor-pool self-test, ...). Gated behind a define so + dnl the hook is absent from a release build. Test/CI builds opt in. + HTTP_SERVER_TEST_HOOKS_FLAG="" + if test "$PHP_HTTP_SERVER_TEST_HOOKS" = "yes"; then + AC_MSG_NOTICE([http_server: internal test hooks ENABLED — do not ship this build]) + HTTP_SERVER_TEST_HOOKS_FLAG="-DHTTP_SERVER_TEST_HOOKS=1" + fi + dnl Create extension. The trailing "cxx" arg makes the shared module link dnl through $(CXX) so the C++ TU's runtime (libstdc++) is pulled in. - PHP_NEW_EXTENSION(true_async_server, $http_server_sources, $ext_shared,, -Wall -Wextra -Wno-unused-parameter $HTTP_SERVER_HARDENING, cxx) + PHP_NEW_EXTENSION(true_async_server, $http_server_sources, $ext_shared,, -Wall -Wextra -Wno-unused-parameter $HTTP_SERVER_HARDENING $HTTP_SERVER_TEST_HOOKS_FLAG, cxx) PHP_SUBST(TRUE_ASYNC_SERVER_SHARED_LIBADD) dnl Add include paths diff --git a/config.w32 b/config.w32 index 633102b..91e2219 100644 --- a/config.w32 +++ b/config.w32 @@ -43,6 +43,12 @@ if (PHP_TRUE_ASYNC_SERVER == "yes") { "async_plain_event.c " + "thread_queue.cc " + "thread_mailbox.c " + + "reactor_pool.c " + + "reactor_pool_test_hooks.c " + + "response_wire.c " + + "worker_dispatch.c " + + "worker_inbox.c " + + "worker_registry.c " + "http_connection.c " + "http_connection_tls.c " + "http_protocol_handlers.c " + diff --git a/docs/CODING_STANDARDS.md b/docs/CODING_STANDARDS.md index a38ab88..485684d 100644 --- a/docs/CODING_STANDARDS.md +++ b/docs/CODING_STANDARDS.md @@ -113,6 +113,57 @@ eventfd indirection actually serves. Do not use it as a defer mechanism for in-thread work; microtasks are cheaper (no syscall) and clearer about intent. +### 1.5 The transport reactor stays strictly non-blocking (HTTP/3) + +In HTTP/3 the entire QUIC transport — ACK generation, loss detection, +RTT/PTO, pacing, idle timers — runs in userspace **on the reactor thread** +(ngtcp2 has no internal threads). The reactor *is* the ACK clock. A +synchronous CPU burst or any blocking call on that thread delays ACKs for +**every** live connection by its full duration, which inflates the peers' +RTT/PTO, stalls cwnd, and reintroduces head-of-line blocking at the +transport layer. (On TCP the kernel ACKs independently, so this rule is +QUIC-specific — but the H3 path shares code with H1/H2, so apply it +wherever code can run on the H3 reactor.) + +**Rule.** Code reachable on the H3 reactor thread — the `recvmmsg` +poll-cb (`src/http3/http3_listener.c` `http3_listener_poll_cb`), every +ngtcp2/nghttp3 callback (they run inside `ngtcp2_conn_read_pkt`), the +`drain_out` send loop (`src/http3/http3_io.c`), timer fires, and the +coroutine **dispose**/commit tail (`src/http3/http3_dispatch.c`) — must +not perform an unbounded synchronous span: + +- **No blocking syscalls.** No sync DB, sync file read, blocking + `connect`/`getaddrinfo`, or `sleep`. I/O goes through the async API so + it yields. +- **No unbounded CPU without a yield.** Large gzip/brotli/zstd, large + serialize, a big `smart_str` build, a wide hash/sort over + attacker-sized input — none of these belong inline on a callback or in + the dispose commit. Cap it, or move it onto the PHP worker (a handler + coroutine that `await`s; the reactor/worker split keeps response + rendering — including compression — off the transport reactor, see + `docs/PLAN_REACTOR_POOL.md`). +- **Every loop over peer-controlled counts has a cap.** Follow the + existing precedents: `H3_DRAIN_ITER_CAP` (drain), `HTTP3_MAX_BODY_BYTES` + (body assembly), the `recvmmsg` batch cap (poll-cb). + +The PHP **handler** runs in its own coroutine and *may* `await` — that is +the sanctioned place for real work. But a CPU-bound handler that never +awaits monopolises the reactor exactly like inline reactor code; "it's in +a coroutine" is not a yield. When a handler must do heavy CPU, it has to +reach an await (chunk + yield), not run it in one synchronous span. + +The buffered-response compression in `http3_stream_submit_response` +(`src/http3/http3_callbacks.c`) runs synchronously in dispose context — a +current example of inline CPU on the reactor. The reactor/worker split +moves response rendering onto the PHP worker; until then, keep buffered +bodies modest and prefer the streaming path for large ones. + +**Watchdog.** The reactor self-times each tick and each timer fire and +exports `reactor_*` counters via `HttpServer::getStats()` (budget +`PHP_HTTP3_REACTOR_BUDGET_MS`, default 10 ms < `max_ack_delay` 25 ms); a +budget overrun logs `WARN h3.reactor.slow_tick`. If a change makes +`reactor_slow_ticks` / `reactor_max_tick_ns` climb, it violated this rule. + --- ## 2. Branch prediction: `EXPECTED` / `UNEXPECTED` diff --git a/docs/PLAN_REACTOR_POOL.md b/docs/PLAN_REACTOR_POOL.md new file mode 100644 index 0000000..37f45db --- /dev/null +++ b/docs/PLAN_REACTOR_POOL.md @@ -0,0 +1,455 @@ +# Reactor Thread Pool — Design Plan (#80 / #72 / #81) + +Status: **design accepted, not yet implemented.** Design plan for the pure-C +reactor-thread pool that decouples transport I/O from PHP business logic. + +> **Revision 2026-06-14 — request boundary reversed.** The copy-marshal request +> wire (D2) is superseded by **actor-model handoff: one `http_request_t`, one +> parser path, the request crosses the thread boundary by pointer.** See **D7** +> (request ownership) and **D8** (reverse path: bidirectional cancel, +> validate-and-drop, generationed handle). D2 is kept below struck-through for +> history. `response_wire` (D3) is unaffected and stays. + +## Problem & framing + +- **#80** (the driver): in QUIC/HTTP3 the *entire* transport — ACK generation, loss + detection, RTT/PTO, pacing, idle timers — runs in userspace on our reactor loop + (ngtcp2 has no internal threads). A synchronous CPU/blocking burst in a per-request + PHP handler on that thread delays ACKs → inflates peer RTT/PTO, stalls cwnd, drops + connections, and reintroduces head-of-line blocking *at the transport layer*. Budget: + reactor iterations must stay single-digit-ms (< `max_ack_delay` 25 ms). + - **This is QUIC-specific.** On TCP the kernel ACKs independently → no transport-stall. + So the reactor pool's primary client is **H3**; H1/H2 reuse is future and optional. +- **#72** (rides along): with `setWorkers > 1` the `SO_REUSEPORT` rehash routes a + migrated QUIC 4-tuple to a worker that doesn't own the connection → stateless reset. + Needs a worker-id encoded in the CID. See `docs/H3_ROADMAP.md` §"Cross-worker steering". +- **#81** (the foundation, already merged on this branch): lock-free bounded MPSC/SPSC + queues + reactor-integrated mailbox (`include/core/thread_queue.h`, + `include/core/thread_mailbox.h`). Currently **unwired** — only a unit test. This is the + non-blocking handoff primitive the pool is built on. + +## Core architecture — two tiers, opposite scheduling policies + +A pool of **pure-C reactor threads** (no PHP *executes*) owns all transport; a pool of +**PHP worker threads** runs handlers. Bridged by the #81 queue. The two tiers get +deliberately opposite policies: + +| | Transport (sockets, ngtcp2, TLS, ACK/timers) | Business logic (PHP handler) | +|---|---|---| +| Policy | share-nothing, **pinned**, connection never migrates | load-balanced dispatch | +| Why | per-conn crypto/congestion/PN state is a hot working set; migrating it = cache-miss avalanche (Seastar) | a finished request is position-independent | + +H3 request data-flow under the split: + +``` +datagram ─(SO_REUSEPORT │ eBPF-CID-steer)─▶ Reactor R (pinned, 0 PHP) + R: ngtcp2_read_pkt → decrypt → reassemble HEADERS+DATA + on end_stream: build FLAT request_wire {method,:path,header spans,body buf, + reactor_id=R, stream, conn} + pick worker W (sticky default + threshold + P2C — see §Dispatch) + post(request_wire) → W.mailbox (#81) + ─▶ R immediately resumes ACK/timers/other conns — transport NEVER blocked +Worker W (php-async thread, has interpreter): + build $request zval from request_wire ON ITS OWN thread → run handler coroutine + (await DB/io on W's own reactor) + render response into a PERSISTENT buffer {ptr,len,free_fn} → post back to R.mailbox +Reactor R: + QPACK/HPACK-encode response headers + flow control + TLS encrypt + sendmsg + slow client → out_buffer + re-arm write + watermarks (W already free, never blocks) + streaming → W posts body chunks incrementally; bounded mailbox = backpressure +``` + +## Decisions (accepted) + +### D1 — Thread substrate: ThreadPool + `submit_internal` + +A "pure C, no TSRM" thread + the TrueAsync reactor is **impossible as built**: the reactor +lives in thread-local `ASYNC_G(uvloop)` (`ext/async/libuv_reactor.c:315`, `uv_loop_init` +in `libuv_reactor_startup` `:391`), touches `EG(exception)` throughout, and every pool +thread boots via `ts_resource(0)` (`ext/async/thread.c:1905`) + `php_request_startup()` +(`:1980`). No TSRM → no `ASYNC_G` → no `libuv_reactor` → no #81 mailbox (it registers a +`zend_async_trigger_event_t` on `ASYNC_G(uvloop)`). **#81 already assumes the consumer is +a Zend thread.** + +A reactor thread is a ThreadPool worker that runs a **C reactor loop via +`submit_internal`** (`ext/async/thread_pool.c:813`, the `TASK_KIND_INTERNAL` path — +this is literally how the server already boots workers: `pool->submit_internal(pool, +pool_worker_handler, …)`) and **never enqueues a local handler coroutine** — it posts to +workers. PHP does not execute on it. `libuv_reactor_execute()` (`:476`) is already +separable from the coroutine scheduler. Zero php-src changes. **#81 mailbox works in +BOTH directions** (R→W and W→R) because both pools are Zend threads with `ASYNC_G` — no +raw-uv twin needed. Cost: a "sleeping" TSRM/interpreter per reactor (memory/init, not +runtime) — accepted. + +### D2 — Request boundary: wire-parse on reactor, zval on worker, body streamed + +> **⚠ SUPERSEDED by D7 (2026-06-14).** The flat `request_wire` copy-marshal is +> replaced by actor handoff-by-pointer (one struct, one parser). `request_wire` + +> `http_request_from_wire` are deleted for the request path. The body-streaming +> insight below survives, but evolves into D7's worker-applied command stream +> (append-chunk / body-complete / release) instead of a chunk-feed the reactor +> writes. Text kept for history. + +The current `http_request_t` (`include/http1/http_parser.h:83`) is **not** thread-clean — +it's built from `zend_string*` / `HashTable*` (per-thread ZMM). The reactor (no usable ZMM) +cannot fill it. So: + +- Reactor does **only wire-parse** of headers (HPACK/QPACK/llhttp — stateful per-conn, + must be on the reactor anyway) and produces a **new flat `request_wire`**: raw + `{ptr,len}` spans over its recv buffer (method, :path, header name/value spans) + body + buffer + `{reactor_id, stream, conn}`. Zero PHP allocation on the reactor. +- Worker materializes `$request` zval (`zend_string_init`/`zend_hash`) **on its own thread**. +- **Body is streamed incrementally**, not buffered before handoff — maps onto the existing + `readBody()` streaming path (#26): `body_event` + chunk feed. + +### D3 — Response boundary: persistent buffer, reactor owns encode/flow/TLS + +- Worker renders the response body into a **persistent / malloc-domain buffer**, passed as + a generic descriptor `{char* data, size_t len, void(*free_fn)(void*)}`. **Required, not + just convenient:** on worker graceful-shutdown the ZMM arena is destroyed, so an + `emalloc` buffer the reactor is still draining would dangle. `free_fn` may be `pefree` + (persistent `zend_string`) or a shared-slab-pool return (reuse, no malloc/free per + response). Keeps the reactor **Zend-free**. +- **Ownership transfers at `post`** — that is the responsibility boundary. After post, the + buffer is the reactor's; worker death no longer matters for it. +- Reactor does QPACK/HPACK response-header encode (stateful per-conn), stream+conn flow + control, TLS encrypt, sendmsg. Slow client absorbed by reactor `out_buffer` + writable + re-arm + high/low watermarks (Swoole model) — the worker never blocks on the client. +- **Static responses bypass PHP entirely** (already served in reactor/scheduler context via + `http_static_try_serve`). + +### D4 — Robustness model: graceful-only, thread-based + +Workers die only **cooperatively** (exception / OOM → `ZEND_ASYNC_SHUTDOWN`, already caught +by the OOM-firewalls), never "suddenly". A true crash takes the whole process anyway +(thread model). So no shm/processes needed. Cleanup = persistent buffer (D3) + worker +**shutdown-hook** (free not-yet-posted buffers via a per-request registry; signal the +reactor to `RST_STREAM`/500 orphaned streams) + a reactor **liveness-timeout** on streams +awaiting a response that never comes. + +> **Refined by D8 (2026-06-14).** Mid-flight cancellation is now **bidirectional** +> (client-gone → reactor tells worker; handler-dead → worker tells reactor), and +> the reactor-side liveness-timeout is largely subsumed by **validate-and-drop**: +> the reactor frees stream state on the normal QUIC lifecycle and silently drops +> late worker messages for streams that are already gone, instead of holding them +> alive on a timeout. + +### D5 — Dispatch: sticky default + threshold-on-dispatch + P2C re-selection + +Per-worker **MPSC mailbox** (one consumer each — #81 as-built), **not** one shared MPMC +queue (global head/tail contention + no affinity + breaks #81's single-consumer +edge-triggered wakeup). The reactor picks *which* mailbox: + +- Per worker: `_Atomic uint32_t outstanding` (cache-line padded), `+1` by reactor at + dispatch, `−1` by worker at completion = "queued + in-flight" in one number. +- **Hot path:** push to `reactor->default_worker` (sticky → max locality, warm + zval/parser caches, zero sampling). +- **Trigger (free):** `atomic_fetch_add` *returns* the prior count → one compare, no extra + read. If `prev >= HIGH`, the default is loaded → **re-select via P2C** (sample 2, take + the lighter), set it as the new sticky default. +- **Self-damping / auto-interpolating:** the P2C re-pick lands on a lighter worker, so the + next dispatch is sticky again. Under global saturation (all ≥ HIGH) it naturally + degrades to per-request P2C — which is optimal there. Cost: **≤ P2C always, strictly + cheaper when there's slack.** No timer. +- Memory ordering: `relaxed` everywhere (heuristic; the mailbox provides the real + happens-before). Signal is `outstanding` (reactor-observable at decision time), **not** + CoDel-sojourn (computed later on the worker — it stays for admission/shedding). + +Why this and not the alternatives (researched against HAProxy/Nginx/Envoy/Finagle/Linkerd ++ Mitzenmacher/JIQ/Tokio/Go): plain P2C-over-outstanding is the industry default and +near-optimal at saturation; JSQ full-scan = O(N) cache-bounce for a vanishing balance gain; +JIQ collapses to *random* (worse than P2C) under high load unless patched to P2C-fallback; +peak-EWMA ≡ P2C for homogeneous local workers; one shared MPMC queue = global contention + +no affinity. Work-stealing ("воровство") is **obviated** by smart-push: we have cheap +centralized load info, and #81's MPSC-per-worker design would have to become MPMC to allow +stealing. + +### D6 — H3 CID steering (closes #72) + +Today the SCID is 8 random bytes (`src/http3/http3_connection.c:301`, +`http3_fill_random(c->scid, HTTP3_SCID_LEN)`); `routing_dcid` already exists (`:483`). + +- **Encode reactor-id into the SCID** (reserve 1–2 high bytes, obfuscated QUIC-LB-style: + AES-ECB single block / 3-pass Feistel; rest random). We mint the SCID the client echoes + as DCID → constant for the connection's life. +- **Two-level routing (h2o model):** short-header (established) → read reactor-id from + DCID, mine→process else→forward to owner; long-header Initial (client-chosen DCID, no id) + → 4-tuple hash / kernel SO_REUSEPORT. +- **Forward channel: userspace fan-out via #81 first** (in-process, same address space — + cheaper than h2o's AF_UNIX socketpair; works everywhere incl. WSL/Windows). eBPF + `SO_ATTACH_REUSEPORT_EBPF` is a later opt-in optimization (no userspace hop, immune to + reuseport reshuffle — but needs CAP_BPF, no WSL/Windows). **TTL** on forwards bounds + ping-pong. Fixes migration/NAT-rebind for free (CID constant). + +**Scope.** Steering is only exercised on **migration / NAT-rebind** — in steady state the +4-tuple is stable, so `SO_REUSEPORT` keeps delivering a connection's packets to its owner +and nothing is forwarded. It is not a hot path. + +**Why forward at all (not migrate the connection).** The reactor owns the connection +memory — `ngtcp2_conn`, crypto keys, stream-reassembly buffers — and a raw QUIC packet is +encrypted, so only the owner can read it. Moving live conn+crypto state across threads on +every rebind is the racy path we reject; forwarding ~1 datagram is cheaper. **Rule: the +packet goes to the memory, not the memory to the packet.** + +**Forward carries the DCID, not a conn pointer.** The owner re-looks-up by DCID and a +lookup miss is just a drop (ngtcp2 already drops unknown CIDs). No dangling pointer crosses +threads → steering is **independent of D4/D8's generationed handle** (that one is for the +request/stream pointer on the reactor↔worker axis, a different axis). + +**Decided defaults (2026-06-19):** +- **Copy the datagram on forward.** Rare path, datagram ≤ MTU → the `memcpy` is in the + noise, and it sidesteps changing the hot recv path. Zero-copy (recv into owned slab + buffers + return via `reactor_pool_post_exec` reclaim, the existing D7.5 pattern) is a + later optimization, only if forwarding ever turns hot. +- **Obfuscate the reactor-id** (QUIC-LB AES-ECB single block / Feistel) before production. + A plaintext id may ship first under the dev gate; it must not reach prod (leaks topology + → targeted single-thread DoS). + +**Build order (D6):** +1. Encode reactor-id into the minted SCID + a decode helper from DCID + (`http3_connection.c:301`; `routing_dcid` read side at `:483`). +2. Reactor registry (`id → datagram-inbox`) + per-reactor datagram-inbox — mirror of + `worker_registry` / `worker_inbox`; wake via the existing trigger-event. +3. Classify on recv (long/short header bit → decode id → local vs `post` to owner) and a + single `feed_datagram(buf, len, remote_addr)` called from both the socket loop and the + inbox drain. Copy into the inbox message; TTL byte to bound bounce. +4. Test single-host migration with h3client's `MIGRATE_AFTER` (from #59). +5. (later, opt-in) eBPF reuseport steering to drop the userspace hop on Linux prod. + +**Status — SHIPPED 2026-06-19 (steps 1–4).** Implemented as built: `http3_steer.c` +(AES-128 keystream over the CID nonce masks a 1-byte reactor id — obfuscated, not +plaintext), encode at SCID mint + in `get_new_connection_id_cb`, decode only on a +conn_map **miss** (off the hot path; the miss path already pays a stateless-reset HMAC). +The forward is a per-endpoint `http3_steer_group_t` (atomic `id → listener` table) + +`reactor_pool_post_exec` — **no new mailbox** (the reverse-path primitive already exists). +Active only with >1 reactor; gated behind `TRUE_ASYNC_SERVER_REACTOR_POOL`. Coverage: +`HTTP3Steer` unit test (deterministic encode/decode round-trip, masking, valgrind-clean) + +phpt `040` (a NAT-rebound connection is served across reactors with `setWorkers(2)` — +the regression 032 documents as broken). Suite green (H3 + reactor_pool 48/48). + +**KNOWN LIMITATION — investigated deeply (server + client qlog), root is a circular +path-validation deadlock; fix is a deliberate QUIC effort.** Under **back-to-back** +migrations a connection intermittently stalls: ~5 % at 15 rebinds on one connection, +scaling to 0 % at ≤2–3 rebinds and **0/40 at a single realistic NAT-rebind**. It is a +**circular QUIC path-validation convergence deadlock** exposed only when migrated packets +traverse the cross-reactor forward under a pathological rebind rate. The loop: + +1. Per RFC 9000 §9.3 the server (ngtcp2) keeps sending non-probing frames (the HTTP + response + ACKs) on the **old, already-validated** path until the new path validates. +2. The client has moved to the new path, so it never receives those ACKs → its RTT stays 0 + and it **congestion-window-blocks** (server+client qlog: `bytes_in_flight 3600 > cwnd + 3538`, `smoothed_rtt 0`). +3. cwnd-blocked, the client cannot send the request retransmit / the final PATH_RESPONSE → + the new path never finishes validating → back to step 1. + +**Ruled out by data (each was a hypothesis that the evidence killed):** +- *RTT inflation* — refuted: `smoothed_rtt ≈ 0.2 ms` at the stall (normal localhost). +- *Forward-hop latency* — refuted: measured forward latency is **microseconds** (max + ~0.17 ms even in stalls), nowhere near the tens-of-ms path-validation deadline. +- *Validation failure* — refuted: ngtcp2 `path_validation` callback reports **FAILURE=0**; + validations succeed or are superseded (ABORTED), never fail. Full-success runs occur with + **0** completed validations (anti-amplification covers the small response). +- *Our forward dropping packets* — refuted: `steered_in == steered_out`, 0 drops; the + forward does not lose datagrams. +- *Addressing / steering* — correct: every migrated datagram decodes to the right owner + (conn-map HIT). *Memory* — valgrind clean, no UAF/leak. +- *Test-client stale `c.local`* — that is deliberate NAT-rebind simulation; "fixing" it + breaks h3client immediately (it has no client-initiated migration). Not the cause. + +It is **forward-specific**: single-reactor mode (no hop — everything serialized in one +reactor tick) is **0 stalls** at the identical 15× migration load. The exact single mis- +pathed packet was **not** isolatable: the failure is circular, qlog carries no per-packet +addresses, every instrumentation perturbs the Heisenbug, and no independent H3 client +exists (host curl lacks HTTP/3; a QUIC client cannot be written "simply"). Trigger is +pathological (7+ rebinds/conn in milliseconds); real clients rebind occasionally and are +unaffected. + +**Fix (separate task): eBPF `SO_ATTACH_REUSEPORT_EBPF`** — the kernel reads the DCID and +delivers the migrated datagram **straight to the owner's socket**, so there is no forward +hop and the connection behaves exactly like single-reactor (which never stalls). This is +the nginx approach (eBPF worker-socket map keyed on DCID). Linux + CAP_BPF only, hence it +stays the opt-in optimization over the portable userspace forward. + +### D7 — Request ownership: actor handoff by pointer (supersedes D2) [2026-06-14] + +**Decision reversed.** D2 marshalled the request through a flat `request_wire` +(copy on the reactor → re-materialize on the worker). Rejected as built. Replaced +by ownership handoff: **one struct, one parser path, the request crosses the +thread boundary by pointer.** + +Rationale: H1/H2/H3 already build a single `http_request_t` *directly* via their +parser callbacks (`http3_callbacks.c:271,130`; `http2_session.c:291,129`; +`http_parser.c:205`). `request_wire_create` is called **only from +`reactor_pool_test.c`** — never in production. Pushing the wire into production +would force every protocol callback to grow a *second* emit target (build wire +alongside `http_request_t`), duplicating per-protocol method/header/body logic. +The goal is the opposite: parser code **almost identical** for single-thread and +split modes — `http_request_t` must not depend on the delivery mechanism. + +1. **One struct, one parser.** `http_request_t` stays the sole request + representation. The reactor fills it through the existing parser callbacks. + `request_wire` + `http_request_from_wire` are **deleted** for the request path. + +2. **Allocation domain from execution context, not a new field.** The only + per-site delta is the persistent flag on `zend_string_init(v, len, persistent)` + / hashtable init: the reactor has no usable ZMM → `persistent=1`; a + single-thread worker → `0` (ZMM, fast). That bit comes from the parser's + already-threaded context object (session / stream / parser), **not** a new + `bool` on `http_request_t`. (Self-rejected: a struct flag is redundant.) + +3. **Accessors self-describe on read.** `zend_string` already carries + `IS_STR_PERSISTENT`. Each accessor inspects the string it returns: persistent + → deep-copy into ZMM; ZMM → `addref` (`RETURN_STR_COPY`, unchanged). `getHeaders` + (`http_request.c:199`) must **not** `zend_array_dup` a persistent HT — dup + `addref`s persistent strings → VM `efree` → heap corruption; persistent mode + needs a deep ZMM rebuild of the table. + +4. **Handoff by pointer = the responsibility boundary.** The reactor builds the + `http_request_t`, posts the **pointer** to the worker mailbox, and + relinquishes the right to touch that memory. The worker is then **sole owner + and sole writer**; the address is the identity/handle. No serialize, no + re-materialize, zero copies. + + **Zero-alloc: the request stays in the reactor's stream slab [revised 2026-06-14].** + For H3 the request is *embedded* in the pooled `http3_stream_t` + (`s->_request_storage`, offset-0; `http3_stream.c:40`) — the per-listener slab + already gives us a request slot with no `malloc`. The handoff therefore costs + **zero allocations**: the reactor fills the embedded request (persistent + strings) and hands its pointer over; nothing is cloned and nothing is + separately `pemalloc`'d. (An earlier sketch proposed a standalone `pemalloc` + request per handoff — *rejected*: the slab already does the job, see point 5 + for how the slot returns home without a cross-thread pool free.) + +5. **Single-writer ⇒ non-atomic refcount; the slab slot is reclaimed by the + reactor on command [revised 2026-06-14].** The existing `unsigned refcount` + (`http_parser.h`) stays non-atomic and **only the worker mutates it** (the + reactor never touches it during the borrow). The catch: the request lives in a + reactor-owned slab slot, and that slot's `release` callback + (`http3_stream_release_via_request` → `http3_stream_pool_free`) returns the slot + to the **reactor's** per-listener pool — a pool with no locks, sized for one + thread. So the worker must **not** invoke `release` itself: that would be a + cross-thread slab free (two threads mutating one lock-free pool = corruption). + + Instead the free is *deferred to the owner thread*: when the worker's + `--refcount` hits 0 it does **not** call `release`; it posts a **`consumed`** + message (D8 reverse channel) and the **reactor** invokes `release` on its own + thread, returning the slot to the pool. Worker mutates the refcount; reactor + reclaims the slab. No clone, no standalone alloc, no cross-thread pool access. + (For the self-test synthetic requests — `selftest_build_request` — the request + is a standalone `pecalloc` with `release == NULL`, so the worker `pefree`s it + directly; only the real H3 slab-backed request needs the `consumed` round-trip, + which lands with the reverse channel in B4.) + +6. **Post-handoff data = commands, applied by the worker.** For streaming bodies + the reactor never writes into the worker's struct. It sends commands over the + mailbox — `append-chunk`, `body-complete`, `release` — and the **worker** + applies them in its loop (append to `body_queue_*`, notify `body_data_event` on + its **own** thread — no cross-thread wake). Commands are FIFO per stream (one + reactor producer); `release` is the last command. Free happens only after + draining to `release` at refcount 0 → no UAF, no pointer-reuse ABA. + +### D8 — Reverse path: bidirectional cancel, validate-and-drop, generationed handle [2026-06-14] + +7. **One reverse channel per reactor, tagged messages, non-blocking post.** + Worker→reactor carries `response` (render result), `consumed` (request done → + the reactor invokes the slab `release` and reclaims the request slot, per D7.5; + also flow-control replenish) and `cancel-stream`. One drain point per reactor; a + tagged union gives `response`-then-`cancel` ordering for free. A dedicated control channel is + **deferred** until profiling shows control-message starvation; if ever split, + the line is data-vs-control (cancel + consumed together), not cancel alone. The + two-bounded-queues deadlock is prevented **not** by channel count but by the + rule that *neither side ever blocks on a full queue* — the reactor backpressures + the **client** (stop reading the stream / shrink the FC window), never blocks on + a worker mailbox. + +8. **Reverse addressing: array of reactor channels indexed by `reactor_id`.** The + routing triple `{reactor_id, stream_id, conn}` already exists + (`request_wire.c:29-31`) and is echoed on the response. It must be carried + **into `http_request_t`** (today `http_request_from_wire` drops it). + `reactor_id` → which reverse channel; `stream_id` (+ conn handle) → which stream. + +9. **Bidirectional cancel; nobody waits.** + - **Client gone** (RST/close) → reactor sends the worker a `cancel` command → + worker stops, cancels the handler coroutine (`ZEND_ASYNC_CANCEL` via the + request's `coroutine` field), releases the request. + - **Handler died** on the worker before body-complete (returned / threw / + cancelled) → worker sends the reactor a `cancel-stream` message → reactor + stops streaming and RST/closes the stream. + - Neither side blocks. `cancel` means "begin teardown", not "stop now": + already-posted commands drain/discard until `release`, which stays the + terminator. + +10. **Validate-and-drop on the reverse path; hold-alive rejected.** The reactor + frees stream state on the **normal QUIC lifecycle** (client RST / completion), + independent of worker timing. A late worker message for a gone stream → lookup + fails → silently dropped. Hold-alive (reactor keeps the stream alive until the + worker acks) is rejected: a client that opens + RSTs many streams would pin + reactor memory **proportional to handler latency** — a DoS that violates #80's + transport budget. + +11. **Reverse identity = generationed handle, not a raw `conn` pointer.** A freed + conn cannot be safely dereferenced. The reverse path replaces the raw + `void *conn` (`request_wire.c:31`) with `(conn_id, conn_gen)` + `stream_id`; a + reused slot is caught by a generation mismatch on lookup. The **forward** + request pointer stays raw — safe because the worker is its sole owner/writer + for the whole borrow (single-ownership + FIFO + FIN); the slab slot is only + reclaimed after the worker's `consumed` (D7.5), so the pointer can never be + reused under the worker. Asymmetry: forward memory's lifetime is driven by the + message *consumer* (worker, via `consumed`) → raw pointer safe; reverse memory + is owned by the reactor but its lifetime is driven by the **client** + (unsolicited RST), not by the message sender → raw pointer unsafe. + +## Build order + +Reactor-tick + ACK/PTO-late watchdog instrumentation is already in place (`5884e2a`) — +the empirical check that the transport reactor stays inside the ACK budget. + +The split itself, per the Decisions above (each maps to a D-item): + +- [x] Reactor pool via ThreadPool + `submit_internal` C loop (D1/B). +- [x] ~~`request_wire` flat type + worker-side zval materialization (D2).~~ **superseded by D7.** +- [x] Persistent `http_request_t` build flag from execution context + self-describing + accessors + persistent-aware `getHeaders` rebuild; **deleted `request_wire` + + `http_request_from_wire`** for the request path (D7). +- [x] Command stream over the mailbox: handoff-pointer / append-chunk / body-complete / + release; worker-applied refcount decref → reactor reclaims the slab slot on + `consumed` (D7.5–6). +- [x] Reverse channel (response / consumed) per reactor + non-blocking post (D8). +- [ ] Bidirectional cancel + validate-and-drop with generationed conn handle (D8/D4) — + deferred; the raw stream pointer is currently proven safe via the worker-borrow + ref + reactor-thread serialisation, so the generationed handle is not yet needed. +- [x] Persistent response buffer + ownership transfer + reactor encode/TLS (D3). +- [x] **Dispatch policy (D5) — reactor-paired connection→worker affinity.** Each reactor + owns a strided subset of workers ({i : i % n_reactors == reactor_id}); a connection + homes to the least-loaded owned worker (idle ties rotate so connections spread) and + reuses it for all its streams (`http3_connection.worker_slot` + `worker_registry_at`). + A home backed up past `H3_WORKER_SPILL_DEPTH` spills the request to a less-loaded + worker (owned first, then any reactor's); a home whose worker died is re-homed. + `worker_registry_least_busy` is the primitive — unit-tested by reactor_pool/010, + e2e by h3/037-041. +- [x] CID-in-SCID steering + userspace fan-out (D6); closes #72. +- [ ] Worker shutdown-hook (D4). + +H1/H2 into the pool is future and optional: the kernel ACKs TCP independently, so there +is no transport-stall to fix there. Leave H1/H2 on the current share-nothing same-thread +model unless measurements say otherwise. + +## Open items / deferred decisions + +- `HIGH` threshold value for D5 (tune; ~2–4× steady-state per-worker depth). +- Reactor count vs worker count topology (R:W ratio; pinning policy). +- eBPF CID steering (opt-in optimization, later). +- Idle-fast-path (JIQ-Pod) — only if the load profile turns out bursty rather than steadily + hot; degrades cleanly to D5's P2C, so additive later. + +## Prior-art references + +- **h2o** — share-nothing thread-per-core; CID-encoded thread/node id + AF_UNIX socketpair + forward + TTL; mutex-MPSC mailbox with eventfd edge-triggered wakeup + whole-batch drain. +- **Swoole** — reactor threads (0 PHP) + worker pool; `dispatch_mode` menu; idle/busy = + self-updated status byte; `session→{fd,reactor_id}` for the send-back path; reactor owns + out_buffer + watermarks. +- **Seastar / Tokio / Go** — share-nothing + work-stealing on the pull side; informs why we + do smart-push instead. +- Load-balancing theory — Mitzenmacher P2C (`log log n`), JIQ (Lu et al.), Envoy/Nginx/ + HAProxy/Finagle/Linkerd P2C-in-production. diff --git a/docs/coverage-baseline.json b/docs/coverage-baseline.json index 42eaa8d..06815b7 100644 --- a/docs/coverage-baseline.json +++ b/docs/coverage-baseline.json @@ -1,12 +1,12 @@ { "totals": { "lines": { - "total": 14845, - "hit": 11846 + "total": 14777, + "hit": 12031 }, "functions": { - "total": 1006, - "hit": 893 + "total": 1001, + "hit": 907 } }, "files": { @@ -132,8 +132,8 @@ }, "src/core/http_connection.c": { "lines": { - "total": 1017, - "hit": 716 + "total": 1006, + "hit": 705 }, "functions": { "total": 59, @@ -200,16 +200,6 @@ "hit": 5 } }, - "src/core/thread_mailbox.c": { - "lines": { - "total": 51, - "hit": 0 - }, - "functions": { - "total": 5, - "hit": 0 - } - }, "src/core/tls_layer.c": { "lines": { "total": 381, @@ -293,7 +283,7 @@ "src/http2/http2_session.c": { "lines": { "total": 730, - "hit": 633 + "hit": 638 }, "functions": { "total": 53, @@ -303,11 +293,11 @@ "src/http2/http2_static_response.c": { "lines": { "total": 405, - "hit": 85 + "hit": 291 }, "functions": { "total": 19, - "hit": 2 + "hit": 16 } }, "src/http2/http2_strategy.c": { @@ -333,7 +323,7 @@ "src/http3/http3_callbacks.c": { "lines": { "total": 511, - "hit": 414 + "hit": 411 }, "functions": { "total": 31, @@ -372,8 +362,8 @@ }, "src/http3/http3_listener.c": { "lines": { - "total": 498, - "hit": 377 + "total": 495, + "hit": 368 }, "functions": { "total": 29, @@ -632,8 +622,8 @@ }, "src/send_file.c": { "lines": { - "total": 336, - "hit": 293 + "total": 333, + "hit": 290 }, "functions": { "total": 14, diff --git a/fuzz/fuzz_stubs.c b/fuzz/fuzz_stubs.c index 49d458f..485abb0 100644 --- a/fuzz/fuzz_stubs.c +++ b/fuzz/fuzz_stubs.c @@ -17,10 +17,26 @@ */ #include "php.h" +#include "http1/http_parser.h" /* http_request_t layout + HTTP_HEADERS_INITIAL_SIZE */ /* Extension class entries (normally populated at MINIT). */ zend_class_entry *http_exception_ce __attribute__((weak)) = NULL; +/* http_request_init_headers lives in http_request.c (the PHP-object TU, not + * linked into the fuzz harness). http_parser.c / http2_session.c call it to + * lazily allocate req->headers before storing parsed headers, so a no-op + * would leave the HT NULL and crash the path under test. Fuzz requests are + * always ZMM (non-persistent), so the real non-persistent init is correct. */ +__attribute__((weak)) void http_request_init_headers(http_request_t *req) +{ + if (req->headers != NULL) { + return; + } + + ALLOC_HASHTABLE(req->headers); + zend_hash_init(req->headers, HTTP_HEADERS_INITIAL_SIZE, NULL, ZVAL_PTR_DTOR, 0); +} + /* Server-level telemetry hooks invoked by h2 session/strategy TUs. * All NULL-safe in production; here they're no-ops since fuzz has * no server object to count against. */ @@ -93,9 +109,8 @@ __attribute__((weak)) struct zend_async_event_s *async_plain_event_new(void) } /* http_body_stream_pop calls these on h2 streaming bodies to grant the - * peer credit (commit c812184: per-stream INITIAL_WINDOW=64K + flow- - * control backpressure). Fuzz harnesses don't drive a real h2 session, - * so no-op is safe — the parser path under test is identical. */ + * peer credit. Fuzz harnesses don't drive a real h2 session, so no-op + * is safe — the parser path under test is identical. */ struct nghttp2_session; struct http2_session_t; __attribute__((weak)) int nghttp2_session_consume(struct nghttp2_session *session, diff --git a/include/core/reactor_pool.h b/include/core/reactor_pool.h new file mode 100644 index 0000000..f07cc38 --- /dev/null +++ b/include/core/reactor_pool.h @@ -0,0 +1,101 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) TrueAsync | + +----------------------------------------------------------------------+ + | Licensed under the Apache License, Version 2.0 | + +----------------------------------------------------------------------+ +*/ + +#ifndef REACTOR_POOL_H +#define REACTOR_POOL_H + +#include +#include +#include + +/* + * Reactor thread pool (issue #80, design D1 — substrate). + * + * A pool of pure-C transport reactor threads: each is a TrueAsync ThreadPool + * worker that runs its libuv reactor loop and NEVER executes a PHP handler. PHP + * business logic lives on a separate worker tier; the two are bridged by the + * #81 lock-free mailbox (include/core/thread_mailbox.h). + * + * A reactor does not "tick": it runs the native event loop and is woken by real + * events. At this substrate stage its only event source is its own inbound + * mailbox; once the transport lands (D2) the same loop is woken by the listener + * socket and QUIC timers too. Liveness is therefore not a heartbeat — it is + * whether the inbound channel is being drained (reactor_pool_processed). A + * consumer that stops draining is exactly a stalled reactor; a bounded mailbox + * turns that into backpressure at the producer (reactor_pool_post returning + * false), which is the real health signal. See docs/PLAN_REACTOR_POOL.md. + * + * Threading contract: + * - reactor_pool_create()/destroy() run on the owning (parent) thread. + * - Each reactor loop runs on its own pool thread; PHP never runs there. + * - Shutdown is cooperative and travels through the channel: destroy() posts + * a stop sentinel into each reactor's mailbox; the reactor observes it on + * its normal drain and leaves its loop. The parent never touches a + * reactor's libuv handles cross-thread. + * - Producers (reactor_pool_post) must quiesce before destroy(). + */ + +typedef struct reactor_pool_s reactor_pool_t; + +/* Stand up `reactors` transport reactor threads and block until each has + * entered its loop (or failed to). Returns NULL on bad arguments, if the + * ThreadPool API is unavailable, or if no reactor came up (a PHP exception may + * be set). Call on the parent thread. */ +reactor_pool_t *reactor_pool_create(int reactors); + +/* Number of reactor threads that came up. */ +int reactor_pool_count(const reactor_pool_t *rp); + +/* Post an opaque item into reactor `idx`'s inbound mailbox. Returns false if + * idx is out of range, the reactor is not running, or the bounded mailbox is + * full (backpressure — the caller decides to drop/retry). Any thread; must not + * race destroy(). */ +bool reactor_pool_post(reactor_pool_t *rp, int idx, void *item); + +/* A function run on a reactor's own thread by reactor_pool_exec. */ +typedef void (*reactor_exec_fn)(void *arg); + +/* Run fn(arg) on reactor `idx`'s own loop thread and block the caller until it + * returns. The reactor executes it inline on its drain pass — this is how + * transport that must be bound to the reactor's libuv loop (a uv handle, the + * H3 listener's UDP socket) gets created on the right thread. Returns false for + * a bad index, a NULL fn, or a non-running reactor. Any thread; must not race + * destroy(), and the caller must serialise its own exec calls to one reactor + * (it blocks to completion, so this is natural). */ +bool reactor_pool_exec(reactor_pool_t *rp, int idx, reactor_exec_fn fn, void *arg); + +/* Like reactor_pool_exec but fire-and-forget: post fn(arg) into reactor `idx`'s + * inbound and return immediately, without waiting for it to run. The reactor + * runs it on its drain pass and frees the internal envelope; there is no + * completion handshake. This is the worker->reactor reverse path's delivery + * primitive — the worker posts an apply callback + its message and never + * blocks. Ownership of whatever `arg` points at is the callback's concern (it + * runs once on the reactor). Returns false for a bad index, a NULL fn, a + * non-running reactor, or a full mailbox (backpressure — the caller keeps `arg` + * and decides to drop/retry). Any thread; must not race destroy(). */ +bool reactor_pool_post_exec(reactor_pool_t *rp, int idx, reactor_exec_fn fn, void *arg); + +/* Count of items reactor `idx` has drained from its inbound. Rises as the + * reactor services work — "alive" == "draining". Returns 0 for a bad index. */ +uint64_t reactor_pool_processed(const reactor_pool_t *rp, int idx); + +/* Optional epilogue run on the reactor thread at the END of every mailbox drain + * batch, after all commands in the batch have run. Lets a consumer coalesce + * per-command side effects into one action per drain — the H3 steering path uses + * it to flush forwarded datagrams once per batch instead of once per datagram, + * matching the recvmmsg tick's single deferred flush. Process-wide, set once on + * the parent before reactors start; fn runs on each reactor thread, so it must + * key its state per-thread. NULL clears it. */ +void reactor_pool_set_drain_epilogue(void (*fn)(void)); + +/* Signal every reactor to stop (via the channel), wait for the loops to leave, + * and release the pool. Parent-thread only; call once, after producers quiesce. + * Passing NULL is a no-op. */ +void reactor_pool_destroy(reactor_pool_t *rp); + +#endif /* REACTOR_POOL_H */ diff --git a/include/core/reactor_pool_test.h b/include/core/reactor_pool_test.h new file mode 100644 index 0000000..fdde8f2 --- /dev/null +++ b/include/core/reactor_pool_test.h @@ -0,0 +1,22 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) TrueAsync | + +----------------------------------------------------------------------+ + | Licensed under the Apache License, Version 2.0 | + +----------------------------------------------------------------------+ +*/ + +#ifndef REACTOR_POOL_TEST_H +#define REACTOR_POOL_TEST_H + +/* + * Test-only entry point for the reactor pool (#80). Registers the + * `_http_server_reactor_pool_selftest()` PHP function used by the phpt + * substrate test. Compiled in only when the extension is built with + * -DHTTP_SERVER_TEST_HOOKS (--enable-http-server-test-hooks); without that + * flag this is a no-op and the hook is absent from the build, so it never + * ships in a release. Called unconditionally from MINIT. + */ +void reactor_pool_test_register(const int module_type); + +#endif /* REACTOR_POOL_TEST_H */ diff --git a/include/core/response_wire.h b/include/core/response_wire.h new file mode 100644 index 0000000..c593b83 --- /dev/null +++ b/include/core/response_wire.h @@ -0,0 +1,74 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) TrueAsync | + +----------------------------------------------------------------------+ + | Licensed under the Apache License, Version 2.0 | + +----------------------------------------------------------------------+ +*/ + +#ifndef RESPONSE_WIRE_H +#define RESPONSE_WIRE_H + +#include +#include +#include + +/* + * Flat, thread-clean response representation for the reactor/worker split + * (issue #80, design D3) — the return-path mirror of request_wire (D2). A PHP + * worker renders its HttpResponse into a response_wire ON ITS thread (reading + * status / headers / body out of the per-thread ZMM HttpResponse object), then + * hands it back to the transport reactor through the #81 mailbox. The reactor + * (no usable ZMM, owns the nghttp3/QUIC connection) QPACK-encodes the headers + * and sends the body — it never touches a zval. + * + * Why a flat type and not the HttpResponse object: the response is built from + * zend_string* / HashTable* (per-thread ZMM) and the nghttp3 encode must happen + * on the reactor thread that owns the connection. A response_wire is pure + * malloc-domain bytes (one growable arena, offset-based spans → realloc-safe), + * so it crosses the thread boundary cleanly. Same layout discipline as + * request_wire. + * + * Routing: reactor_id / stream_id / conn echo the originating request_wire so + * the reactor can resolve which QUIC stream to emit on. Lifetime: created and + * filled on the worker thread, ownership transfers to the reactor at post; the + * reactor reads it (encode + send) and frees it. Single owner at any time. + */ + +typedef struct response_wire_s response_wire_t; + +/* Create an empty response wire. routing identifies the origin stream the + * reactor must send on (echoed from the request_wire). status starts unset (0). + * Returns NULL on allocation failure. */ +response_wire_t *response_wire_create(uint32_t reactor_id, int64_t stream_id, void *conn); + +/* Builders — copy bytes into the arena. set_status replaces; add_header + * appends; set_body replaces. All accept non-NUL-terminated spans. The header + * builders return false on allocation failure (the wire stays usable/freeable). + * `complete` is false when more body will be streamed to the reactor separately + * after this hand-off. */ +void response_wire_set_status(response_wire_t *rw, int status); +bool response_wire_add_header(response_wire_t *rw, + const char *name_ptr, size_t name_len, + const char *value_ptr, size_t value_len); +bool response_wire_set_body(response_wire_t *rw, const char *ptr, size_t len, bool complete); + +/* Accessors. Returned pointers are valid until response_wire_free; *len + * receives the span length. body returns NULL with *len = 0 when unset. */ +int response_wire_status(const response_wire_t *rw); +const char *response_wire_body(const response_wire_t *rw, size_t *len); +bool response_wire_body_complete(const response_wire_t *rw); + +size_t response_wire_header_count(const response_wire_t *rw); +/* Resolve header `index` (0-based). Returns false for an out-of-range index. */ +bool response_wire_header_at(const response_wire_t *rw, size_t index, + const char **name_ptr, size_t *name_len, + const char **value_ptr, size_t *value_len); + +uint32_t response_wire_reactor_id(const response_wire_t *rw); +int64_t response_wire_stream_id(const response_wire_t *rw); +void *response_wire_conn(const response_wire_t *rw); + +void response_wire_free(response_wire_t *rw); + +#endif /* RESPONSE_WIRE_H */ diff --git a/include/core/thread_mailbox.h b/include/core/thread_mailbox.h index d0b1c10..b139854 100644 --- a/include/core/thread_mailbox.h +++ b/include/core/thread_mailbox.h @@ -23,10 +23,11 @@ * blocks (full => clean backpressure), and the consumer never touches the queue * off its reactor thread. * - * Lost-wakeup safety: the producer signals only on the empty->non-empty edge, - * the enqueue (release) happens-before that signal, and the consumer drains to - * empty before returning. uv_async coalescing plus drain-to-empty means no item - * is ever stranded. + * Lost-wakeup safety: the producer signals on every post. The enqueue (release) + * happens-before the signal, and uv_async_send coalesces (it writes the eventfd + * only on the 0->1 pending transition), so unconditional signalling is cheap and + * leaves no item stranded. An earlier empty->non-empty edge optimisation raced + * drain-to-empty (the length counter lags the dequeue) and was removed. * * Threading contract: * - thread_mailbox_create()/free() run on the consumer's reactor thread (they @@ -54,6 +55,14 @@ void thread_mailbox_free(thread_mailbox_t *mb); * full (the caller decides whether to drop, retry, or close). */ bool thread_mailbox_post(thread_mailbox_t *mb, void *item); +/* Opt-in: make the wakeup handle keep the consumer's reactor loop alive (uv_ref + * via the trigger's start()). Mailboxes default to NOT keeping the loop alive — + * they are a wake source for a loop already kept running by other handles (a + * listener, coroutines). A dedicated reactor thread whose only handle is its + * inbound mailbox enables this so its loop blocks on the kernel instead of + * spinning. Consumer-thread only. */ +void thread_mailbox_keepalive(thread_mailbox_t *mb, bool enable); + /* Approximate number of queued items. */ size_t thread_mailbox_count(const thread_mailbox_t *mb); diff --git a/include/core/worker_dispatch.h b/include/core/worker_dispatch.h new file mode 100644 index 0000000..de1a5ab --- /dev/null +++ b/include/core/worker_dispatch.h @@ -0,0 +1,69 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) TrueAsync | + +----------------------------------------------------------------------+ + | Licensed under the Apache License, Version 2.0 | + +----------------------------------------------------------------------+ +*/ + +#ifndef WORKER_DISPATCH_H +#define WORKER_DISPATCH_H + +#include "php.h" +#include "Zend/zend_async_API.h" +#include "core/response_wire.h" +#include "http1/http_parser.h" /* http_request_t */ + +/* + * Worker-side request dispatch for the reactor/worker split (issue #80, B1b/D7). + * + * The transport reactor builds the request directly into a persistent + * http_request_t (D7) and hands the POINTER to a PHP worker (actor handoff — + * no copy-marshal). THIS is the worker side: it wraps the request in an + * HttpRequest zval on its own thread, spawns the user handler coroutine (so + * business logic runs off the transport thread), and when the handler finishes + * renders the HttpResponse into a flat response_wire (D3) handed back to a + * sink — which posts it to the reactor for nghttp3 encode + send. + * + * Everything here runs on the worker thread: the request/response zvals and the + * handler coroutine never touch the reactor. The request crosses the thread + * boundary by pointer (the worker becomes its sole owner); the response crosses + * back as a flat response_wire. + */ + +typedef struct http_server_object http_server_object; + +/* Sink for the rendered response, invoked on the worker thread from the handler + * coroutine's dispose. Ownership of `rw` transfers to the sink: it must + * response_wire_free() it once it has handed the bytes off (e.g. posted them + * back to the reactor). */ +typedef void (*worker_response_sink_fn)(response_wire_t *rw, void *sink_arg); + +/* Take ownership of `req` (a persistent reactor-built or ZMM request, refcount + * 1), wrap it in an HttpRequest on THIS (worker) thread, spawn the user handler + * coroutine in `scope`, and when it finishes render the HttpResponse into a + * response_wire (echoing the request's reactor_id / stream_id / conn) handed to + * `sink`. + * + * Ownership: `req` is consumed unconditionally — on success the HttpRequest + * object owns it (freed via http_request_destroy when the coroutine disposes); + * on every failure path this function destroys it before returning. The caller + * must not touch or free `req` after the call. + * + * `own_scope` mirrors the H3 dispatch flag: true gives each request its own + * request_context() subtree (a child of `scope`); false runs directly in + * `scope`. When no handler is registered a 404 is synthesised so the sink still + * fires. Buffered responses only for now (setBody / end) — a streaming send() + * body is not marshalled yet. + * + * Returns true once the handler coroutine is enqueued; false on hard failure + * (bad args / allocation / no current coroutine to spawn under), in which case + * the sink is not called. Requires an active TrueAsync scheduler on the calling + * thread. */ +bool worker_dispatch_request(http_server_object *server, + zend_async_scope_t *scope, + http_request_t *req, + bool own_scope, + worker_response_sink_fn sink, void *sink_arg); + +#endif /* WORKER_DISPATCH_H */ diff --git a/include/core/worker_inbox.h b/include/core/worker_inbox.h new file mode 100644 index 0000000..9d49b3d --- /dev/null +++ b/include/core/worker_inbox.h @@ -0,0 +1,60 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) TrueAsync | + +----------------------------------------------------------------------+ + | Licensed under the Apache License, Version 2.0 | + +----------------------------------------------------------------------+ +*/ + +#ifndef WORKER_INBOX_H +#define WORKER_INBOX_H + +#include "php.h" +#include "Zend/zend_async_API.h" +#include "core/worker_dispatch.h" /* http_request_t, worker_response_sink_fn */ + +/* + * Worker inbox for the reactor/worker split (issue #80, B2/D7). + * + * The consumer side of the request handoff: a per-worker #81 mailbox + * (thread_mailbox) whose drain runs on the worker's reactor thread and feeds + * each posted http_request_t to worker_dispatch_request (B1b/D7) — wrap + + * spawn handler coroutine + render the response to the sink. The reactor (any + * thread) builds the request and posts its POINTER; the worker drains and + * dispatches on its own thread, so business logic never runs on the transport + * reactor. + * + * One inbox per worker. The registry that lets a reactor pick which worker's + * inbox to post to is the producer side, wired in B3. + * + * Threading contract: + * - worker_inbox_create()/free() run on the worker (consumer) thread; they + * create/dispose the underlying libuv-backed mailbox on that loop. + * - worker_inbox_post() runs on any thread (typically a reactor). + * - free() must run after producers have quiesced. + */ + +typedef struct worker_inbox_s worker_inbox_t; + +/* Create a worker inbox on THIS (worker) thread. Dispatched requests run their + * handler in `scope` (own_scope mirrors worker_dispatch_request) against + * `server`'s handler table; each rendered response goes to `sink`. Returns NULL + * if no reactor is running on the calling thread or on allocation failure. */ +worker_inbox_t *worker_inbox_create(http_server_object *server, + zend_async_scope_t *scope, + bool own_scope, + worker_response_sink_fn sink, void *sink_arg); + +/* Post a request to the inbox (any thread). Ownership of `req` transfers to the + * inbox — the worker dispatch path becomes its sole owner and frees it via the + * request lifecycle. Returns false if the bounded mailbox is full (backpressure; + * the caller keeps ownership) or on bad arguments. */ +bool worker_inbox_post(worker_inbox_t *inbox, http_request_t *req); + +/* Approximate queued request count (producer backpressure visibility). */ +size_t worker_inbox_depth(const worker_inbox_t *inbox); + +/* Tear down the inbox. Consumer-thread only; producers must have quiesced. */ +void worker_inbox_free(worker_inbox_t *inbox); + +#endif /* WORKER_INBOX_H */ diff --git a/include/core/worker_registry.h b/include/core/worker_registry.h new file mode 100644 index 0000000..bffb401 --- /dev/null +++ b/include/core/worker_registry.h @@ -0,0 +1,76 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) TrueAsync | + +----------------------------------------------------------------------+ + | Licensed under the Apache License, Version 2.0 | + +----------------------------------------------------------------------+ +*/ + +#ifndef WORKER_REGISTRY_H +#define WORKER_REGISTRY_H + +#include +#include "core/worker_inbox.h" + +/* + * Worker registry for the reactor/worker split (issue #80, B3 — producer side). + * + * A fixed-size table of per-worker inboxes (worker_inbox.h). The parent creates + * it sized for the worker pool; each worker publishes its own inbox into its + * slot at startup; a transport reactor reads the table to choose which worker + * to hand a parsed request to. Publication is a single atomic store, lookup a + * single atomic load — no lock on the dispatch path. + * + * Dispatch (D5): reactor-paired sticky-default with load spill. Each reactor owns a + * strided subset of slots; worker_registry_least_busy picks the least-loaded owned + * worker (ties rotate so idle connections spread). The H3 path homes a connection to + * one worker and reuses it, spilling a request to a less-loaded worker — owned, else + * any (reactor_id < 0) — when the home backs up or dies. worker_registry_pick (flat + * round-robin) stays for the unit-test path. + * + * Threading: create()/free() on the parent; publish() once per worker on its + * own thread (release); pick()/at()/count() from any thread (acquire). free() + * after producers and workers have quiesced — it frees the table, not the + * inboxes (each worker frees its own). + */ + +typedef struct worker_registry_s worker_registry_t; + +/* Create a registry with `capacity` slots (>= 1). Parent thread. NULL on bad + * argument or allocation failure. */ +worker_registry_t *worker_registry_create(int capacity); + +/* Publish `inbox` at slot `idx` (release store). The worker calls this on its + * own thread once its inbox is up. Returns false for an out-of-range slot. */ +bool worker_registry_publish(worker_registry_t *reg, int idx, worker_inbox_t *inbox); + +/* Atomically claim the next free slot and publish `inbox` into it — lets each + * worker register without being told its index. Returns the slot index, or -1 + * if the table is full. Any thread. */ +int worker_registry_add(worker_registry_t *reg, worker_inbox_t *inbox); + +/* Number of slots, and number currently published. */ +int worker_registry_capacity(const worker_registry_t *reg); +int worker_registry_count(const worker_registry_t *reg); + +/* Inbox published at slot `idx` (sticky lookup), or NULL if unpublished / + * out of range. */ +worker_inbox_t *worker_registry_at(const worker_registry_t *reg, int idx); + +/* Next published inbox, round-robin across slots (atomic counter). NULL when no + * slot is published yet. Any thread. */ +worker_inbox_t *worker_registry_pick(worker_registry_t *reg); + +/* Least-loaded published inbox for a reactor, by worker_inbox_depth. Ownership + * is strided: reactor `reactor_id` owns slots {i : i % n_reactors == reactor_id}. + * `reactor_id` < 0 (or `n_reactors` <= 1) scans ALL slots — the global spill / fallback. + * Ties rotate via the round-robin counter so connections homing while idle spread + * across the owned set. Writes the chosen slot to *out_slot (-1 if none). Any thread. */ +worker_inbox_t *worker_registry_least_busy(worker_registry_t *reg, + int reactor_id, int n_reactors, + int *out_slot); + +/* Free the table (not the inboxes). Parent thread, after workers quiesce. */ +void worker_registry_free(worker_registry_t *reg); + +#endif /* WORKER_REGISTRY_H */ diff --git a/include/http1/http_parser.h b/include/http1/http_parser.h index 6ed51dc..73b92c5 100644 --- a/include/http1/http_parser.h +++ b/include/http1/http_parser.h @@ -201,6 +201,13 @@ struct http_request_t { int32_t body_h2_consume_pending; void *body_h3_stream; + /* Reverse-path routing. Set by the reactor that built the request; + * echoed onto the response so the reactor resolves which QUIC stream to + * emit on. All zero on the single-thread path. */ + void *reactor_conn; + int64_t reactor_stream_id; + uint32_t reactor_id; + /* 1-byte fields clustered */ uint8_t http_major; uint8_t http_minor; @@ -215,6 +222,13 @@ struct http_request_t { * llhttp_pause; readBody clears it via llhttp_resume below the * low-water mark. Unused by the MVP — see TODO in on_body. */ bool body_paused; + + /* Allocation domain of reactor-produced fields + * (method/uri/headers + the headers HashTable + the struct block). + * false = ZMM (worker-built, default), true = persistent malloc + * (reactor-built) → those frees go through pefree, flag-aware. Body + * and worker-derived fields (path/query/post/files) stay ZMM. */ + bool persistent; }; /* Single chunk node in the streaming body queue (linked list). @@ -354,6 +368,13 @@ void http_parser_destroy(http1_parser_t *parser); int http_parse_error_to_status(http_parse_error_t err); const char *http_parse_error_reason(http_parse_error_t err); +/* Allocate req->headers in the request's allocation domain: ZMM + * (ZVAL_PTR_DTOR) for a worker-built request, persistent malloc + + * flag-aware value dtor for a reactor-built one (req->persistent). Idempotent + * — no-op if the table already exists. Single source of truth so H1/H2/H3 do + * not each duplicate the domain branch. */ +void http_request_init_headers(http_request_t *req); + /* Bump request refcount. Used by H2/H3 stream layers right at dispatch * time so the stream can keep writing body bytes / fire body_event * post-dispatch while the PHP HttpRequest object independently owns @@ -367,4 +388,13 @@ void http_request_addref(http_request_t *req); * the canonical single-owner case where release == destroy. NULL-safe. */ void http_request_destroy(http_request_t *req); +/* Free every owned field of `req` (method / uri / headers / body / multipart / + * post / files / query / trace), in each field's own allocation domain, and + * NULL the pointers — WITHOUT touching the refcount, the release callback, or + * the struct itself. Used by reactor-mode H3 teardown to reclaim the fields of + * a slab-embedded request that was never handed to a worker (early RST / + * backpressure), where the worker's http_request_destroy never ran. NULL-safe; + * idempotent (NULLs as it goes). */ +void http_request_free_fields(http_request_t *req); + #endif /* HTTP_PARSER_H */ diff --git a/include/http3/http3_stream.h b/include/http3/http3_stream.h index 3436ca4..266cdc9 100644 --- a/include/http3/http3_stream.h +++ b/include/http3/http3_stream.h @@ -24,6 +24,12 @@ typedef struct _http3_stream_s http3_stream_t; typedef struct _http3_connection_s http3_connection_t; /* defined in http3_connection.h */ +/* hq-interop (HTTP/0.9-over-QUIC) request line is "GET \r\n"; cap the + * accumulator generously and reject (close stream) past it. */ +#ifndef HTTP3_HQ_LINE_MAX +#define HTTP3_HQ_LINE_MAX 8192 +#endif + /* One per inbound HTTP/3 request stream (bidi, client-initiated). * Mirrors http2_stream_t in spirit but carries less state — nghttp3 * already keeps the framing/header decoder state. @@ -163,6 +169,24 @@ struct _http3_stream_s { * down — without the walk, each such stream leaks its request + * headers + zend_strings). */ http3_stream_t *list_next; + + /* hq-interop only (HTTP/0.9-over-QUIC). Request-line accumulator, + * lazily allocated on the first stream byte; freed in release. h3 + * streams leave these NULL/zero. hq_served latches once the response + * has been produced; hq_fin_sent latches once its FIN has been emitted. */ + char *hq_line; + uint16_t hq_line_len; + bool hq_served; + bool hq_fin_sent; + + /* hq response payload. hq_body points into the mmap'd file (hq_map) or a + * static literal (error); NULL + zero len = empty body served FIN-only. + * The egress loop streams [hq_body_off, hq_body_len) raw + FIN. */ + const char *hq_body; + size_t hq_body_len; + size_t hq_body_off; + void *hq_map; /* munmap(hq_map, hq_map_len) on release */ + size_t hq_map_len; }; /* Allocate a stream + its http_request_t from the listener's slab pool. diff --git a/include/php_http_server.h b/include/php_http_server.h index d8813e0..b9f2270 100644 --- a/include/php_http_server.h +++ b/include/php_http_server.h @@ -136,6 +136,10 @@ struct _http_server_config_t { zend_string *tls_cert_path; zend_string *tls_key_path; + /* hq-interop (HTTP/0.9-over-QUIC) document root. + * NULL = hq serves no files. No effect on h3. */ + zend_string *http3_hq_docroot; + /* Buffer sizes */ size_t write_buffer_size; /* Write buffer size (default: 65536) */ @@ -512,6 +516,11 @@ void http_server_on_parse_error(http_server_object *server, int status_code); HashTable *http_server_get_protocol_handlers(http_server_object *server); zend_async_scope_t *http_server_get_scope (http_server_object *server); +/* Resolve the core server object from its PHP wrapper (the create_object stash). + * The wrapper layout is private to http_server_class.c, so this is the public + * way to reach the core from another TU. */ +http_server_object *http_server_object_from_zend(zend_object *obj); + /* Live HttpServerConfig the server was constructed with. The returned * pointer is non-owning and stays valid for the server's lifetime — * the config object's zval is held inside http_server_object. */ diff --git a/include/send_file.h b/include/send_file.h index 054b77f..87c49e2 100644 --- a/include/send_file.h +++ b/include/send_file.h @@ -105,6 +105,11 @@ typedef struct * NULL = cache disabled (sendFile case). */ http_server_object *server; + /* Open-file cache to insert into on the miss path, used in preference + * to `server` when set. Lets a caller with no PHP server object (the + * transport reactor) supply its own per-thread cache. */ + http_static_cache_t *cache; + send_file_on_error_t on_error; } send_file_config_t; diff --git a/include/static/static_handler.h b/include/static/static_handler.h index e916b2d..bdc393e 100644 --- a/include/static/static_handler.h +++ b/include/static/static_handler.h @@ -208,6 +208,23 @@ http_static_result_t http_static_try_serve(http_server_object *server, const http_static_dispatch_cbs_t *cbs, void *user); +/* Server-free core of the dispatch hook. Identical logic to + * http_static_try_serve but keyed on a borrowed mount array + an + * explicit open-file cache (NULL = uncached) instead of the PHP server + * object — the transport reactor has no server object on its thread but + * can hold its own refs to the persistent, atomically-refcounted mounts. + * http_static_try_serve is a thin wrapper that resolves these from the + * server and forwards. */ +struct http_static_cache_s; +http_static_result_t http_static_try_serve_mounts( + const http_static_handler_t *const *mounts, size_t mount_count, + struct http_static_cache_s *cache, + struct http_request_t *request, + zend_object *response_obj, + http_server_counters_t *counters, + const http_static_dispatch_cbs_t *cbs, + void *user); + /* Out-of-line "is any mount registered" helper. The struct layout * lives in http_server_class.c so the count is not directly visible to * the dispatcher TU; this getter is the single authority and is cheap @@ -221,6 +238,13 @@ size_t http_static_handler_count(const http_server_object *server); const http_static_handler_t *http_static_handler_get(const http_server_object *server, size_t index); +/* Borrow the server's contiguous mount-pointer array (length == + * http_static_handler_count). NULL when there are no mounts. Stable for + * the server's lifetime. Lets a caller pass the whole array to + * http_static_try_serve_mounts without the server struct layout. */ +const http_static_handler_t *const * +http_static_handler_mounts(const http_server_object *server); + /* Open file cache accessor — lazily creates the cache on first call, * returns it on subsequent calls. NULL if the server is NULL or * allocation failed. Lifetime: until http_server_free destroys the diff --git a/src/core/http_connection.c b/src/core/http_connection.c index 63d7b50..9349cd6 100644 --- a/src/core/http_connection.c +++ b/src/core/http_connection.c @@ -57,19 +57,6 @@ extern void http_response_set_default_json_flags(zend_object *, uint32_t); # define MSG_NOSIGNAL 0 #endif -/* Half-close the send direction: SD_SEND on Winsock, SHUT_WR on POSIX. */ -#ifdef _WIN32 -# define HTTP_SHUT_WR SD_SEND -#else -# define HTTP_SHUT_WR SHUT_WR -#endif - -/* Lingering-close budget (ms). After an error response is sent mid-upload - * we keep draining the peer's body for at most this long (refreshed on - * activity) before a forced close, so a peer that never sends FIN can't - * pin the connection open. */ -#define HTTP_LINGER_CLOSE_MS 5000 - #define DEFAULT_READ_BUFFER_SIZE 8192 extern zval* http_request_create_from_parsed(http_request_t *req); @@ -908,16 +895,6 @@ static bool http_connection_handle_read_completion(http_connection_t *conn, { *should_destroy_out = false; - /* Lingering close: a 4xx was already sent + FIN'd; we are only draining - * the peer's leftover upload now. Discard every chunk (don't feed the - * parser), refresh the linger deadline, and stay armed. Peer FIN (EOF) - * is handled in the read callback (→ destroy). */ - if (UNEXPECTED(conn->lingering)) { - conn->read_buffer_len = 0; - conn->deadline_ms = ZEND_ASYNC_NOW() + HTTP_LINGER_CLOSE_MS; - return true; - } - if (UNEXPECTED(!conn->protocol_detected) && !detect_and_assign_protocol(conn)) { return true; /* Need more data for detection — caller re-arms */ } @@ -991,18 +968,8 @@ static bool http_connection_handle_read_completion(http_connection_t *conn, return false; } - if (conn->parser != NULL && http_connection_emit_parse_error(conn, conn->parser) - && conn->io != NULL) { - /* 4xx sent + FIN'd (shutdown in emit). Enter lingering close: - * keep reading and discarding the peer's remaining upload so the - * final close is a clean FIN, not an RST that would wipe the - * response on Windows. Peer FIN (EOF) → destroy in the read cb; - * the deadline_tick force-closes a peer that never sends FIN. */ - conn->lingering = 1; - conn->read_buffer_len = 0; - conn->deadline_ms = ZEND_ASYNC_NOW() + HTTP_LINGER_CLOSE_MS; - *should_destroy_out = false; - return false; + if (conn->parser != NULL) { + (void)http_connection_emit_parse_error(conn, conn->parser); } *should_destroy_out = true; return false; @@ -1118,7 +1085,7 @@ static void http_connection_read_callback_fn( * tear the conn down once it (and any pipelined chain) has finished * responding. Without this, an EOF from a peer that sent its last * request and shut down the write half kills mid-flight responses. */ - if (!conn->lingering && (conn->request_in_flight || conn->read_buffer_len > 0)) { + if (conn->request_in_flight || conn->read_buffer_len > 0) { conn->keep_alive = false; return; } @@ -1135,7 +1102,7 @@ static void http_connection_read_callback_fn( * could on_message_complete and dispatch a *second* handler on the same * conn while the first one's response slot is still live. Just buffer the * tail; handler dispose will pull it out via handle_read_completion. */ - if (!terminal && conn->request_in_flight && !conn->lingering) { + if (!terminal && conn->request_in_flight) { return; } @@ -1946,16 +1913,6 @@ bool http_connection_emit_parse_error(http_connection_t *conn, http1_parser_t *p } const ssize_t sent = send(fd, response, (size_t)n, MSG_NOSIGNAL); - - /* Half-close our send side: flushes the response + FIN while keeping - * the recv side open so handle_read_completion can drain the peer's - * in-flight upload (lingering close) before the final closesocket. - * Without the drain, closing with unread recv data forces an RST that - * discards the just-sent response on Windows. Best-effort. */ - if (sent == (ssize_t)n) { - (void)shutdown(fd, HTTP_SHUT_WR); - } - return sent == (ssize_t)n; } /* }}} */ diff --git a/src/core/http_connection.h b/src/core/http_connection.h index f44dca1..7d278e5 100644 --- a/src/core/http_connection.h +++ b/src/core/http_connection.h @@ -263,13 +263,6 @@ struct _http_connection_t { * iterating. The destroy defers on this flag instead — see the * gate in http_connection_destroy and the drain in http1_feed. */ unsigned in_parser_feed : 1; - /* Lingering close (graceful). Set after an error response (e.g. 413) - * is sent while the peer is still uploading: we keep reading and - * DISCARDING the peer's remaining body so the eventual close emits a - * clean FIN instead of an RST. Closing a socket with unread recv data - * forces an abortive RST on Windows, which wipes the just-sent - * response. Bounded by deadline_ms / the periodic deadline_tick. */ - unsigned lingering : 1; /* Intrusive doubly-linked node. Dual role: * - while the slot is ALIVE, (next_conn, prev_conn) link into diff --git a/src/core/reactor_pool.c b/src/core/reactor_pool.c new file mode 100644 index 0000000..25a9943 --- /dev/null +++ b/src/core/reactor_pool.c @@ -0,0 +1,399 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) TrueAsync | + +----------------------------------------------------------------------+ + | Licensed under the Apache License, Version 2.0 | + +----------------------------------------------------------------------+ + + Reactor thread pool (#80, design D1 — substrate). See include/core/reactor_pool.h. +*/ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include "php.h" +#include "Zend/zend_async_API.h" +#include "Zend/zend_atomic.h" +#include "core/reactor_pool.h" +#include "core/thread_mailbox.h" + +#ifdef PHP_WIN32 +# include +#else +# include +#endif + +/* Inbound mailbox sizing. Bounded so a stalled reactor backpressures producers + * rather than growing unbounded. */ +#define REACTOR_MAILBOX_CAPACITY 1024 +#define REACTOR_MAILBOX_BATCH 64 + +/* Distinguished pointer posted to a reactor's mailbox to make it leave its + * loop. Its address can never collide with a real heap item. */ +static const char reactor_stop_token; +#define REACTOR_STOP_SENTINEL ((void *)&reactor_stop_token) + +/* ctx lifecycle, published from the reactor thread to the parent. */ +#define REACTOR_PHASE_SPAWN 0 /* submitted, not yet in its loop */ +#define REACTOR_PHASE_RUN 1 /* mailbox created and published; looping */ +#define REACTOR_PHASE_DONE 2 /* loop left, mailbox freed */ + +/* Inbound message envelope. Everything posted to a reactor other than the stop + * sentinel is a reactor_cmd_t*; the drain dispatches on `kind`. + * NOOP — the substrate's opaque-token wrapper (reactor_pool_post). Heap; the + * drain frees it after counting. Carries no behaviour, just liveness. + * EXEC — a function the reactor runs on its own thread, then acks via `done`. + * Stack-owned by reactor_pool_exec (which blocks on `done`), so the + * drain must never free it. + * POST — like EXEC but fire-and-forget (reactor_pool_post_exec): the reactor + * runs fn(arg) and frees the heap envelope, no `done` ack. This is the + * worker->reactor reverse path's delivery. */ +typedef enum { + REACTOR_CMD_NOOP, + REACTOR_CMD_EXEC, + REACTOR_CMD_POST, +} reactor_cmd_kind_t; + +typedef struct { + reactor_cmd_kind_t kind; + void *payload; /* NOOP: opaque token, currently only counted */ + reactor_exec_fn fn; /* EXEC */ + void *arg; /* EXEC */ + zend_atomic_int done; /* EXEC: reactor stores 1 once fn has returned */ +} reactor_cmd_t; + +/* Per-reactor state, shared parent <-> one reactor thread — the legitimate + * cross-thread handshake (Zend atomics), not single-threaded-core state. + * `mailbox` is written by the reactor before it stores phase=RUN, and read by + * the parent only after it observes phase>=RUN — the atomic phase store/load + * orders the plain write. `stopping` is touched only on the reactor thread + * (loop + drain callback). */ +typedef struct { + reactor_pool_t *pool; + zend_atomic_int phase; + thread_mailbox_t *mailbox; + zend_atomic_int64 processed; + bool stopping; +} reactor_ctx_t; + +struct reactor_pool_s { + zend_async_thread_pool_t *tp; + reactor_ctx_t *ctx; /* [count] */ + int count; +}; + +/* See reactor_pool_set_drain_epilogue. Process-wide, set once on the parent + * before reactors run, read on each reactor thread at drain-batch end. */ +static void (*g_drain_epilogue)(void) = NULL; + +void reactor_pool_set_drain_epilogue(void (*fn)(void)) +{ + g_drain_epilogue = fn; +} + +static void reactor_pool_msleep(void) +{ +#ifdef PHP_WIN32 + Sleep(1); +#else + const struct timespec ts = { 0, 1000000 }; /* 1 ms */ + nanosleep(&ts, NULL); +#endif +} + +/* Runs on the reactor thread when its inbound mailbox has items. The stop + * sentinel asks the loop to leave; everything else is real work, counted here. */ +static void reactor_drain(void **items, const size_t count, void *arg) +{ + reactor_ctx_t *const rc = (reactor_ctx_t *)arg; + int64_t drained = 0; + + for (size_t i = 0; i < count; i++) { + if (UNEXPECTED(items[i] == REACTOR_STOP_SENTINEL)) { + rc->stopping = true; + continue; + } + + reactor_cmd_t *const cmd = (reactor_cmd_t *)items[i]; + + switch (cmd->kind) { + case REACTOR_CMD_EXEC: + cmd->fn(cmd->arg); + /* Release: publish fn's effects before the parent sees done. */ + zend_atomic_int_store_ex(&cmd->done, 1); + break; + + case REACTOR_CMD_POST: + cmd->fn(cmd->arg); + free(cmd); + break; + + case REACTOR_CMD_NOOP: + default: + free(cmd); + break; + } + + drained++; + } + + /* Batch epilogue: coalesce any per-command deferred work (H3 steer flush) + * into one pass now that every command in this drain has run. */ + if (g_drain_epilogue != NULL) { + g_drain_epilogue(); + } + + if (drained != 0) { + zend_atomic_int64_store_ex(&rc->processed, + zend_atomic_int64_load_ex(&rc->processed) + drained); + } +} + +/* The reactor loop. Owns a pure-C libuv loop, kept alive by its inbound mailbox + * (the trigger is ref'd via keepalive); blocks in the kernel until woken, then + * drains. Leaves when a stop sentinel arrives. No PHP executes here. */ +static void reactor_loop_handler(zend_async_event_t *event, void *vctx) +{ + (void)event; + reactor_ctx_t *const rc = (reactor_ctx_t *)vctx; + + thread_mailbox_t *const mb = thread_mailbox_create(REACTOR_MAILBOX_CAPACITY, + REACTOR_MAILBOX_BATCH, + reactor_drain, rc); + + if (mb == NULL) { + zend_atomic_int_store_ex(&rc->phase, REACTOR_PHASE_DONE); + return; + } + + /* Open inbound keeps the loop alive (no listener yet) — so uv_run blocks + * instead of spinning. */ + thread_mailbox_keepalive(mb, true); + + rc->mailbox = mb; /* publish (plain) */ + zend_atomic_int_store_ex(&rc->phase, REACTOR_PHASE_RUN); /* release */ + + while (!rc->stopping) { + ZEND_ASYNC_REACTOR_EXECUTE(/*no_wait=*/false); + } + + thread_mailbox_keepalive(mb, false); + thread_mailbox_free(mb); /* consumer-thread */ + rc->mailbox = NULL; + + zend_atomic_int_store_ex(&rc->phase, REACTOR_PHASE_DONE); +} + +reactor_pool_t *reactor_pool_create(const int reactors) +{ + if (reactors <= 0) { + return NULL; + } + + if (zend_async_new_thread_pool_fn == NULL) { + zend_throw_error(NULL, "ThreadPool API is not registered — load true_async first"); + return NULL; + } + + zend_async_thread_pool_t *const tp = + ZEND_ASYNC_NEW_THREAD_POOL((int32_t)reactors, (int32_t)reactors); + + if (tp == NULL || tp->submit_internal == NULL) { + if (tp != NULL) { + ZEND_THREAD_POOL_DELREF(tp); + } + + zend_throw_error(NULL, "ThreadPool->submit_internal not available — true_async too old"); + return NULL; + } + + reactor_pool_t *const rp = pecalloc(1, sizeof(*rp), 0); + rp->tp = tp; + rp->ctx = pecalloc((size_t)reactors, sizeof(reactor_ctx_t), 0); + rp->count = 0; + + for (int i = 0; i < reactors; i++) { + rp->ctx[i].pool = rp; + rp->ctx[i].mailbox = NULL; + rp->ctx[i].stopping = false; + ZEND_ATOMIC_INT_INIT(&rp->ctx[i].phase, REACTOR_PHASE_SPAWN); + ZEND_ATOMIC_INT64_INIT(&rp->ctx[i].processed, 0); + + zend_async_event_t *const evt = + tp->submit_internal(tp, reactor_loop_handler, &rp->ctx[i]); + + if (evt == NULL) { + break; + } + + rp->count++; + + /* We track completion via the per-reactor phase, not this future — + * release our reference so the unawaited future does not leak. */ + ZEND_ASYNC_EVENT_RELEASE(evt); + } + + if (rp->count == 0) { + ZEND_THREAD_POOL_DELREF(tp); + pefree(rp->ctx, 0); + pefree(rp, 0); + return NULL; + } + + /* Block until every submitted reactor has reached its loop (or failed) so + * the pool is ready to accept posts the moment we return. */ + for (int i = 0; i < rp->count; i++) { + while (zend_atomic_int_load_ex(&rp->ctx[i].phase) == REACTOR_PHASE_SPAWN) { + reactor_pool_msleep(); + } + } + + return rp; +} + +int reactor_pool_count(const reactor_pool_t *rp) +{ + return rp != NULL ? rp->count : 0; +} + +bool reactor_pool_post(reactor_pool_t *rp, const int idx, void *item) +{ + if (UNEXPECTED(rp == NULL || idx < 0 || idx >= rp->count)) { + return false; + } + + reactor_ctx_t *const rc = &rp->ctx[idx]; + + if (UNEXPECTED(zend_atomic_int_load_ex(&rc->phase) != REACTOR_PHASE_RUN)) { + return false; + } + + reactor_cmd_t *const cmd = malloc(sizeof(*cmd)); + + if (UNEXPECTED(cmd == NULL)) { + return false; + } + + cmd->kind = REACTOR_CMD_NOOP; + cmd->payload = item; + + if (!thread_mailbox_post(rc->mailbox, cmd)) { + free(cmd); + return false; + } + + return true; +} + +bool reactor_pool_exec(reactor_pool_t *rp, const int idx, const reactor_exec_fn fn, void *arg) +{ + if (UNEXPECTED(rp == NULL || idx < 0 || idx >= rp->count || fn == NULL)) { + return false; + } + + reactor_ctx_t *const rc = &rp->ctx[idx]; + + if (UNEXPECTED(zend_atomic_int_load_ex(&rc->phase) != REACTOR_PHASE_RUN)) { + return false; + } + + /* Stack-owned: the reactor never frees an EXEC envelope. Safe because we + * block on `done` below, so the frame outlives the reactor's use of it. */ + reactor_cmd_t cmd; + cmd.kind = REACTOR_CMD_EXEC; + cmd.fn = fn; + cmd.arg = arg; + ZEND_ATOMIC_INT_INIT(&cmd.done, 0); + + /* Bounded mailbox: retry on full; bail if the reactor leaves RUN. */ + while (!thread_mailbox_post(rc->mailbox, &cmd)) { + if (zend_atomic_int_load_ex(&rc->phase) != REACTOR_PHASE_RUN) { + return false; + } + + reactor_pool_msleep(); + } + + /* Acquire: pair with the reactor's release store once fn has run. */ + while (zend_atomic_int_load_ex(&cmd.done) == 0) { + reactor_pool_msleep(); + } + + return true; +} + +bool reactor_pool_post_exec(reactor_pool_t *rp, const int idx, + const reactor_exec_fn fn, void *arg) +{ + if (UNEXPECTED(rp == NULL || idx < 0 || idx >= rp->count || fn == NULL)) { + return false; + } + + reactor_ctx_t *const rc = &rp->ctx[idx]; + + if (UNEXPECTED(zend_atomic_int_load_ex(&rc->phase) != REACTOR_PHASE_RUN)) { + return false; + } + + /* Heap-owned: the reactor frees it after running fn (no `done` ack). */ + reactor_cmd_t *const cmd = malloc(sizeof(*cmd)); + + if (UNEXPECTED(cmd == NULL)) { + return false; + } + + cmd->kind = REACTOR_CMD_POST; + cmd->fn = fn; + cmd->arg = arg; + + if (!thread_mailbox_post(rc->mailbox, cmd)) { + free(cmd); + return false; + } + + return true; +} + +uint64_t reactor_pool_processed(const reactor_pool_t *rp, const int idx) +{ + if (UNEXPECTED(rp == NULL || idx < 0 || idx >= rp->count)) { + return 0; + } + + return (uint64_t)zend_atomic_int64_load_ex(&rp->ctx[idx].processed); +} + +void reactor_pool_destroy(reactor_pool_t *rp) +{ + if (rp == NULL) { + return; + } + + /* Ask each running reactor to leave by posting the stop sentinel into its + * mailbox — same path as real work, so no cross-thread handle touch. */ + for (int i = 0; i < rp->count; i++) { + reactor_ctx_t *const rc = &rp->ctx[i]; + + if (zend_atomic_int_load_ex(&rc->phase) != REACTOR_PHASE_RUN) { + continue; + } + + while (!thread_mailbox_post(rc->mailbox, REACTOR_STOP_SENTINEL)) { + reactor_pool_msleep(); + } + } + + /* Wait for every loop to leave — only then is the ctx no longer touched. */ + for (int i = 0; i < rp->count; i++) { + while (zend_atomic_int_load_ex(&rp->ctx[i].phase) != REACTOR_PHASE_DONE) { + reactor_pool_msleep(); + } + } + + rp->tp->close(rp->tp); + ZEND_THREAD_POOL_DELREF(rp->tp); + + pefree(rp->ctx, 0); + pefree(rp, 0); +} diff --git a/src/core/reactor_pool_test_hooks.c b/src/core/reactor_pool_test_hooks.c new file mode 100644 index 0000000..e745d61 --- /dev/null +++ b/src/core/reactor_pool_test_hooks.c @@ -0,0 +1,1088 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) TrueAsync | + +----------------------------------------------------------------------+ + | Licensed under the Apache License, Version 2.0 | + +----------------------------------------------------------------------+ + + Reactor pool test hook (#80). Entirely gated behind HTTP_SERVER_TEST_HOOKS + (--enable-http-server-test-hooks); never present in a release build. + See include/core/reactor_pool_test.h. +*/ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include "core/reactor_pool_test.h" + +#ifdef HTTP_SERVER_TEST_HOOKS + +#include "php.h" +#include "Zend/zend_API.h" +#include "Zend/zend_atomic.h" +#include "zend_exceptions.h" +#include "core/reactor_pool.h" +#include "core/response_wire.h" +#include "core/worker_dispatch.h" +#include "core/worker_inbox.h" +#include "core/worker_registry.h" +#include "core/async_plain_event.h" +#include "php_http_server.h" +#include "http1/http_parser.h" + +#include +#include + +/* Defined in src/http_request.c; wraps an http_request_t in an HttpRequest zval. */ +extern zval *http_request_create_from_parsed(http_request_t *req); + +#ifdef PHP_WIN32 +# include +#else +# include +# include +#endif + +/* Reactor-side H3 listener spike (#80, B3p3-a). POSIX-only: the raw-fd recv + * path and the C datagram send below use BSD sockets directly. */ +#if defined(HAVE_HTTP_SERVER_HTTP3) && !defined(PHP_WIN32) +# include "http3/http3_listener.h" +# include +# include +# include +# include +#endif + +/* Upper bound on how long the self-test waits for reactors to drain. */ +#define REACTOR_SELFTEST_WAIT_MS 5000 + +static void selftest_msleep(void) +{ +#ifdef PHP_WIN32 + Sleep(1); +#else + const struct timespec ts = { 0, 1000000 }; /* 1 ms */ + nanosleep(&ts, NULL); +#endif +} + +/* Opaque OS thread identity, for asserting a callback ran off the parent. */ +static uintptr_t selftest_thread_id(void) +{ +#ifdef PHP_WIN32 + return (uintptr_t)GetCurrentThreadId(); +#else + return (uintptr_t)pthread_self(); +#endif +} + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_reactor_pool_selftest, 0, 2, + MAY_BE_ARRAY | MAY_BE_FALSE) + ZEND_ARG_TYPE_INFO(0, reactors, IS_LONG, 0) + ZEND_ARG_TYPE_INFO(0, items_per_reactor, IS_LONG, 0) +ZEND_END_ARG_INFO() + +/* Spin up `reactors` transport reactors, post `items_per_reactor` opaque tokens + * into each reactor's #81 inbound, wait for them to drain, tear down, and return + * the per-reactor drained counts (or false on spawn failure). Exercises spawn, + * channel drain, per-reactor isolation, and clean shutdown. */ +PHP_FUNCTION(_http_server_reactor_pool_selftest) +{ + zend_long reactors = 0; + zend_long items = 0; + + ZEND_PARSE_PARAMETERS_START(2, 2) + Z_PARAM_LONG(reactors) + Z_PARAM_LONG(items) + ZEND_PARSE_PARAMETERS_END(); + + if (reactors <= 0 || items < 0) { + RETURN_FALSE; + } + + reactor_pool_t *const rp = reactor_pool_create((int)reactors); + + if (rp == NULL) { + RETURN_FALSE; + } + + const int count = reactor_pool_count(rp); + + for (int r = 0; r < count; r++) { + for (zend_long k = 1; k <= items; k++) { + void *const token = (void *)(uintptr_t)k; /* opaque, never deref'd */ + + while (!reactor_pool_post(rp, r, token)) { + selftest_msleep(); /* mailbox full: let the reactor drain */ + } + } + } + + /* Reactors drain on their own threads; bounded wait for completion. */ + for (int waited = 0; waited < REACTOR_SELFTEST_WAIT_MS; waited++) { + bool all_done = true; + + for (int r = 0; r < count; r++) { + if (reactor_pool_processed(rp, r) < (uint64_t)items) { + all_done = false; + break; + } + } + + if (all_done) { + break; + } + + selftest_msleep(); + } + + array_init(return_value); + + for (int r = 0; r < count; r++) { + add_next_index_long(return_value, (zend_long)reactor_pool_processed(rp, r)); + } + + reactor_pool_destroy(rp); +} + +/* Build a synthetic http_request_t the way the #80 reactor will: persistent + * (malloc) domain method/uri/headers + routing triple, ZMM body (worker-domain + * — deliberately mixed-domain). + * Returns a refcount=1 request the caller owns (hand to dispatch/inbox, or + * release via http_request_destroy). NULL only on persistent-alloc failure. + * `headers`/`body` may be NULL. */ +static http_request_t *selftest_build_request(uint32_t reactor_id, int64_t stream_id, + const char *method, size_t method_len, + const char *path, size_t path_len, + HashTable *headers, + const char *body, size_t body_len) +{ + http_request_t *const req = pecalloc(1, sizeof(*req), 1); + + if (req == NULL) { + return NULL; + } + + req->refcount = 1; + req->persistent = true; + req->reactor_id = reactor_id; + req->reactor_stream_id = stream_id; + req->reactor_conn = NULL; + + req->method = zend_string_init(method, method_len, 1); + req->uri = zend_string_init(path, path_len, 1); + + if (headers != NULL) { + http_request_init_headers(req); + + zend_string *name; + zval *value; + ZEND_HASH_FOREACH_STR_KEY_VAL(headers, name, value) { + if (name == NULL || Z_TYPE_P(value) != IS_STRING) { + continue; + } + + zend_string *const key = zend_string_init(ZSTR_VAL(name), ZSTR_LEN(name), 1); + zval val; + ZVAL_STR(&val, zend_string_init(Z_STRVAL_P(value), Z_STRLEN_P(value), 1)); + zend_hash_update(req->headers, key, &val); + zend_string_release(key); + } ZEND_HASH_FOREACH_END(); + } + + if (body != NULL && body_len > 0) { + req->body = zend_string_init(body, body_len, 0); /* worker-domain */ + req->content_length = body_len; + } + + return req; +} + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_persistent_request_selftest, 0, 4, + MAY_BE_OBJECT | MAY_BE_FALSE) + ZEND_ARG_TYPE_INFO(0, method, IS_STRING, 0) + ZEND_ARG_TYPE_INFO(0, path, IS_STRING, 0) + ZEND_ARG_TYPE_INFO(0, headers, IS_ARRAY, 0) + ZEND_ARG_TYPE_INFO(0, body, IS_STRING, 0) +ZEND_END_ARG_INFO() + +/* Build an http_request_t in the PERSISTENT (malloc) domain the way the #80 + * reactor will (selftest_build_request: persistent method/uri/headers, ZMM + * body) and wrap it in an HttpRequest. The phpt then exercises the flag-aware + * accessors (deep-copy persistent strings, rebuild the persistent headers table + * into ZMM) and frees the object, which runs http_request_destroy on the + * persistent domain. ASan proves the whole reactor-side request lifecycle + * (create + read + free) is heap-clean. */ +PHP_FUNCTION(_http_server_persistent_request_selftest) +{ + zend_string *method; + zend_string *path; + HashTable *headers; + zend_string *body; + + ZEND_PARSE_PARAMETERS_START(4, 4) + Z_PARAM_STR(method) + Z_PARAM_STR(path) + Z_PARAM_ARRAY_HT(headers) + Z_PARAM_STR(body) + ZEND_PARSE_PARAMETERS_END(); + + http_request_t *const req = selftest_build_request( + 0, 0, ZSTR_VAL(method), ZSTR_LEN(method), ZSTR_VAL(path), ZSTR_LEN(path), + headers, ZSTR_VAL(body), ZSTR_LEN(body)); + + if (req == NULL) { + RETURN_FALSE; + } + + zval *const obj = http_request_create_from_parsed(req); + ZVAL_COPY_VALUE(return_value, obj); + efree(obj); +} + +/* Filled by exec_probe_fn ON the reactor thread; read by the parent only after + * reactor_pool_exec returns (its acquire-load of `done` orders these writes), so + * plain fields suffice — no atomics needed in the probe itself. */ +typedef struct { + uintptr_t tid; /* thread the callback ran on */ + int ran; /* 1 once the callback executed */ +} exec_probe_t; + +static void exec_probe_fn(void *arg) +{ + exec_probe_t *const p = (exec_probe_t *)arg; + p->tid = selftest_thread_id(); + p->ran = 1; +} + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_reactor_pool_exec_selftest, 0, 1, + MAY_BE_ARRAY | MAY_BE_FALSE) + ZEND_ARG_TYPE_INFO(0, reactors, IS_LONG, 0) +ZEND_END_ARG_INFO() + +/* Spin up `reactors` reactors and run a probe callback on each via + * reactor_pool_exec, proving the function executes ON the reactor's own thread + * (off the parent, one distinct thread per reactor). Returns a summary array + * { reactors, ran, off_parent, distinct_threads } or false on spawn failure. */ +PHP_FUNCTION(_http_server_reactor_pool_exec_selftest) +{ + zend_long reactors = 0; + + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_LONG(reactors) + ZEND_PARSE_PARAMETERS_END(); + + if (reactors <= 0) { + RETURN_FALSE; + } + + reactor_pool_t *const rp = reactor_pool_create((int)reactors); + + if (rp == NULL) { + RETURN_FALSE; + } + + const int count = reactor_pool_count(rp); + const uintptr_t parent_tid = selftest_thread_id(); + + exec_probe_t *const probes = ecalloc((size_t)count, sizeof(*probes)); + + for (int r = 0; r < count; r++) { + reactor_pool_exec(rp, r, exec_probe_fn, &probes[r]); + } + + int ran = 0; + int off_parent = 0; + bool distinct = true; + + for (int r = 0; r < count; r++) { + if (probes[r].ran == 1) { + ran++; + + if (probes[r].tid != parent_tid) { + off_parent++; + } + } + + for (int s = r + 1; s < count; s++) { + if (probes[r].tid == probes[s].tid) { + distinct = false; + } + } + } + + array_init(return_value); + add_assoc_long(return_value, "reactors", count); + add_assoc_long(return_value, "ran", ran); + add_assoc_long(return_value, "off_parent", off_parent); + add_assoc_bool(return_value, "distinct_threads", distinct); + + efree(probes); + reactor_pool_destroy(rp); +} + +/* Each reactor's own probe — written only by that one reactor thread (single + * writer, so load+store on `ran` is safe), polled by the parent via an atomic + * load. Mirrors how reactor_pool counts `processed`. */ +typedef struct { + zend_atomic_int ran; /* callbacks that executed on this reactor */ + uintptr_t tid; /* thread they ran on */ +} post_exec_probe_t; + +static void post_exec_probe_fn(void *arg) +{ + post_exec_probe_t *const p = (post_exec_probe_t *)arg; + p->tid = selftest_thread_id(); + zend_atomic_int_store_ex(&p->ran, zend_atomic_int_load_ex(&p->ran) + 1); +} + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_reactor_post_exec_selftest, 0, 2, + MAY_BE_ARRAY | MAY_BE_FALSE) + ZEND_ARG_TYPE_INFO(0, reactors, IS_LONG, 0) + ZEND_ARG_TYPE_INFO(0, count, IS_LONG, 0) +ZEND_END_ARG_INFO() + +/* Drive the fire-and-forget reverse-path primitive: post `count` non-blocking + * callbacks into each reactor and confirm they all ran on the reactor's own + * thread without the caller ever blocking. Unlike reactor_pool_exec (one blocking + * round-trip), reactor_pool_post_exec returns immediately, so the parent posts + * everything first and only then polls for completion. Returns a summary + * { reactors, expected, ran, off_parent } or false on spawn failure. */ +PHP_FUNCTION(_http_server_reactor_post_exec_selftest) +{ + zend_long reactors = 0, count = 0; + + ZEND_PARSE_PARAMETERS_START(2, 2) + Z_PARAM_LONG(reactors) + Z_PARAM_LONG(count) + ZEND_PARSE_PARAMETERS_END(); + + if (reactors <= 0 || count <= 0) { + RETURN_FALSE; + } + + reactor_pool_t *const rp = reactor_pool_create((int)reactors); + + if (rp == NULL) { + RETURN_FALSE; + } + + const int cnt = reactor_pool_count(rp); + const uintptr_t parent_tid = selftest_thread_id(); + + post_exec_probe_t *const probes = ecalloc((size_t)cnt, sizeof(*probes)); + + for (int r = 0; r < cnt; r++) { + ZEND_ATOMIC_INT_INIT(&probes[r].ran, 0); + } + + /* Fire everything without blocking; backpressure → retry. */ + for (int r = 0; r < cnt; r++) { + for (zend_long k = 0; k < count; k++) { + while (!reactor_pool_post_exec(rp, r, post_exec_probe_fn, &probes[r])) { + selftest_msleep(); + } + } + } + + /* Now poll until every reactor has run all its callbacks (bounded). */ + for (int waited = 0; waited < REACTOR_SELFTEST_WAIT_MS; waited++) { + bool all_done = true; + + for (int r = 0; r < cnt; r++) { + if (zend_atomic_int_load_ex(&probes[r].ran) < (int)count) { + all_done = false; + break; + } + } + + if (all_done) { + break; + } + + selftest_msleep(); + } + + int ran = 0, off_parent = 0; + + for (int r = 0; r < cnt; r++) { + ran += zend_atomic_int_load_ex(&probes[r].ran); + + if (probes[r].tid != 0 && probes[r].tid != parent_tid) { + off_parent++; + } + } + + array_init(return_value); + add_assoc_long(return_value, "reactors", cnt); + add_assoc_long(return_value, "expected", (zend_long)(cnt * (int)count)); + add_assoc_long(return_value, "ran", ran); + add_assoc_long(return_value, "off_parent", off_parent); + + efree(probes); + reactor_pool_destroy(rp); +} + +/* Sink + suspend state for the worker-dispatch self-test. */ +typedef struct { + response_wire_t *captured; /* response handed back by dispatch (owned) */ + zend_async_event_t *done; /* fired by the sink to wake the test */ +} dispatch_probe_t; + +static void dispatch_probe_sink(response_wire_t *rw, void *arg) +{ + dispatch_probe_t *const p = (dispatch_probe_t *)arg; + p->captured = rw; /* take ownership */ + async_plain_event_fire(p->done); +} + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_dispatch_from_wire_selftest, 0, 5, + MAY_BE_ARRAY | MAY_BE_FALSE) + ZEND_ARG_TYPE_INFO(0, server, IS_OBJECT, 0) + ZEND_ARG_TYPE_INFO(0, method, IS_STRING, 0) + ZEND_ARG_TYPE_INFO(0, path, IS_STRING, 0) + ZEND_ARG_TYPE_INFO(0, headers, IS_ARRAY, 0) + ZEND_ARG_TYPE_INFO(0, body, IS_STRING, 0) +ZEND_END_ARG_INFO() + +/* Drive the worker-side request path end to end on this thread: build a + * persistent http_request_t from the args, hand its pointer to + * worker_dispatch_request (which spawns the user handler coroutine in the + * current scope), suspend until the handler's dispose renders the response_wire + * and fires the sink, then return the rendered { status, headers, body } (or + * false on failure / timeout). Must be called from inside a coroutine + * (Async\spawn) on a server with a registered handler. */ +PHP_FUNCTION(_http_server_dispatch_from_wire_selftest) +{ + zval *server_zv; + zend_string *method, *path, *body; + HashTable *headers; + + ZEND_PARSE_PARAMETERS_START(5, 5) + Z_PARAM_OBJECT_OF_CLASS(server_zv, http_server_ce) + Z_PARAM_STR(method) + Z_PARAM_STR(path) + Z_PARAM_ARRAY_HT(headers) + Z_PARAM_STR(body) + ZEND_PARSE_PARAMETERS_END(); + + http_server_object *const server = http_server_object_from_zend(Z_OBJ_P(server_zv)); + + if (server == NULL) { + RETURN_FALSE; + } + + http_request_t *const req = selftest_build_request( + 0, 1, ZSTR_VAL(method), ZSTR_LEN(method), ZSTR_VAL(path), ZSTR_LEN(path), + headers, ZSTR_VAL(body), ZSTR_LEN(body)); + + if (req == NULL) { + RETURN_FALSE; + } + + dispatch_probe_t probe = { .captured = NULL, .done = async_plain_event_new() }; + + if (probe.done == NULL) { + http_request_destroy(req); + RETURN_FALSE; + } + + /* worker_dispatch_request consumes req unconditionally (owns it on success, + * destroys it on failure) — nothing to free here. */ + const bool ok = worker_dispatch_request(server, ZEND_ASYNC_CURRENT_SCOPE, + req, /*own_scope=*/true, + dispatch_probe_sink, &probe); + + if (!ok) { + probe.done->dispose(probe.done); + RETURN_FALSE; + } + + /* Suspend until the handler coroutine's dispose fires the sink; a timeout + * timer keeps a misbehaving handler from hanging the test. Both events are + * trans_event=true, so the waker owns and disposes them on resume. */ + zend_coroutine_t *const co = ZEND_ASYNC_CURRENT_COROUTINE; + + if (ZEND_ASYNC_WAKER_NEW(co) == NULL) { + probe.done->dispose(probe.done); + RETURN_FALSE; + } + + zend_async_resume_when(co, + &ZEND_ASYNC_NEW_TIMER_EVENT((zend_ulong)5000, false)->base, true, + zend_async_waker_callback_timeout, NULL); + zend_async_resume_when(co, probe.done, true, + zend_async_waker_callback_resolve, NULL); + + ZEND_ASYNC_SUSPEND(); + zend_async_waker_clean(co); + + if (EG(exception)) { + zend_clear_exception(); + } + + if (probe.captured == NULL) { + RETURN_FALSE; /* timed out / no response rendered */ + } + + array_init(return_value); + add_assoc_long(return_value, "status", response_wire_status(probe.captured)); + + zval hdrs; + array_init(&hdrs); + const size_t hcount = response_wire_header_count(probe.captured); + + for (size_t i = 0; i < hcount; i++) { + const char *np, *vp; + size_t nl, vl; + + if (response_wire_header_at(probe.captured, i, &np, &nl, &vp, &vl)) { + add_assoc_stringl_ex(&hdrs, np, nl, (char *)vp, vl); + } + } + + add_assoc_zval(return_value, "headers", &hdrs); + + size_t blen; + const char *b = response_wire_body(probe.captured, &blen); + add_assoc_stringl(return_value, "body", b != NULL ? (char *)b : "", blen); + + response_wire_free(probe.captured); +} + +/* Accumulating sink for the worker-inbox self-test: validate each rendered + * response (200 + "ok-" body), count it, and fire `done` once all expected + * responses have arrived. */ +typedef struct { + int expected; + int received; + int ok; + zend_async_event_t *done; +} inbox_probe_t; + +static void inbox_probe_sink(response_wire_t *rw, void *arg) +{ + inbox_probe_t *const p = (inbox_probe_t *)arg; + + if (response_wire_status(rw) == 200) { + size_t blen; + const char *b = response_wire_body(rw, &blen); + + if (b != NULL && blen >= 3 && strncmp(b, "ok-", 3) == 0) { + p->ok++; + } + } + + p->received++; + response_wire_free(rw); + + if (p->received >= p->expected && p->done != NULL) { + async_plain_event_fire(p->done); + } +} + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_worker_inbox_selftest, 0, 2, + MAY_BE_ARRAY | MAY_BE_FALSE) + ZEND_ARG_TYPE_INFO(0, server, IS_OBJECT, 0) + ZEND_ARG_TYPE_INFO(0, count, IS_LONG, 0) +ZEND_END_ARG_INFO() + +/* Drive the worker-inbox path: stand up a worker_inbox on this thread, post + * `count` synthetic http_request_t pointers into it (as a reactor would), wait + * for the drain to dispatch them all and the handlers to render their responses, + * then return { expected, received, ok }. Proves the #81 mailbox -> dispatch -> + * response path carries N independent requests. Call inside a coroutine on a + * server with a registered handler. */ +PHP_FUNCTION(_http_server_worker_inbox_selftest) +{ + zval *server_zv; + zend_long count; + + ZEND_PARSE_PARAMETERS_START(2, 2) + Z_PARAM_OBJECT_OF_CLASS(server_zv, http_server_ce) + Z_PARAM_LONG(count) + ZEND_PARSE_PARAMETERS_END(); + + if (count <= 0) { + RETURN_FALSE; + } + + http_server_object *const server = http_server_object_from_zend(Z_OBJ_P(server_zv)); + + if (server == NULL) { + RETURN_FALSE; + } + + inbox_probe_t probe = { + .expected = (int)count, + .received = 0, + .ok = 0, + .done = async_plain_event_new(), + }; + + if (probe.done == NULL) { + RETURN_FALSE; + } + + worker_inbox_t *const inbox = worker_inbox_create(server, ZEND_ASYNC_CURRENT_SCOPE, + /*own_scope=*/true, + inbox_probe_sink, &probe); + + if (inbox == NULL) { + probe.done->dispose(probe.done); + RETURN_FALSE; + } + + for (zend_long i = 0; i < count; i++) { + char path[32]; + const int plen = snprintf(path, sizeof(path), "/item-%lld", (long long)i); + http_request_t *const req = selftest_build_request( + 0, i, "GET", 3, path, plen > 0 ? (size_t)plen : 0, NULL, NULL, 0); + + if (req == NULL) { + probe.expected--; + continue; + } + + if (!worker_inbox_post(inbox, req)) { + http_request_destroy(req); /* full: backpressure, we keep ownership */ + probe.expected--; + } + } + + /* Suspend until every dispatched handler has rendered its response; a + * timeout timer keeps the loop alive and bounds a hang. */ + zend_coroutine_t *const co = ZEND_ASYNC_CURRENT_COROUTINE; + + if (ZEND_ASYNC_WAKER_NEW(co) == NULL) { + worker_inbox_free(inbox); + probe.done->dispose(probe.done); + RETURN_FALSE; + } + + zend_async_resume_when(co, + &ZEND_ASYNC_NEW_TIMER_EVENT((zend_ulong)5000, false)->base, true, + zend_async_waker_callback_timeout, NULL); + zend_async_resume_when(co, probe.done, true, + zend_async_waker_callback_resolve, NULL); + + ZEND_ASYNC_SUSPEND(); + zend_async_waker_clean(co); + + if (EG(exception)) { + zend_clear_exception(); + } + + /* Every handler has completed (or we timed out) — no in-flight dispatch + * references the inbox, so it is safe to tear down. */ + worker_inbox_free(inbox); + + array_init(return_value); + add_assoc_long(return_value, "expected", probe.expected); + add_assoc_long(return_value, "received", probe.received); + add_assoc_long(return_value, "ok", probe.ok); +} + +/* Registry self-test: one shared tally across all inboxes, plus a per-inbox + * counter so the test can see the round-robin spread. */ +typedef struct { + int expected; + int received; + int ok; + zend_async_event_t *done; +} reg_shared_t; + +typedef struct { + reg_shared_t *shared; + int per_inbox; /* responses this inbox handled */ +} reg_slot_probe_t; + +static void reg_probe_sink(response_wire_t *rw, void *arg) +{ + reg_slot_probe_t *const p = (reg_slot_probe_t *)arg; + + if (response_wire_status(rw) == 200) { + size_t blen; + const char *b = response_wire_body(rw, &blen); + + if (b != NULL && blen >= 3 && strncmp(b, "ok-", 3) == 0) { + p->shared->ok++; + } + } + + p->per_inbox++; + p->shared->received++; + response_wire_free(rw); + + if (p->shared->received >= p->shared->expected && p->shared->done != NULL) { + async_plain_event_fire(p->shared->done); + } +} + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_worker_registry_selftest, 0, 3, + MAY_BE_ARRAY | MAY_BE_FALSE) + ZEND_ARG_TYPE_INFO(0, server, IS_OBJECT, 0) + ZEND_ARG_TYPE_INFO(0, workers, IS_LONG, 0) + ZEND_ARG_TYPE_INFO(0, count, IS_LONG, 0) +ZEND_END_ARG_INFO() + +/* Stand up `workers` inboxes published into a worker_registry, post `count` + * synthetic request pointers through worker_registry_pick (round-robin), wait for + * all to dispatch + render, then return { expected, received, ok, distribution } + * where distribution[i] is how many requests slot i handled — proving the + * registry spreads load across worker inboxes. */ +PHP_FUNCTION(_http_server_worker_registry_selftest) +{ + zval *server_zv; + zend_long workers, count; + + ZEND_PARSE_PARAMETERS_START(3, 3) + Z_PARAM_OBJECT_OF_CLASS(server_zv, http_server_ce) + Z_PARAM_LONG(workers) + Z_PARAM_LONG(count) + ZEND_PARSE_PARAMETERS_END(); + + if (workers <= 0 || count <= 0) { + RETURN_FALSE; + } + + http_server_object *const server = http_server_object_from_zend(Z_OBJ_P(server_zv)); + + if (server == NULL) { + RETURN_FALSE; + } + + reg_shared_t shared = { + .expected = (int)count, + .received = 0, + .ok = 0, + .done = async_plain_event_new(), + }; + + if (shared.done == NULL) { + RETURN_FALSE; + } + + worker_registry_t *const reg = worker_registry_create((int)workers); + worker_inbox_t **const inboxes = ecalloc((size_t)workers, sizeof(*inboxes)); + reg_slot_probe_t *const probes = ecalloc((size_t)workers, sizeof(*probes)); + + for (zend_long w = 0; w < workers; w++) { + probes[w].shared = &shared; + probes[w].per_inbox = 0; + inboxes[w] = worker_inbox_create(server, ZEND_ASYNC_CURRENT_SCOPE, + /*own_scope=*/true, reg_probe_sink, &probes[w]); + worker_registry_publish(reg, (int)w, inboxes[w]); + } + + for (zend_long i = 0; i < count; i++) { + worker_inbox_t *const target = worker_registry_pick(reg); + char path[32]; + const int plen = snprintf(path, sizeof(path), "/item-%lld", (long long)i); + http_request_t *const req = selftest_build_request( + 0, i, "GET", 3, path, plen > 0 ? (size_t)plen : 0, NULL, NULL, 0); + + if (target == NULL || req == NULL) { + if (req != NULL) { + http_request_destroy(req); + } + + shared.expected--; + continue; + } + + if (!worker_inbox_post(target, req)) { + http_request_destroy(req); + shared.expected--; + } + } + + zend_coroutine_t *const co = ZEND_ASYNC_CURRENT_COROUTINE; + + if (ZEND_ASYNC_WAKER_NEW(co) != NULL) { + zend_async_resume_when(co, + &ZEND_ASYNC_NEW_TIMER_EVENT((zend_ulong)5000, false)->base, true, + zend_async_waker_callback_timeout, NULL); + zend_async_resume_when(co, shared.done, true, + zend_async_waker_callback_resolve, NULL); + + ZEND_ASYNC_SUSPEND(); + zend_async_waker_clean(co); + + if (EG(exception)) { + zend_clear_exception(); + } + } else { + shared.done->dispose(shared.done); + } + + array_init(return_value); + add_assoc_long(return_value, "expected", shared.expected); + add_assoc_long(return_value, "received", shared.received); + add_assoc_long(return_value, "ok", shared.ok); + + zval dist; + array_init(&dist); + + for (zend_long w = 0; w < workers; w++) { + add_next_index_long(&dist, probes[w].per_inbox); + worker_inbox_free(inboxes[w]); + } + + add_assoc_zval(return_value, "distribution", &dist); + + worker_registry_free(reg); + efree(inboxes); + efree(probes); +} + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_worker_registry_route_selftest, 0, 6, + MAY_BE_ARRAY | MAY_BE_FALSE) + ZEND_ARG_TYPE_INFO(0, server, IS_OBJECT, 0) + ZEND_ARG_TYPE_INFO(0, workers, IS_LONG, 0) + ZEND_ARG_TYPE_INFO(0, published, IS_LONG, 0) + ZEND_ARG_TYPE_INFO(0, n_reactors, IS_LONG, 0) + ZEND_ARG_TYPE_INFO(0, reactor_id, IS_LONG, 0) + ZEND_ARG_TYPE_INFO(0, iterations, IS_LONG, 0) +ZEND_END_ARG_INFO() + +/* Drive worker_registry_least_busy (D5) deterministically: publish `published` of + * `workers` inboxes (all idle, depth 0), call route `iterations` times for + * (reactor_id, n_reactors), and return { none, distribution[workers] } — the slot + * each call picked. Proves strided ownership, idle tie-rotation spread, skipping of + * unpublished slots, and owned-empty -> NULL (the global-fallback trigger). No + * dispatch/drain: inboxes stay empty, so there is nothing to free per call. */ +PHP_FUNCTION(_http_server_worker_registry_route_selftest) +{ + zval *server_zv; + zend_long workers, published, n_reactors, reactor_id, iterations; + + ZEND_PARSE_PARAMETERS_START(6, 6) + Z_PARAM_OBJECT_OF_CLASS(server_zv, http_server_ce) + Z_PARAM_LONG(workers) + Z_PARAM_LONG(published) + Z_PARAM_LONG(n_reactors) + Z_PARAM_LONG(reactor_id) + Z_PARAM_LONG(iterations) + ZEND_PARSE_PARAMETERS_END(); + + if (workers <= 0 || published < 0 || published > workers || iterations <= 0) { + RETURN_FALSE; + } + + http_server_object *const server = http_server_object_from_zend(Z_OBJ_P(server_zv)); + + if (server == NULL) { + RETURN_FALSE; + } + + worker_registry_t *const reg = worker_registry_create((int)workers); + worker_inbox_t **const inboxes = ecalloc((size_t)workers, sizeof(*inboxes)); + + for (zend_long w = 0; w < published; w++) { + inboxes[w] = worker_inbox_create(server, ZEND_ASYNC_CURRENT_SCOPE, + /*own_scope=*/false, NULL, NULL); + worker_registry_publish(reg, (int)w, inboxes[w]); + } + + zend_long none = 0; + zend_long *const dist = ecalloc((size_t)workers, sizeof(*dist)); + + for (zend_long i = 0; i < iterations; i++) { + int slot = -1; + worker_inbox_t *const got = + worker_registry_least_busy(reg, (int)reactor_id, (int)n_reactors, &slot); + + if (got == NULL || slot < 0) { + none++; + } else { + dist[slot]++; + } + } + + array_init(return_value); + add_assoc_long(return_value, "none", none); + + zval distz; + array_init(&distz); + + for (zend_long w = 0; w < workers; w++) { + add_next_index_long(&distz, dist[w]); + + if (inboxes[w] != NULL) { + worker_inbox_free(inboxes[w]); + } + } + + add_assoc_zval(return_value, "distribution", &distz); + + worker_registry_free(reg); + efree(inboxes); + efree(dist); +} + +/* === Reactor-side H3 listener spike (#80, B3p3-a) =================== + * + * Proves the single biggest unknown of the reactor split: that the H3 UDP + * listener — its uv-bound socket, poll handle and recv path — can live on a + * transport reactor thread (not a PHP worker) and that the reactor's own loop + * actually services inbound datagrams. The listener is spawned with + * server_obj == NULL and ssl_ctx == NULL: no PHP dispatch (http3_stream_dispatch + * guards on server == NULL) and no crypto — a recv-only spike. */ +#if defined(HAVE_HTTP_SERVER_HTTP3) && !defined(PHP_WIN32) + +/* Spawn the listener ON the reactor thread (its uv handles must be created on + * the loop that owns them). Out: the listener + its kernel-assigned port. */ +typedef struct { + const char *host; + int port; + http3_listener_t *listener; /* out: NULL on spawn failure */ + int local_port; /* out: actual bound port */ +} h3l_spawn_ctx_t; + +static void h3l_spawn_fn(void *arg) +{ + h3l_spawn_ctx_t *const c = (h3l_spawn_ctx_t *)arg; + + c->listener = http3_listener_spawn(c->host, c->port, NULL, NULL, NULL); + + if (c->listener != NULL) { + c->local_port = http3_listener_local_port(c->listener); + } else if (EG(exception)) { + /* spawn throws on the reactor thread's EG — clear it here so it does + * not dangle on the reactor; the caller sees listener == NULL. */ + zend_clear_exception(); + } +} + +/* Read the recv counter ON the reactor thread (the listener struct is reactor + * owned). Each call also forces a reactor tick via reactor_pool_exec. */ +typedef struct { + http3_listener_t *listener; + uint64_t received; /* out */ +} h3l_stats_ctx_t; + +static void h3l_stats_fn(void *arg) +{ + h3l_stats_ctx_t *const c = (h3l_stats_ctx_t *)arg; + http3_listener_stats_t st; + + http3_listener_get_stats(c->listener, &st); + c->received = st.datagrams_received; +} + +static void h3l_destroy_fn(void *arg) +{ + http3_listener_destroy((http3_listener_t *)arg); +} + +#endif /* HAVE_HTTP_SERVER_HTTP3 && !PHP_WIN32 */ + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_reactor_h3_listener_selftest, 0, 0, + MAY_BE_BOOL) +ZEND_END_ARG_INFO() + +/* Stand up one reactor, spawn an H3 listener on it, fire a few datagrams at the + * reactor-owned socket from this thread, and confirm the reactor's loop counted + * them (datagrams_received >= 1). Tears the listener down on its own thread. + * Returns true iff recv was serviced on the reactor; false when built without + * HTTP/3, on Windows, or on spawn failure. */ +PHP_FUNCTION(_http_server_reactor_h3_listener_selftest) +{ + ZEND_PARSE_PARAMETERS_NONE(); + +#if defined(HAVE_HTTP_SERVER_HTTP3) && !defined(PHP_WIN32) + reactor_pool_t *const rp = reactor_pool_create(1); + + if (rp == NULL) { + RETURN_FALSE; + } + + h3l_spawn_ctx_t sc = { .host = "127.0.0.1", .port = 0, + .listener = NULL, .local_port = 0 }; + reactor_pool_exec(rp, 0, h3l_spawn_fn, &sc); + + if (sc.listener == NULL || sc.local_port <= 0) { + reactor_pool_destroy(rp); + RETURN_FALSE; + } + + /* Fire a handful of short-header garbage datagrams at the listener. They + * bump datagrams_received before dispatch; dispatch then drops them + * (unknown short header → stateless reset, no crypto, no server deref). */ + const int s = socket(AF_INET, SOCK_DGRAM, 0); + bool sent = false; + + if (s >= 0) { + struct sockaddr_in to; + memset(&to, 0, sizeof(to)); + to.sin_family = AF_INET; + to.sin_port = htons((uint16_t)sc.local_port); + to.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + + static const char probe[] = "quicspike"; + + for (int i = 0; i < 8; i++) { + (void)sendto(s, probe, sizeof(probe) - 1, 0, + (struct sockaddr *)&to, (socklen_t)sizeof(to)); + } + + sent = true; + close(s); + } + + /* Poll the recv counter until the reactor has serviced our datagrams or we + * time out. Each iteration round-trips through the reactor (a forced tick). */ + uint64_t received = 0; + + if (sent) { + for (int waited = 0; waited < REACTOR_SELFTEST_WAIT_MS; waited++) { + h3l_stats_ctx_t stc = { .listener = sc.listener, .received = 0 }; + reactor_pool_exec(rp, 0, h3l_stats_fn, &stc); + received = stc.received; + + if (received >= 1) { + break; + } + + selftest_msleep(); + } + } + + /* Tear the listener down on its own thread (libuv handles + reactor ZMM). */ + reactor_pool_exec(rp, 0, h3l_destroy_fn, sc.listener); + reactor_pool_destroy(rp); + + RETURN_BOOL(received >= 1); +#else + RETURN_FALSE; +#endif +} + +static const zend_function_entry reactor_pool_test_functions[] = { + ZEND_FE(_http_server_reactor_pool_selftest, arginfo_reactor_pool_selftest) + ZEND_FE(_http_server_persistent_request_selftest, arginfo_persistent_request_selftest) + ZEND_FE(_http_server_reactor_pool_exec_selftest, arginfo_reactor_pool_exec_selftest) + ZEND_FE(_http_server_reactor_post_exec_selftest, arginfo_reactor_post_exec_selftest) + ZEND_FE(_http_server_dispatch_from_wire_selftest, arginfo_dispatch_from_wire_selftest) + ZEND_FE(_http_server_worker_inbox_selftest, arginfo_worker_inbox_selftest) + ZEND_FE(_http_server_worker_registry_selftest, arginfo_worker_registry_selftest) + ZEND_FE(_http_server_worker_registry_route_selftest, arginfo_worker_registry_route_selftest) + ZEND_FE(_http_server_reactor_h3_listener_selftest, arginfo_reactor_h3_listener_selftest) + PHP_FE_END +}; + +void reactor_pool_test_register(const int module_type) +{ + zend_register_functions(NULL, reactor_pool_test_functions, NULL, module_type); +} + +#else /* !HTTP_SERVER_TEST_HOOKS */ + +void reactor_pool_test_register(const int module_type) +{ + (void)module_type; +} + +#endif /* HTTP_SERVER_TEST_HOOKS */ diff --git a/src/core/response_wire.c b/src/core/response_wire.c new file mode 100644 index 0000000..763f84a --- /dev/null +++ b/src/core/response_wire.c @@ -0,0 +1,234 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) TrueAsync | + +----------------------------------------------------------------------+ + | Licensed under the Apache License, Version 2.0 | + +----------------------------------------------------------------------+ + + Flat response representation for the reactor/worker split (#80, D3). + See include/core/response_wire.h. Pure malloc-domain — no PHP, no ZMM. + The return-path mirror of request_wire. +*/ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include "core/response_wire.h" + +#include +#include + +typedef struct { + size_t name_off; + size_t name_len; + size_t value_off; + size_t value_len; +} wire_header_t; + +struct response_wire_s { + uint32_t reactor_id; + int64_t stream_id; + void *conn; + + int status; + + /* Growable byte arena: every span's bytes are copied in here. */ + char *arena; + size_t arena_len; + size_t arena_cap; + + size_t body_off, body_len; + bool body_set; + bool body_complete; + + wire_header_t *headers; + size_t header_count; + size_t header_cap; +}; + +/* Copy `len` bytes into the arena, growing it as needed. Returns the byte + * offset of the copy, or SIZE_MAX on overflow / allocation failure. A zero + * length appends nothing and returns the current end offset. */ +static size_t arena_append(response_wire_t *rw, const char *ptr, const size_t len) +{ + if (len == 0) { + return rw->arena_len; + } + + if (rw->arena_len + len < rw->arena_len) { + return SIZE_MAX; /* size_t overflow */ + } + + if (rw->arena_len + len > rw->arena_cap) { + size_t new_cap = rw->arena_cap != 0 ? rw->arena_cap : 256; + + while (new_cap < rw->arena_len + len) { + if (new_cap > SIZE_MAX / 2) { + new_cap = rw->arena_len + len; + break; + } + + new_cap *= 2; + } + + char *const grown = (char *) realloc(rw->arena, new_cap); + + if (grown == NULL) { + return SIZE_MAX; + } + + rw->arena = grown; + rw->arena_cap = new_cap; + } + + const size_t off = rw->arena_len; + memcpy(rw->arena + off, ptr, len); + rw->arena_len += len; + + return off; +} + +response_wire_t *response_wire_create(const uint32_t reactor_id, const int64_t stream_id, void *conn) +{ + response_wire_t *const rw = (response_wire_t *) calloc(1, sizeof(*rw)); + + if (rw == NULL) { + return NULL; + } + + rw->reactor_id = reactor_id; + rw->stream_id = stream_id; + rw->conn = conn; + + return rw; +} + +void response_wire_set_status(response_wire_t *rw, const int status) +{ + rw->status = status; +} + +bool response_wire_add_header(response_wire_t *rw, + const char *name_ptr, const size_t name_len, + const char *value_ptr, const size_t value_len) +{ + if (rw->header_count == rw->header_cap) { + const size_t new_cap = rw->header_cap != 0 ? rw->header_cap * 2 : 8; + wire_header_t *const grown = + (wire_header_t *) realloc(rw->headers, new_cap * sizeof(*grown)); + + if (grown == NULL) { + return false; + } + + rw->headers = grown; + rw->header_cap = new_cap; + } + + const size_t name_off = arena_append(rw, name_ptr, name_len); + + if (name_off == SIZE_MAX) { + return false; + } + + const size_t value_off = arena_append(rw, value_ptr, value_len); + + if (value_off == SIZE_MAX) { + return false; + } + + wire_header_t *const h = &rw->headers[rw->header_count]; + h->name_off = name_off; + h->name_len = name_len; + h->value_off = value_off; + h->value_len = value_len; + rw->header_count++; + + return true; +} + +bool response_wire_set_body(response_wire_t *rw, const char *ptr, const size_t len, const bool complete) +{ + const size_t off = arena_append(rw, ptr, len); + + if (off == SIZE_MAX) { + return false; + } + + rw->body_off = off; + rw->body_len = len; + rw->body_set = true; + rw->body_complete = complete; + + return true; +} + +int response_wire_status(const response_wire_t *rw) +{ + return rw->status; +} + +const char *response_wire_body(const response_wire_t *rw, size_t *len) +{ + if (!rw->body_set || rw->body_len == 0) { + *len = 0; + return rw->body_set ? rw->arena + rw->body_off : NULL; + } + + *len = rw->body_len; + return rw->arena + rw->body_off; +} + +bool response_wire_body_complete(const response_wire_t *rw) +{ + return rw->body_complete; +} + +size_t response_wire_header_count(const response_wire_t *rw) +{ + return rw->header_count; +} + +bool response_wire_header_at(const response_wire_t *rw, const size_t index, + const char **name_ptr, size_t *name_len, + const char **value_ptr, size_t *value_len) +{ + if (index >= rw->header_count) { + return false; + } + + const wire_header_t *const h = &rw->headers[index]; + *name_ptr = rw->arena + h->name_off; + *name_len = h->name_len; + *value_ptr = rw->arena + h->value_off; + *value_len = h->value_len; + + return true; +} + +uint32_t response_wire_reactor_id(const response_wire_t *rw) +{ + return rw->reactor_id; +} + +int64_t response_wire_stream_id(const response_wire_t *rw) +{ + return rw->stream_id; +} + +void *response_wire_conn(const response_wire_t *rw) +{ + return rw->conn; +} + +void response_wire_free(response_wire_t *rw) +{ + if (rw == NULL) { + return; + } + + free(rw->arena); + free(rw->headers); + free(rw); +} diff --git a/src/core/thread_mailbox.c b/src/core/thread_mailbox.c index 9503e62..82a3c92 100644 --- a/src/core/thread_mailbox.c +++ b/src/core/thread_mailbox.c @@ -116,15 +116,19 @@ void thread_mailbox_free(thread_mailbox_t *mb) bool thread_mailbox_post(thread_mailbox_t *mb, void *item) { - bool was_empty = false; - - if (!thread_mpsc_enqueue(mb->queue, item, &was_empty)) { + if (UNEXPECTED(!thread_mpsc_enqueue(mb->queue, item, NULL))) { return false; } - if (was_empty) { - mb->trigger->trigger(mb->trigger); - } + /* Wake the consumer unconditionally. The was_empty edge optimisation had a + * lost-wakeup race against drain-to-empty: thread_mpsc_drain pulls items + * from the queue before it decrements the length counter, so a producer + * that posts in that window reads prev > 0, computes was_empty == false, + * and skips the signal — stranding its item and deadlocking teardown + * (confirmed under ASan). uv_async_send coalesces (it writes the eventfd + * only on the 0->1 pending transition), so an unconditional signal is + * cheap and correct. */ + mb->trigger->trigger(mb->trigger); return true; } @@ -133,3 +137,12 @@ size_t thread_mailbox_count(const thread_mailbox_t *mb) { return thread_mpsc_count(mb->queue); } + +void thread_mailbox_keepalive(thread_mailbox_t *mb, const bool enable) +{ + if (enable) { + mb->trigger->base.start(&mb->trigger->base); + } else { + mb->trigger->base.stop(&mb->trigger->base); + } +} diff --git a/src/core/tls_layer.c b/src/core/tls_layer.c index b0c6e86..b68db81 100644 --- a/src/core/tls_layer.c +++ b/src/core/tls_layer.c @@ -74,10 +74,12 @@ static const uint8_t tls_alpn_tcp_list[] = { }; #ifdef HAVE_HTTP_SERVER_HTTP3 -/* QUIC ALPN identifier per RFC 9114. HTTP/3 only — no downgrade to h2 - * over QUIC (there is no such protocol) or http/1.1 over QUIC. */ +/* QUIC ALPN set. h3 (RFC 9114) is the real protocol; hq-interop is the raw + * HTTP/0.9-over-QUIC shim the interop test matrix speaks. h3 stays first so + * server-preference keeps it winning when a peer offers both. */ static const uint8_t tls_alpn_quic_list[] = { - 2, 'h', '3' + 2, 'h', '3', + 10, 'h', 'q', '-', 'i', 'n', 't', 'e', 'r', 'o', 'p' }; #endif diff --git a/src/core/worker_dispatch.c b/src/core/worker_dispatch.c new file mode 100644 index 0000000..58c7467 --- /dev/null +++ b/src/core/worker_dispatch.c @@ -0,0 +1,330 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) TrueAsync | + +----------------------------------------------------------------------+ + | Licensed under the Apache License, Version 2.0 | + +----------------------------------------------------------------------+ + + Worker-side request dispatch for the reactor/worker split (#80, B1b). + See include/core/worker_dispatch.h. +*/ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include "core/worker_dispatch.h" +#include "php_http_server.h" /* http_server_object accessors, response API */ +#include "core/http_connection.h" /* http_request_handler_coroutine_new */ +#include "core/http_protocol_handlers.h" /* http_protocol_get_handler */ +#include "http1/http_parser.h" /* http_request_t, http_request_destroy */ +#include "Zend/zend_hrtime.h" /* zend_hrtime — request-service sampling */ + +#include + +/* Defined in src/http_request.c (no public header). Wraps an http_request_t in + * an HttpRequest zval, taking ownership of the request's single reference. */ +extern zval *http_request_create_from_parsed(http_request_t *req); + +/* Per-request worker-side dispatch state. Lives from worker_dispatch_request + * until the handler coroutine's dispose; ecalloc/efree on the worker thread. */ +typedef struct { + http_server_object *server; + http_server_counters_t *counters; /* worker's real counters */ + zval request_zv; + zval response_zv; + + /* Request-service sampling: enqueue_ns stamped at dispatch, start_ns at + * handler entry; on_request_sample feeds sojourn/service to CoDel + + * telemetry. Gated on sample_stamps_enabled (skips hrtime when no consumer). */ + uint64_t enqueue_ns; + uint64_t start_ns; + bool stamps; + + /* Routing echoed from the request onto the response_wire so the reactor + * can resolve which QUIC stream to emit on. */ + uint32_t reactor_id; + int64_t stream_id; + void *conn; + + worker_response_sink_fn sink; + void *sink_arg; + + bool skip_handler; /* synthetic 404 already populated */ + bool is_head; /* suppress the body on render */ +} worker_dispatch_ctx_t; + +/* Handler coroutine body: run the registered user handler with (request, response). */ +static void worker_dispatch_entry(void) +{ + const zend_coroutine_t *const co = ZEND_ASYNC_CURRENT_COROUTINE; + worker_dispatch_ctx_t *const ctx = (worker_dispatch_ctx_t *)co->extended_data; + ZEND_ASSERT(ctx != NULL); + + /* Synthetic 404 (no handler) still counts as a served request. */ + if (ctx->skip_handler) { + http_server_count_request(ctx->counters); + return; + } + + HashTable *const handlers = http_server_get_protocol_handlers(ctx->server); + zend_fcall_t *fcall = http_protocol_get_handler(handlers, HTTP_PROTOCOL_HTTP1); + + if (fcall == NULL) { + fcall = http_protocol_get_handler(handlers, HTTP_PROTOCOL_HTTP2); + } + + if (fcall == NULL) { + return; + } + + if (ctx->stamps) { + ctx->start_ns = zend_hrtime(); + } + + zval params[2], retval; + ZVAL_COPY_VALUE(¶ms[0], &ctx->request_zv); + ZVAL_COPY_VALUE(¶ms[1], &ctx->response_zv); + ZVAL_UNDEF(&retval); + + zend_fcall_info fci = { + .size = sizeof(zend_fcall_info), + .function_name = fcall->fci.function_name, + .retval = &retval, + .params = params, + .object = NULL, + .param_count = 2, + .named_params = NULL, + }; + + volatile bool bailout = false; + zend_try { + zend_call_function(&fci, &fcall->fci_cache); + } zend_catch { + bailout = true; + } zend_end_try(); + + if (UNEXPECTED(bailout)) { + return; + } + + /* Stamp end before the retval dtor so destructor time is not charged as + * service time. */ + http_server_count_request(ctx->counters); + + if (ctx->stamps) { + const uint64_t end_ns = zend_hrtime(); + http_server_on_request_sample(ctx->server, + ctx->start_ns - ctx->enqueue_ns, + end_ns - ctx->start_ns, + end_ns); + } + + zval_ptr_dtor(&retval); +} + +/* Flatten the committed HttpResponse into a response_wire. Buffered only. + * Returns NULL on allocation failure. */ +static response_wire_t *worker_render_response(const worker_dispatch_ctx_t *ctx) +{ + zend_object *const resp = Z_OBJ(ctx->response_zv); + + response_wire_t *const rw = + response_wire_create(ctx->reactor_id, ctx->stream_id, ctx->conn); + + if (rw == NULL) { + return NULL; + } + + int status = http_response_get_status(resp); + + if (UNEXPECTED(status <= 0)) { + status = 200; + } + + response_wire_set_status(rw, status); + + HashTable *const headers = http_response_get_headers(resp); + + if (headers != NULL) { + zend_string *name; + zval *values; + ZEND_HASH_FOREACH_STR_KEY_VAL(headers, name, values) { + if (UNEXPECTED(name == NULL)) { + continue; + } + + if (!http_response_header_allowed_h2h3(ZSTR_VAL(name), ZSTR_LEN(name))) { + continue; + } + + if (EXPECTED(Z_TYPE_P(values) == IS_STRING)) { + response_wire_add_header(rw, ZSTR_VAL(name), ZSTR_LEN(name), + Z_STRVAL_P(values), Z_STRLEN_P(values)); + } else if (Z_TYPE_P(values) == IS_ARRAY) { + zval *v; + ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(values), v) { + if (Z_TYPE_P(v) != IS_STRING) { + continue; + } + + response_wire_add_header(rw, ZSTR_VAL(name), ZSTR_LEN(name), + Z_STRVAL_P(v), Z_STRLEN_P(v)); + } ZEND_HASH_FOREACH_END(); + } + } ZEND_HASH_FOREACH_END(); + } + + /* http_response_get_body_str returns a borrowed reference; the bytes are + * copied into the arena, so nothing to release. HEAD carries the headers + * but no body (RFC 9110 §9.3.2). */ + if (!ctx->is_head) { + zend_string *const body = http_response_get_body_str(resp); + + if (body != NULL && ZSTR_LEN(body) > 0) { + response_wire_set_body(rw, ZSTR_VAL(body), ZSTR_LEN(body), true); + } else { + response_wire_set_body(rw, NULL, 0, true); + } + } else { + response_wire_set_body(rw, NULL, 0, true); + } + + return rw; +} + +/* Coroutine dispose: commit the response (or derive a 500 from an unhandled + * exception), render it into a response_wire, hand it to the sink, and drop the + * per-request state. */ +static void worker_dispatch_dispose(zend_coroutine_t *coroutine) +{ + worker_dispatch_ctx_t *const ctx = (worker_dispatch_ctx_t *)coroutine->extended_data; + ZEND_ASSERT(ctx != NULL); + + coroutine->extended_data = NULL; + + /* Un-bracket the in-flight request (--active), paired with the + * on_request_dispatch in worker_dispatch_request. */ + http_server_on_request_dispose(ctx->counters); + + if (!Z_ISUNDEF(ctx->response_zv)) { + zend_object *const resp = Z_OBJ(ctx->response_zv); + + if (coroutine->exception != NULL && !http_response_is_committed(resp)) { + http_response_reset_to_error(resp, 500, "Internal Server Error"); + } + + if (!http_response_is_committed(resp)) { + http_response_set_committed(resp); + } + + response_wire_t *const rw = worker_render_response(ctx); + + if (rw != NULL) { + if (ctx->sink != NULL) { + ctx->sink(rw, ctx->sink_arg); /* sink owns rw now */ + } else { + response_wire_free(rw); + } + } + } + + if (!Z_ISUNDEF(ctx->request_zv)) { + zval_ptr_dtor(&ctx->request_zv); + ZVAL_UNDEF(&ctx->request_zv); + } + + if (!Z_ISUNDEF(ctx->response_zv)) { + zval_ptr_dtor(&ctx->response_zv); + ZVAL_UNDEF(&ctx->response_zv); + } + + efree(ctx); +} + +bool worker_dispatch_request(http_server_object *server, + zend_async_scope_t *scope, + http_request_t *req, + const bool own_scope, + worker_response_sink_fn sink, void *sink_arg) +{ + if (UNEXPECTED(server == NULL || scope == NULL || req == NULL)) { + if (req != NULL) { + http_request_destroy(req); /* we own it; nothing else can free it */ + } + + return false; + } + + /* Routing must be read before create_from_parsed: on the coroutine-spawn + * failure path below the object owns (and may free) req. */ + const uint32_t reactor_id = req->reactor_id; + const int64_t stream_id = req->reactor_stream_id; + void *const conn = req->reactor_conn; + const bool is_head = http_request_method_is_head(req); + + zval *const req_obj = http_request_create_from_parsed(req); + + if (UNEXPECTED(req_obj == NULL)) { + http_request_destroy(req); /* nothing took the ref yet */ + return false; + } + + worker_dispatch_ctx_t *const ctx = ecalloc(1, sizeof(*ctx)); + ctx->server = server; + ctx->counters = http_server_counters(server); + ctx->stamps = http_server_sample_stamps_enabled(http_server_view(server)); + ctx->reactor_id = reactor_id; + ctx->stream_id = stream_id; + ctx->conn = conn; + ctx->sink = sink; + ctx->sink_arg = sink_arg; + ctx->is_head = is_head; + + ZVAL_COPY_VALUE(&ctx->request_zv, req_obj); + efree(req_obj); /* the heap zval wrapper, not the object */ + + object_init_ex(&ctx->response_zv, http_response_ce); + http_response_set_protocol_version(Z_OBJ(ctx->response_zv), "3.0"); + + /* No handler registered: synthesise a 404 so the sink still fires with a + * response instead of leaving the stream hanging. */ + HashTable *const handlers = http_server_get_protocol_handlers(server); + zend_fcall_t *fcall = http_protocol_get_handler(handlers, HTTP_PROTOCOL_HTTP1); + + if (fcall == NULL) { + fcall = http_protocol_get_handler(handlers, HTTP_PROTOCOL_HTTP2); + } + + if (fcall == NULL) { + http_response_static_set_status(Z_OBJ(ctx->response_zv), 404); + http_response_static_set_header(Z_OBJ(ctx->response_zv), + "content-type", 12, "text/plain; charset=utf-8", 25); + zend_string *const msg = zend_string_init("Not Found", 9, 0); + http_response_static_set_body_str(Z_OBJ(ctx->response_zv), msg); + zend_string_release(msg); + ctx->skip_handler = true; + } + + zend_coroutine_t *const co = http_request_handler_coroutine_new( + scope, worker_dispatch_entry, ctx, worker_dispatch_dispose, own_scope); + + if (UNEXPECTED(co == NULL)) { + zval_ptr_dtor(&ctx->request_zv); + zval_ptr_dtor(&ctx->response_zv); + efree(ctx); + return false; + } + + /* Bracket the in-flight request on the worker's counters (++active), + * paired with on_request_dispose at coroutine dispose. */ + http_server_on_request_dispatch(ctx->counters); + + if (ctx->stamps) { + ctx->enqueue_ns = zend_hrtime(); + } + + ZEND_ASYNC_ENQUEUE_COROUTINE(co); + + return true; +} diff --git a/src/core/worker_inbox.c b/src/core/worker_inbox.c new file mode 100644 index 0000000..3d8b444 --- /dev/null +++ b/src/core/worker_inbox.c @@ -0,0 +1,100 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) TrueAsync | + +----------------------------------------------------------------------+ + | Licensed under the Apache License, Version 2.0 | + +----------------------------------------------------------------------+ + + Worker inbox (#80, B2) — per-worker request mailbox + dispatch drain. + See include/core/worker_inbox.h. +*/ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include "core/worker_inbox.h" +#include "core/thread_mailbox.h" + +/* Bounded so a backed-up worker backpressures the reactor (post returns false) + * rather than growing the queue without limit. */ +#define WORKER_INBOX_CAPACITY 1024 +#define WORKER_INBOX_BATCH 64 + +struct worker_inbox_s { + thread_mailbox_t *mb; + http_server_object *server; + zend_async_scope_t *scope; + worker_response_sink_fn sink; + void *sink_arg; + bool own_scope; +}; + +/* Runs on the worker's reactor thread when requests are queued. Each item is an + * http_request_t whose ownership passed to us at post: hand it to dispatch (the + * handler coroutine renders the response to the sink). worker_dispatch_request + * consumes the request unconditionally — owns it on success, destroys it on + * failure — so the drain never frees it here. */ +static void worker_inbox_drain(void **items, const size_t count, void *arg) +{ + worker_inbox_t *const inbox = (worker_inbox_t *)arg; + + for (size_t i = 0; i < count; i++) { + http_request_t *const req = (http_request_t *)items[i]; + ZEND_ASSERT(req != NULL); + + worker_dispatch_request(inbox->server, inbox->scope, req, + inbox->own_scope, inbox->sink, inbox->sink_arg); + } +} + +worker_inbox_t *worker_inbox_create(http_server_object *server, + zend_async_scope_t *scope, + const bool own_scope, + worker_response_sink_fn sink, void *sink_arg) +{ + if (UNEXPECTED(server == NULL || scope == NULL)) { + return NULL; + } + + worker_inbox_t *const inbox = pecalloc(1, sizeof(*inbox), 0); + inbox->server = server; + inbox->scope = scope; + inbox->own_scope = own_scope; + inbox->sink = sink; + inbox->sink_arg = sink_arg; + + inbox->mb = thread_mailbox_create(WORKER_INBOX_CAPACITY, WORKER_INBOX_BATCH, + worker_inbox_drain, inbox); + + if (inbox->mb == NULL) { + pefree(inbox, 0); + return NULL; + } + + return inbox; +} + +bool worker_inbox_post(worker_inbox_t *inbox, http_request_t *req) +{ + if (UNEXPECTED(inbox == NULL || req == NULL)) { + return false; + } + + return thread_mailbox_post(inbox->mb, req); +} + +size_t worker_inbox_depth(const worker_inbox_t *inbox) +{ + return inbox != NULL ? thread_mailbox_count(inbox->mb) : 0; +} + +void worker_inbox_free(worker_inbox_t *inbox) +{ + if (inbox == NULL) { + return; + } + + thread_mailbox_free(inbox->mb); + pefree(inbox, 0); +} diff --git a/src/core/worker_registry.c b/src/core/worker_registry.c new file mode 100644 index 0000000..5412d86 --- /dev/null +++ b/src/core/worker_registry.c @@ -0,0 +1,195 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) TrueAsync | + +----------------------------------------------------------------------+ + | Licensed under the Apache License, Version 2.0 | + +----------------------------------------------------------------------+ + + Worker registry (#80, B3) — atomic table of per-worker inboxes. + See include/core/worker_registry.h. +*/ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include "php.h" +#include "Zend/zend_atomic.h" +#include "core/worker_registry.h" + +struct worker_registry_s { + zend_atomic_ptr *slots; /* [capacity], each a worker_inbox_t* or NULL */ + zend_atomic_int rr; /* round-robin cursor */ + zend_atomic_int next; /* next slot to claim */ + int capacity; +}; + +worker_registry_t *worker_registry_create(const int capacity) +{ + if (capacity <= 0) { + return NULL; + } + + worker_registry_t *const reg = pecalloc(1, sizeof(*reg), 0); + reg->slots = pecalloc((size_t)capacity, sizeof(zend_atomic_ptr), 0); + reg->capacity = capacity; + ZEND_ATOMIC_INT_INIT(®->rr, 0); + ZEND_ATOMIC_INT_INIT(®->next, 0); + + for (int i = 0; i < capacity; i++) { + ZEND_ATOMIC_PTR_INIT(®->slots[i], NULL); + } + + return reg; +} + +bool worker_registry_publish(worker_registry_t *reg, const int idx, worker_inbox_t *inbox) +{ + if (UNEXPECTED(reg == NULL || idx < 0 || idx >= reg->capacity)) { + return false; + } + + zend_atomic_ptr_store_ex(®->slots[idx], inbox); + + return true; +} + +int worker_registry_add(worker_registry_t *reg, worker_inbox_t *inbox) +{ + if (UNEXPECTED(reg == NULL)) { + return -1; + } + + const int idx = zend_atomic_int_fetch_add(®->next, 1); + + if (idx >= reg->capacity) { + return -1; + } + + zend_atomic_ptr_store_ex(®->slots[idx], inbox); + + return idx; +} + +int worker_registry_capacity(const worker_registry_t *reg) +{ + return reg != NULL ? reg->capacity : 0; +} + +int worker_registry_count(const worker_registry_t *reg) +{ + if (reg == NULL) { + return 0; + } + + int n = 0; + + for (int i = 0; i < reg->capacity; i++) { + if (zend_atomic_ptr_load_ex(®->slots[i]) != NULL) { + n++; + } + } + + return n; +} + +worker_inbox_t *worker_registry_at(const worker_registry_t *reg, const int idx) +{ + if (UNEXPECTED(reg == NULL || idx < 0 || idx >= reg->capacity)) { + return NULL; + } + + return (worker_inbox_t *)zend_atomic_ptr_load_ex(®->slots[idx]); +} + +worker_inbox_t *worker_registry_pick(worker_registry_t *reg) +{ + if (UNEXPECTED(reg == NULL)) { + return NULL; + } + + /* Unsigned so the cursor wraps cleanly past INT_MAX. */ + const unsigned start = (unsigned)zend_atomic_int_fetch_add(®->rr, 1); + + for (int k = 0; k < reg->capacity; k++) { + const int i = (int)((start + (unsigned)k) % (unsigned)reg->capacity); + worker_inbox_t *const inbox = + (worker_inbox_t *)zend_atomic_ptr_load_ex(®->slots[i]); + + if (EXPECTED(inbox != NULL)) { + return inbox; + } + } + + return NULL; +} + +worker_inbox_t *worker_registry_least_busy(worker_registry_t *reg, + const int reactor_id, const int n_reactors, + int *out_slot) +{ + if (out_slot != NULL) { + *out_slot = -1; + } + + if (UNEXPECTED(reg == NULL)) { + return NULL; + } + + const bool owned = reactor_id >= 0 && n_reactors > 1; + const int step = owned ? n_reactors : 1; + const int base = owned ? (reactor_id % n_reactors) : 0; + + if (base >= reg->capacity) { + return NULL; + } + + /* Number of owned positions in [base, capacity) stepping by `step`. */ + const int npos = (reg->capacity - base + step - 1) / step; + + /* Rotate the scan origin so an all-idle owned set (every depth 0) spreads + * homes round-robin instead of always landing on the lowest slot. */ + const unsigned start = (unsigned)zend_atomic_int_fetch_add(®->rr, 1); + + worker_inbox_t *best = NULL; + size_t best_depth = 0; + int best_slot = -1; + + for (int p = 0; p < npos; p++) { + const int slot = base + (int)((start + (unsigned)p) % (unsigned)npos) * step; + worker_inbox_t *const inbox = + (worker_inbox_t *)zend_atomic_ptr_load_ex(®->slots[slot]); + + if (UNEXPECTED(inbox == NULL)) { + continue; + } + + const size_t depth = worker_inbox_depth(inbox); + + if (best == NULL || depth < best_depth) { + best = inbox; + best_depth = depth; + best_slot = slot; + } + + if (best_depth == 0) { + break; + } + } + + if (best != NULL && out_slot != NULL) { + *out_slot = best_slot; + } + + return best; +} + +void worker_registry_free(worker_registry_t *reg) +{ + if (reg == NULL) { + return; + } + + pefree(reg->slots, 0); + pefree(reg, 0); +} diff --git a/src/http1/http_parser.c b/src/http1/http_parser.c index 70746cc..a6bbc52 100644 --- a/src/http1/http_parser.c +++ b/src/http1/http_parser.c @@ -267,8 +267,7 @@ static int on_header_field(llhttp_t* llhttp_parser, const char* at, size_t lengt /* Lazy initialization of headers HashTable */ if (!parser->request->headers) { - ALLOC_HASHTABLE(parser->request->headers); - zend_hash_init(parser->request->headers, HTTP_HEADERS_INITIAL_SIZE, NULL, ZVAL_PTR_DTOR, 0); + http_request_init_headers(parser->request); } /* If we were parsing value, this is a new header - save previous one first */ @@ -544,8 +543,7 @@ static int on_headers_complete(llhttp_t* llhttp_parser) /* Ensure headers HashTable exists (even if no headers were parsed) */ if (!req->headers) { - ALLOC_HASHTABLE(req->headers); - zend_hash_init(req->headers, HTTP_HEADERS_INITIAL_SIZE, NULL, ZVAL_PTR_DTOR, 0); + http_request_init_headers(req); } /* Save last header if any */ @@ -608,7 +606,7 @@ static int on_headers_complete(llhttp_t* llhttp_parser) req->method = http_known_method_lookup(method_name, method_len); if (req->method == NULL) { - req->method = zend_string_init(method_name, method_len, 0); + req->method = zend_string_init(method_name, method_len, req->persistent); } /* RFC 9110 §9.3.6: CONNECT targets a proxy tunnel — an origin server @@ -1055,30 +1053,30 @@ void http_request_addref(http_request_t *req) } } -void http_request_destroy(http_request_t *req) +void http_request_free_fields(http_request_t *req) { - if (!req) { - return; - } - /* Refcount-based release. Each holder calls destroy when done; the - * last call (refcount → 0) actually frees. Allocators init refcount - * to 1 so a single-owner caller's destroy still frees immediately — - * preserves the pre-refcount behavior at every existing call site. */ - if (--req->refcount > 0) { + if (req == NULL) { return; } if (req->method) { zend_string_release(req->method); + req->method = NULL; } if (req->uri) { zend_string_release(req->uri); + req->uri = NULL; } if (req->headers) { + /* zend_hash_destroy frees arData + releases keys/values in the + * table's own domain (flag-aware); only the HashTable struct block + * needs the matching free. pefree(_, false) == efree == the old + * FREE_HASHTABLE, so ZMM requests are byte-for-byte unchanged. */ zend_hash_destroy(req->headers); - FREE_HASHTABLE(req->headers); + pefree(req->headers, req->persistent); + req->headers = NULL; } body_release(req->body); @@ -1097,35 +1095,58 @@ void http_request_destroy(http_request_t *req) if (req->multipart_proc) { mp_processor_cleanup_temp_files(req->multipart_proc); mp_processor_destroy(req->multipart_proc); + req->multipart_proc = NULL; } if (req->post_data) { zend_hash_destroy(req->post_data); FREE_HASHTABLE(req->post_data); + req->post_data = NULL; } if (req->files) { zend_hash_destroy(req->files); FREE_HASHTABLE(req->files); + req->files = NULL; } if (req->path) { zend_string_release(req->path); + req->path = NULL; } if (req->query_params) { zend_hash_destroy(req->query_params); FREE_HASHTABLE(req->query_params); + req->query_params = NULL; } if (req->traceparent_raw) { zend_string_release(req->traceparent_raw); + req->traceparent_raw = NULL; } if (req->tracestate_raw) { zend_string_release(req->tracestate_raw); + req->tracestate_raw = NULL; + } +} + +void http_request_destroy(http_request_t *req) +{ + if (!req) { + return; + } + /* Refcount-based release. Each holder calls destroy when done; the + * last call (refcount → 0) actually frees. Allocators init refcount + * to 1 so a single-owner caller's destroy still frees immediately — + * preserves the pre-refcount behavior at every existing call site. */ + if (--req->refcount > 0) { + return; } + http_request_free_fields(req); + /* Custom release path — embedder pools the slot itself and finishes * any embedder-specific teardown. NULL = legacy ecalloc owner, free * via efree. Capture the callback before clearing — defensive in @@ -1138,7 +1159,9 @@ void http_request_destroy(http_request_t *req) return; } - efree(req); + /* Standalone owner (H1 ecalloc / reactor pecalloc). pefree(_, false) + * == efree, so ZMM requests free exactly as before. */ + pefree(req, req->persistent); } /* Helper: Reset smart_str with buffer reuse if possible */ diff --git a/src/http2/http2_session.c b/src/http2/http2_session.c index bfda80f..7756b78 100644 --- a/src/http2/http2_session.c +++ b/src/http2/http2_session.c @@ -98,9 +98,7 @@ nghttp2_session *http2_session_get_ng(http2_session_t *session) static void ensure_headers_table(http_request_t *req) { if (req->headers == NULL) { - ALLOC_HASHTABLE(req->headers); - zend_hash_init(req->headers, HTTP_HEADERS_INITIAL_SIZE, - NULL, ZVAL_PTR_DTOR, 0); + http_request_init_headers(req); } } @@ -119,10 +117,10 @@ static void store_header_value(http_request_t *req, const bool name_owned = (name_str == NULL); if (name_owned) { - name_str = zend_string_init(name, namelen, 0); + name_str = zend_string_init(name, namelen, req->persistent); } - zend_string *val_str = zend_string_init(value, valuelen, 0); + zend_string *val_str = zend_string_init(value, valuelen, req->persistent); zval tmp; ZVAL_STR(&tmp, val_str); @@ -303,12 +301,12 @@ static int cb_on_header(nghttp2_session *ng, req->method = http_known_method_lookup(value_c, valuelen); if (req->method == NULL) { - req->method = zend_string_init(value_c, valuelen, 0); + req->method = zend_string_init(value_c, valuelen, req->persistent); } } } else if (namelen == 5 && memcmp(name, ":path", 5) == 0) { if (req->uri == NULL) { - req->uri = zend_string_init(value_c, valuelen, 0); + req->uri = zend_string_init(value_c, valuelen, req->persistent); } } else if (namelen == 10 && memcmp(name, ":authority", 10) == 0) { /* Map :authority → Host header per RFC 9113 §8.3.1, so diff --git a/src/http3/http3_callbacks.c b/src/http3/http3_callbacks.c index 76e91b1..9a0451e 100644 --- a/src/http3/http3_callbacks.c +++ b/src/http3/http3_callbacks.c @@ -37,20 +37,37 @@ #include "core/http_protocol_handlers.h" /* http_protocol_get_handler */ #include "http3_listener.h" /* http3_listener_server_obj etc. */ #include "http3_packet.h" /* http3_packet_compute_sr_token */ +#include "http3_steer.h" /* CID steering encode */ +#include "core/response_wire.h" /* response_wire_* (reverse path) */ #include "http3/http3_stream.h" /* http3_stream_t */ #include /* ngtcp2_crypto_* callback ptrs */ #include +/* hq-interop file serving (open/fstat/mmap/realpath) is POSIX-only. The whole + * feature is gated behind a docroot setter that the Linux interop runner sets; + * on Windows http3_hq_map_file compiles to a stub that returns false. */ +#ifndef PHP_WIN32 +# include /* hq file serving: open */ +# include /* close */ +# include /* fstat */ +# include /* mmap — zero-copy hq file body */ +# include /* PATH_MAX */ +# include +# if defined(__linux__) && defined(__has_include) +# if __has_include() +# include /* struct open_how, RESOLVE_BENEATH */ +# include /* SYS_openat2 */ +# define HTTP3_HAVE_OPENAT2 1 +# endif +# endif +#endif + /* Listener accessors not exposed via http3_listener.h. */ extern http3_packet_stats_t *http3_listener_packet_stats(http3_listener_t *l); extern const uint8_t *http3_listener_sr_key(const http3_listener_t *l); -/* http3_stream_submit_response forward — used by streaming append_chunk - * (here) and by http3_dispatch.c on the buffered REST commit path. - * Declared cross-TU in http3_internal.h. */ - /* ------------------------------------------------------------------------ * ngtcp2 base callbacks: rand + new connection id (with deterministic * stateless-reset token) @@ -79,14 +96,30 @@ static int get_new_connection_id_cb(ngtcp2_conn *conn, ngtcp2_cid *cid, void *user_data) { (void)conn; - http3_connection_t *c = (http3_connection_t *)user_data; + http3_connection_t *const c = (http3_connection_t *)user_data; /* CID is random per RFC 9000 §5.1 — collision-resistant via DRBG. * Stateless-reset token is derived deterministically via * HMAC-SHA256(listener_sr_key, cid)[0:16]. The deterministic * token is what makes peer-side stateless-reset verification work * (peer caches the token from NEW_CONNECTION_ID; when a forged-or- - * legitimate reset arrives we recompute the same value here). */ - if (!http3_fill_random(cid->data, cidlen)) { + * legitimate reset arrives we recompute the same value here). + * + * With CID steering active every CID we hand out must encode + * this reactor's id too — a client may rotate to one of these as its DCID on + * migration, and it must still route back here. */ + const int reactor_id = c != NULL ? http3_listener_reactor_id(c->listener) : -1; + + if (http3_steer_active() && reactor_id >= 0 && cidlen >= HTTP3_STEER_CID_LEN) { + if (!http3_steer_encode(cid->data, reactor_id)) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + if (cidlen > HTTP3_STEER_CID_LEN + && !http3_fill_random(cid->data + HTTP3_STEER_CID_LEN, + cidlen - HTTP3_STEER_CID_LEN)) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + } else if (!http3_fill_random(cid->data, cidlen)) { return NGTCP2_ERR_CALLBACK_FAILURE; } @@ -101,6 +134,25 @@ static int get_new_connection_id_cb(ngtcp2_conn *conn, ngtcp2_cid *cid, _Static_assert(NGTCP2_STATELESS_RESET_TOKENLEN == 16, "stateless reset token width must match HMAC truncation"); http3_packet_compute_sr_token(sr_key, cid->data, cidlen, token); + + /* Register the CID we just handed out so a client that rotates its DCID + * to it (RFC 9000 §5.1) still routes back to this conn. c is non-NULL + * here — the sr_key == NULL guard above already returned otherwise. */ + http3_connection_register_issued_cid(c, cid->data, cidlen); + return 0; +} + +/* The peer retired a CID we offered (RETIRE_CONNECTION_ID). Drop it from the + * conn_map so no stale key survives this connection's teardown. */ +static int remove_connection_id_cb(ngtcp2_conn *conn, const ngtcp2_cid *cid, + void *user_data) +{ + (void)conn; + http3_connection_t *const c = (http3_connection_t *)user_data; + + if (c != NULL) { + http3_connection_unregister_issued_cid(c, cid->data, cid->datalen); + } return 0; } @@ -111,8 +163,7 @@ static int get_new_connection_id_cb(ngtcp2_conn *conn, ngtcp2_cid *cid, static void h3_ensure_headers_table(http_request_t *req) { if (req->headers == NULL) { - ALLOC_HASHTABLE(req->headers); - zend_hash_init(req->headers, 16, NULL, ZVAL_PTR_DTOR, 0); + http_request_init_headers(req); } } @@ -122,8 +173,8 @@ static void h3_store_header_value(http_request_t *req, { h3_ensure_headers_table(req); - zend_string *name_str = zend_string_init(name, namelen, 0); - zend_string *val_str = zend_string_init(value, valuelen, 0); + zend_string *name_str = zend_string_init(name, namelen, req->persistent); + zend_string *val_str = zend_string_init(value, valuelen, req->persistent); zval tmp; ZVAL_STR(&tmp, val_str); @@ -146,7 +197,9 @@ static void http3_finalize_request_body(http3_stream_t *s); static void h3_reject_request_stream(http3_connection_t *c, http3_stream_t *s, int64_t stream_id) { - if (s == NULL || s->rejected) return; + ZEND_ASSERT(s != NULL); + + if (s->rejected) return; s->rejected = true; if (c == NULL) return; @@ -192,17 +245,17 @@ static int h3_begin_headers_cb(nghttp3_conn *conn, int64_t stream_id, void *conn_user_data, void *stream_user_data) { (void)stream_user_data; - http3_connection_t *c = (http3_connection_t *)conn_user_data; - http3_packet_stats_t *stats = c != NULL + http3_connection_t *const c = (http3_connection_t *)conn_user_data; + http3_packet_stats_t *const stats = c != NULL ? http3_listener_packet_stats(c->listener) : NULL; - http3_stream_t *s = http3_stream_new(c, stream_id); + http3_stream_t *const s = http3_stream_new(c, stream_id); - if (s == NULL) { + if (UNEXPECTED(s == NULL)) { return NGHTTP3_ERR_CALLBACK_FAILURE; } - if (nghttp3_conn_set_stream_user_data(conn, stream_id, s) != 0) { + if (UNEXPECTED(nghttp3_conn_set_stream_user_data(conn, stream_id, s) != 0)) { http3_stream_release(s); return NGHTTP3_ERR_CALLBACK_FAILURE; } @@ -233,10 +286,10 @@ static int h3_recv_header_cb(nghttp3_conn *conn, int64_t stream_id, void *conn_user_data, void *stream_user_data) { (void)conn; (void)stream_id; (void)flags; - http3_connection_t *c = (http3_connection_t *)conn_user_data; - http3_stream_t *s = (http3_stream_t *)stream_user_data; + http3_connection_t *const c = (http3_connection_t *)conn_user_data; + http3_stream_t *const s = (http3_stream_t *)stream_user_data; - if (s == NULL || s->rejected) { + if (UNEXPECTED(s == NULL || s->rejected)) { return 0; } @@ -246,9 +299,9 @@ static int h3_recv_header_cb(nghttp3_conn *conn, int64_t stream_id, /* Same RFC 7541 §4.1 32-byte overhead accounting H2 uses. */ const size_t entry_cost = name_v.len + value_v.len + 32; - if (SIZE_MAX - s->headers_total_bytes < entry_cost - || s->headers_total_bytes + entry_cost > HTTP3_MAX_HEADERS_BYTES) { - http3_packet_stats_t *stats = c != NULL + if (UNEXPECTED(SIZE_MAX - s->headers_total_bytes < entry_cost + || s->headers_total_bytes + entry_cost > HTTP3_MAX_HEADERS_BYTES)) { + http3_packet_stats_t *const stats = c != NULL ? http3_listener_packet_stats(c->listener) : NULL; if (stats != NULL) stats->h3_request_oversized++; @@ -259,16 +312,16 @@ static int h3_recv_header_cb(nghttp3_conn *conn, int64_t stream_id, s->headers_total_bytes += entry_cost; - http_request_t *req = s->request; - const char *n = (const char *)name_v.base; - const char *v = (const char *)value_v.base; + http_request_t *const req = s->request; + const char *const name_ptr = (const char *)name_v.base; + const char *const value_ptr = (const char *)value_v.base; /* Pseudo-headers — token enum lets us skip the strcmp ladder for * the four RFC 9114 pseudo names. nghttp3 already validates * uniqueness + ordering so we map unconditionally. */ if (token == NGHTTP3_QPACK_TOKEN__METHOD) { if (req->method == NULL) { - req->method = zend_string_init(v, value_v.len, 0); + req->method = zend_string_init(value_ptr, value_v.len, req->persistent); } return 0; @@ -276,30 +329,30 @@ static int h3_recv_header_cb(nghttp3_conn *conn, int64_t stream_id, if (token == NGHTTP3_QPACK_TOKEN__PATH) { if (req->uri == NULL) { - req->uri = zend_string_init(v, value_v.len, 0); + req->uri = zend_string_init(value_ptr, value_v.len, req->persistent); } return 0; } if (token == NGHTTP3_QPACK_TOKEN__AUTHORITY) { - h3_store_header_value(req, "host", 4, v, value_v.len); + h3_store_header_value(req, "host", 4, value_ptr, value_v.len); return 0; } if (token == NGHTTP3_QPACK_TOKEN__SCHEME) { - h3_store_header_value(req, "scheme", 6, v, value_v.len); + h3_store_header_value(req, "scheme", 6, value_ptr, value_v.len); return 0; } - h3_store_header_value(req, n, name_v.len, v, value_v.len); + h3_store_header_value(req, name_ptr, name_v.len, value_ptr, value_v.len); /* Pre-size the body builder when Content-Length lands so we don't * geometric-grow under multi-MiB POSTs. Same trick H2 uses. */ - if (name_v.len == 14 && strncasecmp(n, "content-length", 14) == 0 + if (name_v.len == 14 && strncasecmp(name_ptr, "content-length", 14) == 0 && value_v.len < 32) { char buf[32]; - memcpy(buf, v, value_v.len); + memcpy(buf, value_ptr, value_v.len); buf[value_v.len] = '\0'; char *end = NULL; unsigned long long cl = strtoull(buf, &end, 10); @@ -317,12 +370,20 @@ static int h3_end_headers_cb(nghttp3_conn *conn, int64_t stream_id, void *stream_user_data) { (void)conn; (void)stream_id; (void)fin; - http3_connection_t *c = (http3_connection_t *)conn_user_data; + http3_connection_t *const c = (http3_connection_t *)conn_user_data; http3_stream_t *s = (http3_stream_t *)stream_user_data; if (s == NULL || s->dispatched || s->rejected) { return 0; } + + /* Reactor mode: defer dispatch to h3_end_stream_cb. The reactor must + * not write into the request after hand-off, so the body is assembled + * (persistent) before the worker gets the pointer — buffered, not streamed. */ + if (c != NULL && http3_listener_reactor_ctx(c->listener) != NULL) { + return 0; + } + /* Dispatch the handler the moment headers are complete, * regardless of fin (mirror H2 cb_on_frame_recv on HEADERS+END_HEADERS). * Body chunks that arrive after this point feed s->body_buf via @@ -337,19 +398,19 @@ static int h3_recv_data_cb(nghttp3_conn *conn, int64_t stream_id, void *conn_user_data, void *stream_user_data) { (void)conn; (void)stream_id; - http3_connection_t *c = (http3_connection_t *)conn_user_data; - http3_stream_t *s = (http3_stream_t *)stream_user_data; + http3_connection_t *const c = (http3_connection_t *)conn_user_data; + http3_stream_t *const s = (http3_stream_t *)stream_user_data; - if (s == NULL || s->rejected) { + if (UNEXPECTED(s == NULL || s->rejected)) { return 0; } const size_t current = s->body_buf.s != NULL ? ZSTR_LEN(s->body_buf.s) : 0; - if (SIZE_MAX - current < datalen - || current + datalen > HTTP3_MAX_BODY_BYTES) { - http3_packet_stats_t *stats = c != NULL + if (UNEXPECTED(SIZE_MAX - current < datalen + || current + datalen > HTTP3_MAX_BODY_BYTES)) { + http3_packet_stats_t *const stats = c != NULL ? http3_listener_packet_stats(c->listener) : NULL; if (stats != NULL) stats->h3_request_oversized++; @@ -388,7 +449,7 @@ static nghttp3_ssize h3_read_data_cb(nghttp3_conn *conn, int64_t stream_id, void *stream_user_data) { (void)conn; (void)stream_id; (void)conn_user_data; - http3_stream_t *s = (http3_stream_t *)stream_user_data; + http3_stream_t *const s = (http3_stream_t *)stream_user_data; if (s == NULL || veccnt == 0) { *pflags |= NGHTTP3_DATA_FLAG_EOF; @@ -520,7 +581,7 @@ bool http3_stream_submit_response(http3_connection_t *c, return false; } - zend_object *resp_obj = Z_OBJ(s->response_zv); + zend_object *const resp_obj = Z_OBJ(s->response_zv); #ifdef HAVE_HTTP_COMPRESSION /* H3 reads body via http_response_get_body_str() directly rather @@ -624,7 +685,99 @@ bool http3_stream_submit_response(http3_connection_t *c, if (buf.heap != NULL) efree(buf.heap); - http3_packet_stats_t *stats = http3_listener_packet_stats(c->listener); + http3_packet_stats_t *const stats = http3_listener_packet_stats(c->listener); + + if (rv == 0) { + if (stats != NULL) stats->h3_response_submitted++; + return true; + } + + if (stats != NULL) stats->h3_response_submit_error++; + + if (s->response_body != NULL) { + zend_string_release(s->response_body); + s->response_body = NULL; + } + + return false; +} + +/* Reverse path: submit a buffered response from a flat response_wire + * (rendered by a worker, handed back over the reverse channel) instead of from + * the per-stream HttpResponse zval. Runs ON THE REACTOR thread. The wire's + * headers were already filtered to the H2/H3-allowed set on the worker, so no + * re-filter here. The body is copied into a stream-owned zend_string because the + * data_reader walks it asynchronously, outliving the wire (freed by the caller + * right after this returns). nghttp3 copies the nv bytes at submit time, so the + * wire's header spans only need to live across this call. */ +bool http3_stream_submit_response_wire(http3_connection_t *c, http3_stream_t *s, + const response_wire_t *rw) +{ + if (c == NULL || s == NULL || rw == NULL || c->nghttp3_conn == NULL) { + return false; + } + + char status_buf[8]; + int status = response_wire_status(rw); + + if (status <= 0) status = 200; + int status_len = snprintf(status_buf, sizeof(status_buf), "%d", status); + + if (status_len < 0 || status_len >= (int)sizeof(status_buf)) { + status_len = 3; + memcpy(status_buf, "500", 3); + } + + const size_t hcount = response_wire_header_count(rw); + const size_t nvcap = hcount + 1; + + nghttp3_nv scratch[32]; + nghttp3_nv *const nv = + (nvcap <= sizeof(scratch) / sizeof(scratch[0])) + ? scratch + : (nghttp3_nv *)emalloc(nvcap * sizeof(nghttp3_nv)); + size_t nvi = 0; + + nv[nvi].name = (uint8_t *)":status"; + nv[nvi].namelen = 7; + nv[nvi].value = (uint8_t *)status_buf; + nv[nvi].valuelen = (size_t)status_len; + nv[nvi].flags = NGHTTP3_NV_FLAG_NONE; + nvi++; + + for (size_t i = 0; i < hcount; i++) { + const char *nm, *val; + size_t nl, vl; + + if (!response_wire_header_at(rw, i, &nm, &nl, &val, &vl)) { + continue; + } + + nv[nvi].name = (uint8_t *)nm; + nv[nvi].namelen = nl; + nv[nvi].value = (uint8_t *)val; + nv[nvi].valuelen = vl; + nv[nvi].flags = NGHTTP3_NV_FLAG_NONE; + nvi++; + } + + size_t blen = 0; + const char *body = response_wire_body(rw, &blen); + + if (body != NULL && blen > 0) { + s->response_body = zend_string_init(body, blen, 0); + s->response_body_offset = 0; + } + + const nghttp3_data_reader dr = { .read_data = h3_read_data_cb }; + const int rv = nghttp3_conn_submit_response( + (nghttp3_conn *)c->nghttp3_conn, s->stream_id, nv, nvi, &dr); + + if (nv != scratch) { + efree(nv); + } + + http3_packet_stats_t *const stats = http3_listener_packet_stats(c->listener); if (rv == 0) { if (stats != NULL) stats->h3_response_submitted++; @@ -654,14 +807,14 @@ bool http3_stream_submit_response(http3_connection_t *c, * ------------------------------------------------------------------- */ int h3_stream_append_chunk(void *ctx, zend_string *chunk) { - http3_stream_t *s = (http3_stream_t *)ctx; + http3_stream_t *const s = (http3_stream_t *)ctx; if (s == NULL || s->conn == NULL || s->peer_closed) { zend_string_release(chunk); return HTTP_STREAM_APPEND_STREAM_DEAD; } - http3_connection_t *c = s->conn; + http3_connection_t *const c = s->conn; if (c->closed || c->nghttp3_conn == NULL) { zend_string_release(chunk); @@ -815,7 +968,7 @@ int h3_stream_append_chunk(void *ctx, zend_string *chunk) void h3_stream_mark_ended(void *ctx) { - http3_stream_t *s = (http3_stream_t *)ctx; + http3_stream_t *const s = (http3_stream_t *)ctx; if (s == NULL || s->conn == NULL || s->streaming_ended) { return; @@ -837,7 +990,7 @@ void h3_stream_mark_ended(void *ctx) static zend_async_event_t *h3_stream_get_wait_event(void *ctx) { - http3_stream_t *s = (http3_stream_t *)ctx; + http3_stream_t *const s = (http3_stream_t *)ctx; if (s == NULL) return NULL; @@ -874,19 +1027,26 @@ const http_response_stream_ops_t h3_stream_ops = { * frees on http3_stream_release. */ static void http3_finalize_request_body(http3_stream_t *s) { - http_request_t *req = s->request; - - if (req == NULL) { - return; - } + http_request_t *const req = s->request; + ZEND_ASSERT(req != NULL); /* Move the assembled body bytes into the request. smart_str leaves * a NUL-terminated zend_string with refcount 1; we transfer that * ownership to req->body and clear our handle. */ if (s->body_buf.s != NULL) { smart_str_0(&s->body_buf); - req->body = s->body_buf.s; - s->body_buf.s = NULL; /* request now owns the storage */ + + if (req->persistent) { + /* Reactor mode: the worker reads req->body on its own thread, + * so copy the ZMM smart_str into a persistent (malloc) zend_string + * and drop the builder. getBody() deep-copies it back into ZMM. */ + req->body = zend_string_init(ZSTR_VAL(s->body_buf.s), + ZSTR_LEN(s->body_buf.s), 1); + smart_str_free(&s->body_buf); + } else { + req->body = s->body_buf.s; + s->body_buf.s = NULL; /* request now owns the storage */ + } } req->complete = true; @@ -909,8 +1069,8 @@ static int h3_end_stream_cb(nghttp3_conn *conn, int64_t stream_id, void *conn_user_data, void *stream_user_data) { (void)conn; (void)stream_id; - http3_connection_t *c = (http3_connection_t *)conn_user_data; - http3_stream_t *s = (http3_stream_t *)stream_user_data; + http3_connection_t *const c = (http3_connection_t *)conn_user_data; + http3_stream_t *const s = (http3_stream_t *)stream_user_data; if (s == NULL || s->rejected) { return 0; @@ -918,7 +1078,7 @@ static int h3_end_stream_cb(nghttp3_conn *conn, int64_t stream_id, http3_finalize_request_body(s); - http3_packet_stats_t *stats = c != NULL + http3_packet_stats_t *const stats = c != NULL ? http3_listener_packet_stats(c->listener) : NULL; if (stats != NULL) stats->h3_request_received++; @@ -965,14 +1125,14 @@ static int h3_stream_close_cb(nghttp3_conn *conn, int64_t stream_id, void *conn_user_data, void *stream_user_data) { (void)conn; (void)app_error_code; - http3_stream_t *s = (http3_stream_t *)stream_user_data; + http3_stream_t *const s = (http3_stream_t *)stream_user_data; /* Defensive: clear ngtcp2's stream_user_data so any straggler ngtcp2 * stream callback (e.g. extend_max_stream_data, ack_stream_data) that * fires after nghttp3 has already closed the stream cannot deref the * about-to-be-freed http3_stream_t. ngtcp2 and nghttp3 maintain their * own per-stream state machines; nothing forces them to fire close in * lockstep. */ - http3_connection_t *c = (http3_connection_t *)conn_user_data; + http3_connection_t *const c = (http3_connection_t *)conn_user_data; if (c != NULL && c->ngtcp2_conn != NULL) { ngtcp2_conn_set_stream_user_data( @@ -992,7 +1152,7 @@ static int h3_stop_sending_cb(nghttp3_conn *conn, int64_t stream_id, /* Mirror the request via QUIC STOP_SENDING so the peer knows we * gave up on its data. ngtcp2 has the function exposed on the * connection-level handle. */ - http3_connection_t *c = (http3_connection_t *)conn_user_data; + http3_connection_t *const c = (http3_connection_t *)conn_user_data; if (c != NULL && c->ngtcp2_conn != NULL) { ngtcp2_conn_shutdown_stream_read( @@ -1007,7 +1167,7 @@ static int h3_reset_stream_cb(nghttp3_conn *conn, int64_t stream_id, void *conn_user_data, void *stream_user_data) { (void)conn; (void)stream_user_data; - http3_connection_t *c = (http3_connection_t *)conn_user_data; + http3_connection_t *const c = (http3_connection_t *)conn_user_data; if (c != NULL && c->ngtcp2_conn != NULL) { ngtcp2_conn_shutdown_stream_write( @@ -1026,10 +1186,10 @@ static int h3_reset_stream_cb(nghttp3_conn *conn, int64_t stream_id, * so freeing on hand-off would corrupt retransmits whenever a packet * got lost. */ static int h3_acked_stream_data_cb(nghttp3_conn *conn, int64_t stream_id, - uint64_t datalen, void *cu, void *su) + uint64_t datalen, void *user_data, void *stream_user_data) { - (void)conn; (void)stream_id; (void)cu; - http3_stream_t *s = (http3_stream_t *)su; + (void)conn; (void)stream_id; (void)user_data; + http3_stream_t *const s = (http3_stream_t *)stream_user_data; if (s == NULL || s->chunk_queue == NULL) { return 0; @@ -1145,19 +1305,24 @@ bool http3_connection_init_h3(http3_connection_t *c) static int handshake_completed_cb(ngtcp2_conn *conn, void *user_data) { (void)conn; - http3_connection_t *c = (http3_connection_t *)user_data; + http3_connection_t *const c = (http3_connection_t *)user_data; if (c == NULL) { return NGTCP2_ERR_CALLBACK_FAILURE; } - http3_packet_stats_t *stats = http3_listener_packet_stats(c->listener); + http3_packet_stats_t *const stats = http3_listener_packet_stats(c->listener); const unsigned char *proto = NULL; unsigned int proto_len = 0; SSL_get0_alpn_selected((SSL *)c->ssl, &proto, &proto_len); - if (proto_len != 2 || proto == NULL || memcmp(proto, "h3", 2) != 0) { + if (proto != NULL && proto_len == 2 && memcmp(proto, "h3", 2) == 0) { + c->proto = HTTP3_PROTO_H3; + } else if (proto != NULL && proto_len == 10 + && memcmp(proto, "hq-interop", 10) == 0) { + c->proto = HTTP3_PROTO_HQ; + } else { if (stats != NULL) stats->quic_alpn_mismatch++; /* Returning CALLBACK_FAILURE asks ngtcp2 to close the connection; * the subsequent drain will emit a CONNECTION_CLOSE frame. */ @@ -1166,15 +1331,263 @@ static int handshake_completed_cb(ngtcp2_conn *conn, void *user_data) if (stats != NULL) stats->quic_handshake_completed++; - /* Wire nghttp3 only after ALPN passes. A failure here is fatal for - * this connection — without nghttp3 we cannot speak HTTP/3 to the - * peer, so close. */ - if (!http3_connection_init_h3(c)) { - if (stats != NULL) stats->h3_init_failed++; - return NGTCP2_ERR_CALLBACK_FAILURE; + /* Only h3 wires nghttp3 (and the control/QPACK streams). hq-interop + * speaks raw HTTP/0.9 on bidi streams, so it keeps nghttp3_conn NULL. */ + if (c->proto == HTTP3_PROTO_H3) { + if (!http3_connection_init_h3(c)) { + if (stats != NULL) stats->h3_init_failed++; + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + if (stats != NULL) stats->h3_init_ok++; + } + + return 0; +} + +/* === hq-interop (HTTP/0.9-over-QUIC) ingress ===================== + * + * The interop test matrix speaks raw HTTP/0.9 on QUIC bidi streams, not + * HTTP/3, so an hq connection has no nghttp3. Ingress accumulates the + * "GET \r\n" request line; egress (http3_io.c drain) writes + * s->response_body raw + FIN. */ +#ifndef PHP_WIN32 +/* Open `path` (a leading-'/' request path) for reading, confined to `docroot`. + * + * On Linux with openat2 this resolves with RESOLVE_BENEATH: the kernel refuses + * any component that escapes the docroot subtree (".." , an absolute path, or a + * symlink swapped in after the check), so there is no realpath()->open() TOCTOU + * window. Where openat2 is unavailable (older kernel / seccomp) it falls back + * to realpath() canonicalisation + an explicit containment check — the prior + * behaviour. Returns an O_RDONLY fd, or -1. */ +static int http3_hq_open_beneath(const char *docroot, + const char *path, const size_t path_len) +{ + /* openat2 / realpath want a docroot-relative path: drop the leading '/'. */ + const char *const rel = path + 1; + const size_t rel_len = path_len - 1; + + if (rel_len == 0) { + return -1; + } + +#ifdef HTTP3_HAVE_OPENAT2 + char relz[PATH_MAX]; + + if (rel_len >= sizeof relz) { + return -1; + } + + memcpy(relz, rel, rel_len); + relz[rel_len] = '\0'; + + const int dirfd = open(docroot, O_RDONLY | O_DIRECTORY | O_CLOEXEC); + + if (dirfd >= 0) { + struct open_how how = { + .flags = (uint64_t)(O_RDONLY | O_CLOEXEC), + .resolve = RESOLVE_BENEATH | RESOLVE_NO_MAGICLINKS, + }; + const long rv = syscall(SYS_openat2, dirfd, relz, &how, sizeof how); + const int saved = errno; + close(dirfd); + + if (rv >= 0) { + return (int)rv; + } + + /* A RESOLVE_BENEATH rejection is a real traversal attempt — do not + * retry it. Only fall through when openat2 itself is unavailable. */ + if (saved != ENOSYS && saved != EPERM) { + return -1; + } + } +#endif /* HTTP3_HAVE_OPENAT2 */ + + char full[PATH_MAX]; + const int n = snprintf(full, sizeof full, "%s/%.*s", + docroot, (int)rel_len, rel); + + if (n <= 0 || (size_t)n >= sizeof full) { + return -1; + } + + char resolved[PATH_MAX]; + char droot[PATH_MAX]; + + if (realpath(full, resolved) == NULL || realpath(docroot, droot) == NULL) { + return -1; + } + + const size_t dl = strlen(droot); + + if (strncmp(resolved, droot, dl) != 0 + || (resolved[dl] != '/' && resolved[dl] != '\0')) { + return -1; /* escaped the docroot */ + } + + return open(resolved, O_RDONLY | O_CLOEXEC); +} +#endif /* !PHP_WIN32 */ + +/* Map a docroot-relative file into s->hq_body for zero-copy raw egress, or + * return false on any failure. Path resolution is confined to the docroot by + * http3_hq_open_beneath (TOCTOU-safe). mmap (not read-into-buffer) keeps + * arbitrarily large files off the heap and out of a blocking bulk read; ngtcp2 + * references the pages until acked, so the mapping lives until + * http3_stream_release munmaps it. A zero-byte regular file is a valid empty + * body (served FIN-only). POSIX-only — a Windows stub returns false. */ +static bool http3_hq_map_file(http3_stream_t *s, const char *docroot, + const char *path, const size_t path_len) +{ +#ifdef PHP_WIN32 + (void)s; (void)docroot; (void)path; (void)path_len; + return false; +#else + if (docroot == NULL || path_len == 0 || path[0] != '/' + || memchr(path, '\0', path_len) != NULL) { + return false; + } + + const int fd = http3_hq_open_beneath(docroot, path, path_len); + + if (fd < 0) { + return false; + } + + struct stat st; + + if (fstat(fd, &st) != 0 || !S_ISREG(st.st_mode)) { + close(fd); + return false; + } + + if (st.st_size == 0) { + close(fd); + s->hq_body = NULL; /* empty body — FIN only */ + s->hq_body_len = 0; + return true; + } + + void *const map = mmap(NULL, (size_t)st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + + if (map == MAP_FAILED) { + return false; + } + + s->hq_map = map; + s->hq_map_len = (size_t)st.st_size; + s->hq_body = (const char *)map; + s->hq_body_len = (size_t)st.st_size; + return true; +#endif /* PHP_WIN32 */ +} + +static void http3_hq_serve(http3_connection_t *c, http3_stream_t *s) +{ + const char *path = NULL; + size_t path_len = 0; + + if (s->hq_line != NULL && s->hq_line_len >= 4 + && memcmp(s->hq_line, "GET ", 4) == 0) { + path = s->hq_line + 4; + path_len = s->hq_line_len - 4; + + /* Tolerate a lenient client appending " HTTP/x.x" — HTTP/0.9 has no + * version token, but trim it to the bare path if present. */ + const char *const sp = memchr(path, ' ', path_len); + + if (sp != NULL) { + path_len = (size_t)(sp - path); + } + } + + const http_server_object *const server = + (const http_server_object *)http3_listener_server_obj(c->listener); + const http_server_config_t *const cfg = + http_server_get_config((http_server_object *)server); + + bool served = false; + + if (cfg != NULL && cfg->http3_hq_docroot != NULL && path != NULL) { + served = http3_hq_map_file(s, ZSTR_VAL(cfg->http3_hq_docroot), + path, path_len); + } + + if (!served) { + /* No docroot / not found / traversal: a static literal body (not a + * heap string) — the egress loop streams it raw and the FIN closes + * the stream cleanly. */ + static const char not_found[] = "hq: not found\n"; + s->hq_body = not_found; + s->hq_body_len = sizeof(not_found) - 1; + } + + s->hq_body_off = 0; + s->hq_served = true; + + http3_listener_mark_flush(c->listener, c); +} + +/* Feed inbound bytes of an hq bidi stream. Allocates the stream on first + * sight (mirrors h3_begin_headers_cb minus nghttp3). Returns 0 on success, + * -1 on allocation failure (caller closes the connection). */ +static int http3_hq_recv_stream_data(http3_connection_t *c, ngtcp2_conn *qconn, + const int64_t stream_id, http3_stream_t *s, + const uint8_t *data, const size_t datalen) +{ + /* hq answers client-initiated bidi only (low 2 bits == 0). Other stream + * ids are consumed for flow control but otherwise ignored. */ + if ((stream_id & 0x03) != 0) { + return 0; + } + + if (s == NULL) { + s = http3_stream_new(c, stream_id); + + if (s == NULL) { + return -1; + } + + (void)ngtcp2_conn_set_stream_user_data(qconn, stream_id, s); + s->conn = c; + s->list_next = c->streams_head; + c->streams_head = s; + } + + if (s->hq_served) { + return 0; + } + + if (s->hq_line == NULL) { + s->hq_line = emalloc(HTTP3_HQ_LINE_MAX); + } + + for (size_t i = 0; i < datalen; i++) { + const char ch = (char)data[i]; + + if (ch == '\n') { + size_t len = s->hq_line_len; + + if (len > 0 && s->hq_line[len - 1] == '\r') { + len--; + } + + s->hq_line_len = (uint16_t)len; + http3_hq_serve(c, s); + return 0; + } + + if (s->hq_line_len < HTTP3_HQ_LINE_MAX - 1) { + s->hq_line[s->hq_line_len++] = ch; + } else { + /* Over-long request line: answer with what we have and stop. */ + http3_hq_serve(c, s); + return 0; + } } - if (stats != NULL) stats->h3_init_ok++; return 0; } @@ -1183,28 +1596,47 @@ static int recv_stream_data_cb(ngtcp2_conn *conn, uint32_t flags, const uint8_t *data, size_t datalen, void *user_data, void *stream_user_data) { - (void)conn; (void)offset; (void)stream_user_data; - http3_connection_t *c = (http3_connection_t *)user_data; + (void)offset; + http3_connection_t *const c = (http3_connection_t *)user_data; - if (c == NULL || c->nghttp3_conn == NULL) { - return 0; /* Pre-handshake stream data is not expected; drop. */ + if (c == NULL) { + return 0; } - int fin = (flags & NGTCP2_STREAM_DATA_FLAG_FIN) ? 1 : 0; - nghttp3_ssize n = nghttp3_conn_read_stream( - (nghttp3_conn *)c->nghttp3_conn, stream_id, data, datalen, fin); + const int fin = (flags & NGTCP2_STREAM_DATA_FLAG_FIN) ? 1 : 0; + uint64_t consumed = 0; - if (n < 0) { - http3_packet_stats_t *stats = http3_listener_packet_stats(c->listener); + if (c->proto == HTTP3_PROTO_HQ) { + if (http3_hq_recv_stream_data(c, conn, stream_id, + (http3_stream_t *)stream_user_data, + data, datalen) < 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } - if (stats != NULL) stats->h3_stream_read_error++; - return NGTCP2_ERR_CALLBACK_FAILURE; + consumed = datalen; /* hq consumes the whole datagram */ + } else { + if (c->nghttp3_conn == NULL) { + return 0; /* Pre-handshake stream data is not expected; drop. */ + } + + const nghttp3_ssize n = nghttp3_conn_read_stream( + (nghttp3_conn *)c->nghttp3_conn, stream_id, data, datalen, fin); + + if (UNEXPECTED(n < 0)) { + http3_packet_stats_t *const stats = http3_listener_packet_stats(c->listener); + + if (stats != NULL) stats->h3_stream_read_error++; + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + consumed = (uint64_t)n; } - /* Tell ngtcp2 how many bytes nghttp3 actually consumed so it can - * advance the QUIC flow-control window. */ - ngtcp2_conn_extend_max_stream_offset((ngtcp2_conn *)c->ngtcp2_conn, - stream_id, (uint64_t)n); - ngtcp2_conn_extend_max_offset((ngtcp2_conn *)c->ngtcp2_conn, (uint64_t)n); + + /* Advance the QUIC flow-control window by the consumed-byte count. Hoisted + * out of the nghttp3 branch so it runs for hq too — otherwise an hq peer's + * stream or connection window never reopens and the transfer stalls. */ + ngtcp2_conn_extend_max_stream_offset(conn, stream_id, consumed); + ngtcp2_conn_extend_max_offset(conn, consumed); return 0; } @@ -1217,7 +1649,7 @@ static int extend_max_stream_data_cb(ngtcp2_conn *conn, int64_t stream_id, void *user_data, void *stream_user_data) { (void)conn; (void)max_data; - http3_connection_t *c = (http3_connection_t *)user_data; + http3_connection_t *const c = (http3_connection_t *)user_data; http3_stream_t *s = (http3_stream_t *)stream_user_data; /* Pair with drain_out's nghttp3_conn_block_stream call: we blocked @@ -1249,10 +1681,21 @@ static int acked_stream_data_offset_cb(ngtcp2_conn *conn, int64_t stream_id, void *user_data, void *stream_user_data) { (void)conn; (void)offset; - http3_connection_t *c = (http3_connection_t *)user_data; + http3_connection_t *const c = (http3_connection_t *)user_data; http3_stream_t *s = (http3_stream_t *)stream_user_data; - if (c == NULL || c->nghttp3_conn == NULL) { + if (c == NULL) { + return 0; + } + + if (c->proto == HTTP3_PROTO_HQ) { + /* A fresh ACK means more cwnd. Resume the raw drain so an hq body that + * paused on STREAM_DATA_BLOCKED keeps flowing until fully sent. */ + http3_listener_mark_flush(c->listener, c); + return 0; + } + + if (c->nghttp3_conn == NULL) { return 0; } @@ -1290,10 +1733,30 @@ static int stream_close_cb(ngtcp2_conn *conn, uint32_t flags, int64_t stream_id, uint64_t app_error_code, void *user_data, void *stream_user_data) { - (void)stream_user_data; - http3_connection_t *c = (http3_connection_t *)user_data; + http3_connection_t *const c = (http3_connection_t *)user_data; - if (c == NULL || c->nghttp3_conn == NULL) { + if (c == NULL) { + return 0; + } + + /* ngtcp2 never auto-extends MAX_STREAMS on close, so without this each + * connection caps at initial_max_streams_bidi. id&3==0 = client bidi. + * Hoisted above the nghttp3 guard so hq (no nghttp3) re-credits too. */ + if ((stream_id & 0x03) == 0) { + ngtcp2_conn_extend_max_streams_bidi(conn, 1); + } + + if (c->proto == HTTP3_PROTO_HQ) { + /* hq tracks the stream on the ngtcp2 side only; release the slab here + * (the h3 path releases via nghttp3's own stream_close). */ + if (stream_user_data != NULL) { + http3_stream_release((http3_stream_t *)stream_user_data); + } + + return 0; + } + + if (c->nghttp3_conn == NULL) { return 0; } /* If the stream closed due to a non-app reason (transport-level), @@ -1304,16 +1767,10 @@ static int stream_close_cb(ngtcp2_conn *conn, uint32_t flags, int rv = nghttp3_conn_close_stream( (nghttp3_conn *)c->nghttp3_conn, stream_id, app_error_code); - http3_packet_stats_t *stats = http3_listener_packet_stats(c->listener); + http3_packet_stats_t *const stats = http3_listener_packet_stats(c->listener); if (stats != NULL) stats->h3_stream_close++; - /* ngtcp2 never auto-extends MAX_STREAMS on close, so without this each - * connection caps at initial_max_streams_bidi. id&3==0 = client bidi. */ - if ((stream_id & 0x03) == 0) { - ngtcp2_conn_extend_max_streams_bidi(conn, 1); - } - if (rv != 0 && rv != NGHTTP3_ERR_STREAM_NOT_FOUND) { return NGTCP2_ERR_CALLBACK_FAILURE; } @@ -1326,7 +1783,7 @@ static int stream_reset_cb(ngtcp2_conn *conn, int64_t stream_id, void *user_data, void *stream_user_data) { (void)conn; (void)final_size; (void)app_error_code; (void)stream_user_data; - http3_connection_t *c = (http3_connection_t *)user_data; + http3_connection_t *const c = (http3_connection_t *)user_data; if (c == NULL || c->nghttp3_conn == NULL) { return 0; @@ -1345,7 +1802,7 @@ static int stream_stop_sending_cb(ngtcp2_conn *conn, int64_t stream_id, void *user_data, void *stream_user_data) { (void)conn; (void)app_error_code; (void)stream_user_data; - http3_connection_t *c = (http3_connection_t *)user_data; + http3_connection_t *const c = (http3_connection_t *)user_data; if (c == NULL || c->nghttp3_conn == NULL) { return 0; @@ -1364,7 +1821,7 @@ static int extend_max_remote_streams_bidi_cb(ngtcp2_conn *conn, void *user_data) { (void)conn; - http3_connection_t *c = (http3_connection_t *)user_data; + http3_connection_t *const c = (http3_connection_t *)user_data; if (c == NULL || c->nghttp3_conn == NULL) { return 0; @@ -1375,6 +1832,8 @@ static int extend_max_remote_streams_bidi_cb(ngtcp2_conn *conn, return 0; } + + const ngtcp2_callbacks HTTP3_NGTCP2_CALLBACKS = { .recv_client_initial = ngtcp2_crypto_recv_client_initial_cb, .recv_crypto_data = ngtcp2_crypto_recv_crypto_data_cb, @@ -1383,6 +1842,7 @@ const ngtcp2_callbacks HTTP3_NGTCP2_CALLBACKS = { .hp_mask = ngtcp2_crypto_hp_mask_cb, .rand = rand_cb, .get_new_connection_id = get_new_connection_id_cb, + .remove_connection_id = remove_connection_id_cb, .update_key = ngtcp2_crypto_update_key_cb, .delete_crypto_aead_ctx = ngtcp2_crypto_delete_crypto_aead_ctx_cb, .delete_crypto_cipher_ctx = ngtcp2_crypto_delete_crypto_cipher_ctx_cb, diff --git a/src/http3/http3_connection.c b/src/http3/http3_connection.c index 5fb961f..b5ad82f 100644 --- a/src/http3/http3_connection.c +++ b/src/http3/http3_connection.c @@ -28,6 +28,7 @@ #include "Zend/zend_hrtime.h" /* zend_hrtime — drain stamps */ #include "http3_listener.h" /* listener accessors */ #include "http3_packet.h" /* version_negotiation / stateless_reset */ +#include "http3_steer.h" /* CID steering encode */ #include "http3/http3_stream.h" /* http3_stream_t (callbacks.c symmetry) */ #include /* recv_client_initial / hp_mask */ @@ -137,10 +138,9 @@ void http3_debug_logger(void *user_data, const char *fmt, ...) * * ngtcp2_path matching is strict: the local addr passed to read_pkt / * writev_stream must be the same value on every call after server_new. - * The proper plumbing for this is `zend_async_udp_sockname` (parked as - * a Step-6 upstream blocker per project_http3_progress.md); until that - * lands we fabricate the sockaddr from the bind config so at least it - * is stable across calls. peer_family lets us produce v4 / v6 to match + * The proper plumbing for this is `zend_async_udp_sockname`, which is + * not yet available; until it lands we fabricate the sockaddr from the + * bind config so at least it is stable across calls. peer_family lets us produce v4 / v6 to match * the inbound datagram. Returns 0 on success. */ int http3_build_listener_local(const http3_listener_t *l, int peer_family, @@ -149,7 +149,7 @@ int http3_build_listener_local(const http3_listener_t *l, { memset(out, 0, sizeof(*out)); const char *host = http3_listener_host(l); - int port = http3_listener_port(l); + const int port = http3_listener_port(l); if (host == NULL) host = (peer_family == AF_INET6) ? "::" : "0.0.0.0"; @@ -272,6 +272,7 @@ static http3_connection_t *http3_connection_accept( * failure, which the caller undoes (peer_dec) and drops silently. */ http3_connection_t *c = ecalloc(1, sizeof(http3_connection_t)); c->listener = listener; + c->worker_slot = -1; /* unassigned until the first dispatch homes it */ /* Cache hot-path slices. http3_listener_server_obj() returns NULL * for an unparented listener — accessor handles that, returning the * dummy / default fallbacks. */ @@ -297,8 +298,16 @@ static http3_connection_t *http3_connection_accept( /* Generate our own SCID; subsequent client packets address us with * this as their DCID. A DRBG failure here must NOT fall through to * a zero SCID (would collide in conn_map with any other conn whose - * SCID generation also failed) — fail the accept cleanly instead. */ - if (!http3_fill_random(c->scid, HTTP3_SCID_LEN)) { + * SCID generation also failed) — fail the accept cleanly instead. + * + * With CID steering active the SCID encodes this reactor's + * id so a migrated client rehashed onto another reactor routes back here; + * otherwise it is fully random as before. */ + const int reactor_id = http3_listener_reactor_id(listener); + const bool steer = http3_steer_active() && reactor_id >= 0; + + if (steer ? !http3_steer_encode(c->scid, reactor_id) + : !http3_fill_random(c->scid, HTTP3_SCID_LEN)) { OPENSSL_cleanse(c, sizeof(*c)); efree(c); return NULL; @@ -354,13 +363,13 @@ static http3_connection_t *http3_connection_accept( * accept path uses for read-header timeouts in this server. */ settings.handshake_timeout = 10 * NGTCP2_SECONDS; - if (c != NULL && c->log_state != NULL + if (c->log_state != NULL && c->log_state->severity != HTTP_LOG_OFF && (int)HTTP_LOG_DEBUG >= (int)c->log_state->severity) { settings.log_printf = http3_debug_logger; } - /* Transport params resolution (NEXT_STEPS.md §5): + /* Transport params resolution: * 1. HttpServerConfig setters at server start() time. * 2. PHP_HTTP3_BENCH_FC=1 raises FC + streams to bench-grade * values (NEVER enable in production — disables back-pressure). @@ -375,9 +384,9 @@ static http3_connection_t *http3_connection_accept( const http_server_object *srv_obj = (const http_server_object *)http3_listener_server_obj(listener); - uint32_t cfg_window = http_server_get_http3_stream_window_bytes(srv_obj); - uint32_t cfg_streams = http_server_get_http3_max_concurrent_streams(srv_obj); - uint32_t cfg_idle_ms = http_server_get_http3_idle_timeout_ms(srv_obj); + const uint32_t cfg_window = http_server_get_http3_stream_window_bytes(srv_obj); + const uint32_t cfg_streams = http_server_get_http3_max_concurrent_streams(srv_obj); + const uint32_t cfg_idle_ms = http_server_get_http3_idle_timeout_ms(srv_obj); uint64_t window_bytes = cfg_window != 0 ? (uint64_t)cfg_window : (256ull * 1024); uint64_t streams_bidi = cfg_streams != 0 ? (uint64_t)cfg_streams : 100; @@ -545,14 +554,23 @@ static http3_connection_t *http3_connection_accept( * Packet entry — listener calls this for each inbound datagram * ------------------------------------------------------------------------ */ +/* Migration-storm guard thresholds: more than HTTP3_MIGRATE_STORM_MAX path + * migrations within HTTP3_MIGRATE_STORM_WINDOW_NS is well past any legitimate + * client (which migrates rarely and re-validates in ~1 RTT) — it is a wedge or + * a flood, and we shed the connection. -D-overridable for tuning/tests. */ +#ifndef HTTP3_MIGRATE_STORM_MAX +# define HTTP3_MIGRATE_STORM_MAX 8u +#endif +#ifndef HTTP3_MIGRATE_STORM_WINDOW_NS +# define HTTP3_MIGRATE_STORM_WINDOW_NS 1000000000ULL /* 1 s */ +#endif + bool http3_connection_dispatch( http3_listener_t *listener, const uint8_t *data, size_t datalen, uint8_t ecn, const struct sockaddr *peer, socklen_t peer_len) { - if (listener == NULL || data == NULL || datalen == 0) { - return false; - } + ZEND_ASSERT(listener != NULL && data != NULL && datalen > 0); http3_packet_stats_t *stats = http3_listener_packet_stats(listener); @@ -584,6 +602,16 @@ bool http3_connection_dispatch( : NULL; if (conn == NULL) { + /* CID steering: a short-header packet for a conn we do + * not own may belong to another reactor — a client that migrated and + * was SO_REUSEPORT-rehashed onto us. If its DCID decodes to a different + * reactor, forward it there instead of resetting a live connection. */ + if (http3_listener_try_steer(listener, vc.version, + vc.dcid, vc.dcidlen, + data, datalen, ecn, peer, peer_len)) { + return true; + } + /* No matching connection. For a long-header (version != 0) INITIAL * this is expected — we create one. For a short-header (version == 0) * it is a 1-RTT packet for a conn we do not know: answer with a @@ -787,6 +815,23 @@ bool http3_connection_dispatch( memcpy(&conn->peer, cur->remote.addr, (size_t)cur->remote.addrlen); conn->peer_len = (socklen_t)cur->remote.addrlen; stats->quic_path_migrations++; + + /* Migration-storm guard: a client rebinding faster than its path + * validates wedges ngtcp2 (responses chase the stale path). Count + * migrations in a sliding window; past the cap, flag the conn so + * the next flush sheds it (graceful close to the live peer) rather + * than spinning PTO probes for seconds. */ + const uint64_t now_ns = (uint64_t)zend_hrtime(); + + if (now_ns - conn->migrate_window_start_ns + > HTTP3_MIGRATE_STORM_WINDOW_NS) { + conn->migrate_window_start_ns = now_ns; + conn->migrate_count = 0; + } + + if (++conn->migrate_count > HTTP3_MIGRATE_STORM_MAX) { + conn->migration_storm = true; + } } } else if (pkt_rv == NGTCP2_ERR_DRAINING || pkt_rv == NGTCP2_ERR_CLOSING || @@ -813,6 +858,89 @@ bool http3_connection_dispatch( * Connection free — teardown order * ------------------------------------------------------------------------ */ +/* ---- Server-issued alternate CIDs (NEW_CONNECTION_ID, RFC 9000 §5.1) ---- + * Each CID we hand the client is registered in the listener conn_map and + * recorded here so it can be unregistered before the conn is freed, + * independent of ngtcp2's retired-CID grace period (see http3_connection.h). */ + +void http3_connection_register_issued_cid( + http3_connection_t *c, const uint8_t *cid, size_t cidlen) +{ + HashTable *map = http3_listener_conn_map(c->listener); + + if (map == NULL) { + return; + } + /* add-if-absent: a byte-for-byte collision with a live key owned by a + * different conn fails the add — do NOT record it, so reap never evicts a + * key this connection does not own. */ + if (zend_hash_str_add_ptr(map, (const char *)cid, cidlen, c) == NULL) { + return; + } + + if (c->issued_cid_count == c->issued_cid_cap) { + const size_t ncap = c->issued_cid_cap == 0 ? 8 : c->issued_cid_cap * 2; + c->issued_cids = erealloc(c->issued_cids, + ncap * sizeof(http3_issued_cid_t)); + c->issued_cid_cap = ncap; + } + + http3_issued_cid_t *const e = &c->issued_cids[c->issued_cid_count++]; + memcpy(e->data, cid, cidlen); + e->len = cidlen; + + http3_packet_stats_t *const stats = http3_listener_packet_stats(c->listener); + + if (stats != NULL) { + stats->quic_new_cid_issued++; + } +} + +void http3_connection_unregister_issued_cid( + http3_connection_t *c, const uint8_t *cid, size_t cidlen) +{ + for (size_t i = 0; i < c->issued_cid_count; i++) { + if (c->issued_cids[i].len != cidlen + || memcmp(c->issued_cids[i].data, cid, cidlen) != 0) { + continue; + } + + HashTable *map = http3_listener_conn_map(c->listener); + + if (map != NULL) { + zend_hash_str_del(map, (const char *)cid, cidlen); + } + + http3_packet_stats_t *const stats = + http3_listener_packet_stats(c->listener); + + if (stats != NULL) { + stats->quic_cid_retired++; + } + + c->issued_cids[i] = c->issued_cids[--c->issued_cid_count]; + return; + } +} + +void http3_connection_unregister_all_issued_cids(http3_connection_t *c) +{ + if (c->issued_cid_count == 0) { + return; + } + + HashTable *map = http3_listener_conn_map(c->listener); + + if (map != NULL) { + for (size_t i = 0; i < c->issued_cid_count; i++) { + zend_hash_str_del(map, + (const char *)c->issued_cids[i].data, c->issued_cids[i].len); + } + } + + c->issued_cid_count = 0; +} + void http3_connection_free(http3_connection_t *conn) { if (UNEXPECTED(conn == NULL || conn->closed)) { @@ -924,6 +1052,15 @@ void http3_connection_free(http3_connection_t *conn) ngtcp2_conn_del((ngtcp2_conn *)conn->ngtcp2_conn); conn->ngtcp2_conn = NULL; } + + /* Free the issued-CID record. On the reap path its conn_map keys were + * already removed by http3_listener_remove_connection; on the listener- + * teardown path the whole map is bulk-destroyed right after, so only the + * heap array itself needs releasing here. */ + if (conn->issued_cids != NULL) { + efree(conn->issued_cids); + conn->issued_cids = NULL; + } /* Wipe the struct shell before returning to ZendMM. Holds random * SCID/original_dcid, peer sockaddr, and pointer slots whose values * (now stale) leak heap layout to a UAF reader. External symbol — diff --git a/src/http3/http3_connection.h b/src/http3/http3_connection.h index 5932600..fe5f911 100644 --- a/src/http3/http3_connection.h +++ b/src/http3/http3_connection.h @@ -30,6 +30,19 @@ typedef struct _http3_listener_s http3_listener_t; /* defined in http3_list typedef struct _http3_connection_s http3_connection_t; typedef struct _http3_stream_s http3_stream_t; /* defined in http3_stream.h */ +/* One server-issued alternate CID (NEW_CONNECTION_ID, RFC 9000 §5.1). */ +typedef struct { + uint8_t data[20]; /* NGTCP2_MAX_CIDLEN */ + size_t len; +} http3_issued_cid_t; + +/* Negotiated QUIC application protocol. h3 (zero default) drives nghttp3; + * hq-interop is the raw HTTP/0.9-over-QUIC interop shim (no nghttp3). */ +typedef enum { + HTTP3_PROTO_H3 = 0, + HTTP3_PROTO_HQ +} http3_proto_t; + /* Per-connection state. * * One http3_connection_t per QUIC connection, identified by the server- @@ -55,6 +68,20 @@ struct _http3_connection_s { uint8_t routing_dcid[20]; size_t routing_dcidlen; + /* Server-issued alternate CIDs handed to the client via + * NEW_CONNECTION_ID (RFC 9000 §5.1). The client may rotate its DCID to + * any of them, so each is registered in the listener conn_map and MUST be + * unregistered before this conn is freed. Tracked explicitly here — NOT + * derived from ngtcp2_conn_get_scid — because ngtcp2 keeps a retired CID + * in its pool for ~3*PTO after RETIRE_CONNECTION_ID before it fires + * remove_connection_id, a window in which get_scid omits a CID still live + * in conn_map. Membership is mutated only at our own register/unregister + * points, so teardown is timing-independent. Dynamic; bounded in practice + * by ngtcp2's SCID pool. */ + http3_issued_cid_t *issued_cids; + size_t issued_cid_count; + size_t issued_cid_cap; + /* Peer address — the live send path. Re-pointed to a new client path on * migration / NAT rebind (RFC 9000 §9), so drain_out follows it. */ struct sockaddr_storage peer; @@ -71,9 +98,13 @@ struct _http3_connection_s { /* Opaque nghttp3_conn (HTTP/3 framing layer). NULL until * the TLS handshake completes; created in handshake_completed_cb so - * that it is only ever attached to a verified-h3 peer. */ + * that it is only ever attached to a verified-h3 peer. Stays NULL for + * an hq-interop peer (raw HTTP/0.9, no framing layer). */ void *nghttp3_conn; + /* ALPN settled by handshake_completed_cb. Zero (H3) is the default. */ + http3_proto_t proto; + /* Per-connection TLS state. All three owned here; teardown order: * 1. ngtcp2_conn_set_tls_native_handle(conn, NULL) * 2. SSL_set_app_data(ssl, NULL) ← ngtcp2 docs REQUIRE this if @@ -91,6 +122,12 @@ struct _http3_connection_s { zend_async_event_t *timer; zend_async_event_callback_t *timer_cb; + /* Deadline (ngtcp2 expiry, hrtime ns) the timer was last armed for. + * timer_fire_cb subtracts it from the actual fire time to measure how + * late the reactor serviced this connection's ACK/PTO — the per-conn + * view of reactor stall. 0 = no timer armed. */ + uint64_t timer_expiry_ns; + /* Intrusive list link. The listener tracks connections through this * chain instead of the DCID hashtable because the latter may carry * the same connection under multiple keys (server SCID + client @@ -98,7 +135,7 @@ struct _http3_connection_s { * The hashtable stays non-owning; this list is the ownership edge. */ http3_connection_t *next; - /* Phase-1 deferred-output dirty-list link. The read path marks the + /* Deferred-output dirty-list link. The read path marks the * conn via http3_listener_mark_flush instead of draining per datagram; * the listener flushes the whole list once per recvmmsg tick, so a * burst of N datagrams for one conn coalesces into one drain (one GSO @@ -129,6 +166,24 @@ struct _http3_connection_s { * does not emit a second CONNECTION_CLOSE on the same conn. */ bool sent_connection_close; + /* Migration-storm guard. A client that NAT-rebinds faster than + * its path can validate (RFC 9000 §9.3 lets the server decline migration) + * wedges ngtcp2 path validation — responses chase a stale path while the + * live path only gets PTO probes. Count migrations in a sliding window; + * past the cap we set migration_storm and shed the conn (graceful close to + * the live peer) on the next flush instead of spinning probes for seconds. + * Also hardens against a deliberate migration-flood DoS. */ + uint64_t migrate_window_start_ns; + uint32_t migrate_count; + bool migration_storm; + + /* Reactor/worker dispatch home. The worker slot this connection's + * requests stick to — one of this reactor's owned workers (reactor-paired pool). + * -1 until the first dispatch homes it; re-homed if that worker dies. A busy + * home spills individual requests elsewhere without changing this. Reactor-thread + * only (the owning reactor dispatches every stream), so no atomics. */ + int worker_slot; + /* Graceful drain state (mirror of the H1/H2 http_connection_t * fields). Pre-stamped at accept; consumed by the H3 commit path * which calls http_server_drain_evaluate and, on positive @@ -172,6 +227,23 @@ void http3_connection_free(http3_connection_t *conn); * listener itself is going away. Safe to call exactly once per conn. */ void http3_connection_reap(http3_connection_t *conn); +/* Register a server-issued alternate CID (from get_new_connection_id_cb) in + * the listener conn_map AND record it on the connection so it can be + * unregistered at teardown. No-op on the (astronomically rare) byte-for-byte + * collision with an existing key: the CID is then NOT recorded, so reap never + * evicts a key it does not own. */ +void http3_connection_register_issued_cid( + http3_connection_t *c, const uint8_t *cid, size_t cidlen); + +/* Unregister one issued CID (from remove_connection_id_cb on RETIRE). Deletes + * the conn_map key only if this connection actually owns it. */ +void http3_connection_unregister_issued_cid( + http3_connection_t *c, const uint8_t *cid, size_t cidlen); + +/* Delete every still-registered issued CID from the conn_map at teardown. + * Called from http3_listener_remove_connection before the conn is freed. */ +void http3_connection_unregister_all_issued_cids(http3_connection_t *c); + /* Flush one connection's pending ngtcp2 output and settle its lifecycle: * drain_out, then reap-or-arm-timer via check_terminal. Defined in * http3_io.c next to drain_out. The caller MUST NOT touch `conn` after diff --git a/src/http3/http3_dispatch.c b/src/http3/http3_dispatch.c index fbff70d..e148387 100644 --- a/src/http3/http3_dispatch.c +++ b/src/http3/http3_dispatch.c @@ -25,6 +25,7 @@ #include "http_send_file.h" /* http_send_file_dispatch */ #include "http_response_internal.h" /* http_response_has/take_send_file */ #include "static/static_handler.h" /* http_static_try_serve / count */ +#include "core/response_wire.h" /* response_wire_* (reverse path) */ /* Defined in src/http_request.c. Declared here because the public * php_http_server.h header doesn't expose it (it lives in the C boundary @@ -113,9 +114,189 @@ static const http_static_dispatch_cbs_t h3_static_dispatch_cbs = { * If the server has no handler registered (defensive — addHttpHandler * is normally a hard requirement of HttpServer::start) we fall back to * a 500 so the peer never sees an indefinite half-open stream. */ +/* Inbox backlog (undrained requests) at which a connection's home worker is + * considered busy and a request spills to a less-loaded worker. Well below + * WORKER_INBOX_CAPACITY (1024) so spill kicks in before hard backpressure; -D-overridable. */ +#ifndef H3_WORKER_SPILL_DEPTH +#define H3_WORKER_SPILL_DEPTH 64 +#endif + +/* Reactor mode: hand the parsed request to a PHP worker by pointer + * instead of spawning a handler coroutine here on the transport thread. The + * embedded persistent http_request_t crosses to the worker; the worker reads + * it, runs the handler, and posts the response + consumed back over the reverse + * channel. The reactor keeps the stream alive via a worker-borrow ref until the + * consumed arrives. No request-service stats here — those are the + * worker's job (handler runs there). */ +static void http3_stream_dispatch_to_worker(http3_connection_t *c, http3_stream_t *s, + const http3_reactor_ctx_t *rctx) +{ + /* Reactor-paired sticky dispatch. A connection homes to one of this + * reactor's owned workers and reuses it for all its streams (locality); a home + * that backs up past H3_WORKER_SPILL_DEPTH spills this request to a less-loaded + * worker (owned first, else any), and a home whose worker died is re-homed. */ + worker_registry_t *const reg = rctx->registry; + int slot = c->worker_slot; + worker_inbox_t *inbox = slot >= 0 ? worker_registry_at(reg, slot) : NULL; + + if (inbox == NULL) { + inbox = worker_registry_least_busy(reg, rctx->reactor_id, rctx->n_reactors, &slot); + + if (inbox == NULL) { + inbox = worker_registry_least_busy(reg, -1, rctx->n_reactors, &slot); + } + + if (inbox != NULL) { + c->worker_slot = slot; + } + } + + if (inbox != NULL && worker_inbox_depth(inbox) >= H3_WORKER_SPILL_DEPTH) { + int spill_slot; + worker_inbox_t *alt = + worker_registry_least_busy(reg, rctx->reactor_id, rctx->n_reactors, &spill_slot); + + if (alt == NULL || worker_inbox_depth(alt) >= H3_WORKER_SPILL_DEPTH) { + worker_inbox_t *const any = + worker_registry_least_busy(reg, -1, rctx->n_reactors, &spill_slot); + + if (any != NULL) { + alt = any; + } + } + + if (alt != NULL) { + inbox = alt; + } + } + + if (inbox == NULL) { + /* No worker published yet — leave the stream undispatched. It is torn + * down on the normal QUIC lifecycle (client PTO/RST); reactor teardown + * frees the request fields (s->dispatched stays false). */ + return; + } + + s->conn = c; + + /* Routing for the reverse path. reactor_id selects the reverse channel; + * reactor_conn carries the raw stream pointer (kept alive by the + * worker-borrow ref below until consumed, so it is valid when the response + * comes back); stream_id is for validation/logging. The raw pointer becomes + * a generationed handle when validate-and-drop lands. */ + s->request->reactor_id = (uint32_t)rctx->reactor_id; + s->request->reactor_stream_id = s->stream_id; + s->request->reactor_conn = s; + + s->dispatched = true; + s->refcount++; /* worker-borrow ref; dropped by the consumed apply */ + + if (UNEXPECTED(!worker_inbox_post(inbox, s->request))) { + /* Backpressure: the request was not handed off. Undo so the normal + * teardown reclaims it (s->dispatched=false => reactor teardown frees + * the fields). */ + s->refcount--; + s->dispatched = false; + return; + } + + H3T(s->stream_id, "1.dispatch_to_worker"); +} + +/* Reverse path: apply a worker-rendered response_wire on the reactor + * thread (posted via reactor_pool_post_exec). The wire carries the raw stream + * pointer (response_wire_conn) — valid here because the worker-borrow ref keeps + * the stream alive until the consumed that follows the response (FIFO on one + * reactor mailbox). If the stream/connection is already gone (client RST), the + * lookup is the validate-and-drop point: free the wire and return. On success, + * QPACK-encode + submit, then drain it out on this tick. Takes ownership of the + * wire. */ +void http3_reactor_apply_response(void *arg) +{ + response_wire_t *const rw = (response_wire_t *)arg; + + if (rw == NULL) { + return; + } + + http3_stream_t *const s = (http3_stream_t *)response_wire_conn(rw); + http3_connection_t *const c = (s != NULL) ? s->conn : NULL; + + if (c != NULL && !c->closed && c->nghttp3_conn != NULL) { + if (http3_stream_submit_response_wire(c, s, rw)) { + http3_connection_drain_out(c); + http3_connection_arm_timer(c); + } + } + + response_wire_free(rw); +} + +/* Reactor-side static serving: serve files + * entirely on the transport reactor — no PHP, no worker round-trip. Returns + * true when the static FSM claimed the request (HANDLED inline / HARD_ZERO + * sendfile), false on PASSTHROUGH so the caller routes to a worker. The + * response is built and submitted in the reactor's own ZMM; s->request is + * read-only here (persistent), freed by the reactor on stream release. */ +static bool http3_reactor_try_static(http3_connection_t *c, http3_stream_t *s, + const http3_reactor_ctx_t *rctx) +{ + if (rctx->static_mount_count == 0 || rctx->static_mounts == NULL) { + return false; + } + + object_init_ex(&s->response_zv, http_response_ce); + http_response_set_protocol_version(Z_OBJ(s->response_zv), "3.0"); + + const http_static_result_t rc = http_static_try_serve_mounts( + (const http_static_handler_t *const *)rctx->static_mounts, + rctx->static_mount_count, rctx->static_cache, + s->request, Z_OBJ(s->response_zv), c->counters, + &h3_static_dispatch_cbs, s); + + if (rc == HTTP_STATIC_PASSTHROUGH) { + zval_ptr_dtor(&s->response_zv); + ZVAL_UNDEF(&s->response_zv); + return false; + } + + if (rc == HTTP_STATIC_HARD_ZERO) { + /* Sendfile pump owns the stream: on_armed pinned s->refcount + the + * in-flight counter; on_static_done runs h3_dispose_tail. */ + return true; + } + + /* HANDLED: inline body / 4xx. Take a serving ref (mirrors the local + * coroutine ref) so the slab survives the dispose_tail release until + * nghttp3 closes the stream, then submit + drain. */ + s->refcount++; + + if (!c->closed && c->nghttp3_conn != NULL) { + (void)http3_stream_submit_response(c, s, false); + } + + h3_dispose_tail(c, s); + return true; +} + void http3_stream_dispatch(http3_connection_t *c, http3_stream_t *s) { - if (c == NULL || s == NULL || s->dispatched || s->request == NULL) { + if (c == NULL || s == NULL || s->dispatched) { + return; + } + + ZEND_ASSERT(s->request != NULL); + + /* Reactor mode: serve static here on the transport thread; otherwise route + * the request to a PHP worker instead of dispatching locally. */ + const http3_reactor_ctx_t *const rctx = http3_listener_reactor_ctx(c->listener); + + if (rctx != NULL) { + if (http3_reactor_try_static(c, s, rctx)) { + return; + } + + http3_stream_dispatch_to_worker(c, s, rctx); return; } @@ -186,7 +367,7 @@ void http3_stream_dispatch(http3_connection_t *c, http3_stream_t *s) &h3_stream_ops, s); #ifdef HAVE_HTTP_COMPRESSION - /* Attach compression state (issue #8). Server pointer comes from + /* Attach compression state. Server pointer comes from * the listener — same pattern that http3_handler_coroutine uses * for the request-sample bookkeeping. */ { @@ -206,7 +387,7 @@ void http3_stream_dispatch(http3_connection_t *c, http3_stream_t *s) } #endif - /* Static-handler dispatch (issue #60). Same policy as the H1/H2 + /* Static-handler dispatch. Same policy as the H1/H2 * sites: * HARD_ZERO — FSM owns the request; on_armed pinned the stream. * Return without spawning a coroutine; on_static_done @@ -283,8 +464,9 @@ static void h3_handler_coroutine_entry(void) { const zend_coroutine_t *co = ZEND_ASYNC_CURRENT_COROUTINE; http3_stream_t *s = (http3_stream_t *)co->extended_data; + ZEND_ASSERT(s != NULL); - if (s == NULL || s->conn == NULL) return; + if (s->conn == NULL) return; /* Static-handler HANDLED path: response_zv already carries the * synchronous body (inline small file or 4xx). Skip the user handler; @@ -314,7 +496,7 @@ static void h3_handler_coroutine_entry(void) if (fcall == NULL) return; #ifdef HAVE_HTTP_COMPRESSION - /* Inbound Content-Encoding decode (issue #8). Same shape as the + /* Inbound Content-Encoding decode. Same shape as the * H1/H2 handler entries. */ if (s->request != NULL) { extern int http_compression_decode_request_body( @@ -498,8 +680,7 @@ static bool h3_arm_sendfile(http3_connection_t *c, http3_stream_t *s) static void h3_handler_coroutine_dispose(zend_coroutine_t *coroutine) { http3_stream_t *s = (http3_stream_t *)coroutine->extended_data; - - if (s == NULL) return; + ZEND_ASSERT(s != NULL); H3T(s->stream_id, "4.dispose_enter"); diff --git a/src/http3/http3_internal.h b/src/http3/http3_internal.h index 2c11158..8107c85 100644 --- a/src/http3/http3_internal.h +++ b/src/http3/http3_internal.h @@ -64,6 +64,12 @@ * consistent — zend_hrtime fits. */ ngtcp2_tstamp http3_ts_now(void); +/* Reactor-iteration watchdog budget in nanoseconds. A poll-cb + * tick or a timer fire that takes longer than this is "slow" — on the single + * reactor thread that delay is imposed on every connection's ACK/PTO. Read + * once from PHP_HTTP3_REACTOR_BUDGET_MS (default 10 ms); cached thereafter. */ +uint64_t http3_reactor_budget_ns(void); + /* Secure random bytes via OpenSSL. Returns true on success. Callers MUST * propagate false: a silent zero-fill fallback would produce all-zero * SCIDs and all-zero stateless-reset tokens, both of which are @@ -153,6 +159,12 @@ bool http3_stream_submit_response(http3_connection_t *c, http3_stream_t *s, bool streaming); +/* Reverse path: submit a buffered response from a worker-rendered + * response_wire instead of the per-stream HttpResponse zval. Reactor thread. */ +typedef struct response_wire_s response_wire_t; +bool http3_stream_submit_response_wire(http3_connection_t *c, http3_stream_t *s, + const response_wire_t *rw); + /* Streaming-vtable hooks reused by the static-file delivery TU * (http3_static_response.c). Pumping a file through chunk_queue is * exactly the streaming path: append chunks until EOF, then mark_ended. diff --git a/src/http3/http3_io.c b/src/http3/http3_io.c index 63840ff..d72e06e 100644 --- a/src/http3/http3_io.c +++ b/src/http3/http3_io.c @@ -23,6 +23,7 @@ * nghttp3 + openssl/ssl.h + http3_connection.h */ #include "http3_listener.h" #include "http3_packet.h" +#include "http3/http3_stream.h" /* hq egress sources from http3_stream_t */ #include @@ -56,20 +57,42 @@ static void timer_fire_cb(zend_async_event_t *event, void *result, zend_object *exception) { (void)event; (void)result; (void)exception; - http3_timer_cb_t *tcb = (http3_timer_cb_t *)cb; - http3_connection_t *c = tcb->conn; + http3_timer_cb_t *const tcb = (http3_timer_cb_t *)cb; + http3_connection_t *const c = tcb->conn; if (c == NULL || c->closed) { return; } - http3_packet_stats_t *stats = http3_listener_packet_stats(c->listener); + http3_packet_stats_t *const stats = http3_listener_packet_stats(c->listener); if (stats != NULL) { stats->quic_timer_fired++; + + /* Per-connection ACK/PTO service delay: how far past + * its armed deadline this timer actually fired. Anything beyond the + * reactor budget means the reactor was busy when this connection's + * ACK/loss timer was due. */ + if (c->timer_expiry_ns != 0) { + const uint64_t now_ns = (uint64_t)http3_ts_now(); + + if (now_ns > c->timer_expiry_ns) { + const uint64_t late = now_ns - c->timer_expiry_ns; + + if (late > stats->reactor_max_timer_late_ns) { + stats->reactor_max_timer_late_ns = late; + } + + if (late > http3_reactor_budget_ns()) { + stats->reactor_timer_late++; + } + } + } } - int rv = ngtcp2_conn_handle_expiry((ngtcp2_conn *)c->ngtcp2_conn, http3_ts_now()); + c->timer_expiry_ns = 0; + + const int rv = ngtcp2_conn_handle_expiry((ngtcp2_conn *)c->ngtcp2_conn, http3_ts_now()); /* Idle-timeout: peer is gone, RFC 9000 §10.1 says do NOT emit * CONNECTION_CLOSE. Mark sent_connection_close so the teardown also * skips the emit, and reap immediately. */ @@ -130,10 +153,15 @@ void http3_connection_arm_timer(http3_connection_t *c) if (expiry == UINT64_MAX) { /* ngtcp2 has nothing scheduled — drop any stale timer. */ + c->timer_expiry_ns = 0; http3_connection_detach_timer(c); return; } + /* Stamp the deadline we're arming for so timer_fire_cb can measure how + * late the fire actually was (reactor service delay). */ + c->timer_expiry_ns = (uint64_t)expiry; + ngtcp2_tstamp now = http3_ts_now(); uint64_t delay_ns = (expiry > now) ? (expiry - now) : 1; /* fire ASAP */ zend_ulong ms = (zend_ulong)(delay_ns / 1000000ULL); @@ -197,7 +225,7 @@ void http3_connection_drain_out(http3_connection_t *c) return; } - http3_packet_stats_t *stats = http3_listener_packet_stats(c->listener); + http3_packet_stats_t *const stats = http3_listener_packet_stats(c->listener); /* Loop until ngtcp2 has nothing to emit. writev_stream returns the * number of bytes written into our buffer, 0 when congestion- @@ -266,7 +294,7 @@ void http3_connection_drain_out(http3_connection_t *c) } \ } while (0) - /* Phase 2 — opt-in pacing (#59, HttpServerConfig::setHttp3Pacing). OFF + /* Opt-in pacing (HttpServerConfig::setHttp3Pacing). OFF * by default: the block below is inert and the drain runs exactly as * before. ON: cap each burst at the congestion controller's * send_quantum and yield to the timer only for a real inter-burst gap @@ -297,12 +325,39 @@ void http3_connection_drain_out(http3_connection_t *c) * nothing pending; we still call writev_stream below with * stream_id = -1 so ngtcp2 can emit ACK / control frames of its * own. */ - int64_t h3_stream_id = -1; - int h3_fin = 0; - nghttp3_vec h3_vec[16]; - nghttp3_ssize h3_veccnt = 0; + int64_t h3_stream_id = -1; + int h3_fin = 0; + nghttp3_vec h3_vec[16]; + nghttp3_ssize h3_veccnt = 0; + http3_stream_t *hq_cur = NULL; + + if (c->proto == HTTP3_PROTO_HQ) { + /* hq-interop: source raw response bytes from a served stream's + * hq_body (mmap'd file or a literal) — no nghttp3 framing. Pick the + * first stream whose response is ready and whose FIN has not gone + * out yet; the FIN rides the tail of the body (or a bare FIN when + * the body is empty). */ + for (http3_stream_t *s = c->streams_head; s != NULL; s = s->list_next) { + if (s->hq_served && !s->hq_fin_sent) { + hq_cur = s; + break; + } + } - if (c->nghttp3_conn != NULL && !h3_framing_dead) { + if (hq_cur != NULL) { + const size_t remain = hq_cur->hq_body_len - hq_cur->hq_body_off; + + h3_stream_id = hq_cur->stream_id; + if (remain > 0) { + h3_vec[0].base = (uint8_t *)hq_cur->hq_body + hq_cur->hq_body_off; + h3_vec[0].len = remain; + h3_veccnt = 1; + } else { + h3_veccnt = 0; /* empty body / fully drained: bare FIN */ + } + h3_fin = 1; + } + } else if (c->nghttp3_conn != NULL && !h3_framing_dead) { h3_veccnt = nghttp3_conn_writev_stream( (nghttp3_conn *)c->nghttp3_conn, &h3_stream_id, &h3_fin, @@ -328,12 +383,19 @@ void http3_connection_drain_out(http3_connection_t *c) ngtcp2_ssize pdatalen = 0; /* WRITE_MORE only meaningful when we actually have stream data; * passing it on a bare-handshake / ACK-only call asks ngtcp2 to - * spin waiting for stream input that will never come. */ - uint32_t flags = (h3_stream_id >= 0) - ? (NGTCP2_WRITE_STREAM_FLAG_MORE - | (h3_fin ? NGTCP2_WRITE_STREAM_FLAG_FIN : 0)) - : 0; - ngtcp2_ssize n = ngtcp2_conn_writev_stream( + * spin waiting for stream input that will never come. hq omits + * WRITE_MORE: with no nghttp3 coalescing there is no remainder to + * resume, and MORE on a bare FIN would loop forever. */ + uint32_t flags = 0; + + if (h3_stream_id >= 0) { + flags = h3_fin ? NGTCP2_WRITE_STREAM_FLAG_FIN : 0; + + if (c->proto != HTTP3_PROTO_HQ) { + flags |= NGTCP2_WRITE_STREAM_FLAG_MORE; + } + } + const ngtcp2_ssize written = ngtcp2_conn_writev_stream( (ngtcp2_conn *)c->ngtcp2_conn, &ps.path, &pi, batch_buf + batch_off, H3_PKT_SLOT, @@ -343,7 +405,7 @@ void http3_connection_drain_out(http3_connection_t *c) (const ngtcp2_vec *)h3_vec, (size_t)h3_veccnt, http3_ts_now()); - if (n == NGTCP2_ERR_WRITE_MORE) { + if (written == NGTCP2_ERR_WRITE_MORE) { /* WRITE_MORE means ngtcp2 accepted pdatalen bytes from * nghttp3 into the next packet but has room for more. Tell * nghttp3 the bytes are committed and keep going — without @@ -353,24 +415,31 @@ void http3_connection_drain_out(http3_connection_t *c) nghttp3_conn_add_write_offset( (nghttp3_conn *)c->nghttp3_conn, h3_stream_id, (size_t)pdatalen); + } else if (hq_cur != NULL && pdatalen > 0) { + hq_cur->hq_body_off += (size_t)pdatalen; } continue; } - if (n == NGTCP2_ERR_STREAM_DATA_BLOCKED || n == NGTCP2_ERR_STREAM_SHUT_WR) { + if (written == NGTCP2_ERR_STREAM_DATA_BLOCKED || written == NGTCP2_ERR_STREAM_SHUT_WR) { /* Flow-control or half-closed write side. Pause the stream * so nghttp3 stops handing us data on it until ngtcp2 * extends the window via extend_max_stream_data callback. */ if (c->nghttp3_conn != NULL) { nghttp3_conn_block_stream( (nghttp3_conn *)c->nghttp3_conn, h3_stream_id); + continue; } - continue; + /* hq has no per-stream block list; end this drain and let a later + * tick resume once the window opens (proper cwnd-wake arrives with + * the large-file hq path). */ + H3_FLUSH_BATCH(); + break; } - if (n == 0) { + if (written == 0) { /* No outgoing datagram produced. If nghttp3 had data ready * and ngtcp2 still produced nothing, ack the bytes anyway * (avoids a spin loop). Otherwise we're truly idle. */ @@ -379,13 +448,16 @@ void http3_connection_drain_out(http3_connection_t *c) (nghttp3_conn *)c->nghttp3_conn, h3_stream_id, (size_t)pdatalen); continue; + } else if (hq_cur != NULL && pdatalen > 0) { + hq_cur->hq_body_off += (size_t)pdatalen; + continue; } H3_FLUSH_BATCH(); break; } - if (n < 0) { + if (UNEXPECTED(written < 0)) { if (stats != NULL) stats->quic_write_error++; H3_FLUSH_BATCH(); break; @@ -395,6 +467,21 @@ void http3_connection_drain_out(http3_connection_t *c) nghttp3_conn_add_write_offset( (nghttp3_conn *)c->nghttp3_conn, h3_stream_id, (size_t)pdatalen); + } else if (hq_cur != NULL) { + /* pdatalen is -1 when the packet carried no stream data (ACK/control + * only) — must NOT advance then, or (size_t)(-1) wraps the offset + * backward and the next pick re-sends bytes. Mirrors the h3 guard. */ + if (pdatalen > 0) { + hq_cur->hq_body_off += (size_t)pdatalen; + } + + /* FLAG_FIN rode the final data packet; once the body is fully + * drained the FIN is out, so stop re-picking the stream (it stays + * alive until stream_close releases the slab). Covers the empty + * body bare-FIN (off == len == 0). */ + if (hq_cur->hq_body_off >= hq_cur->hq_body_len) { + hq_cur->hq_fin_sent = true; + } } /* Append the freshly-written packet to the batch. GSO requires @@ -402,7 +489,7 @@ void http3_connection_drain_out(http3_connection_t *c) * bytes; we enforce that here by flushing eagerly when the * size pattern breaks. ECN must also match across the batch * (cmsg(IP_TOS) is per-sendmsg) — flush eagerly if it changes. */ - size_t pkt_len = (size_t)n; + size_t pkt_len = (size_t)written; uint8_t pkt_ecn = pi.ecn; /* ngtcp2 reported the destination for this packet in ps.path — copy * it out before the next writev overwrites the storage. Usually == @@ -502,6 +589,21 @@ void http3_connection_drain_out(http3_connection_t *c) * the timer is armed only on the live branch and `c` is dead afterward. */ void http3_connection_flush(http3_connection_t *c) { + /* Migration storm flagged on read: shed the connection now. emit_close + * targets c->peer — the live (migrated) address — so the close reaches the + * client, which reconnects cleanly instead of stalling on a wedged path. */ + if (c->migration_storm) { + http3_packet_stats_t *stats = http3_listener_packet_stats(c->listener); + + if (stats != NULL) { + stats->quic_migration_storm_shed++; + } + + http3_connection_emit_close(c); + http3_connection_reap(c); + return; + } + http3_connection_drain_out(c); if (!http3_connection_check_terminal(c)) { diff --git a/src/http3/http3_listener.c b/src/http3/http3_listener.c index 3143cac..4fd874d 100644 --- a/src/http3/http3_listener.c +++ b/src/http3/http3_listener.c @@ -11,11 +11,16 @@ #endif #include +#include "Zend/zend_hrtime.h" #include "http3_listener.h" #include "http3_packet.h" #include "http3_connection.h" +#include "http3_internal.h" /* http3_reactor_budget_ns */ +#include "http3_steer.h" /* CID steering */ #include "php_http_server.h" #include "log/http_log.h" +#include "Zend/zend_atomic.h" +#include /* offsetof */ #include @@ -105,7 +110,7 @@ struct _http3_listener_s { * twice as we would if we iterated conn_map. */ http3_connection_t *conn_list; - /* Phase-1 deferred-output dirty-list head, linked through + /* Deferred-output dirty-list head, linked through * conn->dirty_next. The read path marks conns here instead of * draining per datagram; http3_listener_flush_dirty drains the list * once per recvmmsg tick. Always empty between ticks — populated and @@ -122,9 +127,30 @@ struct _http3_listener_s { * us. Stored as void* to keep php_http_server.h out of public * headers; H3 dispatch reaches handler fcall + server_scope by * casting and calling http_protocol_get_handler. NULL when the - * listener is driven directly by a unit test. */ + * listener is driven directly by a unit test, or in reactor mode + * (the server object lives on the parent thread). */ void *server_obj; + /* Reactor mode. Non-NULL => this listener runs on a transport + * reactor and routes parsed requests to PHP workers via the registry + * instead of dispatching locally. Non-owning (the parent owns it). NULL is + * the unchanged single-thread path. */ + const http3_reactor_ctx_t *reactor_ctx; + + /* CID steering group. Non-NULL => this listener forwards + * stray datagrams (DCID decodes to another reactor) to their owner. Shared + * across the endpoint's per-reactor listeners; owned by the parent, outlives + * the listener. NULL = no steering (single reactor / single-thread). */ + http3_steer_group_t *steer; + + /* Drain-batch deferred-flush link. A forwarded datagram marks its + * owner listener here instead of flushing per packet; the reactor drain + * epilogue flushes the whole batch once, mirroring the recvmmsg tick's + * single deferred flush. in_steer_flush guards double-linking. Touched only + * on this listener's own reactor thread (feed_fn / epilogue / destroy). */ + http3_listener_t *steer_flush_next; + bool in_steer_flush; + /* 32-byte HMAC-SHA256 key used to derive stateless-reset * tokens (RFC 9000 §10.3). Generated once at spawn from OpenSSL's * DRBG; per-process lifetime, so a server restart invalidates all @@ -169,6 +195,11 @@ struct _http3_listener_s { http3_listener_stats_t stats; + /* Reactor watchdog rate-limit gate: hrtime of the last + * slow-tick WARN we emitted. Kept off the stats snapshot — it is + * internal throttle state, not a counter. */ + uint64_t wd_last_warn_ns; + /* Slab pool for http3_stream_t. Shared across all conns on this * listener. Initialised in http3_listener_spawn, cleaned up in * http3_listener_destroy after all conns are gone. */ @@ -359,6 +390,75 @@ static void drain_err_queue(http3_listener_t *listener) } } +/* Classify a tick latency into one of 12 histogram buckets, edges aligned + * to the QUIC ACK budget rather than a plain log2 so the buckets read + * directly: bucket 8 is the first one past max_ack_delay (25 ms). */ +static unsigned h3_reactor_lat_bucket(uint64_t ns) +{ + const uint64_t us = ns / 1000ULL; + + if (us < 50) return 0; + if (us < 100) return 1; + if (us < 250) return 2; + if (us < 500) return 3; + if (us < 1000) return 4; /* 1 ms */ + if (us < 2500) return 5; + if (us < 5000) return 6; + if (us < 10000) return 7; /* 10 ms — default budget edge */ + if (us < 25000) return 8; /* 25 ms — max_ack_delay */ + if (us < 50000) return 9; + if (us < 100000) return 10; + + return 11; /* >= 100 ms */ +} + +/* Record one reactor tick. dt_ns is the poll-cb wall time; + * datagrams is how many were processed this wakeup (for the WARN line). On + * a budget overrun, emit at most one WARN per second so a sustained stall + * does not flood the log. Cheap enough to run unconditionally — two + * hrtime reads per wakeup, not per datagram. */ +static void h3_reactor_tick_record(http3_listener_t *l, uint64_t dt_ns, + unsigned datagrams) +{ + http3_packet_stats_t *st = &l->stats.packet; + + st->reactor_ticks++; + st->reactor_busy_ns += dt_ns; + st->reactor_lat_bucket[h3_reactor_lat_bucket(dt_ns)]++; + + if (dt_ns > st->reactor_max_tick_ns) { + st->reactor_max_tick_ns = dt_ns; + } + + const uint64_t budget = http3_reactor_budget_ns(); + + if (dt_ns <= budget) { + return; + } + + st->reactor_slow_ticks++; + + /* Budget overrun — the reactor was heads-down for dt_ns, delaying + * ACK/PTO for every live connection by that much. Throttle the WARN to + * one per second. */ + const uint64_t now = (uint64_t)zend_hrtime(); + + if (now - l->wd_last_warn_ns < 1000000000ULL) { + return; + } + + l->wd_last_warn_ns = now; + + if (l->server_obj != NULL) { + http_logf_warn( + http_server_get_log_state((http_server_object *)l->server_obj), + "h3.reactor.slow_tick budget_ms=%llu tick_ms=%.3f datagrams=%u " + "conns=%u", + (unsigned long long)(budget / 1000000ULL), + (double)dt_ns / 1000000.0, datagrams, l->conn_count); + } +} + /* Raw-fd recvmmsg path. Drains up to HTTP3_LISTENER_RECV_BATCH * datagrams per recvmmsg syscall, capped at 16 batches per poll wakeup. * h2o picks the same 10-batch limit (lib/http3/common.c:819) — a larger @@ -386,6 +486,12 @@ static void http3_listener_poll_cb(zend_async_event_t *event, return; } + /* Reactor watchdog: time this whole tick. Capture before + * the errq drain so the measurement covers every bit of reactor work, + * and route all exits through the tick_done tail. */ + const uint64_t wd_t0 = (uint64_t)zend_hrtime(); + unsigned wd_datagrams = 0; + /* Drain the kernel error queue — but only when there is reason to * believe something is pending. Most sockets never see ICMP errors, * and the unconditional recvmsg(MSG_ERRQUEUE) costs ~10% throughput @@ -442,14 +548,14 @@ static void http3_listener_poll_cb(zend_async_event_t *event, listener->stats.datagrams_errored++; } - return; + goto tick_done; } for (int i = 0; i < n; ++i) { const struct sockaddr *src = (const struct sockaddr *)&src_addrs[i]; - socklen_t src_len = (socklen_t)mess[i].msg_hdr.msg_namelen; - size_t dlen = (size_t)mess[i].msg_len; + const socklen_t src_len = (socklen_t)mess[i].msg_hdr.msg_namelen; + const size_t dlen = (size_t)mess[i].msg_len; if (dlen == 0 || src_len == 0) { continue; @@ -514,12 +620,14 @@ static void http3_listener_poll_cb(zend_async_event_t *event, listener->stats.datagrams_received++; listener->stats.bytes_received += (uint64_t)plen; listener->stats.last_datagram_size = plen; + wd_datagrams++; http3_connection_dispatch(listener, bufs[i] + off, plen, ecn, src, src_len); /* Any segment's dispatch may reap-on-close the listener; - * re-check before touching it again. */ + * re-check before touching it again. Teardown tick — bail + * without recording a (meaningless) latency sample. */ if (listener->closed) { return; } @@ -539,6 +647,14 @@ static void http3_listener_poll_cb(zend_async_event_t *event, * bail-out above returns early and skips this — that path is teardown, * where conns are reaped by listener destroy, not flushed. */ http3_listener_flush_dirty(listener); + + /* Watchdog tail. The EAGAIN/error early-exit jumps here PAST the flush, + * preserving the pre-existing behaviour where a tick that ends on an + * empty recvmmsg skips flush_dirty (any conns dirtied this tick drain on + * the next wakeup). Only the latency sample is taken here. */ +tick_done: + h3_reactor_tick_record(listener, (uint64_t)zend_hrtime() - wd_t0, + wd_datagrams); } #endif /* __linux__ */ @@ -619,8 +735,8 @@ ssize_t http3_listener_send_packet(http3_listener_t *l, /* Legacy libuv path — used on Windows/macOS and any other non-Linux * platform. TrueAsync's UDP API has no synchronous best-effort send - * (`ZEND_ASYNC_UDP_TRY_SEND` was sketched in the Step-9 audit but - * never landed); we fire-and-forget through ZEND_ASYNC_UDP_SENDTO + * (`ZEND_ASYNC_UDP_TRY_SEND` was sketched but never landed); we + * fire-and-forget through ZEND_ASYNC_UDP_SENDTO * which queues to libuv's uv_udp_send. UDP is all-or-nothing per * datagram, so reporting `len` bytes accepted is correct as long as * the request object initialised. */ @@ -770,6 +886,195 @@ void *http3_listener_server_obj(const http3_listener_t *l) return l != NULL ? l->server_obj : NULL; } +const http3_reactor_ctx_t *http3_listener_reactor_ctx(const http3_listener_t *l) +{ + return l != NULL ? l->reactor_ctx : NULL; +} + +int http3_listener_reactor_id(const http3_listener_t *l) +{ + return (l != NULL && l->reactor_ctx != NULL) ? l->reactor_ctx->reactor_id : -1; +} + +/* ------------------------------------------------------------------------ + * CID steering + * ------------------------------------------------------------------------ */ + +/* The per-endpoint table of reactor-owned listeners, indexed by reactor id. + * Slots are atomic so a reactor can publish/retire its own slot while siblings + * read it on the forward path without locking. */ +struct http3_steer_group_s { + reactor_pool_t *pool; + int count; + zend_atomic_ptr listeners[1]; /* [count] — over-allocated */ +}; + +/* A forwarded datagram. Carries the OWNER REACTOR ID, not a listener pointer: + * the owner re-resolves its listener from the group slot at apply time, so a + * listener torn down between forward and apply is a clean drop, never a UAF. */ +typedef struct { + http3_steer_group_t *group; + int owner_id; + uint8_t ecn; + socklen_t peer_len; + struct sockaddr_storage peer; + size_t datalen; + uint8_t data[1]; /* [datalen] — over-allocated */ +} http3_steer_msg_t; + +http3_steer_group_t *http3_steer_group_create(reactor_pool_t *pool, const int count) +{ + if (pool == NULL || count <= 0) { + return NULL; + } + + http3_steer_group_t *const g = + pemalloc(offsetof(http3_steer_group_t, listeners) + + (size_t)count * sizeof(zend_atomic_ptr), 1); + g->pool = pool; + g->count = count; + + for (int i = 0; i < count; i++) { + ZEND_ATOMIC_PTR_INIT(&g->listeners[i], NULL); + } + + return g; +} + +void http3_steer_group_publish(http3_steer_group_t *g, const int reactor_id, + http3_listener_t *listener) +{ + if (g == NULL || reactor_id < 0 || reactor_id >= g->count) { + return; + } + + zend_atomic_ptr_store_ex(&g->listeners[reactor_id], listener); +} + +void http3_steer_group_free(http3_steer_group_t *g) +{ + if (g != NULL) { + pefree(g, 1); + } +} + +void http3_listener_set_steer(http3_listener_t *l, http3_steer_group_t *g) +{ + if (l != NULL) { + l->steer = g; + } +} + +/* Per-reactor-thread list of listeners that took a forwarded datagram in the + * current mailbox drain batch. Built by http3_steer_feed_fn, drained once by + * http3_reactor_steer_flush_epilogue at batch end. __thread: each reactor has + * its own, no locking, and a listener only ever appears on its owner reactor. */ +static __thread http3_listener_t *tls_steer_flush_head = NULL; + +/* Reactor drain epilogue (registered via reactor_pool_set_drain_epilogue): + * flush every listener that took a forwarded datagram this batch, exactly once. + * This coalesces a burst of steered datagrams into one flush_dirty per listener + * — the same single deferred flush the recvmmsg tick does — instead of flushing + * per forwarded packet, which split a connection's output across separate sends + * and perturbed ACK / path-validation timing under rapid migration. */ +void http3_reactor_steer_flush_epilogue(void) +{ + http3_listener_t *l = tls_steer_flush_head; + tls_steer_flush_head = NULL; + + while (l != NULL) { + http3_listener_t *const next = l->steer_flush_next; + l->steer_flush_next = NULL; + l->in_steer_flush = false; + + http3_listener_flush_dirty(l); + + l = next; + } +} + +/* Runs ON THE OWNER reactor (via reactor_pool_post_exec): re-resolve the + * owner's listener from the group and feed the forwarded datagram into it as if + * it had arrived on the owner's own socket. Marks the conn dirty (in dispatch) + * and queues the listener for the drain-batch epilogue, so a burst of forwarded + * datagrams flushes once — like a recvmmsg tick — not once per packet. */ +static void http3_steer_feed_fn(void *arg) +{ + http3_steer_msg_t *const m = (http3_steer_msg_t *)arg; + http3_listener_t *const target = + (http3_listener_t *)zend_atomic_ptr_load_ex(&m->group->listeners[m->owner_id]); + + if (target != NULL) { + target->stats.packet.quic_steered_in++; + http3_connection_dispatch(target, m->data, m->datalen, m->ecn, + (struct sockaddr *)&m->peer, m->peer_len); + + if (!target->in_steer_flush) { + target->in_steer_flush = true; + target->steer_flush_next = tls_steer_flush_head; + tls_steer_flush_head = target; + } + } + + pefree(m, 1); +} + +/* KNOWN LIMITATION (see docs/PLAN_REACTOR_POOL.md, D6): forwarding works + * correctly, but pathological back-to-back migrations (7+ NAT rebinds on one + * connection in milliseconds) can deadlock ngtcp2 path validation — investigated + * to a circular validation/cwnd stall, ~5% at 15 rebinds, 0% at a realistic + * single rebind. The deliberate fix is eBPF reuseport steering (no forward hop). */ +bool http3_listener_try_steer(http3_listener_t *l, + const uint32_t version, + const uint8_t *dcid, const size_t dcidlen, + const uint8_t *data, const size_t datalen, + const uint8_t ecn, + const struct sockaddr *peer, const socklen_t peer_len) +{ + http3_steer_group_t *const g = l != NULL ? l->steer : NULL; + + if (g == NULL || version != 0) { + /* Not steering, or a long-header packet (Initial/Handshake): an Initial + * carries a client-chosen DCID with no id, and pre-handshake migration + * is disallowed, so only short-header (1-RTT) packets are steerable. */ + return false; + } + + const int owner = http3_steer_decode(dcid, dcidlen); + + if (owner < 0 || owner >= g->count || owner == http3_listener_reactor_id(l)) { + /* Undecodable, out of range, or already ours — handle locally. */ + return false; + } + + const socklen_t plen = peer_len <= (socklen_t)sizeof(((http3_steer_msg_t *)0)->peer) + ? peer_len + : (socklen_t)sizeof(((http3_steer_msg_t *)0)->peer); + + http3_steer_msg_t *const m = + pemalloc(offsetof(http3_steer_msg_t, data) + datalen, 1); + m->group = g; + m->owner_id = owner; + m->ecn = ecn; + m->peer_len = plen; + memcpy(&m->peer, peer, (size_t)plen); + m->datalen = datalen; + memcpy(m->data, data, datalen); + + if (!reactor_pool_post_exec(g->pool, owner, http3_steer_feed_fn, m)) { + /* Owner mailbox full — drop the datagram (QUIC retransmits). Do not fall + * through to the local miss path: that would stateless-reset a live + * connection we know lives elsewhere. */ + pefree(m, 1); + l->stats.packet.quic_steered_drop++; + return true; + } + + l->stats.packet.quic_steered_out++; + + return true; +} + const uint8_t *http3_listener_sr_key(const http3_listener_t *l) { return l != NULL ? l->sr_key : NULL; @@ -838,7 +1143,9 @@ void http3_listener_flush_dirty(http3_listener_t *l) * it does not help if the slot is reallocated to a live conn first. */ void http3_listener_unmark_flush(http3_listener_t *l, http3_connection_t *conn) { - if (l == NULL || conn == NULL || !conn->in_dirty) { + ZEND_ASSERT(l != NULL && conn != NULL); + + if (!conn->in_dirty) { return; } @@ -865,7 +1172,7 @@ void http3_listener_unmark_flush(http3_listener_t *l, http3_connection_t *conn) static bool peer_key_from_sockaddr(const struct sockaddr *peer, uint8_t out[16], size_t *out_len) { - if (peer == NULL) return false; + ZEND_ASSERT(peer != NULL); if (peer->sa_family == AF_INET) { memcpy(out, &((const struct sockaddr_in *)peer)->sin_addr, 4); @@ -1004,6 +1311,12 @@ void http3_listener_remove_connection(http3_listener_t *l, zend_hash_str_del(l->conn_map, (const char *)conn->routing_dcid, conn->routing_dcidlen); } + + /* Server-issued alternate CIDs (NEW_CONNECTION_ID). ngtcp2 keeps a + * retired CID in its pool for ~3*PTO before firing remove_connection_id, + * so a get_scid sweep here would miss retired-but-present keys — we + * track them ourselves and remove exactly what we registered. */ + http3_connection_unregister_all_issued_cids(conn); } } @@ -1033,7 +1346,8 @@ HashTable *http3_listener_conn_map(http3_listener_t *l) extern int ngtcp2_crypto_ossl_init(void); http3_listener_t *http3_listener_spawn(const char *host, int port, - void *ssl_ctx, void *server_obj) + void *ssl_ctx, void *server_obj, + const http3_reactor_ctx_t *reactor_ctx) { /* Belt-and-braces: even if connection_attach_tls also calls this, * doing it once at listener-spawn time guarantees the provider is @@ -1041,11 +1355,12 @@ http3_listener_t *http3_listener_spawn(const char *host, int port, (void)ngtcp2_crypto_ossl_init(); http3_listener_t *listener = ecalloc(1, sizeof(http3_listener_t)); - listener->fd = -1; - listener->host = estrdup(host); - listener->port = port; - listener->ssl_ctx = ssl_ctx; - listener->server_obj = server_obj; + listener->fd = -1; + listener->host = estrdup(host); + listener->port = port; + listener->ssl_ctx = ssl_ctx; + listener->server_obj = server_obj; + listener->reactor_ctx = reactor_ctx; http3_stream_pool_init(&listener->stream_pool); #ifdef __linux__ @@ -1119,8 +1434,9 @@ http3_listener_t *http3_listener_spawn(const char *host, int port, * 0 = leave the OS default; NULL server_obj in a unit test resolves * to the 0 fallback). */ { - const uint32_t want = - http_server_get_http3_socket_buffer_bytes((const http_server_object *)server_obj); + const uint32_t want = reactor_ctx != NULL + ? reactor_ctx->socket_buffer_bytes + : http_server_get_http3_socket_buffer_bytes((const http_server_object *)server_obj); if (want > 0) { const int sockbuf = (int)want; @@ -1247,8 +1563,10 @@ http3_listener_t *http3_listener_spawn(const char *host, int port, * 2. PHP_HTTP3_PEER_BUDGET env (ops escape hatch, [1, 4096]). * 3. Built-in default 0 = disabled. */ listener->peer_budget = 0; - uint32_t cfg_budget = http_server_get_http3_peer_connection_budget( - (const http_server_object *)server_obj); + uint32_t cfg_budget = reactor_ctx != NULL + ? reactor_ctx->peer_budget + : http_server_get_http3_peer_connection_budget( + (const http_server_object *)server_obj); if (cfg_budget != 0) { listener->peer_budget = cfg_budget; @@ -1284,9 +1602,13 @@ http3_listener_t *http3_listener_spawn(const char *host, int port, * worker_connections — the resource backstop now that peer_budget is * opt-in. */ { - const int mc = http_server_get_max_connections( - (const http_server_object *)server_obj); - listener->max_conns = mc > 0 ? (uint32_t)mc : 0; + if (reactor_ctx != NULL) { + listener->max_conns = reactor_ctx->max_conns; + } else { + const int mc = http_server_get_max_connections( + (const http_server_object *)server_obj); + listener->max_conns = mc > 0 ? (uint32_t)mc : 0; + } } #ifdef __linux__ @@ -1382,6 +1704,30 @@ int http3_listener_port(const http3_listener_t *listener) return listener ? listener->port : 0; } +int http3_listener_local_port(const http3_listener_t *listener) +{ + if (listener == NULL) { + return -1; + } + +#ifdef __linux__ + if (listener->fd >= 0) { + struct sockaddr_storage ss; + socklen_t len = (socklen_t)sizeof(ss); + + if (getsockname(listener->fd, (struct sockaddr *)&ss, &len) == 0) { + if (ss.ss_family == AF_INET6) { + return ntohs(((struct sockaddr_in6 *)&ss)->sin6_port); + } + + return ntohs(((struct sockaddr_in *)&ss)->sin_port); + } + } +#endif + + return listener->port; +} + void http3_listener_destroy(http3_listener_t *listener) { if (listener == NULL || listener->closed) { @@ -1390,6 +1736,35 @@ void http3_listener_destroy(http3_listener_t *listener) listener->closed = true; + /* 0. Retire our steering slot so a sibling reactor stops forwarding stray + * datagrams to us. Runs on our own reactor thread (same thread that + * drains any already-queued forward), so a forward in flight either ran + * before this or finds the slot NULL after — never a freed listener. */ + if (listener->steer != NULL) { + http3_steer_group_publish(listener->steer, + http3_listener_reactor_id(listener), NULL); + listener->steer = NULL; + } + + /* Drop ourselves from this reactor's pending steer-flush list (same thread) + * so the drain epilogue never flushes a listener we are freeing — covers a + * teardown command landing in the same batch as a forward to us. */ + if (listener->in_steer_flush) { + http3_listener_t **pp = &tls_steer_flush_head; + + while (*pp != NULL) { + if (*pp == listener) { + *pp = listener->steer_flush_next; + break; + } + + pp = &(*pp)->steer_flush_next; + } + + listener->in_steer_flush = false; + listener->steer_flush_next = NULL; + } + /* 1. Sever the callback's back-pointer to our listener data BEFORE * touching the io. Any recv_cb invocation that the reactor has * already queued (or runs during uv_close's teardown tick) reads @@ -1449,24 +1824,10 @@ void http3_listener_destroy(http3_listener_t *listener) #endif if (listener->udp_io != NULL) { zend_async_io_t *io = listener->udp_io; - zend_async_udp_req_t *recv_req = listener->recv_req; listener->udp_io = NULL; listener->recv_cb = NULL; listener->recv_req = NULL; ZEND_ASYNC_IO_CLOSE(io); - - /* Dispose the multishot recv req we submitted. ZEND_ASYNC_IO_CLOSE - * only detaches io->active_req (its await-handoff path assumes a - * parked coroutine frees it), and our recv callback merely counts - * datagrams — neither frees the req. Without this the req struct + - * 2 KiB recv buffer (plus any error exception) leak on every listener - * teardown. Dispose AFTER close: close clears the reactor's reference - * so there is no use-after-free, and the typed recv_req pointer frees - * through the correct zend_async_udp_req_t layout. */ - if (recv_req != NULL && recv_req->dispose != NULL) { - recv_req->dispose(recv_req); - } - io->event.dispose(&io->event); } diff --git a/src/http3/http3_listener.h b/src/http3/http3_listener.h index 9c99e2f..eac1bec 100644 --- a/src/http3/http3_listener.h +++ b/src/http3/http3_listener.h @@ -16,6 +16,8 @@ #include "http3_packet.h" #include "http3/http3_stream_pool.h" +#include "core/worker_registry.h" /* worker_registry_t */ +#include "core/reactor_pool.h" /* reactor_pool_t */ /* HTTP/3 UDP listener. * @@ -28,14 +30,96 @@ typedef struct _http3_listener_s http3_listener_t; typedef struct _http3_connection_s http3_connection_t; /* defined in http3_connection.h */ +/* Thread-clean context for a reactor-OWNED H3 listener. + * + * Everything a transport reactor needs to serve QUIC and route parsed requests + * to PHP workers, WITHOUT the PHP server object — that lives on the parent + * thread and its ZMM is UB to touch from a reactor. Request-service stats + * (counters / view / telemetry) are deliberately absent: in the split they are + * the worker's job (the handler runs there). Built on the parent before spawn, + * owned by the parent, outlives the listener. + * + * When a listener carries one of these it is in "reactor mode": the dispatch + * path builds a persistent http_request_t and hands it to a worker instead of + * spawning a handler coroutine locally. A NULL reactor context (the default) is + * the unchanged single-thread / unit-test path. */ +typedef struct { + worker_registry_t *registry; /* pick a worker to hand requests to */ + reactor_pool_t *pool; /* reverse path posts back here */ + int reactor_id; /* this listener's reactor slot */ + int n_reactors; /* reactor count — strided worker ownership */ + uint32_t socket_buffer_bytes; /* config scalars resolved on the parent */ + uint32_t peer_budget; + uint32_t max_conns; + + /* Static mounts served on the reactor itself (no PHP, no worker round-trip). + * Borrowed array of const http_static_handler_t* from the owning server — + * the mounts outlive the reactor pool (released only at http_server_free, + * after reactor teardown), so no extra ref is taken. void* to keep the + * static header out of this widely-included file; cast at the call site. */ + const void *static_mounts; + size_t static_mount_count; + + /* Per-reactor open-file cache for the static path. The cache is persistent + * (malloc) so it is created/freed on the parent, but NOT thread-safe — each + * reactor gets its own instance, touched only by its one thread (no locking). + * NULL when no mount opted into StaticHandler::setOpenFileCache. */ + struct http_static_cache_s *static_cache; +} http3_reactor_ctx_t; + /* ssl_ctx is the OpenSSL SSL_CTX* shared with the TCP+TLS path (from * tls_context_t::ctx). Passed as void* to keep openssl/ssl.h out of this * header — H3 listener uses it to build per-connection SSL objects via * ngtcp2_crypto_ossl_configure_server_session. Must be non-NULL when * --enable-http3 is built — addHttp3Listener flags the listener as TLS, * and start() constructs the context before spawning. */ +/* `reactor_ctx` puts the listener in reactor mode (see http3_reactor_ctx_t): + * non-NULL means server_obj is the parent's (NULL on the reactor) and request + * dispatch routes to a worker. NULL is the unchanged single-thread path where + * server_obj drives local dispatch + config. The listener does NOT own the + * context (the parent does); it must outlive the listener. */ http3_listener_t *http3_listener_spawn(const char *host, int port, - void *ssl_ctx, void *server_obj); + void *ssl_ctx, void *server_obj, + const http3_reactor_ctx_t *reactor_ctx); + +/* The reactor context this listener carries, or NULL in single-thread mode. + * The H3 dispatch path checks this to decide local-dispatch vs route-to-worker. */ +const http3_reactor_ctx_t *http3_listener_reactor_ctx(const http3_listener_t *listener); + +/* This listener's reactor id, or -1 in single-thread mode. */ +int http3_listener_reactor_id(const http3_listener_t *listener); + +/* CID steering group: the set of one endpoint's per-reactor + * listeners, indexed by reactor id and shared by all of them, so any reactor + * that receives a stray datagram can forward it to the owner. Opaque; created + * and freed on the parent. Slots are published atomically as listeners spawn + * and cleared on teardown. */ +typedef struct http3_steer_group_s http3_steer_group_t; +http3_steer_group_t *http3_steer_group_create(reactor_pool_t *pool, int count); +void http3_steer_group_publish(http3_steer_group_t *group, int reactor_id, + http3_listener_t *listener); +void http3_steer_group_free(http3_steer_group_t *group); + +/* Put a listener into steering mode against its endpoint's group. */ +void http3_listener_set_steer(http3_listener_t *listener, http3_steer_group_t *group); + +/* If `listener` steers and `data` is a short-header datagram whose DCID decodes + * to a DIFFERENT reactor (a migrated client rehashed onto us by SO_REUSEPORT), + * copy it onto the owner reactor's mailbox and return true — the caller must + * NOT handle it locally. Returns false to let the caller process it normally + * (it is ours, an Initial, or undecodable). */ +bool http3_listener_try_steer(http3_listener_t *listener, + uint32_t version, + const uint8_t *dcid, size_t dcidlen, + const uint8_t *data, size_t datalen, uint8_t ecn, + const struct sockaddr *peer, socklen_t peer_len); + +/* Reverse-path apply, run ON THE REACTOR thread via + * reactor_pool_post_exec: `arg` is a response_wire_t* (ownership transfers) that + * the worker rendered; encode + submit + drain it on the addressed stream, or + * drop it if the stream is already gone. Declared here (void* arg) so the + * worker-side sink can post it without pulling in the H3 internals. */ +void http3_reactor_apply_response(void *arg); /* Returns the http_server_object* stashed at spawn time (cast to void* * to keep this header free of the public server header). The H3 @@ -62,6 +146,13 @@ void http3_listener_get_stats(const http3_listener_t *listener, const char *http3_listener_host(const http3_listener_t *listener); int http3_listener_port(const http3_listener_t *listener); +/* Actual bound UDP port via getsockname — resolves the kernel-assigned port + * when the listener was spawned with port 0 (http3_listener_port returns the + * requested 0). Falls back to the requested port on non-Linux / lookup + * failure. getsockname is thread-agnostic, but callers should still invoke + * this on the reactor that owns the listener. */ +int http3_listener_local_port(const http3_listener_t *listener); + /* Tear down: stop recv, close the IO handle, free the struct. The * underlying zend_async_io_t* teardown is non-blocking (libuv close * completes on the next reactor tick). Safe to call once per listener. */ @@ -107,7 +198,7 @@ ssize_t http3_listener_send_gso(http3_listener_t *listener, * worker, so no locking. Returns non-NULL once the listener is up. */ http3_stream_pool_t *http3_listener_stream_pool(http3_listener_t *listener); -/* Phase-1 deferred output. mark_flush records that `conn` produced +/* Deferred output. mark_flush records that `conn` produced * ngtcp2 output this tick (idempotent — guarded by conn->in_dirty); * flush_dirty drains every marked conn once and clears the list. The * read path marks instead of draining, so a multi-datagram burst to one @@ -115,4 +206,9 @@ http3_stream_pool_t *http3_listener_stream_pool(http3_listener_t *listener); void http3_listener_mark_flush(http3_listener_t *listener, http3_connection_t *conn); void http3_listener_flush_dirty(http3_listener_t *listener); +/* Reactor drain-batch epilogue: flush every listener that took a forwarded + * (steered) datagram this batch exactly once. Register with + * reactor_pool_set_drain_epilogue before reactors start. */ +void http3_reactor_steer_flush_epilogue(void); + #endif /* HTTP3_LISTENER_H */ diff --git a/src/http3/http3_packet.c b/src/http3/http3_packet.c index cc35d0f..d3da62b 100644 --- a/src/http3/http3_packet.c +++ b/src/http3/http3_packet.c @@ -33,6 +33,28 @@ * client's version-negotiation logic, not to randomise our wire bytes. */ #define HTTP3_GREASE_VERSION 0x1a2a3a4aU +/* Reactor watchdog budget. Default 10 ms keeps a tick well + * under QUIC max_ack_delay (25 ms); PHP_HTTP3_REACTOR_BUDGET_MS overrides at + * process start. Cached on first read — same one-shot getenv pattern as the + * H3_TRACE / peer-budget knobs. */ +uint64_t http3_reactor_budget_ns(void) +{ + static uint64_t cached = 0; + + if (cached == 0) { + const char *env = getenv("PHP_HTTP3_REACTOR_BUDGET_MS"); + unsigned long ms = (env != NULL) ? strtoul(env, NULL, 10) : 0; + + if (ms == 0) { + ms = 10; /* default budget */ + } + + cached = (uint64_t)ms * 1000000ULL; + } + + return cached; +} + bool http3_packet_send_version_negotiation( http3_listener_t *listener, const uint8_t *dcid, size_t dcidlen, @@ -89,7 +111,7 @@ void http3_packet_compute_sr_token(const uint8_t key[32], * unit test can drive it without the listener TU. */ void http3_packet_account_send_error(http3_packet_stats_t *st, int err) { - if (st == NULL) { return; } + ZEND_ASSERT(st != NULL); switch (err) { case EAGAIN: @@ -190,8 +212,8 @@ bool http3_packet_send_retry( memcpy(odcid.data, client_dcid, client_dcid_len); uint8_t token[NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN]; - ngtcp2_tstamp ts = (ngtcp2_tstamp)zend_hrtime(); - ngtcp2_ssize tokenlen = ngtcp2_crypto_generate_retry_token( + const ngtcp2_tstamp ts = (ngtcp2_tstamp)zend_hrtime(); + const ngtcp2_ssize tokenlen = ngtcp2_crypto_generate_retry_token( token, retry_token_key, 32, version, (const ngtcp2_sockaddr *)peer, (ngtcp2_socklen)peer_len, &retry_scid, &odcid, ts); @@ -282,8 +304,8 @@ int http3_packet_verify_retry_token( } ngtcp2_cid odcid; - ngtcp2_tstamp ts = (ngtcp2_tstamp)zend_hrtime(); - int rv = ngtcp2_crypto_verify_retry_token( + const ngtcp2_tstamp ts = (ngtcp2_tstamp)zend_hrtime(); + const int rv = ngtcp2_crypto_verify_retry_token( &odcid, token, tokenlen, retry_token_key, 32, version, (const ngtcp2_sockaddr *)peer, (ngtcp2_socklen)peer_len, diff --git a/src/http3/http3_packet.h b/src/http3/http3_packet.h index 3ca4013..3245b25 100644 --- a/src/http3/http3_packet.h +++ b/src/http3/http3_packet.h @@ -41,6 +41,25 @@ typedef struct _http3_packet_stats_s { * necessarily validated). */ uint64_t quic_path_migrations; + /* Migration-storm guard: connections shed for rebinding faster + * than their path validates (a wedge or a migration flood). */ + uint64_t quic_migration_storm_shed; + + /* CID steering. A short-header datagram whose DCID decodes + * to another reactor (a migrated client SO_REUSEPORT-rehashed onto us) is + * forwarded to its owner; the owner counts it as steered_in. steered_drop + * is a forward refused by a full reactor mailbox (packet dropped, QUIC + * retransmits). */ + uint64_t quic_steered_out; /* forwarded to the owning reactor */ + uint64_t quic_steered_in; /* received via forward from another reactor */ + uint64_t quic_steered_drop; /* forward refused (mailbox full) — dropped */ + + /* Server-issued alternate CIDs (NEW_CONNECTION_ID, RFC 9000 §5.1). Each + * is registered in conn_map so a client that rotates its DCID still routes + * home; retired ones are dropped on RETIRE_CONNECTION_ID. */ + uint64_t quic_new_cid_issued; /* get_new_connection_id_cb registered a CID */ + uint64_t quic_cid_retired; /* remove_connection_id_cb dropped a CID */ + /* Write-loop / timer counters. */ uint64_t quic_packets_sent; /* datagrams emitted by writev_stream */ uint64_t quic_bytes_sent; /* cumulative bytes over those datagrams */ @@ -98,6 +117,27 @@ typedef struct _http3_packet_stats_s { uint64_t h3_framing_error; /* nghttp3_conn_writev_stream returned <0 */ uint64_t quic_drain_iter_cap_hit; /* drain_out hit per-call iteration cap */ + /* Reactor-iteration watchdog. The poll-cb is the H3 + * reactor's unit of work: one recvmmsg drain + per-conn output flush. + * On a single reactor thread a long tick delays ACK/PTO generation for + * EVERY live connection by exactly the tick duration, which inflates the + * peer's RTT/PTO and stalls cwnd. So we time each tick and flag overruns + * of the budget (default 10 ms, below QUIC max_ack_delay 25 ms). This + * histogram is the listener-level proxy for "worst-case ACK delay"; the + * timer-late fields below are its per-connection counterpart. */ + uint64_t reactor_ticks; /* poll-cb wakeups measured */ + uint64_t reactor_busy_ns; /* cumulative poll-cb wall time (ns) */ + uint64_t reactor_max_tick_ns; /* slowest single poll-cb (ns) */ + uint64_t reactor_slow_ticks; /* poll-cb wakeups exceeding the budget */ + uint64_t reactor_lat_bucket[12]; /* tick-latency histogram, ACK-budget edges */ + + /* Per-connection ACK/PTO service delay: how late the retransmission/ + * ACK-delay timer fired versus the deadline ngtcp2 asked for. A late + * fire means the reactor was busy when this connection's ACK/PTO was + * due — the per-conn view of the same stall the histogram aggregates. */ + uint64_t reactor_timer_late; /* timer fires past deadline + budget */ + uint64_t reactor_max_timer_late_ns;/* worst observed fire lateness (ns) */ + /* Send-path error categorisation. Every outbound sendmsg lands in * exactly one of these buckets (success increments quic_packets_sent * already accounted above). Use to detect kernel-side trouble that diff --git a/src/http3/http3_steer.c b/src/http3/http3_steer.c new file mode 100644 index 0000000..1dcc6c0 --- /dev/null +++ b/src/http3/http3_steer.c @@ -0,0 +1,133 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) TrueAsync | + +----------------------------------------------------------------------+ + | Licensed under the Apache License, Version 2.0 | + +----------------------------------------------------------------------+ + + HTTP/3 CID steering core (#80 D6 / #72). See http3_steer.h. +*/ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include "http3_steer.h" + +#include +#include + +/* Secure random bytes (defined in http3_connection.c). Forward-declared here + * rather than pulling in the heavy http3_internal.h (php.h + ngtcp2 + nghttp3) + * so this TU stays runtime-free and unit-testable in isolation. */ +bool http3_fill_random(uint8_t *buf, size_t len); + +/* Per-process steering secret. Seeded once via the OpenSSL DRBG in + * http3_steer_init; read-only afterwards so every reactor thread can derive + * the keystream byte without locking. */ +static uint8_t g_steer_key[16]; +static bool g_steer_ready = false; +static bool g_steer_active = false; + +/* One AES-128-ECB block. Used as a PRF: the input is the CID nonce, the first + * output byte is the keystream that masks the id. Cold path only (CID mint and + * conn_map misses) — the per-call EVP context is dwarfed by the stateless-reset + * HMAC the miss path already pays, and pairs alloc/free so it is leak-clean. */ +static bool http3_steer_block(const uint8_t in[16], uint8_t out[16]) +{ + EVP_CIPHER_CTX *const ctx = EVP_CIPHER_CTX_new(); + + if (ctx == NULL) { + return false; + } + + int outl = 0; + const bool ok = + EVP_EncryptInit_ex(ctx, EVP_aes_128_ecb(), NULL, g_steer_key, NULL) == 1 + && EVP_CIPHER_CTX_set_padding(ctx, 0) == 1 + && EVP_EncryptUpdate(ctx, out, &outl, in, 16) == 1 + && outl == 16; + + EVP_CIPHER_CTX_free(ctx); + + return ok; +} + +/* Keystream byte for a 7-byte nonce: AES(key, nonce || zero-pad)[0]. */ +static bool http3_steer_mask(const uint8_t *nonce, uint8_t *out_mask) +{ + uint8_t block[16] = {0}; + uint8_t cipher[16]; + + memcpy(block, nonce, HTTP3_STEER_CID_LEN - 1); + + if (!http3_steer_block(block, cipher)) { + return false; + } + + *out_mask = cipher[0]; + + return true; +} + +bool http3_steer_init(void) +{ + if (g_steer_ready) { + return true; + } + + if (!http3_fill_random(g_steer_key, sizeof(g_steer_key))) { + return false; + } + + g_steer_ready = true; + + return true; +} + +void http3_steer_set_active(const bool active) +{ + g_steer_active = active && g_steer_ready; +} + +bool http3_steer_active(void) +{ + return g_steer_active; +} + +bool http3_steer_encode(uint8_t *cid, const int reactor_id) +{ + if (cid == NULL || !g_steer_ready) { + return false; + } + + /* Random nonce in [1..], id byte masked at [0]. */ + if (!http3_fill_random(cid + 1, HTTP3_STEER_CID_LEN - 1)) { + return false; + } + + uint8_t mask = 0; + + if (!http3_steer_mask(cid + 1, &mask)) { + return false; + } + + cid[0] = (uint8_t)((uint8_t)reactor_id ^ mask); + + return true; +} + +int http3_steer_decode(const uint8_t *cid, const size_t cidlen) +{ + if (cid == NULL || cidlen < HTTP3_STEER_CID_LEN) { + return -1; + } + + uint8_t mask = 0; + + if (!http3_steer_mask(cid + 1, &mask)) { + return -1; + } + + return (int)(uint8_t)(cid[0] ^ mask); +} diff --git a/src/http3/http3_steer.h b/src/http3/http3_steer.h new file mode 100644 index 0000000..7909504 --- /dev/null +++ b/src/http3/http3_steer.h @@ -0,0 +1,52 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) TrueAsync | + +----------------------------------------------------------------------+ + | Licensed under the Apache License, Version 2.0 | + +----------------------------------------------------------------------+ + + HTTP/3 CID steering (#80 D6 / #72): encode the owning reactor's id into + every server-minted Connection ID so any reactor that receives a stray + datagram (after a client migration / NAT rebind reshuffles SO_REUSEPORT) + can decode the owner from the DCID and forward the packet to it. + + The id occupies one byte of the 8-byte CID, masked with a CTR-style + keystream byte derived from the random nonce in the remaining bytes via + AES-128(server-secret). Recovering or targeting an id therefore needs the + per-process key — a plaintext id would let an attacker pin every + connection onto one reactor (targeted single-thread DoS). +*/ + +#ifndef HTTP3_STEER_H +#define HTTP3_STEER_H + +#include +#include +#include + +/* The CID width steering encodes into — matches HTTP3_SCID_LEN. */ +#define HTTP3_STEER_CID_LEN 8 + +/* Generate the per-process steering secret and expand the AES key. Idempotent; + * call once on the parent before any reactor mints a CID. Returns false if the + * DRBG or cipher setup fails (the caller leaves steering inactive). */ +bool http3_steer_init(void); + +/* Arm/disarm steering process-wide. Active only with >1 reactor (a single + * reactor owns every connection, so there is nothing to steer) and <=256 + * reactors (the id is one byte). When inactive, encode/decode are never + * reached — minting falls back to a fully random CID. */ +void http3_steer_set_active(bool active); +bool http3_steer_active(void); + +/* Encode `reactor_id` into a fresh HTTP3_STEER_CID_LEN-byte CID: random nonce + * in bytes [1..], id byte at [0] masked with AES(key, nonce)[0]. Returns false + * on DRBG / cipher failure (caller must fail the mint, never ship a zero CID). */ +bool http3_steer_encode(uint8_t *cid, int reactor_id); + +/* Recover the reactor id encoded in `cid` (the DCID a peer addressed us with). + * Returns 0..255, or -1 if the CID is too short or the cipher failed. The + * caller validates the id against the live reactor count. */ +int http3_steer_decode(const uint8_t *cid, size_t cidlen); + +#endif /* HTTP3_STEER_H */ diff --git a/src/http3/http3_stream.c b/src/http3/http3_stream.c index cfc54ea..f02d9a1 100644 --- a/src/http3/http3_stream.c +++ b/src/http3/http3_stream.c @@ -11,6 +11,9 @@ #endif #include +#ifndef PHP_WIN32 +# include /* munmap — hq-interop mmap'd file bodies (POSIX) */ +#endif #include "Zend/zend_async_API.h" /* zend_async_trigger_event_t dispose */ #include "http3/http3_stream.h" #include "http3/http3_stream_pool.h" @@ -40,6 +43,12 @@ http3_stream_t *http3_stream_new(http3_connection_t *conn, int64_t stream_id) s->request = &s->_request_storage; s->request->refcount = 1; s->request->release = http3_stream_release_via_request; + /* Reactor mode: the listener routes parsed requests to PHP + * workers, so the parser builds the request in the persistent (malloc) + * domain — it crosses the reactor->worker thread boundary. NULL reactor ctx + * (the default) keeps the ZMM fast path. */ + s->request->persistent = + (http3_listener_reactor_ctx(conn->listener) != NULL); /* PHP zvals start UNDEF; dispatch fills them right before spawning * the handler coroutine. */ ZVAL_UNDEF(&s->request_zv); @@ -54,12 +63,42 @@ http3_stream_t *http3_stream_new(http3_connection_t *conn, int64_t stream_id) * return the slot to the slab pool — separating these phases lets * the slot stay alive across the gap between stream_release (early) * and the eventual destroy from a PHP HttpRequest wrapper (late). */ +/* Reverse-path consumed apply, run ON THE REACTOR thread: the + * worker is done with the request, so drop the reactor's worker-borrow stream + * ref. When it is the last ref, the slab slot returns to the pool here (all + * slab ops stay on the reactor that owns the pool). */ +static void http3_reactor_consumed_apply(void *arg) +{ + http3_stream_release((http3_stream_t *)arg); +} + static void http3_stream_release_via_request(http_request_t *req) { /* Offset-0 invariant: _request_storage is the first field of * http3_stream_t, so the same byte address is both. */ http3_stream_t *s = (http3_stream_t *)req; + /* Reactor mode: this fires on the WORKER thread (the request's + * last ref dropped as the HttpRequest wrapper was freed). The slab is the + * reactor's — freeing it here would be a cross-thread pool free. Instead + * signal the owning reactor to reclaim the slot on its own thread; + * the actual http3_stream_pool_free happens in http3_reactor_consumed_apply + * -> http3_stream_release. */ + const http3_reactor_ctx_t *const rctx = + s->conn != NULL ? http3_listener_reactor_ctx(s->conn->listener) : NULL; + + if (rctx != NULL) { + /* Bounded mailbox; the reactor drains continuously, so a brief spin on + * a transient full queue is safe and never deadlocks (the reactor never + * blocks on the worker). */ + while (!reactor_pool_post_exec(rctx->pool, rctx->reactor_id, + http3_reactor_consumed_apply, s)) { + /* retry */ + } + + return; + } + http3_stream_pool_free(s->pool, s); } @@ -73,6 +112,12 @@ void http3_stream_release(http3_stream_t *s) return; } + /* Capture reactor mode before the unlink below nulls s->conn. In reactor + * mode the request's lifetime is the worker's (it freed the request fields + * on its own thread); the slab slot is the reactor's to reclaim here. */ + const bool reactor_mode = + s->conn != NULL && http3_listener_reactor_ctx(s->conn->listener) != NULL; + /* Stream-side cleanup. After this point no H3 callback or * coroutine should reach into the slot — only an outstanding PHP * HttpRequest wrapper may still hold a request->refcount, in @@ -86,6 +131,18 @@ void http3_stream_release(http3_stream_t *s) s->response_body = NULL; } + if (s->hq_line != NULL) { + efree(s->hq_line); + s->hq_line = NULL; + } + +#ifndef PHP_WIN32 + if (s->hq_map != NULL) { + munmap(s->hq_map, s->hq_map_len); + s->hq_map = NULL; + } +#endif + /* Drain the streaming chunk queue — chunks still owned by us * because nghttp3 may have already taken iov pointers but not yet * acked them. */ @@ -124,6 +181,24 @@ void http3_stream_release(http3_stream_t *s) s->conn = NULL; } + /* Reactor mode: the per-stream zvals are UNDEF (the reactor never wrapped + * the request). The slab slot is ours to reclaim — this is the single slab + * free for the reactor path. The request FIELDS, though, depend on whether + * the request reached a worker: + * - handed off (s->dispatched): the worker freed the fields on its own + * thread via http_request_destroy, then posted the consumed that brought + * us here. Nothing to free. + * - never handed off (early RST / backpressure): no worker ran, so free + * the persistent fields here before returning the slot. */ + if (reactor_mode) { + if (!s->dispatched) { + http_request_free_fields(s->request); + } + + http3_stream_pool_free(s->pool, s); + return; + } + /* Per-stream PHP objects. The HttpRequest wrapper's free_object * calls http_request_destroy on s->request — that decrement may * either reach zero (no other ref) and fire the release callback diff --git a/src/http_request.c b/src/http_request.c index 2814712..3a95072 100644 --- a/src/http_request.c +++ b/src/http_request.c @@ -94,6 +94,61 @@ ZEND_METHOD(TrueAsync_HttpRequest, __construct) ZEND_PARSE_PARAMETERS_NONE(); } +/* Hand a request string to PHP in the worker's ZMM domain. A reactor-built + * (persistent malloc) string must be deep-copied — the engine would otherwise + * efree malloc memory. ZMM / interned strings are returned by addref as before. + * The domain is read off the string itself (self-describing), so mixed-domain + * requests (persistent method/uri/headers + ZMM path/query) are handled per + * field. */ +static void http_request_retval_str(zval *out, zend_string *str) +{ + if (UNEXPECTED(GC_FLAGS(str) & IS_STR_PERSISTENT)) { + ZVAL_STRINGL(out, ZSTR_VAL(str), ZSTR_LEN(str)); + } else { + ZVAL_STR_COPY(out, str); + } +} + +/* Hand a request HashTable to PHP in the worker's ZMM domain. ZMM tables are + * dup'd (cheap, refcounted) as before; a persistent (reactor-built) table is + * rebuilt with ZMM copies — zend_array_dup would addref persistent keys/values + * that the engine then efree's (heap corruption). Request tables hold string + * values only. */ +static void http_request_retval_ht(zval *out, HashTable *ht) +{ + if (EXPECTED(!(GC_FLAGS(ht) & IS_ARRAY_PERSISTENT))) { + ZVAL_ARR(out, zend_array_dup(ht)); + return; + } + + zend_array *dst; + ALLOC_HASHTABLE(dst); + zend_hash_init(dst, zend_hash_num_elements(ht), NULL, ZVAL_PTR_DTOR, 0); + + zend_string *key; + zend_ulong idx; + zval *val; + ZEND_HASH_FOREACH_KEY_VAL(ht, idx, key, val) { + zval copy; + + if (EXPECTED(Z_TYPE_P(val) == IS_STRING)) { + ZVAL_STRINGL(©, Z_STRVAL_P(val), Z_STRLEN_P(val)); + } else { + ZVAL_COPY(©, val); + } + + if (key != NULL) { + zend_string *const key_copy = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 0); + zend_hash_update(dst, key_copy, ©); + zend_string_release(key_copy); + } else { + zend_hash_index_update(dst, idx, ©); + } + } ZEND_HASH_FOREACH_END(); + + ZVAL_ARR(out, dst); +} + ZEND_METHOD(TrueAsync_HttpRequest, getMethod) { http_request_object *intern = Z_HTTP_REQUEST_P(ZEND_THIS); @@ -103,7 +158,7 @@ ZEND_METHOD(TrueAsync_HttpRequest, getMethod) RETURN_EMPTY_STRING(); } - RETURN_STR_COPY(intern->request->method); + http_request_retval_str(return_value, intern->request->method); } ZEND_METHOD(TrueAsync_HttpRequest, getUri) @@ -115,7 +170,7 @@ ZEND_METHOD(TrueAsync_HttpRequest, getUri) RETURN_EMPTY_STRING(); } - RETURN_STR_COPY(intern->request->uri); + http_request_retval_str(return_value, intern->request->uri); } ZEND_METHOD(TrueAsync_HttpRequest, getHttpVersion) @@ -163,7 +218,8 @@ ZEND_METHOD(TrueAsync_HttpRequest, getHeader) zend_string_release(name_lower); if (value && Z_TYPE_P(value) == IS_STRING) { - RETURN_STR_COPY(Z_STR_P(value)); + http_request_retval_str(return_value, Z_STR_P(value)); + return; } RETURN_NULL(); @@ -184,7 +240,8 @@ ZEND_METHOD(TrueAsync_HttpRequest, getHeaderLine) /* In our implementation headers are stored as single strings */ if (value && Z_TYPE_P(value) == IS_STRING) { - RETURN_STR_COPY(Z_STR_P(value)); + http_request_retval_str(return_value, Z_STR_P(value)); + return; } RETURN_EMPTY_STRING(); @@ -195,7 +252,7 @@ ZEND_METHOD(TrueAsync_HttpRequest, getHeaders) http_request_object *intern = Z_HTTP_REQUEST_P(ZEND_THIS); ZEND_PARSE_PARAMETERS_NONE(); - ZVAL_ARR(return_value, zend_array_dup(intern->request->headers)); + http_request_retval_ht(return_value, intern->request->headers); } ZEND_METHOD(TrueAsync_HttpRequest, getBody) @@ -207,7 +264,7 @@ ZEND_METHOD(TrueAsync_HttpRequest, getBody) RETURN_EMPTY_STRING(); } - RETURN_STR_COPY(intern->request->body); + http_request_retval_str(return_value, intern->request->body); } ZEND_METHOD(TrueAsync_HttpRequest, hasBody) @@ -232,7 +289,7 @@ ZEND_METHOD(TrueAsync_HttpRequest, getPost) ZEND_PARSE_PARAMETERS_NONE(); if (intern->request->post_data) { - ZVAL_ARR(return_value, zend_array_dup(intern->request->post_data)); + http_request_retval_ht(return_value, intern->request->post_data); } else { array_init(return_value); } @@ -244,7 +301,7 @@ ZEND_METHOD(TrueAsync_HttpRequest, getFiles) ZEND_PARSE_PARAMETERS_NONE(); if (intern->request->files) { - ZVAL_ARR(return_value, zend_array_dup(intern->request->files)); + http_request_retval_ht(return_value, intern->request->files); } else { array_init(return_value); } @@ -306,7 +363,12 @@ static void http_request_ensure_uri_parsed(http_request_t *req) const char *q = memchr(uri, '?', ulen); if (!q) { - req->path = zend_string_copy(req->uri); + /* path is a worker-derived (ZMM) field. Aliasing a persistent uri + * by addref would make path persistent; deep-copy in that case so + * getPath can addref it to PHP. ZMM uri stays a cheap shared ref. */ + req->path = (GC_FLAGS(req->uri) & IS_STR_PERSISTENT) + ? zend_string_init(ZSTR_VAL(req->uri), ZSTR_LEN(req->uri), 0) + : zend_string_copy(req->uri); req->query_params = zend_new_array(0); return; } @@ -342,7 +404,7 @@ ZEND_METHOD(TrueAsync_HttpRequest, getQuery) ZEND_PARSE_PARAMETERS_NONE(); http_request_ensure_uri_parsed(intern->request); - ZVAL_ARR(return_value, zend_array_dup(intern->request->query_params)); + http_request_retval_ht(return_value, intern->request->query_params); } ZEND_METHOD(TrueAsync_HttpRequest, getQueryParam) @@ -417,7 +479,7 @@ ZEND_METHOD(TrueAsync_HttpRequest, getTraceParent) RETURN_NULL(); } - RETURN_STR_COPY(intern->request->traceparent_raw); + http_request_retval_str(return_value, intern->request->traceparent_raw); } ZEND_METHOD(TrueAsync_HttpRequest, getTraceState) @@ -429,7 +491,7 @@ ZEND_METHOD(TrueAsync_HttpRequest, getTraceState) RETURN_NULL(); } - RETURN_STR_COPY(intern->request->tracestate_raw); + http_request_retval_str(return_value, intern->request->tracestate_raw); } ZEND_METHOD(TrueAsync_HttpRequest, getTraceId) @@ -696,6 +758,33 @@ void http_request_class_register(void) http_request_object_handlers.clone_obj = NULL; /* No cloning */ } +/* Value dtor for a persistent (reactor-built) headers HashTable. Header + * values are always zend_strings; zend_string_release is flag-aware + * (pefree for IS_STR_PERSISTENT), unlike ZVAL_PTR_DTOR which routes + * IS_STRING through zend_string_destroy == efree and would corrupt the + * malloc heap on a persistent string. */ +static void http_request_persistent_header_dtor(zval *zv) +{ + zend_string_release(Z_STR_P(zv)); +} + +void http_request_init_headers(http_request_t *req) +{ + if (req->headers != NULL) { + return; + } + + if (req->persistent) { + req->headers = pemalloc(sizeof(HashTable), 1); + zend_hash_init(req->headers, HTTP_HEADERS_INITIAL_SIZE, NULL, + http_request_persistent_header_dtor, 1); + return; + } + + ALLOC_HASHTABLE(req->headers); + zend_hash_init(req->headers, HTTP_HEADERS_INITIAL_SIZE, NULL, ZVAL_PTR_DTOR, 0); +} + /* Helper: Create HttpRequest object wrapping an already-parsed * http_request_t. The object takes ownership; on free_obj it will * call http_request_destroy(req). */ diff --git a/src/http_server.c b/src/http_server.c index 20c8052..628aeca 100644 --- a/src/http_server.c +++ b/src/http_server.c @@ -21,6 +21,7 @@ # include "compression/http_compression_pool.h" #endif #include "http_known_strings.h" +#include "core/reactor_pool_test.h" #include "log/http_log.h" #include "static/static_handler.h" #include "http_send_file.h" @@ -155,7 +156,7 @@ ZEND_GET_MODULE(http_server) /* {{{ PHP_MINIT_FUNCTION */ PHP_MINIT_FUNCTION(http_server) { - (void)type; (void)module_number; + (void)module_number; /* Initialize module globals */ ZEND_INIT_MODULE_GLOBALS(http_server, php_http_server_init_globals, NULL); @@ -181,6 +182,9 @@ PHP_MINIT_FUNCTION(http_server) http_static_handler_class_register(); http_server_class_register(); + /* Test-only C hooks; a no-op unless built with HTTP_SERVER_TEST_HOOKS. */ + reactor_pool_test_register(type); + return SUCCESS; } /* }}} */ diff --git a/src/http_server_class.c b/src/http_server_class.c index 157c206..8ca3c13 100644 --- a/src/http_server_class.c +++ b/src/http_server_class.c @@ -28,11 +28,16 @@ #include "core/http_protocol_handlers.h" #include "core/http_protocol_strategy.h" #include "core/tls_layer.h" +#include "core/reactor_pool.h" +#include "core/worker_inbox.h" +#include "core/worker_registry.h" +#include "core/response_wire.h" #include "log/http_log.h" #include "static/static_handler.h" #include "static/http_static_cache.h" #ifdef HAVE_HTTP_SERVER_HTTP3 # include "http3/http3_listener.h" +# include "http3/http3_steer.h" #endif /* Backpressure tunables. Hard-cap hysteresis ratio: pause_low = ratio * @@ -284,6 +289,16 @@ struct http_server_object { http_pool_tcp_fd_t pool_tcp_fds[MAX_LISTENERS]; size_t pool_tcp_fd_count; + /* Pure-C transport reactor pool. Parent-only, brought up when an H3 + * listener is configured and the opt-in env gate is set; NULL otherwise. + * Owns no PHP state. */ + reactor_pool_t *reactor_pool; + + /* This worker clone's request inbox. Non-NULL only on a worker clone + * running under the reactor-pool gate; the reactor posts parsed requests + * here and the drain dispatches them on this thread. */ + worker_inbox_t *worker_inbox; + #ifdef HAVE_HTTP_SERVER_HTTP3 /* HTTP/3 UDP listeners — parallel to TCP listeners[] because they have * different transport semantics (no accept(), no per-connection fd) and @@ -291,6 +306,25 @@ struct http_server_object { http3_listener_t *http3_listeners[MAX_LISTENERS]; size_t http3_listener_count; + /* Reactor-owned H3 listeners. Parent-only, under the gate: one per + * (reactor x configured udp_h3 listener), spawned ON the reactor thread so + * its uv socket lives on the right loop. The parent owns these + their + * thread-clean contexts + a shared SSL_CTX; all torn down with the reactor + * pool. Each entry remembers which reactor it runs on so teardown can run + * on that thread. */ + struct { http3_listener_t *listener; int reactor_id; } + reactor_h3_listeners[MAX_LISTENERS]; + size_t reactor_h3_listener_count; + http3_reactor_ctx_t *reactor_h3_ctx; /* [reactor count] */ + tls_context_t *reactor_tls_ctx; /* parent-built shared SSL_CTX */ + + /* CID steering groups, one per H3 endpoint. Each groups that endpoint's + * per-reactor listeners by reactor id so any reactor can forward a stray + * (migrated) datagram to the owner. Built after the listeners spawn, freed + * after they tear down. */ + http3_steer_group_t *reactor_h3_steer[MAX_LISTENERS]; + size_t reactor_h3_steer_count; + /* Pre-rendered "h3=\":\"; ma=86400" string, refreshed * at start() when an H3 listener is configured. NULL when H3 is * disabled or env-var opt-out is set. Lifetime: refcounted zend_string, @@ -490,6 +524,10 @@ static inline http_server_object *http_server_from_obj(zend_object *obj) { } #define Z_HTTP_SERVER_P(zv) http_server_from_obj(Z_OBJ_P(zv)) +http_server_object *http_server_object_from_zend(zend_object *obj) { + return http_server_from_obj(obj); +} + /* Single-thread per worker — no atomics needed. */ static void http_server_state_finalize(http_server_object *server); @@ -1006,6 +1044,16 @@ http_static_handler_get(const http_server_object *server, size_t index) return server->static_handler_mounts[index]; } +const http_static_handler_t *const * +http_static_handler_mounts(const http_server_object *server) +{ + if (server == NULL || server->static_handler_count == 0) { + return NULL; + } + + return (const http_static_handler_t *const *)server->static_handler_mounts; +} + /* Open-file cache accessor. The cache instance is per-server (per-worker * after worker-pool transfer — no cross-worker sharing, no locking). * @@ -2014,6 +2062,440 @@ static int http_server_pool_tcp_fd_lookup(const http_server_object *server, #endif /* !PHP_WIN32 */ +/* Process-wide registry of worker inboxes. The pool parent creates it; worker + * clones publish their inbox into it; reactor threads read it to pick a worker. + * One per process, shared across all threads. */ +static worker_registry_t *g_worker_registry = NULL; + +/* Process-wide reactor pool handle. The pool itself is owned by the parent's + * http_server_object; this global lets a worker thread reach it to post + * responses back over the reverse channel (reactor_pool_post_exec), addressed by + * the reactor_id carried on each request/response. One pool per process. */ +static reactor_pool_t *g_reactor_pool = NULL; + +/* Reactor pool opt-in gate. While the H3-listener-on-reactor wiring is + * incomplete the pool is brought up only when TRUE_ASYNC_SERVER_REACTOR_POOL=1 + * so the default server behaves exactly as before. */ +static bool http_server_reactor_pool_enabled(void) +{ + const char *env = getenv("TRUE_ASYNC_SERVER_REACTOR_POOL"); + return env != NULL && env[0] == '1'; +} + +/* Online CPU count, floor 1 — caps the reactor pool at the core count. */ +static int http_server_online_cpus(void) +{ +#ifdef PHP_WIN32 + SYSTEM_INFO si; + GetSystemInfo(&si); + return si.dwNumberOfProcessors > 0 ? (int)si.dwNumberOfProcessors : 1; +#else + const long n = sysconf(_SC_NPROCESSORS_ONLN); + return n > 0 ? (int)n : 1; +#endif +} + +/* True when the server config declares at least one HTTP/3 (QUIC) listener — + * the reactor pool's only client today (the kernel ACKs TCP independently). */ +static bool http_server_config_has_h3(http_server_object *server) +{ + http_server_config_t *const cfg = http_server_get_config(server); + + if (cfg == NULL) { + return false; + } + + for (size_t i = 0; i < cfg->listener_count; i++) { + if (cfg->listeners[i].type == LISTENER_TYPE_UDP_H3) { + return true; + } + } + + return false; +} + +#ifdef HAVE_HTTP_SERVER_HTTP3 +/* Exec payload: spawn one reactor-mode H3 listener ON the reactor thread — its + * uv socket must be created on the loop that owns it (run via reactor_pool_exec). + * server_obj is NULL (the reactor must not touch the parent's PHP object); the + * thread-clean ctx drives config + worker routing. */ +typedef struct { + const char *host; + int port; + void *ssl_ctx; + const http3_reactor_ctx_t *ctx; + http3_steer_group_t *steer; /* endpoint's steering group, or NULL */ + http3_listener_t *out; /* result, NULL on failure */ +} reactor_h3_spawn_arg_t; + +static void reactor_h3_spawn_fn(void *arg) +{ + reactor_h3_spawn_arg_t *const spawn = (reactor_h3_spawn_arg_t *)arg; + spawn->out = http3_listener_spawn(spawn->host, spawn->port, spawn->ssl_ctx, NULL, spawn->ctx); + + if (spawn->out == NULL && EG(exception)) { + zend_clear_exception(); /* don't dangle on the reactor's EG */ + } + + /* Arm steering on the reactor's own thread before it processes traffic — + * the listener already polls here, so this store happens-before any read in + * try_steer on the same thread. */ + if (spawn->out != NULL && spawn->steer != NULL) { + http3_listener_set_steer(spawn->out, spawn->steer); + } +} + +static void reactor_h3_destroy_fn(void *arg) +{ + http3_listener_destroy((http3_listener_t *)arg); +} + +/* Spawn one reactor-mode H3 listener per (reactor x configured udp_h3 listener), + * each on its reactor's own thread. Builds the parent-shared SSL_CTX + the + * per-reactor thread-clean contexts (config scalars resolved here on the parent + * where the server object is valid). Non-fatal: on failure the gated server just + * doesn't serve H3. Returns the number spawned. */ +static size_t http_server_reactor_h3_spawn(http_server_object *server, const int reactors) +{ + http_server_config_t *const cfg = http_server_get_config(server); + + if (cfg == NULL) { + return 0; + } + + /* QUIC mandates TLS — build the SSL_CTX on the parent (shared across reactor + * threads; OpenSSL SSL_CTX is safe for concurrent per-connection SSL use). */ + char tls_err[TLS_ERR_BUF_SIZE]; + tls_err[0] = '\0'; + server->reactor_tls_ctx = tls_context_new( + cfg->tls_cert_path ? ZSTR_VAL(cfg->tls_cert_path) : NULL, + cfg->tls_key_path ? ZSTR_VAL(cfg->tls_key_path) : NULL, + tls_err, sizeof(tls_err)); + + if (server->reactor_tls_ctx == NULL) { + fprintf(stderr, "[true-async-server] reactor H3 TLS context failed: %s\n", + tls_err[0] != '\0' ? tls_err : "(no detail)"); + fflush(stderr); + return 0; + } + + server->reactor_h3_ctx = + pecalloc((size_t)reactors, sizeof(http3_reactor_ctx_t), 1); + + const uint32_t socket_buffer_bytes = http_server_get_http3_socket_buffer_bytes(server); + const uint32_t peer_budget = http_server_get_http3_peer_connection_budget(server); + const int max_conns = http_server_get_max_connections(server); + const http_static_handler_t *const *const mounts = http_static_handler_mounts(server); + const size_t mount_count = http_static_handler_count(server); + + /* Merge the open-file cache settings across mounts (max of max_entries, + * min of non-zero ttl), same policy as http_static_cache_acquire. Each + * reactor gets its own cache instance below. */ + int32_t cache_max = 0; + int32_t cache_ttl = 0; + for (size_t mi = 0; mi < mount_count; mi++) { + const http_static_handler_t *const m = mounts[mi]; + + if (m == NULL || m->cache_max_entries <= 0 || m->cache_ttl_seconds <= 0) { + continue; + } + + if (m->cache_max_entries > cache_max) { + cache_max = m->cache_max_entries; + } + + if (cache_ttl == 0 || m->cache_ttl_seconds < cache_ttl) { + cache_ttl = m->cache_ttl_seconds; + } + } + + for (int r = 0; r < reactors; r++) { + server->reactor_h3_ctx[r].registry = g_worker_registry; + server->reactor_h3_ctx[r].pool = server->reactor_pool; + server->reactor_h3_ctx[r].reactor_id = r; + server->reactor_h3_ctx[r].n_reactors = reactors; + server->reactor_h3_ctx[r].socket_buffer_bytes = socket_buffer_bytes; + server->reactor_h3_ctx[r].peer_budget = peer_budget; + server->reactor_h3_ctx[r].max_conns = max_conns > 0 ? (uint32_t)max_conns : 0; + server->reactor_h3_ctx[r].static_mounts = (const void *)mounts; + server->reactor_h3_ctx[r].static_mount_count = mount_count; + server->reactor_h3_ctx[r].static_cache = + (cache_max > 0 && cache_ttl > 0) + ? http_static_cache_create((size_t)cache_max, (time_t)cache_ttl) + : NULL; + } + + void *const ssl_ctx = server->reactor_tls_ctx->ctx; + size_t spawned = 0; + + /* Steering engages only with >1 reactor (a single reactor owns every + * connection — nothing to forward). Set process-wide before any listener + * starts minting CIDs. */ + const bool steer_active = http3_steer_active(); + + for (size_t i = 0; i < cfg->listener_count; i++) { + if (cfg->listeners[i].type != LISTENER_TYPE_UDP_H3 + || cfg->listeners[i].host == NULL) { + continue; + } + + /* One steering group per endpoint, grouping its per-reactor listeners + * by reactor id. Created up front so each listener can be armed with it + * on its own reactor thread at spawn. */ + http3_steer_group_t *group = + steer_active ? http3_steer_group_create(server->reactor_pool, reactors) + : NULL; + size_t group_listeners = 0; + + for (int r = 0; r < reactors; r++) { + if (server->reactor_h3_listener_count >= MAX_LISTENERS) { + break; + } + + reactor_h3_spawn_arg_t arg = { + .host = ZSTR_VAL(cfg->listeners[i].host), + .port = cfg->listeners[i].port, + .ssl_ctx = ssl_ctx, + .ctx = &server->reactor_h3_ctx[r], + .steer = group, + .out = NULL, + }; + + if (!reactor_pool_exec(server->reactor_pool, r, reactor_h3_spawn_fn, &arg) + || arg.out == NULL) { + fprintf(stderr, + "[true-async-server] reactor H3 listener spawn failed " + "(reactor %d, %s:%d)\n", r, arg.host, arg.port); + fflush(stderr); + continue; + } + + /* Publish the listener into its endpoint's steering table so sibling + * reactors can forward to it (atomic — read lock-free on the + * forward path). */ + http3_steer_group_publish(group, r, arg.out); + group_listeners++; + + const size_t n = server->reactor_h3_listener_count++; + server->reactor_h3_listeners[n].listener = arg.out; + server->reactor_h3_listeners[n].reactor_id = r; + spawned++; + } + + if (group != NULL && group_listeners > 0 + && server->reactor_h3_steer_count < MAX_LISTENERS) { + server->reactor_h3_steer[server->reactor_h3_steer_count++] = group; + } else { + http3_steer_group_free(group); /* no listeners, or no slot — drop it */ + } + } + + return spawned; +} + +/* Tear down every reactor-owned H3 listener on its own reactor thread (libuv + * handles + ZMM allocated there), then free the contexts + shared SSL_CTX. Runs + * before reactor_pool_destroy stops the reactors. */ +static void http_server_reactor_h3_teardown(http_server_object *server) +{ + for (size_t i = 0; i < server->reactor_h3_listener_count; i++) { + if (server->reactor_pool != NULL + && server->reactor_h3_listeners[i].listener != NULL) { + reactor_pool_exec(server->reactor_pool, + server->reactor_h3_listeners[i].reactor_id, + reactor_h3_destroy_fn, + server->reactor_h3_listeners[i].listener); + } + + server->reactor_h3_listeners[i].listener = NULL; + } + + server->reactor_h3_listener_count = 0; + + if (server->reactor_h3_ctx != NULL) { + /* Destroy the per-reactor open-file caches. Listeners are already torn + * down above (synchronously, on their reactors), so no reactor is still + * serving; the caches are persistent (malloc), freed here on the parent. */ + const int rc = server->reactor_pool != NULL + ? reactor_pool_count(server->reactor_pool) : 0; + + for (int r = 0; r < rc; r++) { + if (server->reactor_h3_ctx[r].static_cache != NULL) { + http_static_cache_destroy(server->reactor_h3_ctx[r].static_cache); + server->reactor_h3_ctx[r].static_cache = NULL; + } + } + + pefree(server->reactor_h3_ctx, 1); + server->reactor_h3_ctx = NULL; + } + + if (server->reactor_tls_ctx != NULL) { + tls_context_free(server->reactor_tls_ctx); + server->reactor_tls_ctx = NULL; + } +} +#endif /* HAVE_HTTP_SERVER_HTTP3 */ + +/* Bring up the transport reactor pool on the parent before workers run. + * reactors = min(workers, cores) per the accepted R:W topology. No-op (and + * leaves reactor_pool NULL) when the gate is off or no H3 listener is + * configured. Non-fatal: a failed bring-up logs and the server runs without + * it. */ +static void http_server_reactor_pool_up(http_server_object *server, const int workers) +{ + server->reactor_pool = NULL; + + if (!http_server_reactor_pool_enabled() || !http_server_config_has_h3(server)) { + return; + } + + const int cores = http_server_online_cpus(); + const int reactors = workers < cores ? workers : cores; + + server->reactor_pool = reactor_pool_create(reactors); + + if (server->reactor_pool == NULL) { + /* reactor_pool_create may set a PHP error on hard failures; clear it + * so the (non-fatal) gate does not poison the parent coroutine. */ + if (EG(exception)) { + zend_clear_exception(); + } + + fprintf(stderr, + "[true-async-server] reactor pool bring-up failed (reactors=%d) — " + "continuing without it\n", reactors); + fflush(stderr); + return; + } + + /* One inbox slot per worker — workers publish into it as they come up. */ + g_worker_registry = worker_registry_create(workers); + g_reactor_pool = server->reactor_pool; + + size_t h3_spawned = 0; +#ifdef HAVE_HTTP_SERVER_HTTP3 + /* Arm CID steering before any reactor mints a CID: encode the owner + * reactor's id into every server CID so a migrated client rehashed by + * SO_REUSEPORT onto another reactor routes back to its owner. Active only + * with >1 reactor (the id is one byte, so cap at 256). */ + const int real_reactors = reactor_pool_count(server->reactor_pool); + + if (http3_steer_init()) { + http3_steer_set_active(real_reactors > 1 && real_reactors <= 256); + /* Flush forwarded datagrams once per reactor drain batch (not per + * packet), so a burst of steered datagrams under rapid migration sends + * like a recvmmsg tick instead of fragmenting a connection's output. */ + reactor_pool_set_drain_epilogue(http3_reactor_steer_flush_epilogue); + } + + /* Spawn the H3 listeners ON the reactor threads now. From here the workers + * stop spawning their own H3 listener (gated, in start()); the reactor owns + * the transport and routes parsed requests to workers via the registry. */ + h3_spawned = http_server_reactor_h3_spawn(server, reactors); +#endif + + fprintf(stderr, + "[true-async-server] reactor pool up: %d reactor(s), worker registry: %d slot(s), " + "%zu H3 listener(s) on reactors\n", + reactor_pool_count(server->reactor_pool), + worker_registry_capacity(g_worker_registry), + h3_spawned); + fflush(stderr); +} + +/* Tear the reactor pool down on the parent after workers have quiesced. + * Idempotent; safe when the pool was never brought up. */ +static void http_server_reactor_pool_down(http_server_object *server) +{ +#ifdef HAVE_HTTP_SERVER_HTTP3 + /* Reactor-owned H3 listeners first, on their own threads, while the reactors + * still run — then stop the pool. */ + http_server_reactor_h3_teardown(server); +#endif + + g_reactor_pool = NULL; + + if (g_worker_registry != NULL) { + worker_registry_free(g_worker_registry); + g_worker_registry = NULL; + } + + if (server->reactor_pool != NULL) { + reactor_pool_destroy(server->reactor_pool); + server->reactor_pool = NULL; + } + +#ifdef HAVE_HTTP_SERVER_HTTP3 + /* Steering groups last: only safe once every reactor has stopped, since a + * forward still queued on a reactor's mailbox reads the group's slots. */ + for (size_t i = 0; i < server->reactor_h3_steer_count; i++) { + http3_steer_group_free(server->reactor_h3_steer[i]); + server->reactor_h3_steer[i] = NULL; + } + + server->reactor_h3_steer_count = 0; +#endif +} + +/* Worker response sink: post the rendered response back to the originating + * reactor for nghttp3 encode + send. Runs on the worker thread (from + * the handler coroutine's dispose). reactor_id (echoed on the wire) selects the + * reverse channel; ownership of `rw` transfers to the reactor apply on success. + * On failure (no pool / full mailbox) drop it — the client times out and the + * slab is still reclaimed by the consumed that follows. */ +static void http_server_worker_response_sink(response_wire_t *rw, void *arg) +{ + (void)arg; + +#ifdef HAVE_HTTP_SERVER_HTTP3 + if (g_reactor_pool != NULL + && reactor_pool_post_exec(g_reactor_pool, + (int)response_wire_reactor_id(rw), + http3_reactor_apply_response, rw)) { + return; /* the reactor owns rw now */ + } +#endif + + response_wire_free(rw); +} + +/* Stand up this worker clone's request inbox and publish it to the shared + * registry so a reactor can route requests to it. Gated + H3-only + clone-only; + * a no-op otherwise. Runs on the worker thread after its server scope exists. */ +static void http_server_worker_inbox_up(http_server_object *server) +{ + if (server->worker_inbox != NULL + || g_worker_registry == NULL + || !server->is_worker_clone + || !http_server_reactor_pool_enabled() + || !http_server_config_has_h3(server) + || server->server_scope == NULL) { + return; + } + + server->worker_inbox = worker_inbox_create(server, server->server_scope, + /*own_scope=*/true, + http_server_worker_response_sink, NULL); + + if (server->worker_inbox == NULL) { + return; + } + + const int slot = worker_registry_add(g_worker_registry, server->worker_inbox); + + if (slot < 0) { + worker_inbox_free(server->worker_inbox); + server->worker_inbox = NULL; + return; + } + + fprintf(stderr, + "[true-async-server] worker inbox published: slot %d of %d\n", + slot, worker_registry_capacity(g_worker_registry)); + fflush(stderr); +} + static int http_server_start_pool(http_server_object *server, zval *this_zv, const int workers) @@ -2101,6 +2583,11 @@ static int http_server_start_pool(http_server_object *server, server->pool_worker_ctx = ctxs; server->pool_worker_ctx_count = workers; + /* Stand up the transport reactor pool + worker registry BEFORE submitting + * workers, so a worker that comes up fast finds the registry ready to + * publish into. */ + http_server_reactor_pool_up(server, workers); + pool_await_state_t *st = ecalloc(1, sizeof(*st)); st->pending = workers; st->all_done = create_server_wait_event(); @@ -2156,6 +2643,11 @@ static int http_server_start_pool(http_server_object *server, server->stopping = false; server->in_pool_mode = false; + /* Workers have quiesced (the suspend returned). Tear down the transport + * reactor pool: this releases the H3 listeners the gated reactors own, + * frees the worker registry, and stops the reactor loops. */ + http_server_reactor_pool_down(server); + if (st->all_done != NULL) { st->all_done->dispose(st->all_done); } @@ -2453,6 +2945,10 @@ ZEND_METHOD(TrueAsync_HttpServer, start) * explicitly release it. */ server->scope_object = server->server_scope->scope_object; + /* Worker-pool clone under the reactor-pool gate: publish a request inbox so + * a reactor can route parsed requests to this worker. No-op otherwise. */ + http_server_worker_inbox_up(server); + /* Build TLS context up-front if any listener declared tls=true. * Doing this *before* binding sockets keeps the failure path cheap: * a bad cert means no listen_event allocation at all, and the @@ -2637,6 +3133,16 @@ ZEND_METHOD(TrueAsync_HttpServer, start) } #ifdef HAVE_HTTP_SERVER_HTTP3 else if (strcmp(Z_STRVAL_P(type_zv), "udp_h3") == 0) { + /* Under the reactor-pool gate the transport reactor owns the H3 + * listener (spawned by the parent in http_server_reactor_pool_up); + * a worker clone must NOT spawn its own, or two listeners would + * REUSEPORT-share the socket and the reactor split would not hold. + * The worker still publishes its request inbox for the reactor to + * route to. */ + if (http_server_reactor_pool_enabled() && server->is_worker_clone) { + continue; + } + if (server->http3_listener_count >= MAX_LISTENERS) { continue; } @@ -2656,7 +3162,7 @@ ZEND_METHOD(TrueAsync_HttpServer, start) # endif http3_listener_t *h3 = http3_listener_spawn( Z_STRVAL_P(host_zv), (int)Z_LVAL_P(port_zv), ssl_ctx, - /* server_obj: */ server); + /* server_obj: */ server, /* reactor_ctx: */ NULL); if (!h3) { /* Unwind both TCP and H3 listeners — start() is all-or-nothing. */ for (size_t i = 0; i < server->listener_count; i++) { @@ -3214,13 +3720,133 @@ ZEND_METHOD(TrueAsync_HttpServer, getConfig) } /* }}} */ +#ifdef HAVE_HTTP_SERVER_HTTP3 +/* Append one listener's stats snapshot to the result array. Factored out so + * both the single-thread listeners (server->http3_listeners) and the reactor- + * owned listeners (server->reactor_h3_listeners) report identically. + * The reactor-owned read is cross-thread (the reactor writes these counters on + * its own thread); they are advisory uint64s, so a torn read is benign. */ +static void http3_emit_listener_stats(zval *return_value, http3_listener_t *listener) +{ + http3_listener_stats_t s; + http3_listener_get_stats(listener, &s); + + zval entry; + array_init(&entry); + add_assoc_string(&entry, "host", (char *)http3_listener_host(listener)); + add_assoc_long (&entry, "port", http3_listener_port(listener)); + add_assoc_long (&entry, "datagrams_received", (zend_long)s.datagrams_received); + add_assoc_long (&entry, "bytes_received", (zend_long)s.bytes_received); + add_assoc_long (&entry, "datagrams_errored", (zend_long)s.datagrams_errored); + add_assoc_long (&entry, "last_datagram_size", (zend_long)s.last_datagram_size); + add_assoc_string(&entry, "last_peer", s.last_peer); + + /* QUIC packet classification counters. */ + add_assoc_long(&entry, "quic_initial", (zend_long)s.packet.quic_initial); + add_assoc_long(&entry, "quic_short_header", (zend_long)s.packet.quic_short_header); + add_assoc_long(&entry, "quic_version_negotiated", (zend_long)s.packet.quic_version_negotiated); + add_assoc_long(&entry, "quic_parse_errors", (zend_long)s.packet.quic_parse_errors); + /* ngtcp2_conn lifecycle counters. */ + add_assoc_long(&entry, "quic_conn_accepted", (zend_long)s.packet.quic_conn_accepted); + add_assoc_long(&entry, "quic_conn_rejected", (zend_long)s.packet.quic_conn_rejected); + /* Read-path counters. */ + add_assoc_long(&entry, "quic_read_ok", (zend_long)s.packet.quic_read_ok); + add_assoc_long(&entry, "quic_read_error", (zend_long)s.packet.quic_read_error); + add_assoc_long(&entry, "quic_read_fatal", (zend_long)s.packet.quic_read_fatal); + add_assoc_long(&entry, "quic_path_migrations", (zend_long)s.packet.quic_path_migrations); + add_assoc_long(&entry, "quic_migration_storm_shed", (zend_long)s.packet.quic_migration_storm_shed); + /* CID steering. */ + add_assoc_long(&entry, "quic_steered_out", (zend_long)s.packet.quic_steered_out); + add_assoc_long(&entry, "quic_steered_in", (zend_long)s.packet.quic_steered_in); + add_assoc_long(&entry, "quic_steered_drop", (zend_long)s.packet.quic_steered_drop); + /* Issued / retired alternate CIDs (NEW_CONNECTION_ID, RFC 9000 §5.1). */ + add_assoc_long(&entry, "quic_new_cid_issued", (zend_long)s.packet.quic_new_cid_issued); + add_assoc_long(&entry, "quic_cid_retired", (zend_long)s.packet.quic_cid_retired); + /* Write-loop + timer counters. */ + add_assoc_long(&entry, "quic_packets_sent", (zend_long)s.packet.quic_packets_sent); + add_assoc_long(&entry, "quic_bytes_sent", (zend_long)s.packet.quic_bytes_sent); + add_assoc_long(&entry, "quic_timer_fired", (zend_long)s.packet.quic_timer_fired); + add_assoc_long(&entry, "quic_write_error", (zend_long)s.packet.quic_write_error); + /* Handshake / ALPN counters. */ + add_assoc_long(&entry, "quic_handshake_completed", (zend_long)s.packet.quic_handshake_completed); + add_assoc_long(&entry, "quic_alpn_mismatch", (zend_long)s.packet.quic_alpn_mismatch); + /* nghttp3 lifecycle counters. */ + add_assoc_long(&entry, "h3_init_ok", (zend_long)s.packet.h3_init_ok); + add_assoc_long(&entry, "h3_init_failed", (zend_long)s.packet.h3_init_failed); + add_assoc_long(&entry, "h3_stream_close", (zend_long)s.packet.h3_stream_close); + add_assoc_long(&entry, "h3_stream_read_error", (zend_long)s.packet.h3_stream_read_error); + /* Request-assembly counters. */ + add_assoc_long(&entry, "h3_request_received", (zend_long)s.packet.h3_request_received); + add_assoc_long(&entry, "h3_request_oversized", (zend_long)s.packet.h3_request_oversized); + add_assoc_long(&entry, "h3_streams_opened", (zend_long)s.packet.h3_streams_opened); + /* Response counters. */ + add_assoc_long(&entry, "h3_response_submitted", (zend_long)s.packet.h3_response_submitted); + add_assoc_long(&entry, "h3_response_submit_error",(zend_long)s.packet.h3_response_submit_error); + /* Connection lifecycle counters. */ + add_assoc_long(&entry, "quic_connection_close_sent", (zend_long)s.packet.quic_connection_close_sent); + add_assoc_long(&entry, "quic_conn_in_closing", (zend_long)s.packet.quic_conn_in_closing); + add_assoc_long(&entry, "quic_conn_in_draining", (zend_long)s.packet.quic_conn_in_draining); + add_assoc_long(&entry, "quic_conn_idle_closed", (zend_long)s.packet.quic_conn_idle_closed); + add_assoc_long(&entry, "quic_conn_handshake_timeout",(zend_long)s.packet.quic_conn_handshake_timeout); + add_assoc_long(&entry, "quic_conn_reaped", (zend_long)s.packet.quic_conn_reaped); + add_assoc_long(&entry, "quic_stateless_reset_sent", (zend_long)s.packet.quic_stateless_reset_sent); + add_assoc_long(&entry, "quic_retry_sent", (zend_long)s.packet.quic_retry_sent); + add_assoc_long(&entry, "quic_retry_token_ok", (zend_long)s.packet.quic_retry_token_ok); + add_assoc_long(&entry, "quic_retry_token_invalid", (zend_long)s.packet.quic_retry_token_invalid); + add_assoc_long(&entry, "quic_conn_per_peer_rejected",(zend_long)s.packet.quic_conn_per_peer_rejected); + add_assoc_long(&entry, "quic_conn_global_rejected", (zend_long)s.packet.quic_conn_global_rejected); + add_assoc_long(&entry, "quic_conn_refused_sent", (zend_long)s.packet.quic_conn_refused_sent); + /* Audit hardening counters. */ + add_assoc_long(&entry, "h3_framing_error", (zend_long)s.packet.h3_framing_error); + add_assoc_long(&entry, "quic_drain_iter_cap_hit", (zend_long)s.packet.quic_drain_iter_cap_hit); + + /* Reactor-iteration watchdog. Tick = one poll-cb wakeup; on the single + * reactor thread its latency is the ACK/PTO delay imposed on every live + * connection. */ + add_assoc_long(&entry, "reactor_ticks", (zend_long)s.packet.reactor_ticks); + add_assoc_long(&entry, "reactor_busy_ns", (zend_long)s.packet.reactor_busy_ns); + add_assoc_long(&entry, "reactor_max_tick_ns", (zend_long)s.packet.reactor_max_tick_ns); + add_assoc_long(&entry, "reactor_slow_ticks", (zend_long)s.packet.reactor_slow_ticks); + add_assoc_long(&entry, "reactor_timer_late", (zend_long)s.packet.reactor_timer_late); + add_assoc_long(&entry, "reactor_max_timer_late_ns",(zend_long)s.packet.reactor_max_timer_late_ns); + { + zval hist; + array_init(&hist); + + const size_t nbuckets = sizeof(s.packet.reactor_lat_bucket) + / sizeof(s.packet.reactor_lat_bucket[0]); + + for (size_t i = 0; i < nbuckets; ++i) { + add_next_index_long(&hist, (zend_long)s.packet.reactor_lat_bucket[i]); + } + + add_assoc_zval(&entry, "reactor_lat_bucket", &hist); + } + + /* Send-path error categorisation. */ + add_assoc_long(&entry, "quic_send_eagain", (zend_long)s.packet.quic_send_eagain); + add_assoc_long(&entry, "quic_send_gso_refused", (zend_long)s.packet.quic_send_gso_refused); + add_assoc_long(&entry, "quic_send_emsgsize", (zend_long)s.packet.quic_send_emsgsize); + add_assoc_long(&entry, "quic_send_unreach", (zend_long)s.packet.quic_send_unreach); + add_assoc_long(&entry, "quic_send_other_error", (zend_long)s.packet.quic_send_other_error); + add_assoc_long(&entry, "quic_gso_disabled", (zend_long)s.packet.quic_gso_disabled); + + /* Async errors observed via MSG_ERRQUEUE. */ + add_assoc_long(&entry, "quic_errqueue_emsgsize", (zend_long)s.packet.quic_errqueue_emsgsize); + add_assoc_long(&entry, "quic_errqueue_unreach", (zend_long)s.packet.quic_errqueue_unreach); + add_assoc_long(&entry, "quic_errqueue_other", (zend_long)s.packet.quic_errqueue_other); + + add_next_index_zval(return_value, &entry); +} +#endif + /* {{{ proto HttpServer::getHttp3Stats(): array * - * Per-listener observability for the HTTP/3 bootstrap path. Returns an - * array indexed by listener position; each entry has host, port, - * datagrams_received, bytes_received, datagrams_errored, last_datagram_size, - * last_peer. Counters let tests confirm the UDP pipe is live end-to-end. - */ + * Per-listener observability for the HTTP/3 path. In single-thread / worker + * mode the listeners live on this server; in the reactor-pool split the + * transport reactors own them (server->reactor_h3_listeners) — report both so + * a pooled server is observable too. Counters let tests confirm the UDP pipe + * is live end-to-end. */ ZEND_METHOD(TrueAsync_HttpServer, getHttp3Stats) { ZEND_PARSE_PARAMETERS_NONE(); @@ -3231,88 +3857,16 @@ ZEND_METHOD(TrueAsync_HttpServer, getHttp3Stats) http_server_object *server = Z_HTTP_SERVER_P(ZEND_THIS); for (size_t i = 0; i < server->http3_listener_count; i++) { - http3_listener_t *l = server->http3_listeners[i]; - - if (l == NULL) continue; - - http3_listener_stats_t s; - http3_listener_get_stats(l, &s); + if (server->http3_listeners[i] != NULL) { + http3_emit_listener_stats(return_value, server->http3_listeners[i]); + } + } - zval entry; - array_init(&entry); - add_assoc_string(&entry, "host", (char *)http3_listener_host(l)); - add_assoc_long (&entry, "port", http3_listener_port(l)); - add_assoc_long (&entry, "datagrams_received", (zend_long)s.datagrams_received); - add_assoc_long (&entry, "bytes_received", (zend_long)s.bytes_received); - add_assoc_long (&entry, "datagrams_errored", (zend_long)s.datagrams_errored); - add_assoc_long (&entry, "last_datagram_size", (zend_long)s.last_datagram_size); - add_assoc_string(&entry, "last_peer", s.last_peer); - - /* QUIC packet classification counters. */ - add_assoc_long(&entry, "quic_initial", (zend_long)s.packet.quic_initial); - add_assoc_long(&entry, "quic_short_header", (zend_long)s.packet.quic_short_header); - add_assoc_long(&entry, "quic_version_negotiated", (zend_long)s.packet.quic_version_negotiated); - add_assoc_long(&entry, "quic_parse_errors", (zend_long)s.packet.quic_parse_errors); - /* ngtcp2_conn lifecycle counters. */ - add_assoc_long(&entry, "quic_conn_accepted", (zend_long)s.packet.quic_conn_accepted); - add_assoc_long(&entry, "quic_conn_rejected", (zend_long)s.packet.quic_conn_rejected); - /* Read-path counters. */ - add_assoc_long(&entry, "quic_read_ok", (zend_long)s.packet.quic_read_ok); - add_assoc_long(&entry, "quic_read_error", (zend_long)s.packet.quic_read_error); - add_assoc_long(&entry, "quic_read_fatal", (zend_long)s.packet.quic_read_fatal); - add_assoc_long(&entry, "quic_path_migrations", (zend_long)s.packet.quic_path_migrations); - /* Write-loop + timer counters. */ - add_assoc_long(&entry, "quic_packets_sent", (zend_long)s.packet.quic_packets_sent); - add_assoc_long(&entry, "quic_bytes_sent", (zend_long)s.packet.quic_bytes_sent); - add_assoc_long(&entry, "quic_timer_fired", (zend_long)s.packet.quic_timer_fired); - add_assoc_long(&entry, "quic_write_error", (zend_long)s.packet.quic_write_error); - /* Handshake / ALPN counters. */ - add_assoc_long(&entry, "quic_handshake_completed", (zend_long)s.packet.quic_handshake_completed); - add_assoc_long(&entry, "quic_alpn_mismatch", (zend_long)s.packet.quic_alpn_mismatch); - /* nghttp3 lifecycle counters. */ - add_assoc_long(&entry, "h3_init_ok", (zend_long)s.packet.h3_init_ok); - add_assoc_long(&entry, "h3_init_failed", (zend_long)s.packet.h3_init_failed); - add_assoc_long(&entry, "h3_stream_close", (zend_long)s.packet.h3_stream_close); - add_assoc_long(&entry, "h3_stream_read_error", (zend_long)s.packet.h3_stream_read_error); - /* Request-assembly counters. */ - add_assoc_long(&entry, "h3_request_received", (zend_long)s.packet.h3_request_received); - add_assoc_long(&entry, "h3_request_oversized", (zend_long)s.packet.h3_request_oversized); - add_assoc_long(&entry, "h3_streams_opened", (zend_long)s.packet.h3_streams_opened); - /* Response counters. */ - add_assoc_long(&entry, "h3_response_submitted", (zend_long)s.packet.h3_response_submitted); - add_assoc_long(&entry, "h3_response_submit_error",(zend_long)s.packet.h3_response_submit_error); - /* Connection lifecycle counters. */ - add_assoc_long(&entry, "quic_connection_close_sent", (zend_long)s.packet.quic_connection_close_sent); - add_assoc_long(&entry, "quic_conn_in_closing", (zend_long)s.packet.quic_conn_in_closing); - add_assoc_long(&entry, "quic_conn_in_draining", (zend_long)s.packet.quic_conn_in_draining); - add_assoc_long(&entry, "quic_conn_idle_closed", (zend_long)s.packet.quic_conn_idle_closed); - add_assoc_long(&entry, "quic_conn_handshake_timeout",(zend_long)s.packet.quic_conn_handshake_timeout); - add_assoc_long(&entry, "quic_conn_reaped", (zend_long)s.packet.quic_conn_reaped); - add_assoc_long(&entry, "quic_stateless_reset_sent", (zend_long)s.packet.quic_stateless_reset_sent); - add_assoc_long(&entry, "quic_retry_sent", (zend_long)s.packet.quic_retry_sent); - add_assoc_long(&entry, "quic_retry_token_ok", (zend_long)s.packet.quic_retry_token_ok); - add_assoc_long(&entry, "quic_retry_token_invalid", (zend_long)s.packet.quic_retry_token_invalid); - add_assoc_long(&entry, "quic_conn_per_peer_rejected",(zend_long)s.packet.quic_conn_per_peer_rejected); - add_assoc_long(&entry, "quic_conn_global_rejected", (zend_long)s.packet.quic_conn_global_rejected); - add_assoc_long(&entry, "quic_conn_refused_sent", (zend_long)s.packet.quic_conn_refused_sent); - /* Audit hardening counters. */ - add_assoc_long(&entry, "h3_framing_error", (zend_long)s.packet.h3_framing_error); - add_assoc_long(&entry, "quic_drain_iter_cap_hit", (zend_long)s.packet.quic_drain_iter_cap_hit); - - /* Send-path error categorisation. */ - add_assoc_long(&entry, "quic_send_eagain", (zend_long)s.packet.quic_send_eagain); - add_assoc_long(&entry, "quic_send_gso_refused", (zend_long)s.packet.quic_send_gso_refused); - add_assoc_long(&entry, "quic_send_emsgsize", (zend_long)s.packet.quic_send_emsgsize); - add_assoc_long(&entry, "quic_send_unreach", (zend_long)s.packet.quic_send_unreach); - add_assoc_long(&entry, "quic_send_other_error", (zend_long)s.packet.quic_send_other_error); - add_assoc_long(&entry, "quic_gso_disabled", (zend_long)s.packet.quic_gso_disabled); - - /* Async errors observed via MSG_ERRQUEUE. */ - add_assoc_long(&entry, "quic_errqueue_emsgsize", (zend_long)s.packet.quic_errqueue_emsgsize); - add_assoc_long(&entry, "quic_errqueue_unreach", (zend_long)s.packet.quic_errqueue_unreach); - add_assoc_long(&entry, "quic_errqueue_other", (zend_long)s.packet.quic_errqueue_other); - - add_next_index_zval(return_value, &entry); + /* Reactor-pool split: the transport reactors own the H3 listeners. */ + for (size_t i = 0; i < server->reactor_h3_listener_count; i++) { + if (server->reactor_h3_listeners[i].listener != NULL) { + http3_emit_listener_stats(return_value, server->reactor_h3_listeners[i].listener); + } } #endif } @@ -3527,6 +4081,18 @@ static void http_server_free(zend_object *obj) zval_ptr_dtor(&server->config); + /* Reactor pool. Normally torn down in the start_pool cleanup path; this is + * the defensive catch-all if the server is freed without a clean pool + * exit. */ + http_server_reactor_pool_down(server); + + /* This worker clone's request inbox. Producers (reactors) have quiesced by + * the time a worker is freed. */ + if (server->worker_inbox != NULL) { + worker_inbox_free(server->worker_inbox); + server->worker_inbox = NULL; + } + /* Pool-mode worker ctx array (issue #11). NULL outside pool mode; * non-NULL only for parent servers that ran with workers > 1. * Each entry's persistent zval shell is released here. */ diff --git a/src/http_server_config.c b/src/http_server_config.c index 3e667e3..056a71c 100644 --- a/src/http_server_config.c +++ b/src/http_server_config.c @@ -55,6 +55,7 @@ struct _http_server_shared_config_t { zend_string *tls_cert_path; /* persistent zend_string or NULL */ zend_string *tls_key_path; + zend_string *http3_hq_docroot; /* persistent; hq-interop docroot */ size_t write_buffer_size; int backlog; @@ -2287,6 +2288,45 @@ ZEND_METHOD(TrueAsync_HttpServerConfig, getPrivateKey) } /* }}} */ +/* {{{ proto HttpServerConfig::setHttp3HqDocroot(string $path): static */ +ZEND_METHOD(TrueAsync_HttpServerConfig, setHttp3HqDocroot) +{ + zend_string *path; + + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_STR(path) + ZEND_PARSE_PARAMETERS_END(); + + http_server_config_t *config = Z_HTTP_SERVER_CONFIG_P(ZEND_THIS); + + if (config_check_locked(config)) { + return; + } + + if (config->http3_hq_docroot) { + zend_string_release(config->http3_hq_docroot); + } + + config->http3_hq_docroot = zend_string_copy(path); + + RETURN_OBJ_COPY(Z_OBJ_P(ZEND_THIS)); +} +/* }}} */ + +/* {{{ proto HttpServerConfig::getHttp3HqDocroot(): ?string */ +ZEND_METHOD(TrueAsync_HttpServerConfig, getHttp3HqDocroot) +{ + ZEND_PARSE_PARAMETERS_NONE(); + http_server_config_t *config = Z_HTTP_SERVER_CONFIG_P(ZEND_THIS); + + if (config->http3_hq_docroot) { + RETURN_STR_COPY(config->http3_hq_docroot); + } + + RETURN_NULL(); +} +/* }}} */ + /* {{{ proto HttpServerConfig::setAutoAwaitBody(bool $enable): static */ ZEND_METHOD(TrueAsync_HttpServerConfig, setAutoAwaitBody) { @@ -2544,6 +2584,7 @@ static zend_object *http_server_config_create(zend_class_entry *ce) config->tls_enabled = false; config->tls_cert_path = NULL; config->tls_key_path = NULL; + config->http3_hq_docroot = NULL; config->auto_await_body = false; config->is_locked = false; config->log_severity = 0; /* HTTP_LOG_OFF */ @@ -2607,6 +2648,10 @@ static void http_server_config_free(zend_object *obj) zend_string_release(config->tls_key_path); } + if (config->http3_hq_docroot) { + zend_string_release(config->http3_hq_docroot); + } + if (Z_TYPE(config->log_stream) != IS_UNDEF) { zval_ptr_dtor(&config->log_stream); ZVAL_UNDEF(&config->log_stream); @@ -2721,6 +2766,12 @@ static http_server_shared_config_t *http_server_shared_config_freeze( GC_MAKE_PERSISTENT_LOCAL(shared->tls_key_path); } + if (src->http3_hq_docroot) { + shared->http3_hq_docroot = zend_string_init( + ZSTR_VAL(src->http3_hq_docroot), ZSTR_LEN(src->http3_hq_docroot), 1); + GC_MAKE_PERSISTENT_LOCAL(shared->http3_hq_docroot); + } + if (src->listener_count > 0) { shared->listeners = pecalloc(src->listener_count, sizeof(http_listener_shared_t), 1); shared->listener_count = src->listener_count; @@ -2783,6 +2834,10 @@ static void http_server_shared_config_release(http_server_shared_config_t *share zend_string_release_ex(shared->tls_key_path, 1); } + if (shared->http3_hq_docroot) { + zend_string_release_ex(shared->http3_hq_docroot, 1); + } + if (shared->compression_mime_types) { for (size_t i = 0; i < shared->compression_mime_count; i++) { if (shared->compression_mime_types[i]) { @@ -2869,6 +2924,11 @@ static void http_server_config_populate_from_shared( ZSTR_VAL(src->tls_key_path), ZSTR_LEN(src->tls_key_path), 0); } + if (src->http3_hq_docroot) { + dst->http3_hq_docroot = zend_string_init( + ZSTR_VAL(src->http3_hq_docroot), ZSTR_LEN(src->http3_hq_docroot), 0); + } + if (src->listener_count > 0) { dst->listeners = ecalloc(src->listener_count, sizeof(http_listener_config_t)); dst->listener_capacity = src->listener_count; diff --git a/src/send_file.c b/src/send_file.c index b1956a3..0348412 100644 --- a/src/send_file.c +++ b/src/send_file.c @@ -315,8 +315,9 @@ static void engine_handle_stat(engine_state_t *state) /* === Cache insert (only on miss path) =========================== */ - if (view == NULL && cfg->server != NULL) { - http_static_cache_t *cache = http_static_cache_acquire(cfg->server); + if (view == NULL && (cfg->cache != NULL || cfg->server != NULL)) { + http_static_cache_t *const cache = + cfg->cache != NULL ? cfg->cache : http_static_cache_acquire(cfg->server); if (cache != NULL) { http_static_cache_insert(cache, state->fs_path, state->fs_path_len, &state->st, @@ -479,27 +480,18 @@ static void engine_handle_stat(engine_state_t *state) } /* === Small-file fast path (slurp + inline body) ================== */ - /* uv_fs_sendfile slurps to user space then writes; on Linux it falls - * through copy_file_range (EINVAL on socket) into a pread+write loop in a - * worker thread (no zero-copy, a futex round-trip per request). On - * Windows it CANNOT target a socket at all — a Winsock SOCKET is not a - * CRT fd, so the sendfile path below delivers an EMPTY body there. For - * files within the slurp threshold — including byte-range requests, whose - * slice we cut from the in-memory buffer — read the bytes and let the - * protocol op writev(headers+body) through the normal per-socket queue - * instead. (A range/file larger than the threshold still uses sendfile - * and stays broken on Windows — separate follow-up.) */ - if ((size_t)state->st.st_size <= SEND_FILE_SLURP_THRESHOLD && file_io != NULL) { + /* uv_fs_sendfile on Linux falls through copy_file_range (EINVAL on + * socket) into a userspace pread+write loop inside a worker thread + * — no kernel zero-copy + a futex round-trip per request. For small + * files the round-trip dominates. Slurp inline and let the protocol + * op writev(headers+body) through the same per-socket queue that + * headers normally use; ordering is then libuv's problem. */ + if (!state->is_range && (size_t)state->st.st_size <= SEND_FILE_SLURP_THRESHOLD && + file_io != NULL) { zend_string *body = fs_slurp_fd((int)file_io->descriptor.fd, (size_t)state->st.st_size); if (body != NULL) { - if (state->is_range) { - /* Slice [range_first, range_first + body_len) from the buffer. */ - http_response_static_set_body_cstr(response_obj, - ZSTR_VAL(body) + state->range_first, (size_t)body_len); - } else { - http_response_static_set_body_str(response_obj, body); - } + http_response_static_set_body_str(response_obj, body); zend_string_release(body); if (cfg->counters != NULL) { http_server_count_request(cfg->counters); } diff --git a/src/static/http_static.c b/src/static/http_static.c index fe6d032..e90c21e 100644 --- a/src/static/http_static.c +++ b/src/static/http_static.c @@ -181,9 +181,22 @@ http_static_result_t http_static_try_serve(http_server_object *server, const http_static_dispatch_cbs_t *cbs, void *user) { - const size_t mount_count = http_static_handler_count(server); + return http_static_try_serve_mounts( + http_static_handler_mounts(server), http_static_handler_count(server), + http_static_cache_acquire(server), request, response_obj, counters, cbs, + user); +} - if (UNEXPECTED(mount_count == 0)) { +http_static_result_t http_static_try_serve_mounts( + const http_static_handler_t *const *mounts, size_t mount_count, + struct http_static_cache_s *cache, + http_request_t *request, + zend_object *response_obj, + http_server_counters_t *counters, + const http_static_dispatch_cbs_t *cbs, + void *user) +{ + if (UNEXPECTED(mount_count == 0 || mounts == NULL)) { return HTTP_STATIC_PASSTHROUGH; } @@ -211,7 +224,7 @@ http_static_result_t http_static_try_serve(http_server_object *server, } for (size_t mi = 0; mi < mount_count; mi++) { - const http_static_handler_t *mount = http_static_handler_get(server, mi); + const http_static_handler_t *mount = mounts[mi]; if (UNEXPECTED(mount == NULL)) { continue; @@ -337,7 +350,6 @@ http_static_result_t http_static_try_serve(http_server_object *server, size_t override_ct_len = 0; const uint32_t precomp_mask = mount_precomp_mask(mount->flags); - http_static_cache_t *cache = http_static_cache_acquire(server); if (precomp_mask != 0) { const char *pre_ct = NULL; @@ -467,7 +479,8 @@ http_static_result_t http_static_try_serve(http_server_object *server, cfg.mime_overrides = mount->mime_overrides; cfg.cache_view = have_view ? &cv : NULL; cfg.counters = counters; - cfg.server = server; + cfg.server = NULL; + cfg.cache = cache; cfg.content_encoding = picked_encoding; cfg.content_encoding_len = picked_encoding_len; diff --git a/stubs/HttpServerConfig.php b/stubs/HttpServerConfig.php index 35c31b0..fd6c254 100644 --- a/stubs/HttpServerConfig.php +++ b/stubs/HttpServerConfig.php @@ -783,6 +783,22 @@ public function setPrivateKey(string $path): static {} */ public function getPrivateKey(): ?string {} + /** + * Set the hq-interop (HTTP/0.9-over-QUIC) document root. + * + * Files under this directory are served verbatim to hq-interop clients + * (the QUIC interop test matrix speaks hq, not h3). No effect on h3. + * + * @param string $path Document root directory + * @return static + */ + public function setHttp3HqDocroot(string $path): static {} + + /** + * Get the hq-interop document root. + */ + public function getHttp3HqDocroot(): ?string {} + // === Body handling === /** diff --git a/stubs/HttpServerConfig.php_arginfo.h b/stubs/HttpServerConfig.php_arginfo.h index b1c8029..ba38fa0 100644 --- a/stubs/HttpServerConfig.php_arginfo.h +++ b/stubs/HttpServerConfig.php_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit HttpServerConfig.php.stub.php instead. - * Stub hash: 5e06258fb3426d3a0088e298517aef1a146f1a81 */ + * Stub hash: 8a6df78fa87bb63b7abc69d5acd4c8ef760086d0 */ ZEND_BEGIN_ARG_INFO_EX(arginfo_class_TrueAsync_HttpServerConfig___construct, 0, 0, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, host, IS_STRING, 1, "null") @@ -145,10 +145,6 @@ ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_TrueAsync_HttpServerConfig_isHttp3AltSvcEnabled, 0, 0, _IS_BOOL, 0) ZEND_END_ARG_INFO() -#define arginfo_class_TrueAsync_HttpServerConfig_setCompressionEnabled arginfo_class_TrueAsync_HttpServerConfig_setHttp3AltSvcEnabled - -#define arginfo_class_TrueAsync_HttpServerConfig_isCompressionEnabled arginfo_class_TrueAsync_HttpServerConfig_isHttp3AltSvcEnabled - #define arginfo_class_TrueAsync_HttpServerConfig_setHttp3Pacing arginfo_class_TrueAsync_HttpServerConfig_setHttp3AltSvcEnabled #define arginfo_class_TrueAsync_HttpServerConfig_isHttp3Pacing arginfo_class_TrueAsync_HttpServerConfig_isHttp3AltSvcEnabled @@ -157,6 +153,10 @@ ZEND_END_ARG_INFO() #define arginfo_class_TrueAsync_HttpServerConfig_isRequestScope arginfo_class_TrueAsync_HttpServerConfig_isHttp3AltSvcEnabled +#define arginfo_class_TrueAsync_HttpServerConfig_setCompressionEnabled arginfo_class_TrueAsync_HttpServerConfig_setHttp3AltSvcEnabled + +#define arginfo_class_TrueAsync_HttpServerConfig_isCompressionEnabled arginfo_class_TrueAsync_HttpServerConfig_isHttp3AltSvcEnabled + ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_TrueAsync_HttpServerConfig_setCompressionLevel, 0, 1, IS_STATIC, 0) ZEND_ARG_TYPE_INFO(0, level, IS_LONG, 0) ZEND_END_ARG_INFO() @@ -224,6 +224,10 @@ ZEND_END_ARG_INFO() #define arginfo_class_TrueAsync_HttpServerConfig_getPrivateKey arginfo_class_TrueAsync_HttpServerConfig_getCertificate +#define arginfo_class_TrueAsync_HttpServerConfig_setHttp3HqDocroot arginfo_class_TrueAsync_HttpServerConfig_addUnixListener + +#define arginfo_class_TrueAsync_HttpServerConfig_getHttp3HqDocroot arginfo_class_TrueAsync_HttpServerConfig_getCertificate + #define arginfo_class_TrueAsync_HttpServerConfig_setAutoAwaitBody arginfo_class_TrueAsync_HttpServerConfig_setHttp3AltSvcEnabled #define arginfo_class_TrueAsync_HttpServerConfig_isAutoAwaitBodyEnabled arginfo_class_TrueAsync_HttpServerConfig_isHttp3AltSvcEnabled @@ -344,6 +348,8 @@ ZEND_METHOD(TrueAsync_HttpServerConfig, setCertificate); ZEND_METHOD(TrueAsync_HttpServerConfig, getCertificate); ZEND_METHOD(TrueAsync_HttpServerConfig, setPrivateKey); ZEND_METHOD(TrueAsync_HttpServerConfig, getPrivateKey); +ZEND_METHOD(TrueAsync_HttpServerConfig, setHttp3HqDocroot); +ZEND_METHOD(TrueAsync_HttpServerConfig, getHttp3HqDocroot); ZEND_METHOD(TrueAsync_HttpServerConfig, setAutoAwaitBody); ZEND_METHOD(TrueAsync_HttpServerConfig, isAutoAwaitBodyEnabled); ZEND_METHOD(TrueAsync_HttpServerConfig, setLogSeverity); @@ -447,6 +453,8 @@ static const zend_function_entry class_TrueAsync_HttpServerConfig_methods[] = { ZEND_ME(TrueAsync_HttpServerConfig, getCertificate, arginfo_class_TrueAsync_HttpServerConfig_getCertificate, ZEND_ACC_PUBLIC) ZEND_ME(TrueAsync_HttpServerConfig, setPrivateKey, arginfo_class_TrueAsync_HttpServerConfig_setPrivateKey, ZEND_ACC_PUBLIC) ZEND_ME(TrueAsync_HttpServerConfig, getPrivateKey, arginfo_class_TrueAsync_HttpServerConfig_getPrivateKey, ZEND_ACC_PUBLIC) + ZEND_ME(TrueAsync_HttpServerConfig, setHttp3HqDocroot, arginfo_class_TrueAsync_HttpServerConfig_setHttp3HqDocroot, ZEND_ACC_PUBLIC) + ZEND_ME(TrueAsync_HttpServerConfig, getHttp3HqDocroot, arginfo_class_TrueAsync_HttpServerConfig_getHttp3HqDocroot, ZEND_ACC_PUBLIC) ZEND_ME(TrueAsync_HttpServerConfig, setAutoAwaitBody, arginfo_class_TrueAsync_HttpServerConfig_setAutoAwaitBody, ZEND_ACC_PUBLIC) ZEND_ME(TrueAsync_HttpServerConfig, isAutoAwaitBodyEnabled, arginfo_class_TrueAsync_HttpServerConfig_isAutoAwaitBodyEnabled, ZEND_ACC_PUBLIC) ZEND_ME(TrueAsync_HttpServerConfig, setLogSeverity, arginfo_class_TrueAsync_HttpServerConfig_setLogSeverity, ZEND_ACC_PUBLIC) diff --git a/tests/h3client/h3client.c b/tests/h3client/h3client.c index 4f722a3..965f911 100644 --- a/tests/h3client/h3client.c +++ b/tests/h3client/h3client.c @@ -431,13 +431,73 @@ static int rebind_socket(h3c_t *c) { const int fl = fcntl(nfd, F_GETFL, 0); if (fl >= 0) (void)fcntl(nfd, F_SETFL, fl | O_NONBLOCK); + /* Keep every retired socket OPEN until exit (leaking a bounded handful of + * fds across the test's migrations). Closing one makes the kernel answer the + * server's in-flight packets on that old path with ICMP port-unreachable — + * which a real NAT never does, and which can disturb the server's path + * validation. Track the most recent here; main() closes it at exit. */ c->retired_fd = c->fd; c->fd = nfd; + + /* Deliberately DO NOT refresh c.local: this simulates a NAT rebind, where + * the source port changes on the wire *below* the client's QUIC stack. The + * client keeps describing its original local path to ngtcp2 (no client- + * initiated migration); only the server observes the new peer address. */ + return 0; +} + +/* Force a real client-initiated migration to a *new* local address. Unlike + * rebind_socket() (a NAT rebind that keeps the same DCID), ngtcp2 rotates its + * DCID to one of the server-issued NEW_CONNECTION_ID values (RFC 9000 §5.1) as + * part of migrating. This exercises the server's conn_map lookup on an *issued* + * CID — the path that hangs when the server never registers the CIDs it hands + * out. Returns 0 on success, 1 if no unused server CID is available yet, -1 on + * error. */ +static int rotate_dcid(h3c_t *c) { + int nfd = socket(AF_INET, SOCK_DGRAM, 0); + if (nfd < 0) { perror("rotate socket"); return -1; } + + struct sockaddr_in zero = { .sin_family = AF_INET }; + if (bind(nfd, (struct sockaddr *)&zero, sizeof(zero)) < 0) { + perror("rotate bind"); close(nfd); return -1; + } + + const int rcvbuf = 8 * 1024 * 1024; + (void)setsockopt(nfd, SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)); + const int fl = fcntl(nfd, F_GETFL, 0); + if (fl >= 0) (void)fcntl(nfd, F_SETFL, fl | O_NONBLOCK); + + /* Retire the old socket (kept open until exit, like rebind_socket, so the + * kernel does not ICMP the server's in-flight old-path packets). */ + c->retired_fd = c->fd; + c->fd = nfd; + + /* REFRESH c->local — this is what distinguishes a migration from a NAT + * rebind: ngtcp2 must see a new local path to migrate (and reject a path + * whose local equals the current one). */ + c->local_len = sizeof(c->local); + if (getsockname(c->fd, (struct sockaddr *)&c->local, &c->local_len) < 0) { + perror("rotate getsockname"); return -1; + } + + ngtcp2_path_storage ps = {0}; + ngtcp2_path_storage_init(&ps, + (struct sockaddr *)&c->local, c->local_len, + (struct sockaddr *)&c->remote, c->remote_len, NULL); + int rv = ngtcp2_conn_initiate_immediate_migration(c->qc, &ps.path, now_ns()); + if (rv == NGTCP2_ERR_CONN_ID_BLOCKED) { + return 1; + } + if (rv != 0) { + fprintf(stderr, "h3client: initiate_immediate_migration rv=%d\n", rv); + return -1; + } return 0; } /* ----- main ----- */ + int main(int argc, char **argv) { if (argc < 4) { fprintf(stderr, "usage: %s [ []]\n", argv[0]); @@ -615,10 +675,13 @@ int main(int argc, char **argv) { } } - /* H3CLIENT_MIGRATE_AFTER=N — after the N-th completed request, rebind - * the UDP socket to a new source port (NAT-rebind) before issuing the - * next one. Drives the server's connection-migration path (RFC 9000 - * §9). 0/unset = off; pair with H3CLIENT_REQUEST_COUNT > N. */ + /* H3CLIENT_MIGRATE_AFTER=N — once N requests have completed, rebind the UDP + * socket to a new source port (NAT-rebind) before EACH subsequent request, + * so REQUEST_COUNT-N migrations happen back to back. Drives the server's + * connection-migration path (RFC 9000 §9); with a multi-reactor server it + * also forces SO_REUSEPORT to rehash the connection onto another reactor, + * exercising CID steering (#80 D6 / #72). 0/unset = off; pair with + * H3CLIENT_REQUEST_COUNT > N. */ unsigned long migrate_after = 0; { const char *env = getenv("H3CLIENT_MIGRATE_AFTER"); @@ -631,6 +694,25 @@ int main(int argc, char **argv) { } } + /* H3CLIENT_ROTATE_DCID_AFTER=N — once N requests have completed, perform a + * real client-initiated migration (new local addr) so ngtcp2 rotates its + * DCID to a server-issued NEW_CONNECTION_ID. Drives the server's conn_map + * lookup on an *issued* CID (RFC 9000 §5.1). Repeats before each subsequent + * request, so REQUEST_COUNT-N rotations happen in sequence. Distinct from + * MIGRATE_AFTER (NAT rebind, same DCID). 0/unset = off; pair with + * H3CLIENT_REQUEST_COUNT > N. */ + unsigned long rotate_dcid_after = 0; + { + const char *env = getenv("H3CLIENT_ROTATE_DCID_AFTER"); + if (env != NULL && *env != '\0') { + char *end = NULL; + unsigned long n = strtoul(env, &end, 10); + if (end != env && *end == '\0' && n <= 10000000ul) { + rotate_dcid_after = n; + } + } + } + unsigned long completed = 0; bool sent = false; uint64_t deadline_ns = now_ns() + deadline_ms * 1000000ull; @@ -670,15 +752,30 @@ int main(int argc, char **argv) { completed++; if (completed >= request_count) break; - /* NAT-rebind point — continue the same connection from a new - * source port so the server exercises its migration path. */ - if (migrate_after != 0 && completed == migrate_after) { + /* NAT-rebind point — continue the same connection from a new source + * port so the server exercises its migration path. Rebinds before + * every request once `migrate_after` is reached, so a multi-reactor + * server is repeatedly rehashed across reactors. */ + if (migrate_after != 0 && completed >= migrate_after) { if (rebind_socket(&c) < 0) { fprintf(stderr, "h3client: rebind failed\n"); return 1; } if (!quiet) { fprintf(stderr, "MIGRATED\n"); } } + /* DCID-rotation point — migrate to a new local path so ngtcp2 + * switches its DCID to a server-issued CID. Repeats before each + * subsequent request once the threshold is reached. */ + if (rotate_dcid_after != 0 && completed >= rotate_dcid_after) { + int rr = rotate_dcid(&c); + if (rr < 0) { + fprintf(stderr, "h3client: rotate_dcid failed\n"); return 1; + } + if (!quiet) { + fprintf(stderr, rr == 0 ? "ROTATED_DCID\n" : "ROTATE_BLOCKED\n"); + } + } + /* Reset per-request state — keep the connection + h3 conn. */ c.response_status = 0; c.response_header_count = 0; diff --git a/tests/interop/quic/Dockerfile b/tests/interop/quic/Dockerfile new file mode 100644 index 0000000..d5051a4 --- /dev/null +++ b/tests/interop/quic/Dockerfile @@ -0,0 +1,42 @@ +# QUIC interop-runner endpoint for the TrueAsync HTTP/3 server. +# +# Layers the published TrueAsync runtime (PHP 8.6 + OpenSSL 3.5 + nghttp3 + +# ngtcp2 + true_async_server.so, all under /usr/local) onto the +# quic-network-simulator endpoint base, which provides the netns/route setup the +# runner drives. Both are Ubuntu 24.04, so /usr/local copies cleanly. +# +# docker build -t trueasync/quic-interop-endpoint:latest \ +# --build-arg TAS_IMAGE=trueasync/php-true-async:0.7.2-php8.6 \ +# -f tests/interop/quic/Dockerfile tests/interop/quic +# +# Then add the implementations.json entry (see README.md) and run the suite from +# a checkout of https://github.com/quic-interop/quic-interop-runner. + +ARG TAS_IMAGE=trueasync/php-true-async:0.7.2-php8.6 + +FROM ${TAS_IMAGE} AS tas + +FROM martenseemann/quic-network-simulator-endpoint:latest + +ENV DEBIAN_FRONTEND=noninteractive + +# System runtime libs PHP links against (the COPY below only brings /usr/local). +# Same set as the proven release runtime stage (releases/docker/Dockerfile.debian). +RUN apt-get update && apt-get install -y --no-install-recommends \ + libxml2 libargon2-1 libedit2 libreadline8 \ + libsodium23 libsqlite3-0 libonig5 libzip4 \ + libpng16-16 libjpeg8 libwebp7 libfreetype6 \ + libgmp10 libldap2 libsasl2-2 libpq5 \ + libmysqlclient21 libbz2-1.0 libenchant-2-2 \ + libffi8 libgdbm6 liblmdb0 libsnmp40 \ + libtidy5deb1 libxslt1.1 libicu74 ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=tas /usr/local /usr/local +COPY --from=tas /etc/php.d /etc/php.d + +RUN ldconfig && php -v && php -m | grep -i true_async_server + +COPY entry.php /interop/entry.php +COPY run_endpoint.sh /run_endpoint.sh +RUN chmod +x /run_endpoint.sh diff --git a/tests/interop/quic/Dockerfile.source b/tests/interop/quic/Dockerfile.source new file mode 100644 index 0000000..d4c0113 --- /dev/null +++ b/tests/interop/quic/Dockerfile.source @@ -0,0 +1,58 @@ +# QUIC interop endpoint built from the LOCAL working tree (unreleased branch). +# +# Unlike Dockerfile (which layers a published release image), this compiles the +# server extension from the current source against the PHP + OpenSSL/ngtcp2/ +# nghttp3 already inside a TrueAsync base image — guaranteed header/ABI match, +# and far cheaper than a full from-scratch stack build. Use it to interop-test a +# branch before release. +# +# docker build -t trueasync/quic-interop-endpoint:branch \ +# --build-arg TAS_IMAGE=trueasync/php-true-async:0.7.0-rc.6-php8.6 \ +# -f tests/interop/quic/Dockerfile.source . # context = repo root + +ARG TAS_IMAGE=trueasync/php-true-async:0.7.0-rc.6-php8.6 + +FROM ${TAS_IMAGE} AS build +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install -y --no-install-recommends \ + autoconf automake libtool gcc g++ make pkg-config re2c bison \ + && rm -rf /var/lib/apt/lists/* +COPY . /usr/src/server +WORKDIR /usr/src/server +# Strip host-generated autotools/build artifacts (the COPY brought a Makefile with +# host absolute paths) so phpize + configure regenerate cleanly for the container. +RUN rm -rf Makefile Makefile.global Makefile.objects Makefile.fragments \ + configure config.h config.h.in config.status config.cache config.nice \ + config.log autom4te.cache build modules libtool \ + && find . \( -name '*.lo' -o -name '*.la' -o -name '*.o' -o -name '*.dep' \) -delete 2>/dev/null \ + && find . -type d -name .libs -exec rm -rf {} + 2>/dev/null; true +RUN phpize \ + && PKG_CONFIG_PATH=/usr/local/lib/pkgconfig ./configure \ + --with-php-config=/usr/local/bin/php-config \ + --enable-http-server --enable-http2 --enable-http3 \ + --with-openssl --with-nghttp2=/usr/local \ + --with-nghttp3=/usr/local --with-ngtcp2=/usr/local \ + && make -j"$(nproc)" \ + && make install + +FROM martenseemann/quic-network-simulator-endpoint:latest + +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install -y --no-install-recommends \ + libxml2 libargon2-1 libedit2 libreadline8 \ + libsodium23 libsqlite3-0 libonig5 libzip4 \ + libpng16-16 libjpeg8 libwebp7 libfreetype6 \ + libgmp10 libldap2 libsasl2-2 libpq5 \ + libmysqlclient21 libbz2-1.0 libenchant-2-2 \ + libffi8 libgdbm6 liblmdb0 libsnmp40 \ + libtidy5deb1 libxslt1.1 libicu74 ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=build /usr/local /usr/local +COPY --from=build /etc/php.d /etc/php.d + +RUN ldconfig && php -v && php -m | grep -i true_async_server + +COPY tests/interop/quic/entry.php /interop/entry.php +COPY tests/interop/quic/run_endpoint.sh /run_endpoint.sh +RUN chmod +x /run_endpoint.sh diff --git a/tests/interop/quic/README.md b/tests/interop/quic/README.md new file mode 100644 index 0000000..b2e4bb4 --- /dev/null +++ b/tests/interop/quic/README.md @@ -0,0 +1,72 @@ +# QUIC interop-runner endpoint (HTTP/3 conformance, #80 / P0) + +Wires the TrueAsync HTTP/3 server into the +[quic-interop-runner](https://github.com/quic-interop/quic-interop-runner) — the +IETF cross-implementation test harness. It pairs our server against third-party +QUIC clients (quic-go, ngtcp2, mvfst, quiche, picoquic, …) over a simulated +network (ns-3), giving conformance coverage of **RFC 9000** (QUIC transport), +**9001** (QUIC-TLS), **9002** (recovery), **9114** (HTTP/3) and **9204** (QPACK) +that the in-tree phpt suite (own client only) can't. + +## Files + +| File | Role | +|------|------| +| `entry.php` | Server: serves `/www` over H3 (UDP) + H2/H1 (TLS, TCP) on one port, with `/certs/{cert.pem,priv.key}`. Validated locally — see below. | +| `run_endpoint.sh` | Endpoint hook: role + test-case gating (`exit 127` for unsupported), launches the server on 443. | +| `Dockerfile` | Published TrueAsync runtime layered onto the simulator endpoint base. | +| `implementations.snippet.json` | Entry to add to the runner's `implementations_quic.json`. | + +## Running (needs a Docker host) + +The runner is Docker- + ns-3-only; it cannot run in a plain WSL distro without +Docker. On a Docker host: + +```bash +# 1. Build the endpoint image (pin TAS_IMAGE to the matching release tag). +docker build -t trueasync/quic-interop-endpoint:latest \ + --build-arg TAS_IMAGE=trueasync/php-true-async:0.7.2-php8.6 \ + -f tests/interop/quic/Dockerfile tests/interop/quic + +# 2. In a quic-interop-runner checkout, merge implementations.snippet.json into +# implementations_quic.json, then run our server against all clients: +python run.py -s trueasync -t handshake,transfer,http3,multiplexing +``` + +## Supported test cases (this iteration) + +Cases a correct H3 file server passes via **downloaded-file integrity** (the +runner compares the bytes the client pulled): + +`handshake`, `transfer`, `http3`, `multiplexing`, `longrtt`, `goodput`, +`crosstraffic`, `transferloss`, `transfercorruption`, `blackhole`, +`handshakeloss`. + +Everything else returns `exit 127` (skipped) for now — see gaps. + +## Known gaps / follow-ups + +1. **Server-side `SSLKEYLOGFILE` + `QLOGDIR` export is not wired** (TLS-layer + follow-up). The runner verifies the cases above by downloaded-file integrity, + which works without it; cases it verifies by inspecting *server* packet traces + (`amplificationlimit`, `ecn`, …) need it and are screened out. +2. **Special-config cases not yet enabled:** `retry` (force address validation), + `resumption` (TLS tickets), `zerortt` (0-RTT early data — code exists, needs + wiring), `chacha20` (cipher pin), `keyupdate`, `v2` (QUIC v2), `multiconnect`. +3. **Not run here:** this WSL distro has no Docker, so the matrix above is the + set expected to pass on a correct stack — confirm actual pass/fail on a Docker + host / CI. + +## Local validation (no Docker) + +`entry.php`'s serving logic is validated against our own `tests/h3client`: +start it on a high port with `examples/certs`, `GET` a file, and the body matches +byte-for-byte (`STATUS=200`, exact SHA-256). Only the Docker/network wrapping is +unexercised in this environment. + +```bash +INTEROP_WWW=/path/to/www INTEROP_CERT=examples/certs/server.crt \ +INTEROP_KEY=examples/certs/server.key INTEROP_PORT=8543 WORKERS=1 \ + php -d extension=./modules/true_async_server.so tests/interop/quic/entry.php & +tests/h3client/h3client 127.0.0.1 8543 /yourfile +``` diff --git a/tests/interop/quic/entry.php b/tests/interop/quic/entry.php new file mode 100644 index 0000000..fcda360 --- /dev/null +++ b/tests/interop/quic/entry.php @@ -0,0 +1,106 @@ +setWorkers($workers); +$config->setMaxBodySize(64 * 1024 * 1024); + +// HTTP/3 (QUIC, UDP) on the interop port — the path the runner's client uses. +// A plain TCP listener is required by start(); the interop client never connects +// to it. TLS — the cert AND the QUIC "h3" ALPN selector — is enabled at CONFIG +// level via enableTls(); the H3 listener inherits both from here. Configuring the +// cert on a per-listener TLS addListener() instead leaves the QUIC ALPN as the TCP +// list (h2/http1.1), so the server answers the client's "h3" with a fatal +// no_application_protocol alert and the handshake stalls. +$config->addListener('0.0.0.0', $port + 1); +$config->addHttp3Listener('0.0.0.0', $port); +$config->enableTls(true)->setCertificate($cert)->setPrivateKey($key); + +// hq-interop (HTTP/0.9-over-QUIC): the runner negotiates this ALPN for the +// whole transport matrix (migration/rebinding/multiplexing/loss). The hq shim +// serves files straight off the transport reactor from this docroot — same +// files the h3 handler below serves, no PHP per request. +$config->setHttp3HqDocroot($www); + +// INTEROP_DEBUG=1 turns on the ngtcp2 DEBUG bridge to stderr (one line per frame) +// for diagnosing handshake/transport failures under the interop simulator. +if (getenv('INTEROP_DEBUG')) { + $config->setLogSeverity(\TrueAsync\LogSeverity::DEBUG); + $config->setLogStream(fopen('php://stderr', 'w')); +} + +$server = new HttpServer($config); + +// The runner fetches files at the URL root (StaticHandler can't mount '/'), so map +// the request path onto the docroot and zero-copy it back with sendFile (handles +// Content-Length and Range, which the transfer/range test cases exercise). Reject +// traversal / NUL; anything that is not a readable file is a clean 404. +$root = realpath($www); + +if ($root === false) { + fwrite(STDERR, "[interop] docroot not found: {$www}\n"); + exit(1); +} + +$server->addHttpHandler(static function (HttpRequest $req, HttpResponse $res) use ($root): void { + $uri = $req->getUri(); + $qpos = strpos($uri, '?'); + $path = rawurldecode($qpos === false ? $uri : substr($uri, 0, $qpos)); + + if ($path === '' || $path[0] !== '/' + || strpos($path, "\0") !== false || strpos($path, '..') !== false) { + $res->setStatusCode(404)->setBody('not found'); + return; + } + + $full = $root . $path; + + if (!is_file($full) || !is_readable($full)) { + $res->setStatusCode(404)->setBody('not found'); + return; + } + + $res->sendFile($full); +}); + +fprintf(STDERR, "[interop] server up :%d www=%s workers=%d testcase=%s\n", + $port, $www, $workers, getenv('TESTCASE') ?: '-'); + +$server->start(); diff --git a/tests/interop/quic/implementations.snippet.json b/tests/interop/quic/implementations.snippet.json new file mode 100644 index 0000000..aa076c5 --- /dev/null +++ b/tests/interop/quic/implementations.snippet.json @@ -0,0 +1,7 @@ +{ + "trueasync": { + "image": "trueasync/quic-interop-endpoint:latest", + "url": "https://github.com/true-async/server", + "role": "server" + } +} diff --git a/tests/interop/quic/run_endpoint.sh b/tests/interop/quic/run_endpoint.sh new file mode 100755 index 0000000..1bb2022 --- /dev/null +++ b/tests/interop/quic/run_endpoint.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# QUIC interop-runner endpoint hook (server role). +# +# The quic-network-simulator base image configures the network and then execs +# this script. Per the interface, an unsupported test case MUST exit 127 so new +# cases can be added without breaking existing implementations. +set -e + +if [ "$ROLE" != "server" ]; then + echo "true-async endpoint implements the server role only" >&2 + exit 127 +fi + +# Cases a correct HTTP/3 file server passes via downloaded-file integrity (the +# runner compares the bytes the client pulled from /www). Cases that need +# server-side packet-trace inspection (SSLKEYLOGFILE / qlog export — not wired +# yet) or special configuration (retry, resumption, zerortt, chacha20, keyupdate, +# ecn, v2, amplificationlimit, multiconnect) are screened out until supported. +case "$TESTCASE" in + handshake|transfer|http3|multiplexing|longrtt|goodput|crosstraffic|transferloss|transfercorruption|blackhole|handshakeloss) + ;; + *) + echo "unsupported test case: $TESTCASE" >&2 + exit 127 + ;; +esac + +exec env \ + INTEROP_WWW=/www \ + INTEROP_CERT=/certs/cert.pem \ + INTEROP_KEY=/certs/priv.key \ + INTEROP_PORT=443 \ + WORKERS="${WORKERS:-1}" \ + php -d extension=true_async_server /interop/entry.php diff --git a/tests/phpt/server/compression/010-h1-buffered-gzip.phpt b/tests/phpt/server/compression/010-h1-buffered-gzip.phpt index 817b24f..36e7322 100644 --- a/tests/phpt/server/compression/010-h1-buffered-gzip.phpt +++ b/tests/phpt/server/compression/010-h1-buffered-gzip.phpt @@ -6,8 +6,7 @@ true_async --SKIPIF-- /dev/null')) === '') die('skip gunzip(1) not in PATH'); +if (trim((string)shell_exec('command -v gunzip')) === '') die('skip gunzip(1) not in PATH'); ?> --FILE-- /dev/null')) === '') die('skip gunzip(1) not in PATH'); +if (trim((string)shell_exec('command -v gunzip')) === '') die('skip gunzip(1) not in PATH'); ?> --FILE-- /dev/null')) === '') die('skip gzip(1) not in PATH'); +if (trim((string)shell_exec('command -v gzip')) === '') die('skip gzip(1) not in PATH'); ?> --FILE-- /dev/null')) === '') die('skip gunzip(1) not in PATH'); +if (trim((string)shell_exec('command -v gunzip')) === '') die('skip gunzip(1) not in PATH'); ?> --FILE-- addHttpHandler(function ($req, $res) { $client = spawn(function () use ($port, $server) { usleep(30000); - /* First connection — accept trips hard-cap → drain epoch=1. */ + /* First connection — accept trips hard-cap → drain epoch=1. + * `Connection: close` so the server frees the single conn slot right + * after the response (server-initiated close), instead of leaving conn1 + * as an idle keep-alive that only frees when the client read times out. + * Without it the slot-free → listener-resume → conn2-accept chain can + * overrun conn2's read window on a slow/loaded debug build, leaving + * cooldown_blocked at 0 (the CI flake). */ $fp1 = stream_socket_client("tcp://127.0.0.1:$port", $e, $es, 3); - fwrite($fp1, "GET / HTTP/1.1\r\nHost: x\r\n\r\n"); + fwrite($fp1, "GET / HTTP/1.1\r\nHost: x\r\nConnection: close\r\n\r\n"); stream_set_timeout($fp1, 3); while (!feof($fp1)) { $c = fread($fp1, 4096); if ($c === '' || $c === false) break; @@ -66,9 +72,17 @@ $client = spawn(function () use ($port, $server) { } fclose($fp2); - usleep(100000); - + /* conn2's blocked event registers when the server accepts it — which only + * happens after conn1 drains and the listener resumes. On a slow / loaded + * debug build that chain can lag well past any fixed sleep (observed in CI: + * cooldown_blocked still 0 at read time). The terminal state (1/1/1) is + * stable once reached, so poll for it instead of reading once. */ $tel = $server->getTelemetry(); + for ($i = 0; $i < 200 && (int)$tel['drain_events_cooldown_blocked_total'] < 1; $i++) { + usleep(50000); + $tel = $server->getTelemetry(); + } + echo "epoch=", (int)$tel['drain_epoch_current'], "\n"; echo "reactive_events=", (int)$tel['drain_events_reactive_total'], "\n"; echo "cooldown_blocked=", (int)$tel['drain_events_cooldown_blocked_total'], "\n"; diff --git a/tests/phpt/server/h3/037-h3-reactor-pool-e2e.phpt b/tests/phpt/server/h3/037-h3-reactor-pool-e2e.phpt new file mode 100644 index 0000000..7e5fa64 --- /dev/null +++ b/tests/phpt/server/h3/037-h3-reactor-pool-e2e.phpt @@ -0,0 +1,105 @@ +--TEST-- +HttpServer: HTTP/3 end-to-end GET through the reactor/worker split (#80, gated pool) +--EXTENSIONS-- +true_async_server +true_async +--SKIPIF-- + true, 'h3client' => true]); +?> +--ENV-- +TRUE_ASYNC_SERVER_REACTOR_POOL=1 +PHP_HTTP3_DISABLE_RETRY=1 +--FILE-- +1, so a + * clean in-process stop() is not available (issue #11) — SIGKILL after + * the client finishes, %A swallows the abrupt exit. Stats are not asserted: + * in the split the listeners live on thread-clean reactor contexts without + * the worker's request counters, so getHttp3Stats() does not reflect them. */ + +use TrueAsync\HttpServer; +use TrueAsync\HttpServerConfig; +use function Async\spawn; + +require __DIR__ . '/_h3_skipif.inc'; + +$tmp = __DIR__ . '/tmp-037'; +@mkdir($tmp, 0700, true); +$cert = $tmp . '/cert.pem'; +$key = $tmp . '/key.pem'; +if (!h3_gen_cert($key, $cert)) { echo "cert gen failed\n"; exit(1); } +register_shutdown_function(function () use ($tmp, $cert, $key) { + @unlink($cert); @unlink($key); @rmdir($tmp); +}); + +$port = 21300 + getmypid() % 40; + +$config = (new HttpServerConfig()) + ->addListener('127.0.0.1', $port + 1) /* TCP listener required by start() */ + ->addHttp3Listener('127.0.0.1', $port) + ->enableTls(true)->setCertificate($cert)->setPrivateKey($key) + ->setWorkers(2); +$server = new HttpServer($config); +$server->addHttpHandler(function ($req, $res) { + /* The body is computed on the worker from the handed-off request, so + * a correct echo proves method+uri crossed reactor->worker; the custom + * 201 + header prove status+headers crossed worker->reactor. */ + $res->setStatusCode(201) + ->setHeader('content-type', 'text/plain; charset=utf-8') + ->setBody('echo:' . $req->getMethod() . ':' . $req->getUri()); +}); + +$client_bin = __DIR__ . '/../../../h3client/h3client'; + +spawn(function () use ($server, $port, $client_bin) { + /* Reactors + workers need a moment to thread up and bind. */ + usleep(600000); + + $cmd = sprintf('H3CLIENT_DEADLINE_MS=4000 %s 127.0.0.1 %d /world GET 2>&1', + escapeshellarg($client_bin), $port); + $out = shell_exec($cmd) ?? ''; + + $status = null; + if (preg_match('/^STATUS=(\d+)$/m', $out, $m)) $status = (int)$m[1]; + $body = preg_replace('/^STATUS=\d+\n?/m', '', $out); + + echo "status=", $status ?? -1, "\n"; + echo "body=", trim($body), "\n"; + + /* The reactor-owned listeners must surface through getHttp3Stats() so a + * pooled server is observable (#80). Aggregate across listener entries — + * SO_REUSEPORT spreads the single connection across one reactor. */ + $req_recv = 0; $resp_sub = 0; + foreach ($server->getHttp3Stats() as $st) { + $req_recv += (int)($st['h3_request_received'] ?? 0); + $resp_sub += (int)($st['h3_response_submitted'] ?? 0); + } + echo "stats_request_received_ge1=", ($req_recv >= 1 ? 1 : 0), "\n"; + echo "stats_response_submitted_ge1=", ($resp_sub >= 1 ? 1 : 0), "\n"; + + /* Issue #11: no clean cross-thread shutdown for the pool yet; SIGKILL + * skips PHP shutdown so the worker threads cannot deadlock on exit. */ + posix_kill(getmypid(), SIGKILL); +}); + +$server->start(); +?> +--EXPECTF-- +%Astatus=201 +body=echo:GET:/world +stats_request_received_ge1=1 +stats_response_submitted_ge1=1 +%A diff --git a/tests/phpt/server/h3/038-h3-reactor-pool-post.phpt b/tests/phpt/server/h3/038-h3-reactor-pool-post.phpt new file mode 100644 index 0000000..e83133f --- /dev/null +++ b/tests/phpt/server/h3/038-h3-reactor-pool-post.phpt @@ -0,0 +1,93 @@ +--TEST-- +HttpServer: HTTP/3 POST through the reactor/worker split — body crosses persistent (#80, D7.6) +--EXTENSIONS-- +true_async_server +true_async +--SKIPIF-- + true, 'h3client' => true]); +?> +--ENV-- +TRUE_ASYNC_SERVER_REACTOR_POOL=1 +PHP_HTTP3_DISABLE_RETRY=1 +--FILE-- +addListener('127.0.0.1', $port + 1) + ->addHttp3Listener('127.0.0.1', $port) + ->enableTls(true)->setCertificate($cert)->setPrivateKey($key) + ->setWorkers(2); +$server = new HttpServer($config); +$server->addHttpHandler(function ($req, $res) { + $b = $req->getBody(); + $res->setStatusCode(200) + ->setHeader('content-type', 'text/plain') + ->setBody(sprintf("len=%d sha1=%s method=%s", + strlen($b), sha1($b), $req->getMethod())); +}); + +$client_bin = __DIR__ . '/../../../h3client/h3client'; + +spawn(function () use ($port, $client_bin, $body_path, $expected) { + usleep(600000); + + $cmd = sprintf('H3CLIENT_DEADLINE_MS=4000 %s 127.0.0.1 %d /upload POST %s 2>&1', + escapeshellarg($client_bin), $port, escapeshellarg($body_path)); + $out = shell_exec($cmd) ?? ''; + + $status = null; + if (preg_match('/^STATUS=(\d+)$/m', $out, $m)) $status = (int)$m[1]; + $body = trim(preg_replace('/^STATUS=\d+\n?/m', '', $out)); + + echo "status=", $status ?? -1, "\n"; + echo "match=", ($body === $expected ? "yes" : "no\n got=$body\n want=$expected"), "\n"; + + /* Issue #11: no clean cross-thread pool shutdown yet. */ + posix_kill(getmypid(), SIGKILL); +}); + +$server->start(); +?> +--EXPECTF-- +%Astatus=200 +match=yes +%A diff --git a/tests/phpt/server/h3/039-h3-reactor-pool-static.phpt b/tests/phpt/server/h3/039-h3-reactor-pool-static.phpt new file mode 100644 index 0000000..90c7392 --- /dev/null +++ b/tests/phpt/server/h3/039-h3-reactor-pool-static.phpt @@ -0,0 +1,102 @@ +--TEST-- +HttpServer: HTTP/3 static file served on the transport reactor + passthrough to worker (#80, #60) +--EXTENSIONS-- +true_async_server +true_async +--SKIPIF-- + true, 'h3client' => true]); +?> +--ENV-- +TRUE_ASYNC_SERVER_REACTOR_POOL=1 +PHP_HTTP3_DISABLE_RETRY=1 +--FILE-- + file body (served on the reactor) + * /dyn -> handler body (passthrough to a worker) + * + * Before reactor-side static, the static request fell through to the worker's + * user handler and never read the file. SIGKILL teardown (issue #11). */ + +use TrueAsync\HttpServer; +use TrueAsync\HttpServerConfig; +use TrueAsync\StaticHandler; +use function Async\spawn; + +require __DIR__ . '/_h3_skipif.inc'; + +$tmp = __DIR__ . '/tmp-039'; +@mkdir($tmp, 0700, true); +$cert = $tmp . '/cert.pem'; +$key = $tmp . '/key.pem'; +$root = $tmp . '/www'; +@mkdir($root, 0700, true); +file_put_contents("$root/hello.txt", "reactor-static-body"); +/* Large file forces the hard-zero sendfile path (vs the inline HANDLED + * small-file path) — both run on the reactor in the split. */ +$big = str_repeat("0123456789abcdef", 16384); /* 256 KiB */ +file_put_contents("$root/big.bin", $big); +$big_sha1 = sha1($big); +if (!h3_gen_cert($key, $cert)) { echo "cert gen failed\n"; exit(1); } + +register_shutdown_function(function () use ($tmp, $cert, $key, $root) { + @unlink($cert); @unlink($key); @unlink("$root/hello.txt"); @unlink("$root/big.bin"); + @rmdir($root); @rmdir($tmp); +}); + +$port = 21420 + getmypid() % 40; + +$config = (new HttpServerConfig()) + ->addListener('127.0.0.1', $port + 1) + ->addHttp3Listener('127.0.0.1', $port) + ->enableTls(true)->setCertificate($cert)->setPrivateKey($key) + ->setWorkers(2); +$server = new HttpServer($config); +/* setOpenFileCache opts the mount into the per-reactor open-file cache + * (#80) — the repeated fetches below populate then hit it on the reactor. */ +$server->addStaticHandler( + (new StaticHandler('/static/', $root))->disableIndex()->setOpenFileCache(64, 60)); +$server->addHttpHandler(function ($req, $res) { + $res->setStatusCode(200)->setBody('dyn:' . $req->getMethod() . ':' . $req->getUri()); +}); + +$client_bin = __DIR__ . '/../../../h3client/h3client'; + +spawn(function () use ($port, $client_bin, $big_sha1) { + usleep(600000); + + $run = function (string $path, int $count = 1) use ($client_bin, $port) { + $cmd = sprintf('H3CLIENT_REQUEST_COUNT=%d H3CLIENT_DEADLINE_MS=4000 %s 127.0.0.1 %d %s GET 2>&1', + $count, escapeshellarg($client_bin), $port, escapeshellarg($path)); + $out = shell_exec($cmd) ?? ''; + return preg_replace('/^(?:STATUS=\d+|HEADERS=\d+|COMPLETED=\d+)\n?/m', '', $out); + }; + + /* Two requests on one connection → same reactor: first is a cache miss + * (insert), second a hit. Both must return the file body. */ + $two = $run('/static/hello.txt', 2); + echo "static_x2=", (substr_count($two, 'reactor-static-body') === 2 ? "yes" : "no"), "\n"; + + $big = $run('/static/big.bin'); + echo "big_match=", (strlen($big) === 262144 && sha1($big) === $big_sha1 ? "yes" : "no len=" . strlen($big)), "\n"; + echo "dynamic=", trim($run('/dyn')), "\n"; + + /* Issue #11: no clean cross-thread pool shutdown yet. */ + posix_kill(getmypid(), SIGKILL); +}); + +$server->start(); +?> +--EXPECTF-- +%Astatic_x2=yes +big_match=yes +dynamic=dyn:GET:/dyn +%A diff --git a/tests/phpt/server/h3/040-h3-reactor-pool-steering.phpt b/tests/phpt/server/h3/040-h3-reactor-pool-steering.phpt new file mode 100644 index 0000000..94876ae --- /dev/null +++ b/tests/phpt/server/h3/040-h3-reactor-pool-steering.phpt @@ -0,0 +1,110 @@ +--TEST-- +HttpServer: HTTP/3 CID steering — migrated client served across the reactor split (#80 D6 / #72) +--EXTENSIONS-- +true_async_server +true_async +--SKIPIF-- +/dev/null'); +if ($n < 2) die('skip CID steering needs >1 reactor (>=2 cores)'); +h3_skipif(['openssl_cli' => true, 'h3client' => true]); +?> +--ENV-- +TRUE_ASYNC_SERVER_REACTOR_POOL=1 +PHP_HTTP3_DISABLE_RETRY=1 +--FILE-- +1. Steering encodes the owner reactor's id into every server CID; a + * reactor receiving a stray short-header datagram decodes the owner from the DCID + * and forwards it over the reactor mailbox to the owner, which feeds it to ngtcp2 + * and replies directly. + * + * The client rebinds twice on one connection. With two reactors a rebind lands on + * the non-owner ~half the time, so steering is almost always exercised — and + * WITHOUT it, the first cross-reactor rebind would strand the connection and drop + * the later responses. Asserting all three responses land (with workers=2, one + * connection, the migration counter advanced) is therefore a steering regression + * gate: it fails the moment a rehashed datagram is not routed home. The encode/ + * decode addressing itself is proven deterministically in the HTTP3Steer unit + * test; the steering counters echoed below make the forward observable. + * + * Multi-worker scope: the gated pool only engages at setWorkers>1, so there is + * no clean in-process stop (issue #11) — SIGKILL after the client; %A swallows + * the abrupt exit. */ + +use TrueAsync\HttpServer; +use TrueAsync\HttpServerConfig; +use function Async\spawn; + +require __DIR__ . '/_h3_skipif.inc'; + +$tmp = __DIR__ . '/tmp-040'; +@mkdir($tmp, 0700, true); +$cert = $tmp . '/cert.pem'; +$key = $tmp . '/key.pem'; +if (!h3_gen_cert($key, $cert)) { echo "cert gen failed\n"; exit(1); } +register_shutdown_function(function () use ($tmp, $cert, $key) { + @unlink($cert); @unlink($key); @rmdir($tmp); +}); + +$port = 21340 + getmypid() % 40; + +$config = (new HttpServerConfig()) + ->addListener('127.0.0.1', $port + 1) /* TCP listener required by start() */ + ->addHttp3Listener('127.0.0.1', $port) + ->enableTls(true)->setCertificate($cert)->setPrivateKey($key) + ->setWorkers(2); +$server = new HttpServer($config); +$server->addHttpHandler(function ($req, $res) { + $res->setStatusCode(200)->setBody('steer-ok'); +}); + +$client_bin = __DIR__ . '/../../../h3client/h3client'; + +spawn(function () use ($server, $port, $client_bin) { + /* Reactors + workers need a moment to thread up and bind. */ + usleep(700000); + + /* One connection, three requests, rebinding before requests 2 and 3. */ + $cmd = sprintf( + 'H3CLIENT_REQUEST_COUNT=3 H3CLIENT_MIGRATE_AFTER=1 H3CLIENT_DEADLINE_MS=6000 ' + . '%s 127.0.0.1 %d / GET 2>&1', + escapeshellarg($client_bin), $port); + $out = shell_exec($cmd) ?? ''; + + echo "ok_responses=", substr_count($out, 'STATUS=200'), "\n"; + echo "migrated=", (substr_count($out, 'MIGRATED') >= 1 ? 1 : 0), "\n"; + + /* Aggregate across the per-reactor listener entries. */ + $accepted = 0; $steered = 0; $migr = 0; + foreach ($server->getHttp3Stats() as $st) { + $accepted += (int)($st['quic_conn_accepted'] ?? 0); + $steered += (int)($st['quic_steered_out'] ?? 0) + + (int)($st['quic_steered_in'] ?? 0); + $migr += (int)($st['quic_path_migrations'] ?? 0); + } + echo "conn_accepted=", $accepted, "\n"; + echo "migrations_ge1=", ($migr >= 1 ? 1 : 0), "\n"; + /* Observability only (kernel reuseport may keep both rebinds on the owner). */ + fwrite(STDERR, "steered_total=$steered\n"); + + /* Issue #11: no clean cross-thread shutdown for the pool yet. */ + posix_kill(getmypid(), SIGKILL); +}); + +$server->start(); +?> +--EXPECTF-- +%Aok_responses=3 +migrated=1 +conn_accepted=1 +migrations_ge1=1 +%A diff --git a/tests/phpt/server/h3/041-h3-migration-storm-guard.phpt b/tests/phpt/server/h3/041-h3-migration-storm-guard.phpt new file mode 100644 index 0000000..15c0ea3 --- /dev/null +++ b/tests/phpt/server/h3/041-h3-migration-storm-guard.phpt @@ -0,0 +1,111 @@ +--TEST-- +HttpServer: HTTP/3 migration-storm guard — a rebind flood is shed, not hung (#80 D6) +--EXTENSIONS-- +true_async_server +true_async +--SKIPIF-- +/dev/null'); +if ($n < 2) die('skip migration-storm guard exercised under the reactor pool (>=2 cores)'); +h3_skipif(['openssl_cli' => true, 'h3client' => true]); +?> +--ENV-- +TRUE_ASYNC_SERVER_REACTOR_POOL=1 +PHP_HTTP3_DISABLE_RETRY=1 +--FILE-- +1, so there is + * no clean in-process stop (issue #11) — SIGKILL after the client; %A swallows + * the abrupt exit. */ + +use TrueAsync\HttpServer; +use TrueAsync\HttpServerConfig; +use function Async\spawn; + +require __DIR__ . '/_h3_skipif.inc'; + +$tmp = __DIR__ . '/tmp-041'; +@mkdir($tmp, 0700, true); +$cert = $tmp . '/cert.pem'; +$key = $tmp . '/key.pem'; +if (!h3_gen_cert($key, $cert)) { echo "cert gen failed\n"; exit(1); } +register_shutdown_function(function () use ($tmp, $cert, $key) { + @unlink($cert); @unlink($key); @rmdir($tmp); +}); + +$port = 21380 + getmypid() % 40; + +$config = (new HttpServerConfig()) + ->addListener('127.0.0.1', $port + 1) /* TCP listener required by start() */ + ->addHttp3Listener('127.0.0.1', $port) + ->enableTls(true)->setCertificate($cert)->setPrivateKey($key) + ->setWorkers(2); +$server = new HttpServer($config); +$server->addHttpHandler(function ($req, $res) { + $res->setStatusCode(200)->setBody('steer-ok'); +}); + +$client_bin = __DIR__ . '/../../../h3client/h3client'; + +spawn(function () use ($server, $port, $client_bin) { + /* Reactors + workers need a moment to thread up and bind. */ + usleep(700000); + + /* One connection, 16 requests, rebind before each from #2 — 15 back-to-back + * migrations, far past the storm cap (default 8). DEADLINE_MS bounds a + * regressed hang so the test still terminates. */ + $cmd = sprintf( + 'H3CLIENT_REQUEST_COUNT=16 H3CLIENT_MIGRATE_AFTER=1 H3CLIENT_DEADLINE_MS=6000 ' + . '%s 127.0.0.1 %d / GET 2>&1', + escapeshellarg($client_bin), $port); + + $t0 = hrtime(true); + $out = shell_exec($cmd) ?? ''; + $elapsed = (hrtime(true) - $t0) / 1e9; + + echo "served_before_shed=", (substr_count($out, 'STATUS=200') >= 1 ? 1 : 0), "\n"; + + /* Aggregate the shed counter across the per-reactor listener entries. */ + $shed = 0; + foreach ($server->getHttp3Stats() as $st) { + $shed += (int)($st['quic_migration_storm_shed'] ?? 0); + } + echo "storm_shed_ge1=", ($shed >= 1 ? 1 : 0), "\n"; + + /* The shed is a prompt CONNECTION_CLOSE; a regressed wedge would burn the + * full 6 s deadline. Generous bound to stay robust on a loaded CI box. */ + echo "no_hang=", ($elapsed < 4.0 ? 1 : 0), "\n"; + + /* Issue #11: no clean cross-thread shutdown for the pool yet. */ + posix_kill(getmypid(), SIGKILL); +}); + +$server->start(); +?> +--EXPECTF-- +%Aserved_before_shed=1 +storm_shed_ge1=1 +no_hang=1 +%A diff --git a/tests/phpt/server/h3/042-h3-newcid-dcid-rotation.phpt b/tests/phpt/server/h3/042-h3-newcid-dcid-rotation.phpt new file mode 100644 index 0000000..3fc45c6 --- /dev/null +++ b/tests/phpt/server/h3/042-h3-newcid-dcid-rotation.phpt @@ -0,0 +1,87 @@ +--TEST-- +HttpServer: HTTP/3 client rotates DCID to a server-issued CID — server must route it (RFC 9000 §5.1, #80) +--EXTENSIONS-- +true_async_server +true_async +--SKIPIF-- + true, 'h3client' => true]); +?> +--FILE-- +addListener('127.0.0.1', $port + 1) + ->addHttp3Listener('127.0.0.1', $port) + ->enableTls(true)->setCertificate($cert)->setPrivateKey($key); +$server = new HttpServer($config); +$server->addHttpHandler(function ($req, $res) { + $res->setStatusCode(200)->setBody('rotate-ok'); +}); + +$client_bin = __DIR__ . '/../../../h3client/h3client'; + +$client = spawn(function () use ($server, $port, $client_bin) { + usleep(120000); + $cmd = sprintf('H3CLIENT_REQUEST_COUNT=2 H3CLIENT_ROTATE_DCID_AFTER=1 ' + . 'H3CLIENT_DEADLINE_MS=6000 %s 127.0.0.1 %d / GET 2>&1', + escapeshellarg($client_bin), $port); + $out = shell_exec($cmd) ?? ''; + + echo "rotated_marker=", (str_contains($out, 'ROTATED_DCID') ? 1 : 0), "\n"; + echo "ok_responses=", substr_count($out, 'STATUS=200'), "\n"; + echo "completed_both=", (str_contains($out, 'COMPLETED=2') ? 1 : 0), "\n"; + + $s = $server->getHttp3Stats()[0] ?? []; + echo "conn_accepted=", (int)($s['quic_conn_accepted'] ?? -1), "\n"; + echo "migrations_ge1=", ((int)($s['quic_path_migrations'] ?? 0) >= 1 ? 1 : 0), "\n"; + echo "cid_issued_ge1=", ((int)($s['quic_new_cid_issued'] ?? 0) >= 1 ? 1 : 0), "\n"; + + $server->stop(); +}); + +$server->start(); +await($client); +echo "done\n"; +?> +--EXPECT-- +rotated_marker=1 +ok_responses=2 +completed_both=1 +conn_accepted=1 +migrations_ge1=1 +cid_issued_ge1=1 +done diff --git a/tests/phpt/server/h3/043-h3-newcid-multi-rotation.phpt b/tests/phpt/server/h3/043-h3-newcid-multi-rotation.phpt new file mode 100644 index 0000000..c24417e --- /dev/null +++ b/tests/phpt/server/h3/043-h3-newcid-multi-rotation.phpt @@ -0,0 +1,80 @@ +--TEST-- +HttpServer: HTTP/3 client rotates DCID several times in sequence — every issued CID routes (RFC 9000 §5.1, #80) +--EXTENSIONS-- +true_async_server +true_async +--SKIPIF-- + true, 'h3client' => true]); +?> +--FILE-- +addListener('127.0.0.1', $port + 1) + ->addHttp3Listener('127.0.0.1', $port) + ->enableTls(true)->setCertificate($cert)->setPrivateKey($key); +$server = new HttpServer($config); +$server->addHttpHandler(function ($req, $res) { + $res->setStatusCode(200)->setBody('rotate-ok'); +}); + +$client_bin = __DIR__ . '/../../../h3client/h3client'; + +$client = spawn(function () use ($server, $port, $client_bin) { + usleep(120000); + $cmd = sprintf('H3CLIENT_REQUEST_COUNT=4 H3CLIENT_ROTATE_DCID_AFTER=1 ' + . 'H3CLIENT_DEADLINE_MS=8000 %s 127.0.0.1 %d / GET 2>&1', + escapeshellarg($client_bin), $port); + $out = shell_exec($cmd) ?? ''; + + echo "rotations=", substr_count($out, 'ROTATED_DCID'), "\n"; + echo "ok_responses=", substr_count($out, 'STATUS=200'), "\n"; + echo "completed_all=", (str_contains($out, 'COMPLETED=4') ? 1 : 0), "\n"; + + $s = $server->getHttp3Stats()[0] ?? []; + echo "conn_accepted=", (int)($s['quic_conn_accepted'] ?? -1), "\n"; + echo "cid_issued_ge3=", ((int)($s['quic_new_cid_issued'] ?? 0) >= 3 ? 1 : 0), "\n"; + + $server->stop(); +}); + +$server->start(); +await($client); +echo "done\n"; +?> +--EXPECT-- +rotations=3 +ok_responses=4 +completed_all=1 +conn_accepted=1 +cid_issued_ge3=1 +done diff --git a/tests/phpt/server/h3/044-hq-interop-file.phpt b/tests/phpt/server/h3/044-hq-interop-file.phpt new file mode 100644 index 0000000..ca36d2f --- /dev/null +++ b/tests/phpt/server/h3/044-hq-interop-file.phpt @@ -0,0 +1,112 @@ +--TEST-- +HttpServer: hq-interop (HTTP/0.9-over-QUIC) serves files byte-exact + multiplexes (#80) +--EXTENSIONS-- +true_async_server +true_async +--SKIPIF-- + true, 'aioquic' => true]); +?> +--ENV-- +PHP_HTTP3_DISABLE_RETRY=1 +--FILE-- +addListener('127.0.0.1', $port + 1) + ->addHttp3Listener('127.0.0.1', $port) + ->enableTls(true)->setCertificate($cert)->setPrivateKey($key); +/* hq serves these files straight off the transport reactor. */ +$config->setHttp3HqDocroot($root); + +$server = new HttpServer($config); +/* An h3 handler is still required by start(); hq never reaches it. */ +$server->addHttpHandler(function ($req, $res) { $res->setBody('h3'); }); + +$py = __DIR__ . '/_hq_client.py'; +$python = 'python3'; + +$client = spawn(function () use ($server, $port, $py, $python, $sizes, $sha) { + usleep(200000); + + foreach ($sizes as $n) { + $cmd = sprintf('%s %s 127.0.0.1 %d /f%d.bin 2>/dev/null', + escapeshellarg($python), escapeshellarg($py), $port, $n); + $body = shell_exec($cmd) ?? ''; + $ok = (strlen($body) === $n) && (sha1($body) === $sha[$n]); + printf("size=%d len=%d match=%d\n", $n, strlen($body), $ok ? 1 : 0); + } + + /* Multiplexing: 3 concurrent streams on one connection. */ + $cmd = sprintf('%s %s 127.0.0.1 %d --mux /f1.bin /f16384.bin /f131072.bin 2>/dev/null', + escapeshellarg($python), escapeshellarg($py), $port); + $lines = array_values(array_filter(explode("\n", shell_exec($cmd) ?? ''))); + $want = ['/f1.bin' => [1, $sha[1]], + '/f16384.bin' => [16384, $sha[16384]], + '/f131072.bin' => [131072, $sha[131072]]]; + $hit = 0; + foreach ($lines as $ln) { + [$p, $len, $h] = explode(' ', $ln) + [null, null, null]; + if (isset($want[$p]) && (int)$len === $want[$p][0] && $h === $want[$p][1]) $hit++; + } + printf("mux match=%d/%d\n", $hit, count($want)); + + $server->stop(); +}); + +$server->start(); +await($client); +echo "done\n"; +?> +--EXPECT-- +size=0 len=0 match=1 +size=1 len=1 match=1 +size=16384 len=16384 match=1 +size=131072 len=131072 match=1 +mux match=3/3 +done diff --git a/tests/phpt/server/h3/_h3_skipif.inc b/tests/phpt/server/h3/_h3_skipif.inc index 837e11a..3061e0d 100644 --- a/tests/phpt/server/h3/_h3_skipif.inc +++ b/tests/phpt/server/h3/_h3_skipif.inc @@ -36,8 +36,27 @@ function h3_skipif(array $req = []): void { 'openssl_cli' => false, 'h3client' => false, 'objdump' => false, + 'aioquic' => false, ]; + if ($req['aioquic']) { + /* hq-interop tests drive the server with the aioquic Python client + * (the same QUIC stack the interop runner uses): no nghttp3, raw + * HTTP/0.9. Skip where python3 / the module is unavailable rather + * than fail — mirrors the openssl_cli / h3client gating. */ + if (PHP_OS_FAMILY === 'Windows') { + die('skip aioquic hq harness is POSIX-only here'); + } + if (!h3_have_cli('python3')) { + die('skip python3 not available'); + } + $rc = 0; $out = []; + @exec('python3 -c "import aioquic" 2>&1', $out, $rc); + if ($rc !== 0) { + die('skip python3 aioquic module not installed'); + } + } + if ($req['sockets'] && !extension_loaded('sockets')) { die('skip ext/sockets required'); } diff --git a/tests/phpt/server/h3/_hq_client.py b/tests/phpt/server/h3/_hq_client.py new file mode 100644 index 0000000..6e09783 --- /dev/null +++ b/tests/phpt/server/h3/_hq_client.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +"""hq-interop (HTTP/0.9-over-QUIC) test client for the H3 phpt suite. + +The server speaks hq only on the QUIC ALPN "hq-interop" (no nghttp3), so the +nghttp3-based h3client harness can't drive it. aioquic is the QUIC stack the +quic-interop-runner itself uses, so it doubles as the reference hq client. + +Modes: + _hq_client.py HOST PORT /path + Single bidi stream. Writes the raw response body to stdout (binary); + the phpt sha-compares it against the served file. + + _hq_client.py HOST PORT --mux /p1 /p2 ... + Opens all paths as concurrent bidi streams on ONE connection + (multiplexing). Writes one "len sha1" line per path, in input order, + to stdout. Proves N streams complete correctly on a single conn. + +Status / errors go to stderr so stdout stays exactly the payload. +""" +import asyncio +import hashlib +import ssl +import sys + +from aioquic.asyncio import connect +from aioquic.asyncio.protocol import QuicConnectionProtocol +from aioquic.quic.configuration import QuicConfiguration +from aioquic.quic.events import ( + ConnectionTerminated, + HandshakeCompleted, + StreamDataReceived, +) + + +class HQ(QuicConnectionProtocol): + def __init__(self, *a, **k): + super().__init__(*a, **k) + self.bufs = {} # stream_id -> bytearray + self.fins = set() # stream_ids that saw end_stream + self.hs = asyncio.Event() + self.closed = asyncio.Event() + + def quic_event_received(self, ev): + if isinstance(ev, HandshakeCompleted): + self.hs.set() + elif isinstance(ev, StreamDataReceived): + self.bufs.setdefault(ev.stream_id, bytearray()).extend(ev.data) + if ev.end_stream: + self.fins.add(ev.stream_id) + elif isinstance(ev, ConnectionTerminated): + self.closed.set() + + +async def run(host, port, paths): + cfg = QuicConfiguration(is_client=True, alpn_protocols=["hq-interop"]) + cfg.verify_mode = ssl.CERT_NONE + async with connect(host, port, configuration=cfg, create_protocol=HQ) as cli: + await asyncio.wait_for(cli.hs.wait(), timeout=5) + order = [] + for p in paths: + sid = cli._quic.get_next_available_stream_id() + cli._quic.send_stream_data(sid, ("GET %s\r\n" % p).encode(), + end_stream=True) + order.append(sid) + cli.transmit() + + # Wait until every stream has seen its FIN (or the conn ends / timeout). + deadline = len(paths) + for _ in range(300): # ~15s at 50ms granularity + if len(cli.fins) >= deadline or cli.closed.is_set(): + break + await asyncio.sleep(0.05) + + return [bytes(cli.bufs.get(sid, b"")) for sid in order] + + +async def main(): + host, port = sys.argv[1], int(sys.argv[2]) + rest = sys.argv[3:] + + if rest and rest[0] == "--mux": + paths = rest[1:] + bodies = await run(host, port, paths) + out = [] + for p, b in zip(paths, bodies): + out.append("%s %d %s" % (p, len(b), hashlib.sha1(b).hexdigest())) + sys.stdout.write("\n".join(out) + "\n") + return + + bodies = await run(host, port, rest) + sys.stdout.buffer.write(bodies[0] if bodies else b"") + + +asyncio.run(main()) diff --git a/tests/phpt/server/reactor_pool/001-reactor-pool-substrate.phpt b/tests/phpt/server/reactor_pool/001-reactor-pool-substrate.phpt new file mode 100644 index 0000000..eed482a --- /dev/null +++ b/tests/phpt/server/reactor_pool/001-reactor-pool-substrate.phpt @@ -0,0 +1,37 @@ +--TEST-- +Reactor pool substrate (#80): spawn N reactors, drain their #81 inbound, clean shutdown +--EXTENSIONS-- +true_async_server +true_async +--SKIPIF-- + +--FILE-- + $n) { + if ($n !== $items) { + $all_drained = false; + echo "reactor {$idx}: drained {$n}, expected {$items}\n"; + } +} +var_dump($all_drained); + +echo "done\n"; +?> +--EXPECT-- +bool(true) +bool(true) +bool(true) +done diff --git a/tests/phpt/server/reactor_pool/003-reactor-pool-exec.phpt b/tests/phpt/server/reactor_pool/003-reactor-pool-exec.phpt new file mode 100644 index 0000000..701b880 --- /dev/null +++ b/tests/phpt/server/reactor_pool/003-reactor-pool-exec.phpt @@ -0,0 +1,38 @@ +--TEST-- +Reactor pool exec (#80): reactor_pool_exec runs C init on each reactor's own thread +--EXTENSIONS-- +true_async_server +true_async +--SKIPIF-- + +--FILE-- + +--EXPECT-- +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +done diff --git a/tests/phpt/server/reactor_pool/004-dispatch-from-wire.phpt b/tests/phpt/server/reactor_pool/004-dispatch-from-wire.phpt new file mode 100644 index 0000000..d624765 --- /dev/null +++ b/tests/phpt/server/reactor_pool/004-dispatch-from-wire.phpt @@ -0,0 +1,81 @@ +--TEST-- +Worker dispatch (#80, B1b/D7): request pointer -> handler coroutine on this thread -> response_wire +--EXTENSIONS-- +true_async_server +true_async +--SKIPIF-- + +--FILE-- + + * handler -> D3 round trip, minus the actual transport. */ + +use TrueAsync\HttpServer; +use TrueAsync\HttpServerConfig; +use function Async\spawn; +use function Async\await; + +$server = new HttpServer(new HttpServerConfig()); +$server->addHttpHandler(function ($req, $res) { + $res->setStatusCode(201) + ->setHeader('content-type', 'text/plain') + ->setHeader('x-echo', $req->getMethod() . ' ' . $req->getUri()) + ->setBody('hello-' . $req->getUri()); +}); + +/* A second server with no handler registered: the dispatch must synthesise a + * 404 so the sink still fires. */ +$bare = new HttpServer(new HttpServerConfig()); + +$out = await(spawn(function () use ($server, $bare) { + return [ + 'get' => _http_server_dispatch_from_wire_selftest( + $server, 'GET', '/widgets/42', ['accept' => 'text/plain'], ''), + 'head' => _http_server_dispatch_from_wire_selftest( + $server, 'HEAD', '/widgets/42', [], ''), + 'p404' => _http_server_dispatch_from_wire_selftest( + $bare, 'GET', '/nope', [], ''), + ]; +})); + +/* GET: handler ran on the spawned coroutine, request fields survived the wire. */ +$g = $out['get']; +var_dump($g['status'] === 201); +var_dump($g['body'] === 'hello-/widgets/42'); +var_dump($g['headers']['content-type'] === 'text/plain'); +var_dump($g['headers']['x-echo'] === 'GET /widgets/42'); + +/* HEAD: headers rendered, body suppressed (RFC 9110 §9.3.2). */ +$h = $out['head']; +var_dump($h['status'] === 201); +var_dump($h['body'] === ''); +var_dump($h['headers']['x-echo'] === 'HEAD /widgets/42'); + +/* No handler -> synthesised 404. */ +$p = $out['p404']; +var_dump($p['status'] === 404); +var_dump($p['body'] === 'Not Found'); +var_dump($p['headers']['content-type'] === 'text/plain; charset=utf-8'); + +echo "done\n"; +?> +--EXPECT-- +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +done diff --git a/tests/phpt/server/reactor_pool/005-worker-inbox.phpt b/tests/phpt/server/reactor_pool/005-worker-inbox.phpt new file mode 100644 index 0000000..ac2f809 --- /dev/null +++ b/tests/phpt/server/reactor_pool/005-worker-inbox.phpt @@ -0,0 +1,44 @@ +--TEST-- +Worker inbox (#80, B2/D7): N request pointers through the #81 mailbox -> dispatch -> N responses +--EXTENSIONS-- +true_async_server +true_async +--SKIPIF-- + +--FILE-- + dispatch -> response path carries N independent requests. */ + +use TrueAsync\HttpServer; +use TrueAsync\HttpServerConfig; +use function Async\spawn; +use function Async\await; + +$server = new HttpServer(new HttpServerConfig()); +$server->addHttpHandler(function ($req, $res) { + $res->setStatusCode(200)->setBody('ok-' . $req->getUri()); +}); + +$count = 50; + +$r = await(spawn(fn () => _http_server_worker_inbox_selftest($server, $count))); + +var_dump($r['expected'] === $count); +var_dump($r['received'] === $count); /* every posted wire was drained + dispatched */ +var_dump($r['ok'] === $count); /* every handler rendered a correct 200 response */ + +echo "done\n"; +?> +--EXPECT-- +bool(true) +bool(true) +bool(true) +done diff --git a/tests/phpt/server/reactor_pool/006-worker-registry.phpt b/tests/phpt/server/reactor_pool/006-worker-registry.phpt new file mode 100644 index 0000000..c2b6aa8 --- /dev/null +++ b/tests/phpt/server/reactor_pool/006-worker-registry.phpt @@ -0,0 +1,51 @@ +--TEST-- +Worker registry (#80, B3): round-robin dispatch across N worker inboxes +--EXTENSIONS-- +true_async_server +true_async +--SKIPIF-- + +--FILE-- +addHttpHandler(function ($req, $res) { + $res->setStatusCode(200)->setBody('ok-' . $req->getUri()); +}); + +$workers = 4; +$count = 40; + +$r = await(spawn(fn () => _http_server_worker_registry_selftest($server, $workers, $count))); + +var_dump($r['expected'] === $count); +var_dump($r['received'] === $count); /* every request dispatched + rendered */ +var_dump($r['ok'] === $count); /* every response correct */ +var_dump(count($r['distribution']) === $workers); +var_dump(array_sum($r['distribution']) === $count); +var_dump(min($r['distribution']) === 10 && max($r['distribution']) === 10); /* exact RR spread */ + +echo "done\n"; +?> +--EXPECT-- +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +done diff --git a/tests/phpt/server/reactor_pool/007-persistent-request-lifecycle.phpt b/tests/phpt/server/reactor_pool/007-persistent-request-lifecycle.phpt new file mode 100644 index 0000000..d53d778 --- /dev/null +++ b/tests/phpt/server/reactor_pool/007-persistent-request-lifecycle.phpt @@ -0,0 +1,60 @@ +--TEST-- +Persistent request lifecycle (#80, D7): reactor-domain http_request_t -> flag-aware accessors -> heap-clean free +--EXTENSIONS-- +true_async_server +true_async +--SKIPIF-- + +--FILE-- + ZMM +// path copy. Under ASan this whole create -> read -> free cycle is heap-clean. +$req = _http_server_persistent_request_selftest( + 'POST', + '/api/users', + ['content-type' => 'application/json', 'accept' => '*/*', 'x-extension-header' => 'v'], + '{"name":"x"}' +); + +var_dump($req instanceof \TrueAsync\HttpRequest); +var_dump($req->getMethod()); +var_dump($req->getUri()); +var_dump($req->getPath()); +var_dump($req->getHeader('content-type')); +var_dump($req->getHeader('x-extension-header')); +var_dump($req->getBody()); + +$headers = $req->getHeaders(); +ksort($headers); +var_dump($headers); + +// Drop the only ref -> free_obj -> http_request_destroy on the persistent domain. +unset($req); +echo "done\n"; +?> +--EXPECT-- +bool(true) +string(4) "POST" +string(10) "/api/users" +string(10) "/api/users" +string(16) "application/json" +string(1) "v" +string(12) "{"name":"x"}" +array(3) { + ["accept"]=> + string(3) "*/*" + ["content-type"]=> + string(16) "application/json" + ["x-extension-header"]=> + string(1) "v" +} +done diff --git a/tests/phpt/server/reactor_pool/008-reactor-h3-listener-recv.phpt b/tests/phpt/server/reactor_pool/008-reactor-h3-listener-recv.phpt new file mode 100644 index 0000000..8e74fa7 --- /dev/null +++ b/tests/phpt/server/reactor_pool/008-reactor-h3-listener-recv.phpt @@ -0,0 +1,38 @@ +--TEST-- +Reactor pool H3 listener (#80, B3p3-a): UDP listener recv is serviced on the reactor thread +--EXTENSIONS-- +true_async_server +true_async +--SKIPIF-- + +--FILE-- + +--EXPECT-- +bool(true) +done diff --git a/tests/phpt/server/reactor_pool/009-reactor-post-exec.phpt b/tests/phpt/server/reactor_pool/009-reactor-post-exec.phpt new file mode 100644 index 0000000..c48b144 --- /dev/null +++ b/tests/phpt/server/reactor_pool/009-reactor-post-exec.phpt @@ -0,0 +1,45 @@ +--TEST-- +Reactor pool post-exec (#80, D8/B4): fire-and-forget callbacks run on the reactor, caller never blocks +--EXTENSIONS-- +true_async_server +true_async +--SKIPIF-- + +--FILE-- +reactor reverse path (D8/B4) reuses the reactor's existing inbound + * channel — there is no separate mailbox. It just needs a *non-blocking* exec: + * reactor_pool_exec waits for a completion ack (wrong for a path where nobody + * blocks), so reactor_pool_post_exec posts fire-and-forget. This drives that + * primitive: post N callbacks into each reactor without ever blocking, then + * confirm every one ran on the reactor's own thread (off the parent). The tagged + * response/consumed/cancel message rides on top of this as payload — that lands + * with the live reverse path. */ + +use function Async\spawn; +use function Async\await; + +$reactors = 3; +$count = 40; + +$r = await(spawn(fn () => _http_server_reactor_post_exec_selftest($reactors, $count))); + +var_dump(is_array($r)); +var_dump($r['reactors'] === $reactors); +var_dump($r['expected'] === $reactors * $count); +var_dump($r['ran'] === $reactors * $count); /* every fire-and-forget callback ran */ +var_dump($r['off_parent'] === $reactors); /* all on reactor threads, not the caller */ + +echo "done\n"; +?> +--EXPECT-- +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +done diff --git a/tests/phpt/server/reactor_pool/010-worker-routing.phpt b/tests/phpt/server/reactor_pool/010-worker-routing.phpt new file mode 100644 index 0000000..29694fc --- /dev/null +++ b/tests/phpt/server/reactor_pool/010-worker-routing.phpt @@ -0,0 +1,79 @@ +--TEST-- +Worker routing (#80, D5): reactor-paired strided ownership + idle spread + global fallback +--EXTENSIONS-- +true_async_server +true_async +--SKIPIF-- + +--FILE-- +addHttpHandler(function ($req, $res) { + $res->setStatusCode(200)->setBody('ok'); +}); + +$route = fn (...$a) => await(spawn(fn () => + _http_server_worker_registry_route_selftest($server, ...$a))); + +/* 1. Reactor 1 of 4, 8 workers all published: owns slots {1,5} and rotates between + * them; never picks a non-owned slot. */ +$r = $route(8, 8, 4, 1, 200); +$d = $r['distribution']; +$owned = $d[1] + $d[5]; +$others = array_sum($d) - $owned; +var_dump($r['none'] === 0); +var_dump($others === 0); +var_dump($d[1] > 0 && $d[5] > 0); + +/* 2. Global (reactor_id -1): exact spread across all 8 idle slots. */ +$r = $route(8, 8, 4, -1, 800); +$d = $r['distribution']; +var_dump($r['none'] === 0); +var_dump(min($d) > 0); +var_dump(array_sum($d) === 800); + +/* 3. Reactor 2 owns {2,6}, but only slots 0,1 are published -> owned set empty -> + * route returns NULL every time (the global-fallback trigger in dispatch). */ +$r = $route(8, 2, 4, 2, 100); +var_dump($r['none'] === 100); +var_dump(array_sum($r['distribution']) === 0); + +/* 4. Global fallback lands only on published slots, skipping the unpublished. */ +$r = $route(8, 2, 4, -1, 200); +$d = $r['distribution']; +var_dump($r['none'] === 0); +var_dump($d[0] > 0 && $d[1] > 0); +var_dump(array_sum(array_slice($d, 2)) === 0); + +echo "done\n"; +?> +--EXPECT-- +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +done diff --git a/tests/phpt/server/static/012-static-h2.phpt b/tests/phpt/server/static/012-static-h2.phpt index eb79d46..58927d7 100644 --- a/tests/phpt/server/static/012-static-h2.phpt +++ b/tests/phpt/server/static/012-static-h2.phpt @@ -58,37 +58,11 @@ $client = spawn(function() use ($port, $server, $body) { * separately so we can parse ":status" and Content-Length * cleanly even on 206 / 304. */ $args = ['--http2-prior-knowledge', '-sS', '-i', '--max-time', '3']; - - /* Route request headers through a curl --config file rather than - * shell-quoted -H. An ETag is `W/"hex"`, and PHP's escapeshellarg() - * on Windows cannot carry a double quote — it replaces every " with - * a space, so `If-None-Match: W/"hex"` would reach curl as - * `W/ hex `. Leading/trailing whitespace makes that a malformed - * HTTP/2 field value (RFC 9113 §8.2.1), which the server correctly - * rejects with a stream PROTOCOL_ERROR. curl config files use curl's - * own (shell-independent) quoting, so the quotes survive everywhere. */ - $rest = []; $cfg_lines = []; $cfg = null; - for ($i = 0, $n = count($extra_args); $i < $n; $i++) { - if ($extra_args[$i] === '-H' && $i + 1 < $n) { - $val = $extra_args[++$i]; - $esc = str_replace(['\\', '"'], ['\\\\', '\\"'], $val); - $cfg_lines[] = 'header = "' . $esc . '"'; - } else { - $rest[] = $extra_args[$i]; - } - } - if ($cfg_lines) { - $cfg = tempnam(sys_get_temp_dir(), 'h2cfg'); - file_put_contents($cfg, implode("\n", $cfg_lines) . "\n"); - $args[] = '--config'; - $args[] = $cfg; - } - foreach ($rest as $a) $args[] = $a; + foreach ($extra_args as $a) $args[] = $a; $args[] = "http://127.0.0.1:$port$path"; $cmd = 'curl ' . implode(' ', array_map('escapeshellarg', $args)); $out = []; $rc = 0; exec($cmd . ' 2>&1', $out, $rc); - if ($cfg !== null) @unlink($cfg); $resp = implode("\n", $out); /* Parse a curl -i response: header block ends at the blank * line; HTTP/2 status line is "HTTP/2 NNN". */ diff --git a/tests/phpt/server/static/016-static-handler-validation-posix.phpt b/tests/phpt/server/static/016-static-handler-validation-posix.phpt deleted file mode 100644 index 56db060..0000000 --- a/tests/phpt/server/static/016-static-handler-validation-posix.phpt +++ /dev/null @@ -1,50 +0,0 @@ ---TEST-- -StaticHandler: POSIX-only root-path validation (filesystem-root semantics) ---EXTENSIONS-- -true_async_server ---SKIPIF-- - ---FILE-- -getMessage(), "\n"; - } -} - -/* "/" resolves to the filesystem root → explicit guard. */ -check('ctor:root-slash', fn() => new StaticHandler('/x/', '/')); - -/* Leading slash is absolute on POSIX → a missing one reaches not-found - * (proving it cleared the absolute-path gate, unlike on Windows). */ -check('ctor:root-missing-abs', fn() => new StaticHandler('/x/', '/nonexistent-' . bin2hex(random_bytes(8)))); - -echo "done\n"; -?> ---EXPECTF-- -ctor:root-slash: TrueAsync\HttpServerInvalidArgumentException: StaticHandler root directory must not be '/' -ctor:root-missing-abs: TrueAsync\HttpServerInvalidArgumentException: StaticHandler root directory not found: %s -done diff --git a/tests/phpt/server/static/016-static-handler-validation.phpt b/tests/phpt/server/static/016-static-handler-validation.phpt index 1fec6f2..87e1d6d 100644 --- a/tests/phpt/server/static/016-static-handler-validation.phpt +++ b/tests/phpt/server/static/016-static-handler-validation.phpt @@ -45,23 +45,15 @@ check('ctor:prefix-double-slash', fn() => new StaticHandler('/a//b/', $root)); check('ctor:prefix-too-short', fn() => new StaticHandler('/', $root)); // len < 2 /* ---- Constructor: root-directory validation -------------------- */ -/* Cross-platform: feed an absolute-but-missing path under the system - * temp dir so it clears the absolute-path gate and hits the not-found - * branch on every OS (a bare "/..." path is NOT absolute on Windows). - * The POSIX-only root cases ("/" itself) live in 016-...-posix. */ -$absent_root = sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sh-absent-' . bin2hex(random_bytes(8)); check('ctor:root-empty', fn() => new StaticHandler('/x/', '')); check('ctor:root-relative', fn() => new StaticHandler('/x/', 'relative')); -check('ctor:root-missing', fn() => new StaticHandler('/x/', $absent_root)); +check('ctor:root-missing', fn() => new StaticHandler('/x/', '/nonexistent-' . bin2hex(random_bytes(8)))); check('ctor:root-not-a-dir', fn() => new StaticHandler('/x/', __FILE__)); +check('ctor:root-slash', fn() => new StaticHandler('/x/', '/')); /* ---- Happy path ------------------------------------------------ */ $sh = new StaticHandler('/static/', $root); -$gr = $sh->getRootDirectory(); -/* "absolute for this OS": leading slash on *nix, drive-letter / UNC on Windows. */ -$root_abs = $gr !== '' && (str_starts_with($gr, '/') || str_starts_with($gr, '\\') - || (strlen($gr) >= 2 && ctype_alpha($gr[0]) && $gr[1] === ':')); -echo "happy-path: prefix=", $sh->getUrlPrefix(), " root-ok=", $root_abs ? 'yes' : 'no', "\n"; +echo "happy-path: prefix=", $sh->getUrlPrefix(), " root-ok=", str_starts_with($sh->getRootDirectory(), '/') ? 'yes' : 'no', "\n"; echo "isLocked: ", $sh->isLocked() ? 'yes' : 'no', "\n"; /* ---- setIndexFiles: validation arms --------------------------- */ @@ -144,6 +136,7 @@ ctor:root-empty: TrueAsync\HttpServerInvalidArgumentException: StaticHandler roo ctor:root-relative: TrueAsync\HttpServerInvalidArgumentException: StaticHandler root directory must be an absolute path ctor:root-missing: TrueAsync\HttpServerInvalidArgumentException: StaticHandler root directory not found: %s ctor:root-not-a-dir: TrueAsync\HttpServerInvalidArgumentException: StaticHandler root directory is not a directory: %s +ctor:root-slash: TrueAsync\HttpServerInvalidArgumentException: StaticHandler root directory must not be '/' happy-path: prefix=/static/ root-ok=yes isLocked: no idx:non-string: TrueAsync\HttpServerInvalidArgumentException: StaticHandler index files must be strings diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 75339ce..3c07134 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -60,6 +60,30 @@ endif() add_test(NAME ThreadQueue COMMAND test_thread_queue) +# Flat response_wire marshalling type (issue #80, D3). Worker -> reactor +# response; pure malloc-domain C, no PHP/Zend runtime. +add_executable(test_response_wire + core/test_response_wire.c + ${CMAKE_SOURCE_DIR}/../src/core/response_wire.c +) + +target_include_directories(test_response_wire PRIVATE + ${CMAKE_SOURCE_DIR}/../include + ${CMAKE_SOURCE_DIR}/../src + ${CMAKE_SOURCE_DIR}/../src/core +) + +target_link_libraries(test_response_wire + test_common + ${CMOCKA_LIBRARIES} +) + +if(UNIX) + target_link_libraries(test_response_wire m pthread) +endif() + +add_test(NAME ResponseWire COMMAND test_response_wire) + if(WIN32) set_tests_properties(ThreadQueue PROPERTIES ENVIRONMENT_MODIFICATION "PATH=path_list_prepend:${PHP_DLL_DIR};PATH=path_list_prepend:${CMOCKA_DLL_DIR}" @@ -637,5 +661,42 @@ if(NGTCP2_FOUND AND NGTCP2_OSSL_FOUND AND NGHTTP3_FOUND AND OpenSSL_FOUND) endif() endif() +# HTTP/3 CID steering core (#80 D6 / #72) — pure crypto addressing layer +# (AES-masked reactor id in the CID). No ngtcp2/server; only OpenSSL for the +# EVP block + DRBG. http3_steer.c is decoupled from the heavy H3 headers so it +# links standalone with a small http3_fill_random stub in the test. +find_package(OpenSSL 3.0 QUIET) +if(OpenSSL_FOUND) + add_executable(test_http3_steer + http3/test_http3_steer.c + ${CMAKE_SOURCE_DIR}/../src/http3/http3_steer.c + ) + + target_include_directories(test_http3_steer PRIVATE + ${CMAKE_SOURCE_DIR}/../include + ${CMAKE_SOURCE_DIR}/../src + ${CMAKE_SOURCE_DIR}/../src/http3 + ) + + target_link_libraries(test_http3_steer + test_common + OpenSSL::Crypto + ${CMOCKA_LIBRARIES} + ${CMAKE_DL_LIBS} + ) + + if(UNIX) + target_link_libraries(test_http3_steer m pthread rt) + endif() + + add_test(NAME HTTP3Steer COMMAND test_http3_steer) + + if(UNIX AND NOT APPLE) + set_tests_properties(HTTP3Steer PROPERTIES + ENVIRONMENT "LD_LIBRARY_PATH=${PHP_PREFIX}/lib:$ENV{LD_LIBRARY_PATH}" + ) + endif() +endif() + # Install test binaries (optional) # install(TARGETS test_http1_parser test_http1_parser_edge_cases test_multipart_parser DESTINATION bin/tests) diff --git a/tests/unit/core/test_response_wire.c b/tests/unit/core/test_response_wire.c new file mode 100644 index 0000000..d7e4f17 --- /dev/null +++ b/tests/unit/core/test_response_wire.c @@ -0,0 +1,249 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) TrueAsync | + +----------------------------------------------------------------------+ + | Licensed under the Apache License, Version 2.0 | + +----------------------------------------------------------------------+ +*/ + +/* Unit tests for the flat response_wire marshalling type (issue #80, D3), the + * return-path mirror of request_wire. Pure malloc-domain, no PHP/Zend runtime. + * Covers status, builders/accessors, non-NUL spans, empty/zero-length body, + * routing round-trip, and arena growth across realloc (offsets, not pointers). */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "core/response_wire.h" + +#define PTR(i) ((void *) (intptr_t) (i)) + +/* Evaluate an accessor that writes *len BEFORE reading len — the accessor and + * the `len` argument are unsequenced inside a single assert_span(...) call. */ +#define ASSERT_SPAN(accessor_call, len_var, expect) \ + do { const char *sp__ = (accessor_call); assert_span(sp__, (len_var), (expect)); } while (0) + +/* assert a returned span equals an expected C-string (by length + bytes) */ +static void assert_span(const char *ptr, size_t len, const char *expect) +{ + const size_t elen = strlen(expect); + assert_int_equal(len, elen); + if (elen != 0) { + assert_non_null(ptr); + assert_memory_equal(ptr, expect, elen); + } +} + +static void test_empty_wire(void **state) +{ + (void) state; + + response_wire_t *rw = response_wire_create(7, 42, PTR(0xABCD)); + assert_non_null(rw); + + assert_int_equal(response_wire_status(rw), 0); + + size_t len = 123; + assert_null(response_wire_body(rw, &len)); + assert_int_equal(len, 0); + + assert_int_equal(response_wire_header_count(rw), 0); + assert_false(response_wire_body_complete(rw)); + + assert_int_equal(response_wire_reactor_id(rw), 7); + assert_int_equal(response_wire_stream_id(rw), 42); + assert_ptr_equal(response_wire_conn(rw), PTR(0xABCD)); + + response_wire_free(rw); +} + +static void test_status_headers_body(void **state) +{ + (void) state; + + response_wire_t *rw = response_wire_create(0, 0, NULL); + assert_non_null(rw); + + response_wire_set_status(rw, 200); + assert_true(response_wire_add_header(rw, "content-type", 12, "text/plain", 10)); + assert_true(response_wire_set_body(rw, "hello", 5, true)); + + assert_int_equal(response_wire_status(rw), 200); + + size_t len; + ASSERT_SPAN(response_wire_body(rw, &len), len, "hello"); + assert_true(response_wire_body_complete(rw)); + + const char *np, *vp; + size_t nl, vl; + assert_true(response_wire_header_at(rw, 0, &np, &nl, &vp, &vl)); + assert_span(np, nl, "content-type"); + assert_span(vp, vl, "text/plain"); + + response_wire_free(rw); +} + +static void test_status_replaced(void **state) +{ + (void) state; + + response_wire_t *rw = response_wire_create(0, 0, NULL); + response_wire_set_status(rw, 200); + response_wire_set_status(rw, 404); /* latest wins */ + assert_int_equal(response_wire_status(rw), 404); + + response_wire_free(rw); +} + +static void test_non_nul_terminated_span(void **state) +{ + (void) state; + + /* Header value carved out of a larger buffer with no NUL. */ + const char buf[] = "gzip,deflate,br"; + response_wire_t *rw = response_wire_create(0, 0, NULL); + + assert_true(response_wire_add_header(rw, "content-encoding", 16, buf + 5, 7)); /* "deflate" */ + + const char *np, *vp; + size_t nl, vl; + assert_true(response_wire_header_at(rw, 0, &np, &nl, &vp, &vl)); + assert_span(np, nl, "content-encoding"); + assert_span(vp, vl, "deflate"); + + response_wire_free(rw); +} + +static void test_headers(void **state) +{ + (void) state; + + response_wire_t *rw = response_wire_create(0, 0, NULL); + + assert_true(response_wire_add_header(rw, "content-type", 12, "application/json", 16)); + assert_true(response_wire_add_header(rw, "cache-control", 13, "no-store", 8)); + assert_int_equal(response_wire_header_count(rw), 2); + + const char *np, *vp; + size_t nl, vl; + + assert_true(response_wire_header_at(rw, 0, &np, &nl, &vp, &vl)); + assert_span(np, nl, "content-type"); + assert_span(vp, vl, "application/json"); + + assert_true(response_wire_header_at(rw, 1, &np, &nl, &vp, &vl)); + assert_span(np, nl, "cache-control"); + assert_span(vp, vl, "no-store"); + + assert_false(response_wire_header_at(rw, 2, &np, &nl, &vp, &vl)); + + response_wire_free(rw); +} + +static void test_empty_body_complete(void **state) +{ + (void) state; + + /* 204-style: body set, zero length, complete. */ + response_wire_t *rw = response_wire_create(0, 0, NULL); + + response_wire_set_status(rw, 204); + assert_true(response_wire_set_body(rw, NULL, 0, true)); + + size_t len = 99; + response_wire_body(rw, &len); + assert_int_equal(len, 0); + assert_true(response_wire_body_complete(rw)); + + response_wire_free(rw); +} + +static void test_streaming_body_flag(void **state) +{ + (void) state; + + response_wire_t *rw = response_wire_create(0, 0, NULL); + assert_true(response_wire_set_body(rw, "first-chunk", 11, false)); + + size_t len; + ASSERT_SPAN(response_wire_body(rw, &len), len, "first-chunk"); + assert_false(response_wire_body_complete(rw)); /* more streamed later */ + + response_wire_free(rw); +} + +/* Force many arena reallocs, then verify every earlier span still reads + * correctly — offsets must survive realloc (raw pointers would not). */ +static void test_arena_growth_keeps_spans(void **state) +{ + (void) state; + + response_wire_t *rw = response_wire_create(1, 2, NULL); + + response_wire_set_status(rw, 200); + + char name[32], value[512]; + const int count = 200; + + for (int i = 0; i < count; i++) { + snprintf(name, sizeof(name), "x-header-%d", i); + memset(value, 'a' + (i % 26), sizeof(value)); + assert_true(response_wire_add_header(rw, name, strlen(name), value, sizeof(value))); + } + + assert_int_equal(response_wire_header_count(rw), (size_t) count); + + /* Body set last, then re-read — but status set before all the growth must + * still be intact. */ + assert_true(response_wire_set_body(rw, "tail-body", 9, true)); + + assert_int_equal(response_wire_status(rw), 200); + + size_t len; + ASSERT_SPAN(response_wire_body(rw, &len), len, "tail-body"); + + /* Spot-check headers across the range. */ + for (int i = 0; i < count; i += 37) { + snprintf(name, sizeof(name), "x-header-%d", i); + + const char *np, *vp; + size_t nl, vl; + assert_true(response_wire_header_at(rw, (size_t) i, &np, &nl, &vp, &vl)); + assert_int_equal(nl, strlen(name)); + assert_memory_equal(np, name, nl); + assert_int_equal(vl, sizeof(value)); + assert_int_equal(vp[0], 'a' + (i % 26)); + assert_int_equal(vp[vl - 1], 'a' + (i % 26)); + } + + response_wire_free(rw); +} + +static void test_free_null(void **state) +{ + (void) state; + response_wire_free(NULL); /* no-op, must not crash */ +} + +int main(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_empty_wire), + cmocka_unit_test(test_status_headers_body), + cmocka_unit_test(test_status_replaced), + cmocka_unit_test(test_non_nul_terminated_span), + cmocka_unit_test(test_headers), + cmocka_unit_test(test_empty_body_complete), + cmocka_unit_test(test_streaming_body_flag), + cmocka_unit_test(test_arena_growth_keeps_spans), + cmocka_unit_test(test_free_null), + }; + + return cmocka_run_group_tests(tests, NULL, NULL); +} diff --git a/tests/unit/http3/test_http3_steer.c b/tests/unit/http3/test_http3_steer.c new file mode 100644 index 0000000..af38664 --- /dev/null +++ b/tests/unit/http3/test_http3_steer.c @@ -0,0 +1,146 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) TrueAsync | + +----------------------------------------------------------------------+ + | Licensed under the Apache License, Version 2.0 | + +----------------------------------------------------------------------+ +*/ + +/* Unit tests for HTTP/3 CID steering (#80 D6 / #72). Pure crypto core, no + * server/socket: the addressing layer that lets any reactor recover the owner + * reactor id from a server-minted CID. Covers init/activation gating, round-trip + * for every reactor id, nonce variation, masking (id not in clear), short-CID + * rejection, and key sensitivity. */ + +#include +#include +#include +#include +#include +#include + +#include + +#include "http3/http3_steer.h" + +/* Stub for http3_fill_random (defined in http3_connection.c in the real build). + * Real DRBG so encode() produces genuinely varied nonces — exactly what the + * masking test needs. */ +bool http3_fill_random(uint8_t *buf, size_t len) +{ + return RAND_bytes(buf, (int) len) == 1; +} + +static int steer_setup(void **state) +{ + (void) state; + assert_true(http3_steer_init()); + http3_steer_set_active(true); + return 0; +} + +/* init is idempotent and gates activation: set_active(true) before init must + * not arm steering. */ +static void test_activation_gating(void **state) +{ + (void) state; + + /* Re-init is a no-op success (key already seeded by setup). */ + assert_true(http3_steer_init()); + + http3_steer_set_active(false); + assert_false(http3_steer_active()); + + http3_steer_set_active(true); + assert_true(http3_steer_active()); +} + +/* Every reactor id 0..255 survives encode → decode. */ +static void test_roundtrip_all_ids(void **state) +{ + (void) state; + + for (int id = 0; id < 256; id++) { + uint8_t cid[HTTP3_STEER_CID_LEN]; + assert_true(http3_steer_encode(cid, id)); + assert_int_equal(http3_steer_decode(cid, sizeof(cid)), id); + } +} + +/* Two encodes of the same id differ (random nonce) yet both decode back. */ +static void test_nonce_varies(void **state) +{ + (void) state; + + uint8_t a[HTTP3_STEER_CID_LEN]; + uint8_t b[HTTP3_STEER_CID_LEN]; + + assert_true(http3_steer_encode(a, 3)); + assert_true(http3_steer_encode(b, 3)); + + assert_int_not_equal(memcmp(a, b, sizeof(a)), 0); + assert_int_equal(http3_steer_decode(a, sizeof(a)), 3); + assert_int_equal(http3_steer_decode(b, sizeof(b)), 3); +} + +/* The id is not stored in clear: across many encodes of the same id, the + * id byte takes on many different values (it is masked with AES(nonce)). A + * plaintext byte would be constant. */ +static void test_id_is_masked(void **state) +{ + (void) state; + + bool seen_other = false; + + for (int i = 0; i < 64; i++) { + uint8_t cid[HTTP3_STEER_CID_LEN]; + assert_true(http3_steer_encode(cid, 5)); + + if (cid[0] != 5) { + seen_other = true; + } + } + + assert_true(seen_other); +} + +/* A CID shorter than the steering width cannot carry an id. */ +static void test_short_cid_rejected(void **state) +{ + (void) state; + + uint8_t cid[HTTP3_STEER_CID_LEN]; + assert_true(http3_steer_encode(cid, 1)); + + assert_int_equal(http3_steer_decode(cid, HTTP3_STEER_CID_LEN - 1), -1); + assert_int_equal(http3_steer_decode(cid, 0), -1); + assert_int_equal(http3_steer_decode(NULL, HTTP3_STEER_CID_LEN), -1); +} + +/* A CID longer than the width still decodes (the owner byte + nonce live in the + * leading bytes; trailing bytes are ignored by decode). */ +static void test_longer_cid_decodes(void **state) +{ + (void) state; + + uint8_t cid[HTTP3_STEER_CID_LEN + 4]; + assert_true(http3_steer_encode(cid, 9)); + /* fill the tail so it is clearly outside the encoded prefix */ + memset(cid + HTTP3_STEER_CID_LEN, 0xAB, 4); + + assert_int_equal(http3_steer_decode(cid, sizeof(cid)), 9); +} + +int main(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test_setup(test_activation_gating, steer_setup), + cmocka_unit_test_setup(test_roundtrip_all_ids, steer_setup), + cmocka_unit_test_setup(test_nonce_varies, steer_setup), + cmocka_unit_test_setup(test_id_is_masked, steer_setup), + cmocka_unit_test_setup(test_short_cid_rejected, steer_setup), + cmocka_unit_test_setup(test_longer_cid_decodes, steer_setup), + }; + + return cmocka_run_group_tests(tests, NULL, NULL); +}