Buffer per-request StreamableHTTP streams to avoid serial-router head-of-line block

maxisbey · maxisbey · commit b0b398c934ea · 2026-06-22T10:48:23.000Z
The serial message_router forwards each response with a blocking send into a per-request buffer-0 stream whose only consumer (sse_writer) is started lazily via nested start_soon. Under concurrent requests one not-yet-receiving consumer parks the router and head-of-line blocks every other in-flight response on the session. Give the three _request_streams[EventMessage] sites a small bounded buffer so the router can deposit and move on. The sse_stream dict streams stay at 0 (downstream of the router; buffering them would relax per-client backpressure without helping the race). Fixes #1764.
diff --git a/src/mcp/server/streamable_http.py b/src/mcp/server/streamable_http.py
@@ -13,7 +13,7 @@
 from contextlib import asynccontextmanager
 from dataclasses import dataclass
 from http import HTTPStatus
-from typing import Any
+from typing import Any, Final
 
 import anyio
 import pydantic_core
@@ -59,6 +59,11 @@
 # Special key for the standalone GET stream
 GET_STREAM_KEY = "_GET_stream"
 
+# Buffer for the per-request `_request_streams` so the serial `message_router`
+# can deposit a response and move on instead of head-of-line blocking the
+# whole session on a lazily-started `sse_writer`. See #1764.
+REQUEST_STREAM_BUFFER_SIZE: Final = 16
+
 # Session ID validation pattern (visible ASCII characters ranging from 0x21 to 0x7E)
 # Pattern ensures entire string contains only valid characters by using ^ and $ anchors
 SESSION_ID_PATTERN = re.compile(r"^[\x21-\x7E]+$")
@@ -524,7 +529,9 @@ async def _handle_post_request(self, scope: Scope, request: Request, receive: Re
             # Extract the request ID outside the try block for proper scope
             request_id = str(message.id)
             # Register this stream for the request ID
-            self._request_streams[request_id] = anyio.create_memory_object_stream[EventMessage](0)
+            self._request_streams[request_id] = anyio.create_memory_object_stream[EventMessage](
+                REQUEST_STREAM_BUFFER_SIZE
+            )
             request_stream_reader = self._request_streams[request_id][1]
 
             if self.is_json_response_enabled:
@@ -703,7 +710,9 @@ async def standalone_sse_writer():
             try:
                 # Create a standalone message stream for server-initiated messages
 
-                self._request_streams[GET_STREAM_KEY] = anyio.create_memory_object_stream[EventMessage](0)
+                self._request_streams[GET_STREAM_KEY] = anyio.create_memory_object_stream[EventMessage](
+                    REQUEST_STREAM_BUFFER_SIZE
+                )
                 standalone_stream_reader = self._request_streams[GET_STREAM_KEY][1]
 
                 async with sse_stream_writer, standalone_stream_reader:
@@ -893,7 +902,9 @@ async def send_event(event_message: EventMessage) -> None:
                             await self._maybe_send_priming_event(stream_id, sse_stream_writer, replay_protocol_version)
 
                             # Create new request streams for this connection
-                            self._request_streams[stream_id] = anyio.create_memory_object_stream[EventMessage](0)
+                            self._request_streams[stream_id] = anyio.create_memory_object_stream[EventMessage](
+                                REQUEST_STREAM_BUFFER_SIZE
+                            )
                             msg_reader = self._request_streams[stream_id][1]
 
                             # Forward messages to SSE
diff --git a/tests/server/test_streamable_http_router.py b/tests/server/test_streamable_http_router.py
@@ -0,0 +1,51 @@
+"""Regression coverage for the StreamableHTTP per-session response router."""
+
+import anyio
+import pytest
+
+from mcp.server.streamable_http import (
+    REQUEST_STREAM_BUFFER_SIZE,
+    EventMessage,
+    StreamableHTTPServerTransport,
+)
+from mcp.shared.message import SessionMessage
+from mcp.types import JSONRPCResponse
+
+
+@pytest.mark.anyio
+async def test_router_unconsumed_request_stream_does_not_block_siblings() -> None:
+    """A response whose `sse_writer` is not yet receiving must not park the router (#1764).
+
+    Drives the routing layer directly (the production race does not reproduce
+    on loopback), so this pins the router semantics, not the call sites.
+    """
+    transport = StreamableHTTPServerTransport(mcp_session_id="sid", is_json_response_enabled=False)
+    streams = transport._request_streams
+    async with transport.connect() as (_read_stream, write_stream):
+        # Model two concurrent POSTs at the point _handle_post_request has
+        # registered the per-request stream but A's sse_writer has not yet
+        # reached its first receive().
+        streams["A"] = anyio.create_memory_object_stream[EventMessage](REQUEST_STREAM_BUFFER_SIZE)
+        streams["B"] = anyio.create_memory_object_stream[EventMessage](REQUEST_STREAM_BUFFER_SIZE)
+        a_send, a_recv = streams["A"]
+        b_reader = streams["B"][1]
+        b_received = anyio.Event()
+
+        async def consume_b() -> None:
+            async with b_reader:
+                await b_reader.receive()
+                b_received.set()
+
+        async def server_writes() -> None:
+            await write_stream.send(SessionMessage(JSONRPCResponse(jsonrpc="2.0", id="A", result={})))
+            await write_stream.send(SessionMessage(JSONRPCResponse(jsonrpc="2.0", id="B", result={})))
+
+        async with anyio.create_task_group() as tg:
+            tg.start_soon(consume_b)
+            tg.start_soon(server_writes)
+            with anyio.fail_after(5):
+                await b_received.wait()
+            # A's response was buffered for its (late) consumer, not dropped.
+            assert a_send.statistics().current_buffer_used == 1
+            await a_recv.aclose()
+            await a_send.aclose()