eval-protocol · dphuang2 · Aug 11, 2025 · Aug 11, 2025 · Aug 11, 2025 · Aug 11, 2025
diff --git a/eval_protocol/utils/logs_server.py b/eval_protocol/utils/logs_server.py
@@ -87,18 +87,32 @@ async def _send_text_to_all_connections(self, text: str):
             return
 
         tasks = []
+        failed_connections = []
+
         for connection in connections:
             try:
                 tasks.append(connection.send_text(text))
             except Exception as e:
                 logger.error(f"Failed to send text to WebSocket: {e}")
-                with self._lock:
-                    try:
-                        self.active_connections.remove(connection)
-                    except ValueError:
-                        pass
+                failed_connections.append(connection)
+
+        # Execute all sends in parallel
         if tasks:
-            await asyncio.gather(*tasks, return_exceptions=True)
+            results = await asyncio.gather(*tasks, return_exceptions=True)
+
+            # Check for any exceptions that occurred during execution
+            for i, result in enumerate(results):
+                if isinstance(result, Exception):
+                    logger.error(f"Failed to send text to WebSocket: {result}")
+                    failed_connections.append(connections[i])
+
+        # Remove all failed connections
+        with self._lock:
+            for connection in failed_connections:
+                try:
+                    self.active_connections.remove(connection)
+                except ValueError:
+                    pass
 
     def start_broadcast_loop(self):
         """Start the broadcast loop in the current event loop."""
@@ -109,6 +123,7 @@ def stop_broadcast_loop(self):
         """Stop the broadcast loop."""
         if self._broadcast_task and not self._broadcast_task.done():
             self._broadcast_task.cancel()
+            self._broadcast_task = None
 
 
 class EvaluationWatcher:

diff --git a/eval_protocol/utils/vite_server.py b/eval_protocol/utils/vite_server.py
@@ -97,7 +97,17 @@ def _setup_routes(self):
         # Mount static files
         self.app.mount("/assets", StaticFiles(directory=self.build_dir / "assets"), name="assets")
 
-        # Serve other static files from build directory
+        @self.app.get("/")
+        async def root():
+            """Serve the main index.html file with injected configuration."""
+            return self._serve_index_with_config()
+
+        @self.app.get("/health")
+        async def health():
+            """Health check endpoint."""
+            return {"status": "ok", "build_dir": str(self.build_dir)}
+
+        # Serve other static files from build directory - this must be last
         @self.app.get("/{path:path}")
         async def serve_spa(path: str):
             """
@@ -114,22 +124,12 @@ async def serve_spa(path: str):
 
             # For SPA routing, serve index.html for non-existent routes
             # but exclude API routes and asset requests
-            if not path.startswith(("api/", "assets/")):
+            if not path.startswith(("api/", "assets/", "health")):
                 return self._serve_index_with_config()
 
             # If we get here, the file doesn't exist and it's not a SPA route
             raise HTTPException(status_code=404, detail="File not found")
 
-        @self.app.get("/")
-        async def root():
-            """Serve the main index.html file with injected configuration."""
-            return self._serve_index_with_config()
-
-        @self.app.get("/health")
-        async def health():
-            """Health check endpoint."""
-            return {"status": "ok", "build_dir": str(self.build_dir)}
-
     def run(self):
         """
         Run the Vite server.

diff --git a/tests/pytest/test_pytest_input_messages.py b/tests/pytest/test_pytest_input_messages.py
@@ -1,6 +1,6 @@
 from typing import List
 
-from eval_protocol.models import Message, EvaluationRow
+from eval_protocol.models import EvaluationRow, Message
 from eval_protocol.pytest import default_single_turn_rollout_processor, evaluation_test
 
 
@@ -10,7 +10,7 @@
             Message(role="user", content="What is the capital of France?"),
         ]
     ],
-    model=["fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"],
+    model=["fireworks_ai/accounts/fireworks/models/gpt-oss-120b"],
     rollout_processor=default_single_turn_rollout_processor,
 )
 def test_input_messages_in_decorator(rows: List[EvaluationRow]) -> List[EvaluationRow]:

diff --git a/tests/pytest/test_pytest_json_schema.py b/tests/pytest/test_pytest_json_schema.py
@@ -1,5 +1,6 @@
 import json
 from typing import Any, Dict, List
+
 from eval_protocol.models import EvaluationRow
 from eval_protocol.pytest import default_single_turn_rollout_processor, evaluation_test
 from eval_protocol.rewards.json_schema import json_schema_reward
@@ -23,7 +24,7 @@ def json_schema_to_evaluation_row(rows: List[Dict[str, Any]]) -> List[Evaluation
 
 @evaluation_test(
     input_dataset=["tests/pytest/data/json_schema.jsonl"],
-    model=["fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"],
+    model=["fireworks_ai/accounts/fireworks/models/gpt-oss-120b"],
     mode="pointwise",
     rollout_processor=default_single_turn_rollout_processor,
     dataset_adapter=json_schema_to_evaluation_row,