Skip to content

Commit 553329b

Browse files
author
Dylan Huang
authored
Aggregated metrics part 5 (#54)
* add --port arg to ep logs * Fix WebSocketManager to reset broadcast task after cancellation * simple tests work * TODO: TestLogsServer * TODO: TestLogsServerIntegration * TODO: test HTML injection - also test TestAsyncWebSocketOperations * add logs server tests * add port parameter testes * use gpt-oss-120b to avoid rate limits
1 parent fd1c7c9 commit 553329b

File tree

7 files changed

+951
-21
lines changed

7 files changed

+951
-21
lines changed

eval_protocol/utils/logs_server.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -87,18 +87,32 @@ async def _send_text_to_all_connections(self, text: str):
8787
return
8888

8989
tasks = []
90+
failed_connections = []
91+
9092
for connection in connections:
9193
try:
9294
tasks.append(connection.send_text(text))
9395
except Exception as e:
9496
logger.error(f"Failed to send text to WebSocket: {e}")
95-
with self._lock:
96-
try:
97-
self.active_connections.remove(connection)
98-
except ValueError:
99-
pass
97+
failed_connections.append(connection)
98+
99+
# Execute all sends in parallel
100100
if tasks:
101-
await asyncio.gather(*tasks, return_exceptions=True)
101+
results = await asyncio.gather(*tasks, return_exceptions=True)
102+
103+
# Check for any exceptions that occurred during execution
104+
for i, result in enumerate(results):
105+
if isinstance(result, Exception):
106+
logger.error(f"Failed to send text to WebSocket: {result}")
107+
failed_connections.append(connections[i])
108+
109+
# Remove all failed connections
110+
with self._lock:
111+
for connection in failed_connections:
112+
try:
113+
self.active_connections.remove(connection)
114+
except ValueError:
115+
pass
102116

103117
def start_broadcast_loop(self):
104118
"""Start the broadcast loop in the current event loop."""
@@ -109,6 +123,7 @@ def stop_broadcast_loop(self):
109123
"""Stop the broadcast loop."""
110124
if self._broadcast_task and not self._broadcast_task.done():
111125
self._broadcast_task.cancel()
126+
self._broadcast_task = None
112127

113128

114129
class EvaluationWatcher:

eval_protocol/utils/vite_server.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,17 @@ def _setup_routes(self):
9797
# Mount static files
9898
self.app.mount("/assets", StaticFiles(directory=self.build_dir / "assets"), name="assets")
9999

100-
# Serve other static files from build directory
100+
@self.app.get("/")
101+
async def root():
102+
"""Serve the main index.html file with injected configuration."""
103+
return self._serve_index_with_config()
104+
105+
@self.app.get("/health")
106+
async def health():
107+
"""Health check endpoint."""
108+
return {"status": "ok", "build_dir": str(self.build_dir)}
109+
110+
# Serve other static files from build directory - this must be last
101111
@self.app.get("/{path:path}")
102112
async def serve_spa(path: str):
103113
"""
@@ -114,22 +124,12 @@ async def serve_spa(path: str):
114124

115125
# For SPA routing, serve index.html for non-existent routes
116126
# but exclude API routes and asset requests
117-
if not path.startswith(("api/", "assets/")):
127+
if not path.startswith(("api/", "assets/", "health")):
118128
return self._serve_index_with_config()
119129

120130
# If we get here, the file doesn't exist and it's not a SPA route
121131
raise HTTPException(status_code=404, detail="File not found")
122132

123-
@self.app.get("/")
124-
async def root():
125-
"""Serve the main index.html file with injected configuration."""
126-
return self._serve_index_with_config()
127-
128-
@self.app.get("/health")
129-
async def health():
130-
"""Health check endpoint."""
131-
return {"status": "ok", "build_dir": str(self.build_dir)}
132-
133133
def run(self):
134134
"""
135135
Run the Vite server.

tests/pytest/test_pytest_input_messages.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from typing import List
22

3-
from eval_protocol.models import Message, EvaluationRow
3+
from eval_protocol.models import EvaluationRow, Message
44
from eval_protocol.pytest import default_single_turn_rollout_processor, evaluation_test
55

66

@@ -10,7 +10,7 @@
1010
Message(role="user", content="What is the capital of France?"),
1111
]
1212
],
13-
model=["fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"],
13+
model=["fireworks_ai/accounts/fireworks/models/gpt-oss-120b"],
1414
rollout_processor=default_single_turn_rollout_processor,
1515
)
1616
def test_input_messages_in_decorator(rows: List[EvaluationRow]) -> List[EvaluationRow]:

tests/pytest/test_pytest_json_schema.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22
from typing import Any, Dict, List
3+
34
from eval_protocol.models import EvaluationRow
45
from eval_protocol.pytest import default_single_turn_rollout_processor, evaluation_test
56
from eval_protocol.rewards.json_schema import json_schema_reward
@@ -23,7 +24,7 @@ def json_schema_to_evaluation_row(rows: List[Dict[str, Any]]) -> List[Evaluation
2324

2425
@evaluation_test(
2526
input_dataset=["tests/pytest/data/json_schema.jsonl"],
26-
model=["fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"],
27+
model=["fireworks_ai/accounts/fireworks/models/gpt-oss-120b"],
2728
mode="pointwise",
2829
rollout_processor=default_single_turn_rollout_processor,
2930
dataset_adapter=json_schema_to_evaluation_row,

0 commit comments

Comments
 (0)