Skip to content

Commit 757a620

Browse files
committed
f
1 parent 13a4b91 commit 757a620

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

scripts/eval_aime_benchmark.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ def get_llm_response(problem: str, model: str, analyze_logits: bool = False, ext
311311
messages=[
312312
{"role": "user", "content": SYSTEM_PROMPT + problem}
313313
],
314-
max_tokens=8192,
314+
max_tokens=30000,
315315
**kwargs
316316
)
317317

scripts/eval_imo25_benchmark.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ def verify_solution_with_llm(problem: str, solution: str, model: str) -> Dict[st
133133
{"role": "system", "content": "You are an expert mathematician and IMO judge."},
134134
{"role": "user", "content": judge_prompt}
135135
],
136-
max_tokens=2048,
136+
max_tokens=30000,
137137
temperature=0.1 # Low temperature for consistent judging
138138
)
139139

@@ -302,7 +302,7 @@ def get_llm_response(problem: str, model: str, extra_body: dict = None, timeout:
302302
{"role": "system", "content": SYSTEM_PROMPT},
303303
{"role": "user", "content": problem}
304304
],
305-
max_tokens=8192, # Extended token limit for complex proofs
305+
max_tokens=30000, # Extended token limit for complex proofs
306306
**kwargs
307307
)
308308

0 commit comments

Comments
 (0)