Skip to content

Commit fece917

Browse files
committed
f
1 parent 6d7f57c commit fece917

File tree

4 files changed

+51
-8
lines changed

4 files changed

+51
-8
lines changed

optillm/mars/agent.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,18 @@ def generate_solution(self, problem: str, request_id: str = None) -> Tuple[Agent
7777

7878
solution_text = response.choices[0].message.content.strip()
7979

80+
# ENHANCED LOGGING: Log solution details
81+
solution_length = len(solution_text)
82+
83+
logger.info(f"Agent {self.agent_id} solution details:")
84+
logger.info(f" - Length: {solution_length} characters")
85+
logger.info(f" - Last 100 chars: ...{solution_text[-100:] if solution_length > 100 else solution_text}")
86+
8087
# Extract reasoning tokens from the correct nested structure
8188
reasoning_tokens = 0
89+
total_tokens = 0
8290
if hasattr(response, 'usage') and response.usage:
91+
total_tokens = getattr(response.usage, 'total_tokens', 0)
8392
# Check completion_tokens_details first (OpenRouter structure)
8493
if hasattr(response.usage, 'completion_tokens_details') and response.usage.completion_tokens_details:
8594
reasoning_tokens = getattr(response.usage.completion_tokens_details, 'reasoning_tokens', 0)
@@ -88,10 +97,12 @@ def generate_solution(self, problem: str, request_id: str = None) -> Tuple[Agent
8897
if reasoning_tokens == 0:
8998
reasoning_tokens = getattr(response.usage, 'reasoning_tokens', 0)
9099

100+
logger.info(f"Agent {self.agent_id} token usage: reasoning={reasoning_tokens}, total={total_tokens}")
101+
91102
# Extract confidence from solution (heuristic based on response characteristics)
92103
confidence = self._estimate_confidence(solution_text)
93104

94-
# Create agent solution object
105+
# Create agent solution object with enhanced metadata
95106
agent_solution = AgentSolution(
96107
agent_id=self.agent_id,
97108
temperature=self.temperature,
@@ -101,20 +112,27 @@ def generate_solution(self, problem: str, request_id: str = None) -> Tuple[Agent
101112
timestamp=datetime.now()
102113
)
103114

115+
# Add metadata to solution object
116+
agent_solution.solution_length = solution_length
117+
agent_solution.total_tokens = total_tokens
118+
104119
logger.info(f"Agent {self.agent_id} generated solution with {reasoning_tokens} reasoning tokens")
105120
return agent_solution, reasoning_tokens
106121

107122
except Exception as e:
108123
logger.error(f"Agent {self.agent_id} error generating solution: {str(e)}")
109124
# Return empty solution with error indication
110-
return AgentSolution(
125+
error_solution = AgentSolution(
111126
agent_id=self.agent_id,
112127
temperature=self.temperature,
113128
solution=f"Error generating solution: {str(e)}",
114129
confidence=0.0,
115130
reasoning_tokens=0,
116131
timestamp=datetime.now()
117-
), 0
132+
)
133+
error_solution.solution_length = len(error_solution.solution)
134+
error_solution.total_tokens = 0
135+
return error_solution, 0
118136

119137
def verify_solution(self, problem: str, solution: str, verifier_id: int, solution_agent_id: int, request_id: str = None) -> VerificationResult:
120138
"""Verify a solution using mathematical reasoning"""

optillm/mars/mars.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ def multi_agent_reasoning_system(
3535
initial_query: str,
3636
client,
3737
model: str,
38+
request_config: dict = None,
3839
request_id: str = None
3940
) -> Tuple[str, int]:
4041
"""
@@ -51,21 +52,30 @@ def multi_agent_reasoning_system(
5152
Tuple of (final_solution, total_reasoning_tokens)
5253
"""
5354
return asyncio.run(_run_mars_parallel(
54-
system_prompt, initial_query, client, model, request_id
55+
system_prompt, initial_query, client, model, request_config, request_id
5556
))
5657

5758
async def _run_mars_parallel(
5859
system_prompt: str,
5960
initial_query: str,
6061
client,
6162
model: str,
63+
request_config: dict = None,
6264
request_id: str = None
6365
) -> Tuple[str, int]:
6466
"""Async implementation of MARS with parallel execution"""
6567
logger.info(f"Starting MARS with model: {model}")
6668

6769
# Initialize configuration
6870
config = DEFAULT_CONFIG.copy()
71+
72+
# Override max_tokens from request_config if provided
73+
if request_config and 'max_tokens' in request_config:
74+
config['max_tokens'] = request_config['max_tokens']
75+
logger.info(f"Using max_tokens from request: {config['max_tokens']}")
76+
else:
77+
logger.info(f"Using default max_tokens: {config['max_tokens']}")
78+
6979
total_reasoning_tokens = 0
7080

7181
# Calculate optimal worker count for parallel execution
@@ -191,6 +201,14 @@ async def generate_solution_async(agent: MARSAgent):
191201
workspace.add_solution(solution)
192202
total_tokens += tokens
193203
successful_solutions += 1
204+
205+
# ENHANCED LOGGING: Log individual agent solution details
206+
logger.info(f"Agent {agent_id} exploration complete:")
207+
logger.info(f" - Solution length: {solution.solution_length} chars")
208+
logger.info(f" - Total tokens: {solution.total_tokens}")
209+
logger.info(f" - Reasoning tokens: {solution.reasoning_tokens}")
210+
logger.info(f" - Confidence: {solution.confidence:.2f}")
211+
logger.info(f" - Solution preview: {solution.solution[:200]}...")
194212
else:
195213
logger.error(f"Agent {agent_id} generated no solution")
196214

@@ -274,15 +292,22 @@ def _synthesize_final_solution(
274292

275293
# Extract reasoning tokens from correct nested structure (matching agent.py fix)
276294
reasoning_tokens = 0
295+
total_tokens = 0
277296
if hasattr(response, 'usage') and response.usage:
297+
total_tokens = getattr(response.usage, 'total_tokens', 0)
278298
# Check completion_tokens_details first (OpenRouter structure)
279299
if hasattr(response.usage, 'completion_tokens_details') and response.usage.completion_tokens_details:
280300
reasoning_tokens = getattr(response.usage.completion_tokens_details, 'reasoning_tokens', 0)
281301
# Fallback to direct usage field (standard OpenAI structure)
282302
if reasoning_tokens == 0:
283303
reasoning_tokens = getattr(response.usage, 'reasoning_tokens', 0)
284304

285-
logger.info(f"Synthesis complete with {reasoning_tokens} reasoning tokens")
305+
# ENHANCED LOGGING: Log synthesis details
306+
logger.info(f"Synthesis complete:")
307+
logger.info(f" - Synthesis solution length: {len(final_solution)} characters")
308+
logger.info(f" - Reasoning tokens: {reasoning_tokens}")
309+
logger.info(f" - Total tokens: {total_tokens}")
310+
logger.info(f" - Final solution preview: {final_solution[:200]}...")
286311
return final_solution, reasoning_tokens
287312

288313
except Exception as e:

optillm/server.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ def execute_single_approach(approach, system_prompt, initial_query, client, mode
423423
elif approach == 'cepo':
424424
return cepo(system_prompt, initial_query, client, model, cepo_config, request_id)
425425
elif approach == 'mars':
426-
return multi_agent_reasoning_system(system_prompt, initial_query, client, model, request_id)
426+
return multi_agent_reasoning_system(system_prompt, initial_query, client, model, request_config=request_config, request_id=request_id)
427427
elif approach in plugin_approaches:
428428
# Check if the plugin accepts request_config
429429
plugin_func = plugin_approaches[approach]

scripts/eval_imo25_benchmark.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ def imo25_verify_solution(problem: str, solution: str, model: str) -> Dict[str,
182182
{"role": "system", "content": verification_system_prompt},
183183
{"role": "user", "content": verification_prompt}
184184
],
185-
max_tokens=30000,
185+
max_tokens=64000,
186186
temperature=0.1
187187
)
188188

@@ -354,7 +354,7 @@ def get_llm_response(problem: str, model: str, extra_body: dict = None, timeout:
354354
{"role": "system", "content": SYSTEM_PROMPT},
355355
{"role": "user", "content": problem}
356356
],
357-
max_tokens=30000, # Extended token limit for complex proofs
357+
max_tokens=64000, # Extended token limit for complex IMO proofs (increased from 30000)
358358
**kwargs
359359
)
360360

0 commit comments

Comments
 (0)