Skip to content

Commit 5f3bf69

Browse files
committed
d
1 parent 2db5be0 commit 5f3bf69

File tree

2 files changed

+49
-30
lines changed

2 files changed

+49
-30
lines changed

optillm/mars/agent.py

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,11 @@ def _assign_temperature(self) -> float:
3434
def _get_reasoning_effort(self) -> str:
3535
"""Get reasoning effort level based on agent temperature"""
3636
if self.temperature <= 0.4:
37-
return "low" # 8k thinking tokens
37+
return "low" # 12.5% of max_tokens
3838
elif self.temperature <= 0.8:
39-
return "medium" # 16k thinking tokens
39+
return "medium" # 25% of max_tokens
4040
else:
41-
return "high" # 32k thinking tokens
41+
return "high" # 50% of max_tokens
4242

4343
def generate_solution(self, problem: str, request_id: str = None) -> Tuple[AgentSolution, int]:
4444
"""Generate a solution for the given problem using reasoning API"""
@@ -51,19 +51,21 @@ def generate_solution(self, problem: str, request_id: str = None) -> Tuple[Agent
5151
problem=problem
5252
)
5353

54-
# Configure reasoning parameters for OpenRouter
54+
# Configure reasoning parameters based on proportional budgets
5555
reasoning_effort = self._get_reasoning_effort()
56-
reasoning_config = {
57-
"effort": reasoning_effort
58-
}
56+
max_tokens = self.config.get('max_tokens', 64000)
5957

60-
# Add specific token budgets for 3-agent configuration
58+
# Calculate reasoning tokens based on effort level and proportions
6159
if reasoning_effort == "low":
62-
reasoning_config["max_tokens"] = 8000 # Agent 0: 8k thinking tokens
60+
reasoning_tokens = int(max_tokens * self.config.get('low_effort_ratio', 0.125))
6361
elif reasoning_effort == "medium":
64-
reasoning_config["max_tokens"] = 16000 # Agent 1: 16k thinking tokens
62+
reasoning_tokens = int(max_tokens * self.config.get('medium_effort_ratio', 0.25))
6563
else: # high
66-
reasoning_config["max_tokens"] = 32000 # Agent 2: 32k thinking tokens
64+
reasoning_tokens = int(max_tokens * self.config.get('high_effort_ratio', 0.5))
65+
66+
reasoning_config = {
67+
"max_tokens": reasoning_tokens
68+
}
6769

6870
try:
6971
# Make API call with reasoning via extra_body for OpenRouter compatibility
@@ -73,7 +75,7 @@ def generate_solution(self, problem: str, request_id: str = None) -> Tuple[Agent
7375
{"role": "system", "content": MATHEMATICAL_SYSTEM_PROMPT},
7476
{"role": "user", "content": exploration_prompt}
7577
],
76-
max_tokens=self.config.get('max_response_tokens', 32768),
78+
max_tokens=max_tokens,
7779
temperature=self.temperature,
7880
timeout=300, # 5 minute timeout for complex problems
7981
extra_body={
@@ -131,19 +133,24 @@ def verify_solution(self, problem: str, solution: str, verifier_id: int, solutio
131133
solution=solution
132134
)
133135

136+
# Calculate verification token budgets
137+
max_tokens = self.config.get('max_tokens', 64000)
138+
verification_max_tokens = int(max_tokens * self.config.get('verification_ratio', 0.5))
139+
verification_reasoning_tokens = int(verification_max_tokens * 0.5)
140+
134141
try:
135142
response = self.client.chat.completions.create(
136143
model=self.model,
137144
messages=[
138145
{"role": "system", "content": MATHEMATICAL_SYSTEM_PROMPT},
139146
{"role": "user", "content": verification_prompt}
140147
],
141-
max_tokens=16384,
148+
max_tokens=verification_max_tokens,
142149
temperature=0.1, # Low temperature for consistent verification
143150
timeout=180,
144151
extra_body={
145152
"reasoning": {
146-
"effort": "medium"
153+
"max_tokens": verification_reasoning_tokens
147154
}
148155
}
149156
)
@@ -188,19 +195,23 @@ def improve_solution(self, problem: str, current_solution: str, feedback: str, i
188195
issues="\n".join(f"- {issue}" for issue in issues)
189196
)
190197

198+
# Calculate improvement token budgets (use high effort for iterations)
199+
max_tokens = self.config.get('max_tokens', 64000)
200+
improvement_reasoning_tokens = int(max_tokens * self.config.get('high_effort_ratio', 0.5))
201+
191202
try:
192203
response = self.client.chat.completions.create(
193204
model=self.model,
194205
messages=[
195206
{"role": "system", "content": MATHEMATICAL_SYSTEM_PROMPT},
196207
{"role": "user", "content": improvement_prompt}
197208
],
198-
max_tokens=32768,
209+
max_tokens=max_tokens,
199210
temperature=self.temperature * 0.8, # Slightly lower temperature for improvement
200211
timeout=300,
201212
extra_body={
202213
"reasoning": {
203-
"effort": "high"
214+
"max_tokens": improvement_reasoning_tokens
204215
}
205216
}
206217
)

optillm/mars/mars.py

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,23 @@
1515

1616
logger = logging.getLogger(__name__)
1717

18-
# Default MARS configuration inspired by IMO25 solver
18+
# Default MARS configuration with unified token budget system
1919
DEFAULT_CONFIG = {
2020
'num_agents': 3,
21-
'max_iterations': 10,
22-
'verification_passes_required': 3,
23-
'consensus_threshold': 2,
24-
'min_verified_solutions': 1,
25-
'thinking_budget_initial': 10000,
26-
'thinking_budget_max': 32000,
27-
'max_response_tokens': 64000,
21+
'max_iterations': 5, # Balanced for quality vs efficiency
22+
'verification_passes_required': 3, # Restored for better verification
23+
'consensus_threshold': 2, # Keep at 2 for 3-agent setup
24+
'min_verified_solutions': 1, # Keep minimal requirement
25+
'max_tokens': 64000, # Base token budget
2826
'max_verification_attempts': 10,
2927
'early_termination': True,
30-
'use_reasoning_api': True
28+
'use_reasoning_api': True,
29+
# Token budget proportions
30+
'high_effort_ratio': 0.5, # 32000 tokens
31+
'medium_effort_ratio': 0.25, # 16000 tokens
32+
'low_effort_ratio': 0.125, # 8000 tokens
33+
'verification_ratio': 0.5, # 32000 tokens for verification
34+
'synthesis_ratio': 1.0 # 64000 tokens for synthesis
3135
}
3236

3337
def multi_agent_reasoning_system(
@@ -185,19 +189,23 @@ def _synthesize_final_solution(
185189
)
186190

187191
try:
188-
# Use high reasoning effort for synthesis
192+
# Calculate synthesis token budgets
193+
synthesis_max_tokens = int(config['max_tokens'] * config['synthesis_ratio'])
194+
synthesis_reasoning_tokens = int(synthesis_max_tokens * 0.5)
195+
196+
# Use proportional reasoning effort for synthesis
189197
response = client.chat.completions.create(
190198
model=model,
191199
messages=[
192200
{"role": "system", "content": "You are a mathematical synthesis expert."},
193201
{"role": "user", "content": synthesis_prompt}
194202
],
195-
max_tokens=config['max_response_tokens'],
203+
max_tokens=synthesis_max_tokens,
196204
temperature=0.3, # Lower temperature for synthesis
197205
timeout=300,
198206
extra_body={
199207
"reasoning": {
200-
"effort": "high"
208+
"max_tokens": synthesis_reasoning_tokens
201209
}
202210
}
203211
)
@@ -210,11 +218,11 @@ def _synthesize_final_solution(
210218
{"role": "system", "content": "You are a mathematical synthesis expert."},
211219
{"role": "user", "content": synthesis_prompt}
212220
],
213-
"max_tokens": config['max_response_tokens'],
221+
"max_tokens": synthesis_max_tokens,
214222
"temperature": 0.3,
215223
"extra_body": {
216224
"reasoning": {
217-
"effort": "high"
225+
"max_tokens": synthesis_reasoning_tokens
218226
}
219227
}
220228
}

0 commit comments

Comments
 (0)