Skip to content

Commit 129f098

Browse files
committed
fix
1 parent 7b6f652 commit 129f098

File tree

2 files changed

+40
-36
lines changed

2 files changed

+40
-36
lines changed

optillm/mars/agent.py

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,11 @@ def _assign_temperature(self) -> float:
3434
def _get_reasoning_effort(self) -> str:
3535
"""Get reasoning effort level based on agent temperature"""
3636
if self.temperature <= 0.4:
37-
return "low" # 12.5% of max_tokens
37+
return "low" # 8k reasoning tokens
3838
elif self.temperature <= 0.8:
39-
return "medium" # 25% of max_tokens
39+
return "medium" # 16k reasoning tokens
4040
else:
41-
return "high" # 50% of max_tokens
41+
return "high" # 24k reasoning tokens
4242

4343
def generate_solution(self, problem: str, request_id: str = None) -> Tuple[AgentSolution, int]:
4444
"""Generate a solution for the given problem using reasoning API"""
@@ -51,20 +51,21 @@ def generate_solution(self, problem: str, request_id: str = None) -> Tuple[Agent
5151
problem=problem
5252
)
5353

54-
# Configure reasoning parameters based on proportional budgets
54+
# Configure reasoning parameters based on fixed budgets
5555
reasoning_effort = self._get_reasoning_effort()
56-
max_tokens = self.config.get('max_tokens', 64000)
56+
max_tokens = self.config['max_tokens'] # Fixed 32k
5757

58-
# Calculate reasoning tokens based on effort level and proportions
58+
# Use fixed reasoning tokens based on effort level
5959
if reasoning_effort == "low":
60-
reasoning_tokens = int(max_tokens * self.config.get('low_effort_ratio', 0.125))
60+
reasoning_tokens = self.config['low_effort_tokens'] # 8k
6161
elif reasoning_effort == "medium":
62-
reasoning_tokens = int(max_tokens * self.config.get('medium_effort_ratio', 0.25))
62+
reasoning_tokens = self.config['medium_effort_tokens'] # 16k
6363
else: # high
64-
reasoning_tokens = int(max_tokens * self.config.get('high_effort_ratio', 0.5))
64+
reasoning_tokens = self.config['high_effort_tokens'] # 24k
6565

6666
reasoning_config = {
67-
"max_tokens": reasoning_tokens
67+
"max_tokens": reasoning_tokens,
68+
"effort": reasoning_effort
6869
}
6970

7071
try:
@@ -75,7 +76,7 @@ def generate_solution(self, problem: str, request_id: str = None) -> Tuple[Agent
7576
{"role": "system", "content": MATHEMATICAL_SYSTEM_PROMPT},
7677
{"role": "user", "content": exploration_prompt}
7778
],
78-
max_tokens=reasoning_tokens + 8000,
79+
max_tokens=max_tokens,
7980
temperature=self.temperature,
8081
timeout=300, # 5 minute timeout for complex problems
8182
extra_body={
@@ -133,10 +134,9 @@ def verify_solution(self, problem: str, solution: str, verifier_id: int, solutio
133134
solution=solution
134135
)
135136

136-
# Calculate verification token budgets
137-
max_tokens = self.config.get('max_tokens', 64000)
138-
verification_max_tokens = int(max_tokens * self.config.get('verification_ratio', 0.5))
139-
verification_reasoning_tokens = int(verification_max_tokens * 0.5)
137+
# Use fixed verification token budgets
138+
max_tokens = self.config['max_tokens'] # Fixed 32k
139+
verification_reasoning_tokens = self.config['verification_tokens'] # Fixed 8k
140140

141141
try:
142142
response = self.client.chat.completions.create(
@@ -145,12 +145,13 @@ def verify_solution(self, problem: str, solution: str, verifier_id: int, solutio
145145
{"role": "system", "content": MATHEMATICAL_SYSTEM_PROMPT},
146146
{"role": "user", "content": verification_prompt}
147147
],
148-
max_tokens=verification_reasoning_tokens + 8000,
148+
max_tokens=max_tokens,
149149
temperature=0.1, # Low temperature for consistent verification
150150
timeout=180,
151151
extra_body={
152152
"reasoning": {
153-
"max_tokens": verification_reasoning_tokens
153+
"max_tokens": verification_reasoning_tokens,
154+
"effort": "low"
154155
}
155156
}
156157
)
@@ -195,9 +196,9 @@ def improve_solution(self, problem: str, current_solution: str, feedback: str, i
195196
issues="\n".join(f"- {issue}" for issue in issues)
196197
)
197198

198-
# Calculate improvement token budgets (use high effort for iterations)
199-
max_tokens = self.config.get('max_tokens', 64000)
200-
improvement_reasoning_tokens = int(max_tokens * self.config.get('high_effort_ratio', 0.5))
199+
# Use fixed improvement token budgets (use high effort for iterations)
200+
max_tokens = self.config['max_tokens'] # Fixed 32k
201+
improvement_reasoning_tokens = self.config['high_effort_tokens'] # Fixed 24k
201202

202203
try:
203204
response = self.client.chat.completions.create(
@@ -206,12 +207,13 @@ def improve_solution(self, problem: str, current_solution: str, feedback: str, i
206207
{"role": "system", "content": MATHEMATICAL_SYSTEM_PROMPT},
207208
{"role": "user", "content": improvement_prompt}
208209
],
209-
max_tokens=improvement_reasoning_tokens + 8000,
210+
max_tokens=max_tokens,
210211
temperature=self.temperature * 0.8, # Slightly lower temperature for improvement
211212
timeout=300,
212213
extra_body={
213214
"reasoning": {
214-
"max_tokens": improvement_reasoning_tokens
215+
"max_tokens": improvement_reasoning_tokens,
216+
"effort": "high"
215217
}
216218
}
217219
)

optillm/mars/mars.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,23 +15,23 @@
1515

1616
logger = logging.getLogger(__name__)
1717

18-
# Default MARS configuration with unified token budget system
18+
# Default MARS configuration with fixed 32k token budget
1919
DEFAULT_CONFIG = {
2020
'num_agents': 3,
2121
'max_iterations': 5, # Balanced for quality vs efficiency
2222
'verification_passes_required': 2, # Balanced for 5-iteration efficiency
2323
'consensus_threshold': 2, # Keep at 2 for 3-agent setup
2424
'min_verified_solutions': 1, # Keep minimal requirement
25-
'max_tokens': 64000, # Base token budget
25+
'max_tokens': 32000, # Fixed 32k token budget for all calls
2626
'max_verification_attempts': 3,
2727
'early_termination': True,
2828
'use_reasoning_api': True,
29-
# Token budget proportions
30-
'high_effort_ratio': 0.5, # 32000 tokens
31-
'medium_effort_ratio': 0.25, # 16000 tokens
32-
'low_effort_ratio': 0.125, # 8000 tokens
33-
'verification_ratio': 0.5, # 32000 tokens for verification
34-
'synthesis_ratio': 1.0 # 64000 tokens for synthesis
29+
# Fixed reasoning token allocations
30+
'low_effort_tokens': 8000, # Agent 0 (temperature 0.3)
31+
'medium_effort_tokens': 16000, # Agent 1 (temperature 0.6)
32+
'high_effort_tokens': 24000, # Agent 2 (temperature 1.0)
33+
'verification_tokens': 8000, # Fixed low effort for verification consistency
34+
'synthesis_tokens': 24000 # Fixed high effort for final synthesis
3535
}
3636

3737
def multi_agent_reasoning_system(
@@ -189,11 +189,11 @@ def _synthesize_final_solution(
189189
)
190190

191191
try:
192-
# Calculate synthesis token budgets
193-
synthesis_max_tokens = int(config['max_tokens'] * config['synthesis_ratio'])
194-
synthesis_reasoning_tokens = int(synthesis_max_tokens * 0.5)
192+
# Use fixed synthesis token budgets
193+
synthesis_max_tokens = config['max_tokens'] # Fixed 32k
194+
synthesis_reasoning_tokens = config['synthesis_tokens'] # Fixed 24k
195195

196-
# Use proportional reasoning effort for synthesis
196+
# Use fixed reasoning effort for synthesis
197197
response = client.chat.completions.create(
198198
model=model,
199199
messages=[
@@ -205,7 +205,8 @@ def _synthesize_final_solution(
205205
timeout=300,
206206
extra_body={
207207
"reasoning": {
208-
"max_tokens": synthesis_reasoning_tokens
208+
"max_tokens": synthesis_reasoning_tokens,
209+
"effort": "high"
209210
}
210211
}
211212
)
@@ -222,7 +223,8 @@ def _synthesize_final_solution(
222223
"temperature": 0.3,
223224
"extra_body": {
224225
"reasoning": {
225-
"max_tokens": synthesis_reasoning_tokens
226+
"max_tokens": synthesis_reasoning_tokens,
227+
"effort": "high"
226228
}
227229
}
228230
}

0 commit comments

Comments
 (0)