fix

codelion · codelion · commit 129f0984dd66 · 2025-09-24T12:24:20.000+08:00
diff --git a/optillm/mars/agent.py b/optillm/mars/agent.py
@@ -34,11 +34,11 @@ def _assign_temperature(self) -> float:
     def _get_reasoning_effort(self) -> str:
         """Get reasoning effort level based on agent temperature"""
         if self.temperature <= 0.4:
-            return "low"  # 12.5% of max_tokens
+            return "low"  # 8k reasoning tokens
         elif self.temperature <= 0.8:
-            return "medium"  # 25% of max_tokens
+            return "medium"  # 16k reasoning tokens
         else:
-            return "high"  # 50% of max_tokens
+            return "high"  # 24k reasoning tokens
 
     def generate_solution(self, problem: str, request_id: str = None) -> Tuple[AgentSolution, int]:
         """Generate a solution for the given problem using reasoning API"""
@@ -51,20 +51,21 @@ def generate_solution(self, problem: str, request_id: str = None) -> Tuple[Agent
             problem=problem
         )
 
-        # Configure reasoning parameters based on proportional budgets
+        # Configure reasoning parameters based on fixed budgets
         reasoning_effort = self._get_reasoning_effort()
-        max_tokens = self.config.get('max_tokens', 64000)
+        max_tokens = self.config['max_tokens']  # Fixed 32k
 
-        # Calculate reasoning tokens based on effort level and proportions
+        # Use fixed reasoning tokens based on effort level
         if reasoning_effort == "low":
-            reasoning_tokens = int(max_tokens * self.config.get('low_effort_ratio', 0.125))
+            reasoning_tokens = self.config['low_effort_tokens']  # 8k
         elif reasoning_effort == "medium":
-            reasoning_tokens = int(max_tokens * self.config.get('medium_effort_ratio', 0.25))
+            reasoning_tokens = self.config['medium_effort_tokens']  # 16k
         else:  # high
-            reasoning_tokens = int(max_tokens * self.config.get('high_effort_ratio', 0.5))
+            reasoning_tokens = self.config['high_effort_tokens']  # 24k
 
         reasoning_config = {
-            "max_tokens": reasoning_tokens
+            "max_tokens": reasoning_tokens,
+            "effort": reasoning_effort
         }
 
         try:
@@ -75,7 +76,7 @@ def generate_solution(self, problem: str, request_id: str = None) -> Tuple[Agent
                     {"role": "system", "content": MATHEMATICAL_SYSTEM_PROMPT},
                     {"role": "user", "content": exploration_prompt}
                 ],
-                max_tokens=reasoning_tokens + 8000,
+                max_tokens=max_tokens,
                 temperature=self.temperature,
                 timeout=300,  # 5 minute timeout for complex problems
                 extra_body={
@@ -133,10 +134,9 @@ def verify_solution(self, problem: str, solution: str, verifier_id: int, solutio
             solution=solution
         )
 
-        # Calculate verification token budgets
-        max_tokens = self.config.get('max_tokens', 64000)
-        verification_max_tokens = int(max_tokens * self.config.get('verification_ratio', 0.5))
-        verification_reasoning_tokens = int(verification_max_tokens * 0.5)
+        # Use fixed verification token budgets
+        max_tokens = self.config['max_tokens']  # Fixed 32k
+        verification_reasoning_tokens = self.config['verification_tokens']  # Fixed 8k
 
         try:
             response = self.client.chat.completions.create(
@@ -145,12 +145,13 @@ def verify_solution(self, problem: str, solution: str, verifier_id: int, solutio
                     {"role": "system", "content": MATHEMATICAL_SYSTEM_PROMPT},
                     {"role": "user", "content": verification_prompt}
                 ],
-                max_tokens=verification_reasoning_tokens + 8000,
+                max_tokens=max_tokens,
                 temperature=0.1,  # Low temperature for consistent verification
                 timeout=180,
                 extra_body={
                     "reasoning": {
-                        "max_tokens": verification_reasoning_tokens
+                        "max_tokens": verification_reasoning_tokens,
+                        "effort": "low"
                     }
                 }
             )
@@ -195,9 +196,9 @@ def improve_solution(self, problem: str, current_solution: str, feedback: str, i
             issues="\n".join(f"- {issue}" for issue in issues)
         )
 
-        # Calculate improvement token budgets (use high effort for iterations)
-        max_tokens = self.config.get('max_tokens', 64000)
-        improvement_reasoning_tokens = int(max_tokens * self.config.get('high_effort_ratio', 0.5))
+        # Use fixed improvement token budgets (use high effort for iterations)
+        max_tokens = self.config['max_tokens']  # Fixed 32k
+        improvement_reasoning_tokens = self.config['high_effort_tokens']  # Fixed 24k
 
         try:
             response = self.client.chat.completions.create(
@@ -206,12 +207,13 @@ def improve_solution(self, problem: str, current_solution: str, feedback: str, i
                     {"role": "system", "content": MATHEMATICAL_SYSTEM_PROMPT},
                     {"role": "user", "content": improvement_prompt}
                 ],
-                max_tokens=improvement_reasoning_tokens + 8000,
+                max_tokens=max_tokens,
                 temperature=self.temperature * 0.8,  # Slightly lower temperature for improvement
                 timeout=300,
                 extra_body={
                     "reasoning": {
-                        "max_tokens": improvement_reasoning_tokens
+                        "max_tokens": improvement_reasoning_tokens,
+                        "effort": "high"
                     }
                 }
             )
diff --git a/optillm/mars/mars.py b/optillm/mars/mars.py
@@ -15,23 +15,23 @@
 
 logger = logging.getLogger(__name__)
 
-# Default MARS configuration with unified token budget system
+# Default MARS configuration with fixed 32k token budget
 DEFAULT_CONFIG = {
     'num_agents': 3,
     'max_iterations': 5,  # Balanced for quality vs efficiency
     'verification_passes_required': 2,  # Balanced for 5-iteration efficiency
     'consensus_threshold': 2,  # Keep at 2 for 3-agent setup
     'min_verified_solutions': 1,  # Keep minimal requirement
-    'max_tokens': 64000,  # Base token budget
+    'max_tokens': 32000,  # Fixed 32k token budget for all calls
     'max_verification_attempts': 3,
     'early_termination': True,
     'use_reasoning_api': True,
-    # Token budget proportions
-    'high_effort_ratio': 0.5,    # 32000 tokens
-    'medium_effort_ratio': 0.25,  # 16000 tokens
-    'low_effort_ratio': 0.125,   # 8000 tokens
-    'verification_ratio': 0.5,   # 32000 tokens for verification
-    'synthesis_ratio': 1.0       # 64000 tokens for synthesis
+    # Fixed reasoning token allocations
+    'low_effort_tokens': 8000,     # Agent 0 (temperature 0.3)
+    'medium_effort_tokens': 16000, # Agent 1 (temperature 0.6)
+    'high_effort_tokens': 24000,   # Agent 2 (temperature 1.0)
+    'verification_tokens': 8000,   # Fixed low effort for verification consistency
+    'synthesis_tokens': 24000      # Fixed high effort for final synthesis
 }
 
 def multi_agent_reasoning_system(
@@ -189,11 +189,11 @@ def _synthesize_final_solution(
     )
 
     try:
-        # Calculate synthesis token budgets
-        synthesis_max_tokens = int(config['max_tokens'] * config['synthesis_ratio'])
-        synthesis_reasoning_tokens = int(synthesis_max_tokens * 0.5)
+        # Use fixed synthesis token budgets
+        synthesis_max_tokens = config['max_tokens']  # Fixed 32k
+        synthesis_reasoning_tokens = config['synthesis_tokens']  # Fixed 24k
 
-        # Use proportional reasoning effort for synthesis
+        # Use fixed reasoning effort for synthesis
         response = client.chat.completions.create(
             model=model,
             messages=[
@@ -205,7 +205,8 @@ def _synthesize_final_solution(
             timeout=300,
             extra_body={
                 "reasoning": {
-                    "max_tokens": synthesis_reasoning_tokens
+                    "max_tokens": synthesis_reasoning_tokens,
+                    "effort": "high"
                 }
             }
         )
@@ -222,7 +223,8 @@ def _synthesize_final_solution(
                 "temperature": 0.3,
                 "extra_body": {
                     "reasoning": {
-                        "max_tokens": synthesis_reasoning_tokens
+                        "max_tokens": synthesis_reasoning_tokens,
+                        "effort": "high"
                     }
                 }
             }