d

codelion · codelion · commit 5f3bf6964433 · 2025-09-23T19:58:04.000+08:00
diff --git a/optillm/mars/agent.py b/optillm/mars/agent.py
@@ -34,11 +34,11 @@ def _assign_temperature(self) -> float:
     def _get_reasoning_effort(self) -> str:
         """Get reasoning effort level based on agent temperature"""
         if self.temperature <= 0.4:
-            return "low"  # 8k thinking tokens
+            return "low"  # 12.5% of max_tokens
         elif self.temperature <= 0.8:
-            return "medium"  # 16k thinking tokens
+            return "medium"  # 25% of max_tokens
         else:
-            return "high"  # 32k thinking tokens
+            return "high"  # 50% of max_tokens
 
     def generate_solution(self, problem: str, request_id: str = None) -> Tuple[AgentSolution, int]:
         """Generate a solution for the given problem using reasoning API"""
@@ -51,19 +51,21 @@ def generate_solution(self, problem: str, request_id: str = None) -> Tuple[Agent
             problem=problem
         )
 
-        # Configure reasoning parameters for OpenRouter
+        # Configure reasoning parameters based on proportional budgets
         reasoning_effort = self._get_reasoning_effort()
-        reasoning_config = {
-            "effort": reasoning_effort
-        }
+        max_tokens = self.config.get('max_tokens', 64000)
 
-        # Add specific token budgets for 3-agent configuration
+        # Calculate reasoning tokens based on effort level and proportions
         if reasoning_effort == "low":
-            reasoning_config["max_tokens"] = 8000  # Agent 0: 8k thinking tokens
+            reasoning_tokens = int(max_tokens * self.config.get('low_effort_ratio', 0.125))
         elif reasoning_effort == "medium":
-            reasoning_config["max_tokens"] = 16000  # Agent 1: 16k thinking tokens
+            reasoning_tokens = int(max_tokens * self.config.get('medium_effort_ratio', 0.25))
         else:  # high
-            reasoning_config["max_tokens"] = 32000  # Agent 2: 32k thinking tokens
+            reasoning_tokens = int(max_tokens * self.config.get('high_effort_ratio', 0.5))
+
+        reasoning_config = {
+            "max_tokens": reasoning_tokens
+        }
 
         try:
             # Make API call with reasoning via extra_body for OpenRouter compatibility
@@ -73,7 +75,7 @@ def generate_solution(self, problem: str, request_id: str = None) -> Tuple[Agent
                     {"role": "system", "content": MATHEMATICAL_SYSTEM_PROMPT},
                     {"role": "user", "content": exploration_prompt}
                 ],
-                max_tokens=self.config.get('max_response_tokens', 32768),
+                max_tokens=max_tokens,
                 temperature=self.temperature,
                 timeout=300,  # 5 minute timeout for complex problems
                 extra_body={
@@ -131,19 +133,24 @@ def verify_solution(self, problem: str, solution: str, verifier_id: int, solutio
             solution=solution
         )
 
+        # Calculate verification token budgets
+        max_tokens = self.config.get('max_tokens', 64000)
+        verification_max_tokens = int(max_tokens * self.config.get('verification_ratio', 0.5))
+        verification_reasoning_tokens = int(verification_max_tokens * 0.5)
+
         try:
             response = self.client.chat.completions.create(
                 model=self.model,
                 messages=[
                     {"role": "system", "content": MATHEMATICAL_SYSTEM_PROMPT},
                     {"role": "user", "content": verification_prompt}
                 ],
-                max_tokens=16384,
+                max_tokens=verification_max_tokens,
                 temperature=0.1,  # Low temperature for consistent verification
                 timeout=180,
                 extra_body={
                     "reasoning": {
-                        "effort": "medium"
+                        "max_tokens": verification_reasoning_tokens
                     }
                 }
             )
@@ -188,19 +195,23 @@ def improve_solution(self, problem: str, current_solution: str, feedback: str, i
             issues="\n".join(f"- {issue}" for issue in issues)
         )
 
+        # Calculate improvement token budgets (use high effort for iterations)
+        max_tokens = self.config.get('max_tokens', 64000)
+        improvement_reasoning_tokens = int(max_tokens * self.config.get('high_effort_ratio', 0.5))
+
         try:
             response = self.client.chat.completions.create(
                 model=self.model,
                 messages=[
                     {"role": "system", "content": MATHEMATICAL_SYSTEM_PROMPT},
                     {"role": "user", "content": improvement_prompt}
                 ],
-                max_tokens=32768,
+                max_tokens=max_tokens,
                 temperature=self.temperature * 0.8,  # Slightly lower temperature for improvement
                 timeout=300,
                 extra_body={
                     "reasoning": {
-                        "effort": "high"
+                        "max_tokens": improvement_reasoning_tokens
                     }
                 }
             )
diff --git a/optillm/mars/mars.py b/optillm/mars/mars.py
@@ -15,19 +15,23 @@
 
 logger = logging.getLogger(__name__)
 
-# Default MARS configuration inspired by IMO25 solver
+# Default MARS configuration with unified token budget system
 DEFAULT_CONFIG = {
     'num_agents': 3,
-    'max_iterations': 10,
-    'verification_passes_required': 3,
-    'consensus_threshold': 2,
-    'min_verified_solutions': 1,
-    'thinking_budget_initial': 10000,
-    'thinking_budget_max': 32000,
-    'max_response_tokens': 64000,
+    'max_iterations': 5,  # Balanced for quality vs efficiency
+    'verification_passes_required': 3,  # Restored for better verification
+    'consensus_threshold': 2,  # Keep at 2 for 3-agent setup
+    'min_verified_solutions': 1,  # Keep minimal requirement
+    'max_tokens': 64000,  # Base token budget
     'max_verification_attempts': 10,
     'early_termination': True,
-    'use_reasoning_api': True
+    'use_reasoning_api': True,
+    # Token budget proportions
+    'high_effort_ratio': 0.5,    # 32000 tokens
+    'medium_effort_ratio': 0.25,  # 16000 tokens
+    'low_effort_ratio': 0.125,   # 8000 tokens
+    'verification_ratio': 0.5,   # 32000 tokens for verification
+    'synthesis_ratio': 1.0       # 64000 tokens for synthesis
 }
 
 def multi_agent_reasoning_system(
@@ -185,19 +189,23 @@ def _synthesize_final_solution(
     )
 
     try:
-        # Use high reasoning effort for synthesis
+        # Calculate synthesis token budgets
+        synthesis_max_tokens = int(config['max_tokens'] * config['synthesis_ratio'])
+        synthesis_reasoning_tokens = int(synthesis_max_tokens * 0.5)
+
+        # Use proportional reasoning effort for synthesis
         response = client.chat.completions.create(
             model=model,
             messages=[
                 {"role": "system", "content": "You are a mathematical synthesis expert."},
                 {"role": "user", "content": synthesis_prompt}
             ],
-            max_tokens=config['max_response_tokens'],
+            max_tokens=synthesis_max_tokens,
             temperature=0.3,  # Lower temperature for synthesis
             timeout=300,
             extra_body={
                 "reasoning": {
-                    "effort": "high"
+                    "max_tokens": synthesis_reasoning_tokens
                 }
             }
         )
@@ -210,11 +218,11 @@ def _synthesize_final_solution(
                     {"role": "system", "content": "You are a mathematical synthesis expert."},
                     {"role": "user", "content": synthesis_prompt}
                 ],
-                "max_tokens": config['max_response_tokens'],
+                "max_tokens": synthesis_max_tokens,
                 "temperature": 0.3,
                 "extra_body": {
                     "reasoning": {
-                        "effort": "high"
+                        "max_tokens": synthesis_reasoning_tokens
                     }
                 }
             }