@@ -34,11 +34,11 @@ def _assign_temperature(self) -> float:
3434 def _get_reasoning_effort (self ) -> str :
3535 """Get reasoning effort level based on agent temperature"""
3636 if self .temperature <= 0.4 :
37- return "low" # 12.5% of max_tokens
37+ return "low" # 8k reasoning tokens
3838 elif self .temperature <= 0.8 :
39- return "medium" # 25% of max_tokens
39+ return "medium" # 16k reasoning tokens
4040 else :
41- return "high" # 50% of max_tokens
41+ return "high" # 24k reasoning tokens
4242
4343 def generate_solution (self , problem : str , request_id : str = None ) -> Tuple [AgentSolution , int ]:
4444 """Generate a solution for the given problem using reasoning API"""
@@ -51,20 +51,21 @@ def generate_solution(self, problem: str, request_id: str = None) -> Tuple[Agent
5151 problem = problem
5252 )
5353
54- # Configure reasoning parameters based on proportional budgets
54+ # Configure reasoning parameters based on fixed budgets
5555 reasoning_effort = self ._get_reasoning_effort ()
56- max_tokens = self .config . get ( 'max_tokens' , 64000 )
56+ max_tokens = self .config [ 'max_tokens' ] # Fixed 32k
5757
58- # Calculate reasoning tokens based on effort level and proportions
58+ # Use fixed reasoning tokens based on effort level
5959 if reasoning_effort == "low" :
60- reasoning_tokens = int ( max_tokens * self .config . get ( 'low_effort_ratio' , 0.125 ))
60+ reasoning_tokens = self .config [ 'low_effort_tokens' ] # 8k
6161 elif reasoning_effort == "medium" :
62- reasoning_tokens = int ( max_tokens * self .config . get ( 'medium_effort_ratio' , 0.25 ))
62+ reasoning_tokens = self .config [ 'medium_effort_tokens' ] # 16k
6363 else : # high
64- reasoning_tokens = int ( max_tokens * self .config . get ( 'high_effort_ratio' , 0.5 ))
64+ reasoning_tokens = self .config [ 'high_effort_tokens' ] # 24k
6565
6666 reasoning_config = {
67- "max_tokens" : reasoning_tokens
67+ "max_tokens" : reasoning_tokens ,
68+ "effort" : reasoning_effort
6869 }
6970
7071 try :
@@ -75,7 +76,7 @@ def generate_solution(self, problem: str, request_id: str = None) -> Tuple[Agent
7576 {"role" : "system" , "content" : MATHEMATICAL_SYSTEM_PROMPT },
7677 {"role" : "user" , "content" : exploration_prompt }
7778 ],
78- max_tokens = reasoning_tokens + 8000 ,
79+ max_tokens = max_tokens ,
7980 temperature = self .temperature ,
8081 timeout = 300 , # 5 minute timeout for complex problems
8182 extra_body = {
@@ -133,10 +134,9 @@ def verify_solution(self, problem: str, solution: str, verifier_id: int, solutio
133134 solution = solution
134135 )
135136
136- # Calculate verification token budgets
137- max_tokens = self .config .get ('max_tokens' , 64000 )
138- verification_max_tokens = int (max_tokens * self .config .get ('verification_ratio' , 0.5 ))
139- verification_reasoning_tokens = int (verification_max_tokens * 0.5 )
137+ # Use fixed verification token budgets
138+ max_tokens = self .config ['max_tokens' ] # Fixed 32k
139+ verification_reasoning_tokens = self .config ['verification_tokens' ] # Fixed 8k
140140
141141 try :
142142 response = self .client .chat .completions .create (
@@ -145,12 +145,13 @@ def verify_solution(self, problem: str, solution: str, verifier_id: int, solutio
145145 {"role" : "system" , "content" : MATHEMATICAL_SYSTEM_PROMPT },
146146 {"role" : "user" , "content" : verification_prompt }
147147 ],
148- max_tokens = verification_reasoning_tokens + 8000 ,
148+ max_tokens = max_tokens ,
149149 temperature = 0.1 , # Low temperature for consistent verification
150150 timeout = 180 ,
151151 extra_body = {
152152 "reasoning" : {
153- "max_tokens" : verification_reasoning_tokens
153+ "max_tokens" : verification_reasoning_tokens ,
154+ "effort" : "low"
154155 }
155156 }
156157 )
@@ -195,9 +196,9 @@ def improve_solution(self, problem: str, current_solution: str, feedback: str, i
195196 issues = "\n " .join (f"- { issue } " for issue in issues )
196197 )
197198
198- # Calculate improvement token budgets (use high effort for iterations)
199- max_tokens = self .config . get ( 'max_tokens' , 64000 )
200- improvement_reasoning_tokens = int ( max_tokens * self .config . get ( 'high_effort_ratio' , 0.5 ))
199+ # Use fixed improvement token budgets (use high effort for iterations)
200+ max_tokens = self .config [ 'max_tokens' ] # Fixed 32k
201+ improvement_reasoning_tokens = self .config [ 'high_effort_tokens' ] # Fixed 24k
201202
202203 try :
203204 response = self .client .chat .completions .create (
@@ -206,12 +207,13 @@ def improve_solution(self, problem: str, current_solution: str, feedback: str, i
206207 {"role" : "system" , "content" : MATHEMATICAL_SYSTEM_PROMPT },
207208 {"role" : "user" , "content" : improvement_prompt }
208209 ],
209- max_tokens = improvement_reasoning_tokens + 8000 ,
210+ max_tokens = max_tokens ,
210211 temperature = self .temperature * 0.8 , # Slightly lower temperature for improvement
211212 timeout = 300 ,
212213 extra_body = {
213214 "reasoning" : {
214- "max_tokens" : improvement_reasoning_tokens
215+ "max_tokens" : improvement_reasoning_tokens ,
216+ "effort" : "high"
215217 }
216218 }
217219 )
0 commit comments