diff --git a/README.md b/README.md index bc93de2..77f1563 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,28 @@ Integrated with a Python interpreter, CodeAct can execute code actions and dynam **Feb 2, 2024**: CodeAct is released! +## Supported LLM Providers + +CodeAct supports multiple LLM providers for agent evaluation: + +| Provider | Agent Class | Models | +|----------|------------|--------| +| OpenAI | `OpenAILMAgent` | GPT-3.5-Turbo, GPT-4, etc. | +| Anthropic | `ClaudeLMAgent` | Claude 2, Claude Instant | +| Google | `BardLMAgent` | PaLM | +| [MiniMax](https://platform.minimaxi.com/) | `MiniMaxLMAgent` | MiniMax-M2.7, MiniMax-M2.7-highspeed | +| vLLM (self-hosted) | `VLLMAgent` | Any OpenAI-compatible model | + +### Using MiniMax + +To use [MiniMax](https://platform.minimaxi.com/) models, set your API key: + +```bash +export MINIMAX_API_KEY="your-api-key" +``` + +MiniMax models are configured in `EVALUATED_MODEL_LIST` with `agent_class: "MiniMaxLMAgent"`. The agent uses MiniMax's OpenAI-compatible API endpoint (`https://api.minimax.io/v1`) with automatic temperature clamping for API compatibility. + ## Why CodeAct? Our extensive analysis of 17 LLMs on API-Bank and a newly curated benchmark [M3ToolEval](docs/EVALUATION.md) shows that CodeAct outperforms widely used alternatives like Text and JSON (up to 20% higher success rate). Please check our paper for more detailed analysis! diff --git a/mint/agents/__init__.py b/mint/agents/__init__.py index c1d5651..af8bb59 100644 --- a/mint/agents/__init__.py +++ b/mint/agents/__init__.py @@ -6,3 +6,5 @@ from .vllm_feedback_agent import VLLMFeedbackAgent from .vllm_agent import VLLMAgent from .claude_agent import ClaudeLMAgent +from .minimax_lm_agent import MiniMaxLMAgent +from .minimax_feedback_agent import MiniMaxFeedbackAgent diff --git a/mint/agents/minimax_feedback_agent.py b/mint/agents/minimax_feedback_agent.py new file mode 100644 index 0000000..585b8d8 --- /dev/null +++ b/mint/agents/minimax_feedback_agent.py @@ -0,0 +1,93 @@ +import re +import logging + +from .minimax_lm_agent import MiniMaxLMAgent + +from mint.datatypes import State, Action +from mint.prompt import FeedbackPromptTemplate +import openai +import traceback + +LOGGER = logging.getLogger("MINT") + + +class MiniMaxFeedbackAgent(MiniMaxLMAgent): + """Feedback agent using MiniMax's OpenAI-compatible API.""" + + def __init__(self, config): + super().__init__(config) + self.stop_words = ["\nObservation:", "\nTask:", "\nAssistant:"] + self.feedback_prompt = FeedbackPromptTemplate() + + def lm_output_to_action(self, lm_output, form) -> Action: + if form == "textual": + feedback = lm_output + elif form == "binary": + first_sent = re.findall(r"([^.]*\.)", lm_output)[0] + if "GOOD" in first_sent: + feedback = "This is GOOD." + elif "BAD" in first_sent: + feedback = "This is BAD." + else: + raise ValueError(f"Cannot find GOOD or BAD in feedback: {lm_output}") + return Action(feedback, use_tool=False) + + def act( + self, + state: State, + observation: str, + form: str, + gt, + task_in_context_example: str, + tool_desc: str, + ) -> Action: + try: + gt_solution = ( + ( + f"Correct solution (please DO NOT disclose the correct solution to the assistant): {str(gt).strip()}\n" + ) + if gt + else "Correct solution (please DO NOT disclose the correct solution to the assistant): NOT GIVEN\n" + ) + trajectory = ( + "---\n".join(state.history[0]["content"].split("---\n")[2:]) + "\n" + ) + trajectory += "\n".join([x["content"] for x in state.history[1:]]) + trajectory += "\n" + observation + trajectory = trajectory[ + trajectory.find("Task:") : + ] # Get rid of the initial instruction to avoid confusion + messages = [ + { + "role": "user", + "content": self.feedback_prompt( + in_context_example=task_in_context_example[ + task_in_context_example.find("Task:") : + ], + trajectory=trajectory, + correct_solution=gt_solution, + tool_desc=tool_desc, + ), + } + ] + + LOGGER.debug( + "Feedback Agent Prompt:\n" + + "\033[93m" + + messages[0]["content"] + + "\033[0m" + ) + lm_output, token_usage = self.call_lm(messages) + for usage_type, count in token_usage.items(): + state.token_counter["feedback_" + usage_type] += count + action = self.lm_output_to_action(lm_output, form) + LOGGER.debug( + "Feedback Agent Action:\n" + "\033[91m" + action.value + "\033[0m" + ) + return action + except openai.error.InvalidRequestError: + tb = traceback.format_exc() + return Action(f"", False, error=f"InvalidRequestError\n{tb}") + except Exception as e: + tb = traceback.format_exc() + return Action(f"", False, error=f"Unknown error\n{tb}") diff --git a/mint/agents/minimax_lm_agent.py b/mint/agents/minimax_lm_agent.py new file mode 100644 index 0000000..b6a7981 --- /dev/null +++ b/mint/agents/minimax_lm_agent.py @@ -0,0 +1,87 @@ +from .openai_lm_agent import OpenAILMAgent +import openai +import openai.error +import logging +import os +import traceback +from mint.datatypes import Action +import backoff + +LOGGER = logging.getLogger("MINT") + +MINIMAX_API_BASE = "https://api.minimax.io/v1" + + +class MiniMaxLMAgent(OpenAILMAgent): + """LLM agent using MiniMax's OpenAI-compatible API. + + MiniMax provides large language models (M2.7, M2.5) accessible via + an OpenAI-compatible chat completions endpoint. + + Config: + model_name: MiniMax model name (e.g. "MiniMax-M2.7") + chat_mode: Must be True (MiniMax only supports chat completions) + max_tokens: Maximum tokens to generate (default: 512) + temperature: Sampling temperature, clamped to [0.01, 1.0] (default: 0) + """ + + def __init__(self, config): + super().__init__(config) + self.api_key = os.environ.get("MINIMAX_API_KEY", None) + if not self.api_key: + raise ValueError( + "MINIMAX_API_KEY environment variable is required. " + "Get your API key at https://platform.minimaxi.com/" + ) + self.api_base = MINIMAX_API_BASE + + def _clamp_temperature(self, temperature): + """Clamp temperature to MiniMax's accepted range [0.01, 1.0].""" + if temperature <= 0: + return 0.01 + return min(temperature, 1.0) + + @backoff.on_exception( + backoff.fibo, + ( + openai.error.APIError, + openai.error.Timeout, + openai.error.RateLimitError, + openai.error.ServiceUnavailableError, + openai.error.APIConnectionError, + ), + ) + def call_lm(self, messages): + temperature = self._clamp_temperature( + self.config.get("temperature", 0) + ) + response = openai.ChatCompletion.create( + model=self.config["model_name"], + messages=messages, + max_tokens=self.config.get("max_tokens", 512), + temperature=temperature, + stop=self.stop_words, + api_base=self.api_base, + api_key=self.api_key, + ) + # Convert OpenAIObject to plain dict for token counting + usage_obj = response["usage"] + usage = {} + for k in usage_obj: + try: + usage[k] = int(usage_obj[k]) + except (TypeError, ValueError): + pass + return response.choices[0].message["content"], usage + + def act(self, state): + messages = state.history + try: + lm_output, token_usage = self.call_lm(messages) + for usage_type, count in token_usage.items(): + state.token_counter[usage_type] += count + action = self.lm_output_to_action(lm_output) + return action + except openai.error.InvalidRequestError: + tb = traceback.format_exc() + return Action(f"", False, error=f"InvalidRequestError\n{tb}") diff --git a/mint/configs/config_variables.py b/mint/configs/config_variables.py index 7c50813..f109b4d 100644 --- a/mint/configs/config_variables.py +++ b/mint/configs/config_variables.py @@ -155,6 +155,24 @@ "temperature": 0.0, }, }, + { + "agent_class": "MiniMaxLMAgent", + "config": { + "model_name": "MiniMax-M2.7", + "chat_mode": True, + "max_tokens": 1024, + "temperature": 0.0, + }, + }, + { + "agent_class": "MiniMaxLMAgent", + "config": { + "model_name": "MiniMax-M2.7-highspeed", + "chat_mode": True, + "max_tokens": 1024, + "temperature": 0.0, + }, + }, # { # "agent_class": "VLLMAgent", # "config": { diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_minimax_feedback_agent.py b/tests/test_minimax_feedback_agent.py new file mode 100644 index 0000000..745cd67 --- /dev/null +++ b/tests/test_minimax_feedback_agent.py @@ -0,0 +1,211 @@ +"""Unit tests for MiniMaxFeedbackAgent.""" +import os +import pytest +from unittest.mock import patch, MagicMock +from collections import defaultdict + +from mint.agents.minimax_feedback_agent import MiniMaxFeedbackAgent +from mint.datatypes import Action, State + + +@pytest.fixture +def feedback_config(): + return { + "model_name": "MiniMax-M2.7", + "chat_mode": True, + "max_tokens": 1024, + "temperature": 0.0, + } + + +@pytest.fixture +def agent(feedback_config): + with patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key-123"}): + return MiniMaxFeedbackAgent(feedback_config) + + +@pytest.fixture +def mock_state(): + state = State( + history=[ + { + "role": "user", + "content": "System instructions\n---\nExamples\n---\nTask: Solve 2+2", + }, + {"role": "assistant", "content": "Let me think about this."}, + ] + ) + return state + + +class TestMiniMaxFeedbackAgentInit: + def test_init_sets_feedback_stop_words(self, agent): + assert "\nObservation:" in agent.stop_words + assert "\nTask:" in agent.stop_words + assert "\nAssistant:" in agent.stop_words + + def test_init_has_feedback_prompt(self, agent): + assert agent.feedback_prompt is not None + + def test_inherits_minimax_api_config(self, agent): + assert agent.api_base == "https://api.minimax.io/v1" + assert agent.api_key == "test-key-123" + + +class TestFeedbackLMOutputToAction: + def test_textual_feedback(self, agent): + action = agent.lm_output_to_action( + "The approach is correct but could be improved.", "textual" + ) + assert isinstance(action, Action) + assert action.value == "The approach is correct but could be improved." + assert action.use_tool is False + + def test_binary_feedback_good(self, agent): + action = agent.lm_output_to_action( + "This is GOOD. The solution is correct.", "binary" + ) + assert action.value == "This is GOOD." + + def test_binary_feedback_bad(self, agent): + action = agent.lm_output_to_action( + "This is BAD. The approach is wrong.", "binary" + ) + assert action.value == "This is BAD." + + def test_binary_feedback_invalid(self, agent): + with pytest.raises(ValueError, match="Cannot find GOOD or BAD in feedback"): + agent.lm_output_to_action( + "This is NEUTRAL. I'm not sure.", "binary" + ) + + +class TestFeedbackAct: + @patch("mint.agents.minimax_lm_agent.openai.ChatCompletion.create") + def test_act_textual_feedback(self, mock_create, agent, mock_state): + mock_response = MagicMock() + mock_response.choices = [ + MagicMock( + message={"content": "The solution looks correct."} + ) + ] + mock_response.__getitem__ = lambda self, key: { + "usage": {"prompt_tokens": 50, "completion_tokens": 10, "total_tokens": 60} + }[key] + mock_create.return_value = mock_response + + action = agent.act( + state=mock_state, + observation="Result: 4", + form="textual", + gt="4", + task_in_context_example="Task: Example task", + tool_desc="Python tool", + ) + + assert isinstance(action, Action) + assert action.value == "The solution looks correct." + assert action.use_tool is False + assert mock_state.token_counter["feedback_prompt_tokens"] == 50 + + @patch("mint.agents.minimax_lm_agent.openai.ChatCompletion.create") + def test_act_binary_feedback(self, mock_create, agent, mock_state): + mock_response = MagicMock() + mock_response.choices = [ + MagicMock(message={"content": "This is GOOD. Well done."}) + ] + mock_response.__getitem__ = lambda self, key: { + "usage": {"prompt_tokens": 50, "completion_tokens": 10, "total_tokens": 60} + }[key] + mock_create.return_value = mock_response + + action = agent.act( + state=mock_state, + observation="Result: 4", + form="binary", + gt="4", + task_in_context_example="Task: Example task", + tool_desc="Python tool", + ) + + assert action.value == "This is GOOD." + + @patch("mint.agents.minimax_lm_agent.openai.ChatCompletion.create") + def test_act_without_gt(self, mock_create, agent, mock_state): + mock_response = MagicMock() + mock_response.choices = [ + MagicMock(message={"content": "Feedback without GT."}) + ] + mock_response.__getitem__ = lambda self, key: { + "usage": {"prompt_tokens": 30, "completion_tokens": 5, "total_tokens": 35} + }[key] + mock_create.return_value = mock_response + + action = agent.act( + state=mock_state, + observation="Result: unknown", + form="textual", + gt=None, + task_in_context_example="Task: Example task", + tool_desc="Python tool", + ) + + assert action.value == "Feedback without GT." + + @patch("mint.agents.minimax_lm_agent.openai.ChatCompletion.create") + def test_act_invalid_request_error(self, mock_create, agent, mock_state): + import openai.error + mock_create.side_effect = openai.error.InvalidRequestError( + "context length exceeded", "" + ) + + action = agent.act( + state=mock_state, + observation="Result: 4", + form="textual", + gt="4", + task_in_context_example="Task: Example", + tool_desc="Python tool", + ) + + assert action.error is not None + assert "InvalidRequestError" in action.error + + @patch("mint.agents.minimax_lm_agent.openai.ChatCompletion.create") + def test_act_unknown_error(self, mock_create, agent, mock_state): + mock_create.side_effect = RuntimeError("Something broke") + + action = agent.act( + state=mock_state, + observation="Result: 4", + form="textual", + gt="4", + task_in_context_example="Task: Example", + tool_desc="Python tool", + ) + + assert action.error is not None + assert "Unknown error" in action.error + + @patch("mint.agents.minimax_lm_agent.openai.ChatCompletion.create") + def test_act_tracks_feedback_tokens(self, mock_create, agent, mock_state): + mock_response = MagicMock() + mock_response.choices = [ + MagicMock(message={"content": "Good work."}) + ] + mock_response.__getitem__ = lambda self, key: { + "usage": {"prompt_tokens": 100, "completion_tokens": 20, "total_tokens": 120} + }[key] + mock_create.return_value = mock_response + + agent.act( + state=mock_state, + observation="Result: 4", + form="textual", + gt="4", + task_in_context_example="Task: Example", + tool_desc="Python tool", + ) + + assert mock_state.token_counter["feedback_prompt_tokens"] == 100 + assert mock_state.token_counter["feedback_completion_tokens"] == 20 diff --git a/tests/test_minimax_integration.py b/tests/test_minimax_integration.py new file mode 100644 index 0000000..360569d --- /dev/null +++ b/tests/test_minimax_integration.py @@ -0,0 +1,199 @@ +"""Integration tests for MiniMax agents. + +These tests verify the MiniMax agents integrate correctly with the +code-act framework's agent factory and configuration system. +Requires MINIMAX_API_KEY to be set for live API tests. +""" +import os +import pytest +from unittest.mock import patch, MagicMock + +import mint.agents as agents +from mint.agents.minimax_lm_agent import MiniMaxLMAgent +from mint.agents.minimax_feedback_agent import MiniMaxFeedbackAgent +from mint.configs.config_variables import EVALUATED_MODEL_LIST +from mint.datatypes import State + + +class TestAgentFactoryIntegration: + """Test that MiniMax agents work with the dynamic agent instantiation pattern.""" + + def test_minimax_agent_in_evaluated_model_list(self): + minimax_entries = [ + e for e in EVALUATED_MODEL_LIST + if e["agent_class"] == "MiniMaxLMAgent" + ] + assert len(minimax_entries) >= 2 + model_names = [e["config"]["model_name"] for e in minimax_entries] + assert "MiniMax-M2.7" in model_names + assert "MiniMax-M2.7-highspeed" in model_names + + def test_dynamic_instantiation_lm_agent(self): + """Test the getattr(agents, agent_class)(config) pattern from main.py.""" + agent_config = { + "agent_class": "MiniMaxLMAgent", + "config": { + "model_name": "MiniMax-M2.7", + "chat_mode": True, + "max_tokens": 512, + "temperature": 0.0, + }, + } + with patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key"}): + agent_cls = getattr(agents, agent_config["agent_class"]) + agent = agent_cls(agent_config["config"]) + assert isinstance(agent, MiniMaxLMAgent) + + def test_dynamic_instantiation_feedback_agent(self): + agent_config = { + "agent_class": "MiniMaxFeedbackAgent", + "config": { + "model_name": "MiniMax-M2.7", + "chat_mode": True, + "max_tokens": 1024, + "temperature": 0.0, + }, + } + with patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key"}): + agent_cls = getattr(agents, agent_config["agent_class"]) + agent = agent_cls(agent_config["config"]) + assert isinstance(agent, MiniMaxFeedbackAgent) + + def test_all_evaluated_models_instantiable(self): + """Verify all MiniMax entries in EVALUATED_MODEL_LIST can be instantiated.""" + for entry in EVALUATED_MODEL_LIST: + if entry["agent_class"] == "MiniMaxLMAgent": + with patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key"}): + agent = getattr(agents, entry["agent_class"])(entry["config"]) + assert agent.api_base == "https://api.minimax.io/v1" + + +class TestEndToEndFlow: + """Test the full agent flow: instantiate -> act -> get action.""" + + @patch("mint.agents.minimax_lm_agent.openai.ChatCompletion.create") + def test_full_act_flow(self, mock_create): + mock_response = MagicMock() + mock_response.choices = [ + MagicMock( + message={"content": "I'll solve this step by step.\n```python\nprint(2+2)\n```"} + ) + ] + mock_response.__getitem__ = lambda self, key: { + "usage": {"prompt_tokens": 50, "completion_tokens": 20, "total_tokens": 70} + }[key] + mock_create.return_value = mock_response + + config = { + "model_name": "MiniMax-M2.7", + "chat_mode": True, + "max_tokens": 512, + "temperature": 0.5, + } + with patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key"}): + agent = MiniMaxLMAgent(config) + + state = State( + history=[ + { + "role": "user", + "content": "System prompt\n---\nExamples\n---\nTask: What is 2+2?", + } + ] + ) + + action = agent.act(state) + + # Verify action + assert action.use_tool is True # no tag + assert "python" in action.value + # Verify token tracking + assert state.token_counter["total_tokens"] == 70 + # Verify API call parameters + call_kwargs = mock_create.call_args.kwargs + assert call_kwargs["api_base"] == "https://api.minimax.io/v1" + assert call_kwargs["temperature"] == 0.5 + + @patch("mint.agents.minimax_lm_agent.openai.ChatCompletion.create") + def test_multi_turn_conversation(self, mock_create): + mock_response = MagicMock() + mock_response.choices = [ + MagicMock(message={"content": "4"}) + ] + mock_response.__getitem__ = lambda self, key: { + "usage": {"prompt_tokens": 80, "completion_tokens": 10, "total_tokens": 90} + }[key] + mock_create.return_value = mock_response + + config = {"model_name": "MiniMax-M2.7", "chat_mode": True} + with patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key"}): + agent = MiniMaxLMAgent(config) + + state = State( + history=[ + {"role": "user", "content": "System\n---\nEx\n---\nTask: 2+2"}, + {"role": "assistant", "content": "Let me compute this."}, + {"role": "user", "content": "Observation:\n4\nYou have 4 steps left."}, + ] + ) + + action = agent.act(state) + assert action.use_tool is False # has + assert "" in action.value + + # Verify all 3 messages were passed + call_kwargs = mock_create.call_args.kwargs + assert len(call_kwargs["messages"]) == 3 + + +@pytest.mark.skipif( + not os.environ.get("MINIMAX_API_KEY"), + reason="MINIMAX_API_KEY not set" +) +class TestLiveAPI: + """Live API tests - only run when MINIMAX_API_KEY is available.""" + + def test_live_chat_completion(self): + config = { + "model_name": "MiniMax-M2.7", + "chat_mode": True, + "max_tokens": 50, + "temperature": 0.01, + } + agent = MiniMaxLMAgent(config) + state = State( + history=[{"role": "user", "content": "Reply with exactly: hello"}] + ) + action = agent.act(state) + assert action.error is None + assert len(action.value) > 0 + + def test_live_m27_highspeed(self): + config = { + "model_name": "MiniMax-M2.7-highspeed", + "chat_mode": True, + "max_tokens": 50, + "temperature": 0.01, + } + agent = MiniMaxLMAgent(config) + state = State( + history=[{"role": "user", "content": "Reply with exactly: world"}] + ) + action = agent.act(state) + assert action.error is None + assert len(action.value) > 0 + + def test_live_token_tracking(self): + config = { + "model_name": "MiniMax-M2.7", + "chat_mode": True, + "max_tokens": 30, + "temperature": 0.01, + } + agent = MiniMaxLMAgent(config) + state = State( + history=[{"role": "user", "content": "Say hi"}] + ) + agent.act(state) + assert state.token_counter["prompt_tokens"] > 0 + assert state.token_counter["completion_tokens"] > 0 diff --git a/tests/test_minimax_lm_agent.py b/tests/test_minimax_lm_agent.py new file mode 100644 index 0000000..e01ada9 --- /dev/null +++ b/tests/test_minimax_lm_agent.py @@ -0,0 +1,232 @@ +"""Unit tests for MiniMaxLMAgent.""" +import os +import pytest +from unittest.mock import patch, MagicMock +from collections import defaultdict + +from mint.agents.minimax_lm_agent import MiniMaxLMAgent, MINIMAX_API_BASE +from mint.datatypes import Action, State + + +@pytest.fixture +def base_config(): + return { + "model_name": "MiniMax-M2.7", + "chat_mode": True, + "max_tokens": 512, + "temperature": 0.0, + } + + +@pytest.fixture +def agent(base_config): + with patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key-123"}): + return MiniMaxLMAgent(base_config) + + +@pytest.fixture +def mock_state(): + state = State( + history=[ + {"role": "user", "content": "What is 2 + 2?"}, + ] + ) + return state + + +class TestMiniMaxLMAgentInit: + def test_init_with_api_key(self, base_config): + with patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key"}): + agent = MiniMaxLMAgent(base_config) + assert agent.api_key == "test-key" + assert agent.api_base == MINIMAX_API_BASE + assert agent.config["model_name"] == "MiniMax-M2.7" + + def test_init_missing_api_key(self, base_config): + with patch.dict(os.environ, {}, clear=True): + os.environ.pop("MINIMAX_API_KEY", None) + with pytest.raises(ValueError, match="MINIMAX_API_KEY"): + MiniMaxLMAgent(base_config) + + def test_init_missing_model_name(self): + config = {"chat_mode": True} + with patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key"}): + with pytest.raises(AssertionError): + MiniMaxLMAgent(config) + + def test_api_base_url(self, agent): + assert agent.api_base == "https://api.minimax.io/v1" + + def test_stop_words_inherited(self, agent): + assert "\nObservation:" in agent.stop_words + assert "\nExpert feedback:" in agent.stop_words + assert "\nTask:" in agent.stop_words + assert "\n---" in agent.stop_words + + +class TestTemperatureClamping: + def test_clamp_zero_temperature(self, agent): + assert agent._clamp_temperature(0) == 0.01 + + def test_clamp_negative_temperature(self, agent): + assert agent._clamp_temperature(-1.0) == 0.01 + + def test_clamp_valid_temperature(self, agent): + assert agent._clamp_temperature(0.5) == 0.5 + + def test_clamp_max_temperature(self, agent): + assert agent._clamp_temperature(1.0) == 1.0 + + def test_clamp_over_max_temperature(self, agent): + assert agent._clamp_temperature(2.0) == 1.0 + + def test_clamp_small_positive(self, agent): + assert agent._clamp_temperature(0.01) == 0.01 + + def test_clamp_boundary_value(self, agent): + assert agent._clamp_temperature(0.001) == 0.001 + + +class TestCallLM: + @patch("mint.agents.minimax_lm_agent.openai.ChatCompletion.create") + def test_call_lm_success(self, mock_create, agent): + mock_response = MagicMock() + mock_response.choices = [ + MagicMock(message={"content": "The answer is 4."}) + ] + mock_response.__getitem__ = lambda self, key: { + "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15} + }[key] + mock_create.return_value = mock_response + + messages = [{"role": "user", "content": "What is 2 + 2?"}] + content, usage = agent.call_lm(messages) + + assert content == "The answer is 4." + mock_create.assert_called_once_with( + model="MiniMax-M2.7", + messages=messages, + max_tokens=512, + temperature=0.01, # clamped from 0.0 + stop=agent.stop_words, + api_base=MINIMAX_API_BASE, + api_key="test-key-123", + ) + + @patch("mint.agents.minimax_lm_agent.openai.ChatCompletion.create") + def test_call_lm_uses_config_temperature(self, mock_create, base_config): + base_config["temperature"] = 0.7 + with patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key"}): + agent = MiniMaxLMAgent(base_config) + + mock_response = MagicMock() + mock_response.choices = [MagicMock(message={"content": "Response"})] + mock_response.__getitem__ = lambda self, key: { + "usage": {"prompt_tokens": 5, "completion_tokens": 3, "total_tokens": 8} + }[key] + mock_create.return_value = mock_response + + agent.call_lm([{"role": "user", "content": "Hi"}]) + call_args = mock_create.call_args + assert call_args.kwargs["temperature"] == 0.7 + + @patch("mint.agents.minimax_lm_agent.openai.ChatCompletion.create") + def test_call_lm_custom_max_tokens(self, mock_create, base_config): + base_config["max_tokens"] = 1024 + with patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key"}): + agent = MiniMaxLMAgent(base_config) + + mock_response = MagicMock() + mock_response.choices = [MagicMock(message={"content": "Response"})] + mock_response.__getitem__ = lambda self, key: { + "usage": {"prompt_tokens": 5, "completion_tokens": 3, "total_tokens": 8} + }[key] + mock_create.return_value = mock_response + + agent.call_lm([{"role": "user", "content": "Hi"}]) + call_args = mock_create.call_args + assert call_args.kwargs["max_tokens"] == 1024 + + +class TestAct: + @patch("mint.agents.minimax_lm_agent.openai.ChatCompletion.create") + def test_act_success(self, mock_create, agent, mock_state): + mock_response = MagicMock() + mock_response.choices = [ + MagicMock(message={"content": "The answer is 4."}) + ] + mock_response.__getitem__ = lambda self, key: { + "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15} + }[key] + mock_create.return_value = mock_response + + action = agent.act(mock_state) + assert isinstance(action, Action) + assert action.value == "The answer is 4." + assert action.use_tool is True # no tag + assert mock_state.token_counter["prompt_tokens"] == 10 + assert mock_state.token_counter["completion_tokens"] == 5 + + @patch("mint.agents.minimax_lm_agent.openai.ChatCompletion.create") + def test_act_with_solution_tag(self, mock_create, agent, mock_state): + mock_response = MagicMock() + mock_response.choices = [ + MagicMock(message={"content": "4"}) + ] + mock_response.__getitem__ = lambda self, key: { + "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15} + }[key] + mock_create.return_value = mock_response + + action = agent.act(mock_state) + assert action.use_tool is False # has tag + + @patch("mint.agents.minimax_lm_agent.openai.ChatCompletion.create") + def test_act_invalid_request_error(self, mock_create, agent, mock_state): + import openai.error + mock_create.side_effect = openai.error.InvalidRequestError( + "context length exceeded", "" + ) + action = agent.act(mock_state) + assert action.error is not None + assert "InvalidRequestError" in action.error + + @patch("mint.agents.minimax_lm_agent.openai.ChatCompletion.create") + def test_act_tracks_token_usage(self, mock_create, agent, mock_state): + mock_response = MagicMock() + mock_response.choices = [MagicMock(message={"content": "Response"})] + mock_response.__getitem__ = lambda self, key: { + "usage": {"prompt_tokens": 20, "completion_tokens": 10, "total_tokens": 30} + }[key] + mock_create.return_value = mock_response + + agent.act(mock_state) + assert mock_state.token_counter["prompt_tokens"] == 20 + assert mock_state.token_counter["completion_tokens"] == 10 + assert mock_state.token_counter["total_tokens"] == 30 + + +class TestModelNames: + @patch("mint.agents.minimax_lm_agent.openai.ChatCompletion.create") + def test_m27_model(self, mock_create): + config = {"model_name": "MiniMax-M2.7", "chat_mode": True} + with patch.dict(os.environ, {"MINIMAX_API_KEY": "key"}): + agent = MiniMaxLMAgent(config) + assert agent.config["model_name"] == "MiniMax-M2.7" + + @patch("mint.agents.minimax_lm_agent.openai.ChatCompletion.create") + def test_m27_highspeed_model(self, mock_create): + config = {"model_name": "MiniMax-M2.7-highspeed", "chat_mode": True} + with patch.dict(os.environ, {"MINIMAX_API_KEY": "key"}): + agent = MiniMaxLMAgent(config) + assert agent.config["model_name"] == "MiniMax-M2.7-highspeed" + + +class TestModuleExports: + def test_import_from_agents(self): + from mint.agents import MiniMaxLMAgent + assert MiniMaxLMAgent is not None + + def test_import_from_agents_feedback(self): + from mint.agents import MiniMaxFeedbackAgent + assert MiniMaxFeedbackAgent is not None