diff --git a/src/lighteval/models/endpoints/litellm_model.py b/src/lighteval/models/endpoints/litellm_model.py index 87332d1d7..9d0a45154 100644 --- a/src/lighteval/models/endpoints/litellm_model.py +++ b/src/lighteval/models/endpoints/litellm_model.py @@ -38,6 +38,7 @@ logger = logging.getLogger(__name__) +NON_RETRIABLE_STATUS_CODES = {401, 403, 404} if is_package_available("litellm"): import litellm @@ -242,6 +243,8 @@ def __call_api(self, prompt, return_logits, max_new_tokens, num_samples, stop_se logger.warning(f"{error_string}. Returning empty response.") return LitellmModelResponse() except Exception as e: + if getattr(e, "status_code", None) in NON_RETRIABLE_STATUS_CODES: + raise wait_time = min( 64, self.API_RETRY_SLEEP * (self.API_RETRY_MULTIPLIER**attempt) ) # Exponential backoff with max 64s diff --git a/tests/unit/models/endpoints/test_litellm_model.py b/tests/unit/models/endpoints/test_litellm_model.py new file mode 100644 index 000000000..00deffd85 --- /dev/null +++ b/tests/unit/models/endpoints/test_litellm_model.py @@ -0,0 +1,116 @@ +# MIT License + +# Copyright (c) 2026 The HuggingFace Team + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from types import SimpleNamespace +from unittest.mock import Mock + +import pytest + + +pytest.importorskip("litellm") + +from lighteval.models.endpoints.litellm_model import LiteLLMClient, LiteLLMModelConfig, litellm + + +def make_response(content: str): + return SimpleNamespace(choices=[SimpleNamespace(message=SimpleNamespace(content=content))]) + + +def make_client(cache_dir: str, api_max_retry: int = 3) -> LiteLLMClient: + return LiteLLMClient( + LiteLLMModelConfig( + model_name="openai/gpt-4o-mini", + provider="openai", + api_key="test-key", + api_max_retry=api_max_retry, + cache_dir=cache_dir, + ) + ) + + +class TestLiteLLMClientRetries: + def test_rate_limit_error_still_retries(self, monkeypatch, tmp_path): + client = make_client(str(tmp_path), api_max_retry=3) + calls = [] + responses = [ + litellm.RateLimitError( + message="rate limited", + llm_provider="openai", + model="openai/gpt-4o-mini", + ), + litellm.RateLimitError( + message="rate limited", + llm_provider="openai", + model="openai/gpt-4o-mini", + ), + make_response("ok"), + ] + + def fake_completion(**kwargs): + calls.append(kwargs) + response = responses.pop(0) + if isinstance(response, Exception): + raise response + return response + + monkeypatch.setattr("lighteval.models.endpoints.litellm_model.litellm.completion", fake_completion) + sleep_calls = [] + monkeypatch.setattr("lighteval.models.endpoints.litellm_model.time.sleep", sleep_calls.append) + + response = client._LiteLLMClient__call_api( + prompt=[{"role": "user", "content": "hi"}], + return_logits=False, + max_new_tokens=10, + num_samples=1, + stop_sequence=None, + ) + + assert response.choices[0].message.content == "ok" + assert len(calls) == 3 + assert sleep_calls == [1.0, 2.0] + + def test_non_retriable_status_code_fails_fast(self, monkeypatch, tmp_path): + client = make_client(str(tmp_path)) + calls = [] + + class FakeException(Exception): + status_code = 401 + + def fake_completion(**kwargs): + calls.append(kwargs) + raise FakeException("unauthorized") + + monkeypatch.setattr("lighteval.models.endpoints.litellm_model.litellm.completion", fake_completion) + sleep = Mock() + monkeypatch.setattr("lighteval.models.endpoints.litellm_model.time.sleep", sleep) + + with pytest.raises(FakeException): + client._LiteLLMClient__call_api( + prompt=[{"role": "user", "content": "hi"}], + return_logits=False, + max_new_tokens=10, + num_samples=1, + stop_sequence=None, + ) + + assert len(calls) == 1 + sleep.assert_not_called()