Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ cobertura.ser
*.vi
*~
*.sass-cache
__pycache__

# Dreamweaver added files
_notes
Expand Down
33 changes: 33 additions & 0 deletions toolium/test/utils/ai_utils/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""
Copyright 2026 Telefónica Innovación Digital, S.L.
This file is part of Toolium.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import pytest

from toolium.driver_wrappers_pool import DriverWrappersPool


@pytest.fixture(scope='module', autouse=True)
def configure_default_openai_model():
"""
Configure OpenAI model once for all tests in the module
"""
config = DriverWrappersPool.get_default_wrapper().config
try:
config.add_section('AI')
except Exception:
pass
config.set('AI', 'openai_model', 'gpt-4.1-mini')
7 changes: 4 additions & 3 deletions toolium/test/utils/ai_utils/test_ai_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@

import pytest

from toolium.test.utils.ai_utils.common import (
configure_default_openai_model, # noqa: F401, fixture needed to set the OpenAI model for all tests in this module
)
from toolium.utils.ai_utils.ai_agent import create_react_agent, execute_agent

# Global variable to keep track of mock responses in the agent
Expand Down Expand Up @@ -68,9 +71,7 @@ def tv_recommendations(user_question): # noqa: ARG001

@pytest.mark.skipif(not os.getenv('AZURE_OPENAI_API_KEY'), reason='AZURE_OPENAI_API_KEY environment variable not set')
def test_react_agent():
agent = create_react_agent(
TV_CONTENT_SYSTEM_MESSAGE, tool_method=tv_recommendations, provider='azure', model_name='gpt-4o-mini'
)
agent = create_react_agent(TV_CONTENT_SYSTEM_MESSAGE, tool_method=tv_recommendations, provider='azure')
agent_results = execute_agent(agent)

# Check if the agent's final response contains a valid JSON with the expected structure and analyze the result
Expand Down
18 changes: 5 additions & 13 deletions toolium/test/utils/ai_utils/test_answer_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
Field = None

from toolium.driver_wrappers_pool import DriverWrappersPool
from toolium.test.utils.ai_utils.common import (
configure_default_openai_model, # noqa: F401, fixture needed to set the OpenAI model for all tests in this module
)
from toolium.utils.ai_utils.evaluate_answer import assert_answer_evaluation, get_answer_evaluation_with_azure_openai

test_data_get = [
Expand Down Expand Up @@ -68,9 +71,8 @@
test_data_get,
)
def test_get_answer_evaluation_no_format_with_azure_openai(llm_answer, reference_answer, question):
model = 'gpt4o'
similarity, response = get_answer_evaluation_with_azure_openai(
llm_answer=llm_answer, reference_answer=reference_answer, question=question, model_name=model
llm_answer=llm_answer, reference_answer=reference_answer, question=question
)
assert isinstance(similarity, float), 'Similarity should be a float'
assert isinstance(response['explanation'], str), 'Explanation should be a string'
Expand All @@ -90,12 +92,10 @@ class SimilarityEvaluation(BaseModel):
similarity: float = Field(description='Similarity score between 0.0 and 1.0', ge=0.0, le=1.0)
explanation: str = Field(description='Brief justification for the similarity score')

model = 'gpt4o'
similarity, response = get_answer_evaluation_with_azure_openai(
llm_answer=llm_answer,
reference_answer=reference_answer,
question=question,
model_name=model,
response_format=SimilarityEvaluation,
)
assert isinstance(similarity, float), 'Similarity should be a float'
Expand Down Expand Up @@ -133,12 +133,10 @@ class AnswerEval(BaseModel):
'Only answer 5 if the answer is completely relevant to the question and gives no additional information.'
)

model = 'gpt4o'
similarity, response = get_answer_evaluation_with_azure_openai(
llm_answer=llm_answer,
reference_answer=reference_answer,
question=question,
model_name=model,
response_format=AnswerEval,
)
assert isinstance(similarity, float), 'Similarity should be a float'
Expand Down Expand Up @@ -178,12 +176,10 @@ class AnswerEval(BaseModel):
'Only answer 5 if the answer is completely relevant to the question and gives no additional information.'
)

model = 'gpt4o'
similarity, response = get_answer_evaluation_with_azure_openai(
llm_answer=llm_answer,
reference_answer=reference_answer,
question=question,
model_name=model,
response_format=AnswerEval,
)
assert isinstance(similarity, float), 'Similarity should be a float'
Expand All @@ -197,13 +193,11 @@ class AnswerEval(BaseModel):
test_data_assert,
)
def test_assert_answer_with_azure_openai(llm_answer, reference_answer, question, expected_low):
model = 'gpt4o'
provider = 'azure_openai'
provider = 'azure'
assert_answer_evaluation(
llm_answer=llm_answer,
reference_answers=reference_answer,
question=question,
model_name=model,
threshold=expected_low,
provider=provider,
)
Expand All @@ -221,11 +215,9 @@ def test_assert_answer_from_config(llm_answer, reference_answer, question, expec
except Exception:
pass
config.set('AI', 'provider', 'azure')
model = 'gpt4o'
assert_answer_evaluation(
llm_answer=llm_answer,
reference_answers=reference_answer,
question=question,
model_name=model,
threshold=expected_low,
)
17 changes: 3 additions & 14 deletions toolium/test/utils/ai_utils/test_text_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,11 @@

import pytest

from toolium.driver_wrappers_pool import DriverWrappersPool
from toolium.test.utils.ai_utils.common import (
configure_default_openai_model, # noqa: F401, fixture needed to set the OpenAI model for all tests in this module
)
from toolium.utils.ai_utils.text_analysis import get_text_criteria_analysis


def configure_default_openai_model():
"""
Configure OpenAI model used in unit tests
"""
config = DriverWrappersPool.get_default_wrapper().config
try:
config.add_section('AI')
except Exception:
pass
config.set('AI', 'openai_model', 'gpt-4.1-mini')


get_analysis_examples = (
('How are you today?', ['is a greeting phrase', 'is a question'], 0.7, 1),
('Today is sunny', ['is an affirmation', 'talks about the weather'], 0.7, 1),
Expand Down
19 changes: 3 additions & 16 deletions toolium/test/utils/ai_utils/test_text_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,26 +21,16 @@
import pytest

from toolium.driver_wrappers_pool import DriverWrappersPool
from toolium.test.utils.ai_utils.common import (
configure_default_openai_model, # noqa: F401, fixture needed to set the OpenAI model for all tests in this module
)
from toolium.utils.ai_utils.text_similarity import (
assert_text_similarity,
get_text_similarity_with_azure_openai,
get_text_similarity_with_sentence_transformers,
get_text_similarity_with_spacy,
)


def configure_default_openai_model():
"""
Configure OpenAI model used in unit tests
"""
config = DriverWrappersPool.get_default_wrapper().config
try:
config.add_section('AI')
except Exception:
pass
config.set('AI', 'openai_model', 'gpt-4o-mini')


get_similarity_examples = (
('Today it will be sunny', 'Today it will be sunny', 0.9, 1),
('Today is sunny', 'Today it will be sunny', 0.7, 1),
Expand Down Expand Up @@ -78,7 +68,6 @@ def test_get_text_similarity_with_sentence_transformers(input_text, expected_tex
get_openai_similarity_examples,
)
def test_get_text_similarity_with_azure_openai(input_text, expected_text, expected_low, expected_high):
configure_default_openai_model()
similarity = get_text_similarity_with_azure_openai(input_text, expected_text)
assert expected_low <= similarity <= expected_high

Expand All @@ -104,7 +93,6 @@ def test_assert_text_similarity_with_sentence_transformers_passed(input_text, ex
@pytest.mark.skipif(not os.getenv('AZURE_OPENAI_API_KEY'), reason='AZURE_OPENAI_API_KEY environment variable not set')
@pytest.mark.parametrize(('input_text', 'expected_text', 'threshold'), assert_similarity_passed_examples)
def test_assert_text_similarity_with_openai_passed(input_text, expected_text, threshold):
configure_default_openai_model()
assert_text_similarity(input_text, expected_text, threshold=threshold, similarity_method='azure_openai')


Expand Down Expand Up @@ -143,7 +131,6 @@ def test_assert_text_similarity_with_sentence_transformers_failed(input_text, ex
@pytest.mark.skipif(not os.getenv('AZURE_OPENAI_API_KEY'), reason='AZURE_OPENAI_API_KEY environment variable not set')
@pytest.mark.parametrize(('input_text', 'expected_text', 'threshold'), assert_openai_similarity_failed_examples)
def test_assert_text_similarity_with_openai_failed(input_text, expected_text, threshold):
configure_default_openai_model()
with pytest.raises(AssertionError) as excinfo:
assert_text_similarity(input_text, expected_text, threshold=threshold, similarity_method='azure_openai')
assert str(excinfo.value).startswith('Similarity between received and expected texts is below threshold')
Expand Down
Loading