diff --git a/eval_protocol/pytest/__init__.py b/eval_protocol/pytest/__init__.py index ca0f7feb..8a31438a 100644 --- a/eval_protocol/pytest/__init__.py +++ b/eval_protocol/pytest/__init__.py @@ -1,8 +1,9 @@ from .default_agent_rollout_processor import default_agent_rollout_processor from .default_no_op_rollout_process import default_no_op_rollout_processor from .default_single_turn_rollout_process import default_single_turn_rollout_processor -from .pytest_utils import evaluate, evaluation_test +from .evaluation_test import evaluation_test from .types import RolloutProcessor, RolloutProcessorConfig +from .utils import evaluate __all__ = [ "default_agent_rollout_processor", diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py new file mode 100644 index 00000000..8cfd2e1a --- /dev/null +++ b/eval_protocol/pytest/evaluation_test.py @@ -0,0 +1,243 @@ +import inspect +from typing import Any, Callable, Dict, List, Optional + +import pytest + +from eval_protocol.models import EvaluationRow +from eval_protocol.pytest.default_no_op_rollout_process import default_no_op_rollout_processor +from eval_protocol.pytest.types import ( + Dataset, + DatasetPathParam, + EvaluationTestMode, + InputMessagesParam, + InputParam, + ModelParam, + RolloutProcessor, + RolloutProcessorConfig, + TestFunction, +) +from eval_protocol.pytest.utils import ( + AggregationMethod, + aggregate, + create_dynamically_parameterized_wrapper, + execute_function, +) + +from ..common_utils import load_jsonl + + +def evaluation_test( + *, + model: List[ModelParam], + input_messages: Optional[List[InputMessagesParam]] = None, + input_dataset: Optional[List[DatasetPathParam]] = None, + dataset_adapter: Optional[Callable[[List[Dict[str, Any]]], Dataset]] = lambda x: x, + input_params: Optional[List[InputParam]] = None, + rollout_processor: RolloutProcessor = default_no_op_rollout_processor, + aggregation_method: AggregationMethod = "mean", + threshold_of_success: Optional[float] = None, + num_runs: int = 1, + max_dataset_rows: Optional[int] = None, + mcp_config_path: Optional[str] = None, + mode: EvaluationTestMode = "batch", +) -> Callable[ + [TestFunction], + TestFunction, +]: + """Decorator to create pytest-based evaluation tests. + + Args: + model: Model identifiers to query. + input_messages: Messages to send to the model. This is useful if you + don't have a dataset but can hard-code the messages. Will be passed as + "input_dataset" to the test function. + input_dataset: Paths to JSONL datasets. This is useful if you have a + dataset already. Provide a dataset_adapter to convert the input dataset + to a list of EvaluationRows if you have a custom dataset format. + dataset_adapter: Function to convert the input dataset to a list of + EvaluationRows. This is useful if you have a custom dataset format. + input_params: Generation parameters for the model. + rollout_processor: Function used to perform the rollout. + aggregation_method: How to aggregate scores across rows. + threshold_of_success: If set, fail the test if the aggregated score is + below this threshold. + num_runs: Number of times to repeat the evaluation. + max_dataset_rows: Limit dataset to the first N rows. + mcp_config_path: Path to MCP config file that follows MCPMultiClientConfiguration schema + mode: Evaluation mode. "batch" (default) expects test function to handle + full dataset. "pointwise" applies test function to each row. If your evaluation requires + the full rollout of all rows to compute the score, use + """ + + def decorator( + test_func: TestFunction, + ): + sig = inspect.signature(test_func) + + # For pointwise/rowwise mode, we expect a different signature + if mode == "pointwise": + # Pointwise mode: function should accept messages and other row-level params + if "row" not in sig.parameters: + raise ValueError(f"In pointwise mode, your eval function must have a parameter named 'row'") + + # validate that "Row" is of type EvaluationRow + if sig.parameters["row"].annotation is not EvaluationRow: + raise ValueError(f"In pointwise mode, the 'row' parameter must be of type EvaluationRow") + + # validate that the function has a return type of EvaluationRow + if sig.return_annotation is not EvaluationRow: + raise ValueError("In pointwise mode, your eval function must return an EvaluationRow instance") + else: + # Batch mode: function should accept input_dataset and model + if "rows" not in sig.parameters: + raise ValueError("In batch mode, your eval function must have a parameter named 'rows'") + + # validate that "Rows" is of type List[EvaluationRow] + if sig.parameters["rows"].annotation is not List[EvaluationRow]: + raise ValueError(f"In batch mode, the 'rows' parameter must be of type List[EvaluationRow]") + + # validate that the function has a return type of List[EvaluationRow] + if sig.return_annotation is not List[EvaluationRow]: + raise ValueError("In batch mode, your eval function must return a list of EvaluationRow instances") + + def execute_with_params( + test_func: TestFunction, + row: EvaluationRow | None = None, + input_dataset: List[EvaluationRow] | None = None, + ): + kwargs = {} + if input_dataset is not None: + kwargs["rows"] = input_dataset + if row is not None: + kwargs["row"] = row + return execute_function(test_func, **kwargs) + + # Calculate all possible combinations of parameters + def generate_combinations(): + combinations = [] + + # Handle optional parameters with defaults + datasets: List[Optional[DatasetPathParam]] = input_dataset if input_dataset is not None else [None] # type: ignore + params: List[Optional[InputParam]] = input_params if input_params is not None else [None] # type: ignore + messages: List[Optional[InputMessagesParam]] = input_messages if input_messages is not None else [None] # type: ignore + + # Generate all combinations + for m in model: + for ds in datasets: + for ip in params: + for im in messages: + # Skip combinations that don't make sense + # If we have a dataset, we should have params for rollout + if ds is not None and ip is None: + continue + # If we have messages but no dataset, that's fine + # If we have no dataset and no messages, that's also fine + combinations.append((m, ds, ip, im)) + + return combinations + + combinations = generate_combinations() + + # Create parameter tuples for pytest.mark.parametrize + param_tuples = [] + for combo in combinations: + model_name, dataset, params, messages = combo + param_tuple = [model_name] + if input_dataset is not None: + param_tuple.append(dataset) + if input_params is not None: + param_tuple.append(params) + if input_messages is not None: + param_tuple.append(messages) + param_tuples.append(tuple(param_tuple)) + + # For batch mode, use the original parameter names + test_param_names = ["model"] + if input_dataset is not None: + test_param_names.append("dataset_path") + if input_params is not None: + test_param_names.append("input_params") + if input_messages is not None: + test_param_names.append("input_messages") + + # Create wrapper function with exact signature that pytest expects + def create_wrapper_with_signature(): + # Create the function body that will be used + def wrapper_body(**kwargs): + model_name = kwargs["model"] + + # Handle dataset loading + if "dataset_path" in kwargs and kwargs["dataset_path"] is not None: + data = load_jsonl(kwargs["dataset_path"]) + if max_dataset_rows is not None: + data = data[:max_dataset_rows] + data = dataset_adapter(data) + elif "input_messages" in kwargs and kwargs["input_messages"] is not None: + data: List[EvaluationRow] = [EvaluationRow(messages=kwargs["input_messages"])] + else: + raise ValueError("No input dataset or input messages provided") + + input_dataset: List[EvaluationRow] = [] + config = RolloutProcessorConfig( + model=model_name, + input_params=kwargs.get("input_params") or {}, + mcp_config_path=mcp_config_path or "", + initial_messages=kwargs.get("input_messages") if "input_messages" in kwargs else [], + ) + for row in data: + processed: List[EvaluationRow] = execute_function(rollout_processor, row=row, config=config) + input_dataset.extend(processed) + + all_results: List[EvaluationRow] = [] + for _ in range(num_runs): + if mode == "pointwise": + # Pointwise mode: apply the evaluator function to each row + for row in input_dataset: + result = execute_with_params( + test_func, + row=row, + ) + if result is None or not isinstance(result, EvaluationRow): + raise ValueError( + f"Test function {test_func.__name__} did not return an EvaluationRow instance. You must return an EvaluationRow instance from your test function decorated with @evaluation_test." + ) + all_results.append(result) + else: + # Batch mode: call the test function with the full dataset + results = execute_with_params( + test_func, + input_dataset=input_dataset, + ) + if results is None: + raise ValueError( + f"Test function {test_func.__name__} did not return an EvaluationRow instance. You must return an EvaluationRow instance from your test function decorated with @evaluation_test." + ) + if not isinstance(results, list): + raise ValueError( + f"Test function {test_func.__name__} did not return a list of EvaluationRow instances. You must return a list of EvaluationRow instances from your test function decorated with @evaluation_test." + ) + if not results: + raise ValueError( + f"Test function {test_func.__name__} returned an empty list. You must return a non-empty list of EvaluationRow instances from your test function decorated with @evaluation_test." + ) + if not all(isinstance(r, EvaluationRow) for r in results): + raise ValueError( + f"Test function {test_func.__name__} returned a list containing non-EvaluationRow instances. You must return a list of EvaluationRow instances from your test function decorated with @evaluation_test." + ) + all_results.extend(results) + + scores = [r.evaluation_result.score for r in all_results if r.evaluation_result] + agg_score = aggregate(scores, aggregation_method) + if threshold_of_success is not None: + assert ( + agg_score >= threshold_of_success + ), f"Aggregated score {agg_score:.3f} below threshold {threshold_of_success}" + + return create_dynamically_parameterized_wrapper(test_func, wrapper_body, test_param_names) + + wrapper = create_wrapper_with_signature() + wrapper = pytest.mark.parametrize(test_param_names, param_tuples)(wrapper) + + return wrapper + + return decorator diff --git a/eval_protocol/pytest/pytest_utils.py b/eval_protocol/pytest/pytest_utils.py deleted file mode 100644 index afb0b8f4..00000000 --- a/eval_protocol/pytest/pytest_utils.py +++ /dev/null @@ -1,311 +0,0 @@ -import asyncio -import inspect -from typing import Any, Callable, Dict, List, Optional - -import pytest - -from eval_protocol.pytest.default_no_op_rollout_process import default_no_op_rollout_processor -from eval_protocol.pytest.types import ( - Dataset, - DatasetPathParam, - InputMessagesParam, - InputParam, - ModelParam, - RolloutProcessor, - RolloutProcessorConfig, - TestFunction, -) - -from ..common_utils import load_jsonl -from ..models import EvaluateResult, EvaluationRow - - -def evaluate( - rows: List[EvaluationRow], reward_fn: Callable[..., EvaluateResult], **kwargs: Any -) -> List[EvaluationRow]: - """Apply a reward function to each row and attach the result.""" - evaluated: List[EvaluationRow] = [] - for row in rows: - result = reward_fn(messages=row.messages, ground_truth=row.ground_truth, **kwargs) - row.evaluation_result = result - evaluated.append(row) - return evaluated - - -def _aggregate(scores: List[float], method: str) -> float: - if not scores: - return 0.0 - if method == "mean": - return sum(scores) / len(scores) - if method == "max": - return max(scores) - if method == "min": - return min(scores) - raise ValueError(f"Unknown aggregation method: {method}") - - -def evaluation_test( - *, - model: List[ModelParam], - input_messages: Optional[List[InputMessagesParam]] = None, - input_dataset: Optional[List[DatasetPathParam]] = None, - dataset_adapter: Optional[Callable[[List[Dict[str, Any]]], Dataset]] = lambda x: x, - input_params: Optional[List[InputParam]] = None, - rollout_processor: RolloutProcessor = default_no_op_rollout_processor, - aggregation_method: str = "mean", - threshold_of_success: Optional[float] = None, - num_runs: int = 1, - max_dataset_rows: Optional[int] = None, - mcp_config_path: Optional[str] = None, -) -> Callable[ - [TestFunction], - TestFunction, -]: - """Decorator to create pytest-based evaluation tests. - - Args: - model: Model identifiers to query. - input_messages: Messages to send to the model. This is useful if you - don't have a dataset but can hard-code the messages. Will be passed as - "input_dataset" to the test function. - input_dataset: Paths to JSONL datasets. This is useful if you have a - dataset already. Provide a dataset_adapter to convert the input dataset - to a list of EvaluationRows if you have a custom dataset format. - dataset_adapter: Function to convert the input dataset to a list of - EvaluationRows. This is useful if you have a custom dataset format. - input_params: Generation parameters for the model. - rollout_processor: Function used to perform the rollout. - aggregation_method: How to aggregate scores across rows. - threshold_of_success: If set, fail the test if the aggregated score is - below this threshold. - num_runs: Number of times to repeat the evaluation. - max_dataset_rows: Limit dataset to the first N rows. - - Usage: - With an input dataset and input params, the test function will be called with the following arguments: - - ```python - @evaluation_test( - model=["gpt-4o", "gpt-4o-mini"], - input_dataset=["data/test.jsonl"], - input_params=[{"temperature": 0.5}], - rollout_processor=default_rollout_processor, - aggregation_method="mean", - ) - def test_func(dataset_path: str, model_name: str, input_params: Dict[str, Any]): - pass - ``` - - Without an input dataset and input params, the test function will be called with the following arguments: - - ```python - @evaluation_test( - model=["gpt-4o", "gpt-4o-mini"], - ) - def test_func(model_name: str): - pass - ``` - - With model and input_messages, the test function will be called with the following arguments: - - ```python - @evaluation_test( - model=["gpt-4o", "gpt-4o-mini"], - input_messages=[{"role": "user", "content": "Hello, how are you?"}], - ) - def test_func(model_name: str, input_messages: List[List[Message]]): - pass - ``` - """ - - def decorator( - test_func: TestFunction, - ): - # Check if the function is async - is_async = inspect.iscoroutinefunction(test_func) - - sig = inspect.signature(test_func) - if "input_dataset" not in sig.parameters: - raise ValueError("test_func must have a parameter named 'input_dataset'") - - if "model" not in sig.parameters: - raise ValueError("test_func must have a parameter named 'model'") - - def execute_with_params( - test_func: TestFunction, - model: str, - input_dataset: List[EvaluationRow] | None = None, - input_params: InputParam | None = None, - ): - kwargs = {} - if input_dataset is not None: - kwargs["input_dataset"] = list(input_dataset) - if input_params is not None: - kwargs["input_params"] = input_params - if model is not None: - kwargs["model"] = model - if is_async: - # Handle async functions with proper event loop management - try: - loop = asyncio.get_event_loop() - if not loop.is_closed(): - # Use existing loop - task = loop.create_task(test_func(**kwargs)) - results = loop.run_until_complete(task) - else: - # Loop is closed, create a new one - results = asyncio.run(test_func(**kwargs)) - except RuntimeError: - # No event loop or other issues, create a new one - results = asyncio.run(test_func(**kwargs)) - else: - results = test_func(**kwargs) - return results - - # Calculate all possible combinations of parameters - def generate_combinations(model: List[ModelParam]): - combinations = [] - - # Always include models - model_list = model - - # Handle optional parameters with defaults - datasets: List[Optional[DatasetPathParam]] = input_dataset if input_dataset is not None else [None] # type: ignore - params: List[Optional[InputParam]] = input_params if input_params is not None else [None] # type: ignore - messages: List[Optional[InputMessagesParam]] = input_messages if input_messages is not None else [None] # type: ignore - - # Generate all combinations - for m in model_list: - for ds in datasets: - for ip in params: - for im in messages: - # Skip combinations that don't make sense - # If we have a dataset, we should have params for rollout - if ds is not None and ip is None: - continue - # If we have messages but no dataset, that's fine - # If we have no dataset and no messages, that's also fine - combinations.append((m, ds, ip, im)) - - return combinations - - combinations = generate_combinations(model) - - # Create parameter tuples for pytest.mark.parametrize - param_tuples = [] - for combo in combinations: - model_name, dataset, params, messages = combo - param_tuple = [model_name] - if input_dataset is not None: - param_tuple.append(dataset) - if input_params is not None: - param_tuple.append(params) - if input_messages is not None: - param_tuple.append(messages) - param_tuples.append(tuple(param_tuple)) - - # Determine the parameter names for the test function - test_param_names = ["model"] - if input_dataset is not None: - test_param_names.append("dataset_path") - if input_params is not None: - test_param_names.append("input_params") - if input_messages is not None: - test_param_names.append("input_messages") - - # Create wrapper function with exact signature that pytest expects - def create_wrapper_with_signature(): - # Create the function body that will be used - def wrapper_body(**kwargs): - model_name = kwargs["model"] - - # Handle dataset loading - if "dataset_path" in kwargs and kwargs["dataset_path"] is not None: - data = load_jsonl(kwargs["dataset_path"]) - if max_dataset_rows is not None: - data = data[:max_dataset_rows] - data = dataset_adapter(data) - elif "input_messages" in kwargs and kwargs["input_messages"] is not None: - data: List[EvaluationRow] = [EvaluationRow(messages=kwargs["input_messages"])] - else: - raise ValueError("No input dataset or input messages provided") - - input_dataset: List[EvaluationRow] = [] - config = RolloutProcessorConfig( - model=model_name, - input_params=kwargs.get("input_params") or {}, - mcp_config_path=mcp_config_path or "", - initial_messages=kwargs.get("input_messages") if "input_messages" in kwargs else [], - ) - for row in data: - is_async = inspect.iscoroutinefunction(rollout_processor) - if is_async: - try: - loop = asyncio.get_event_loop() - if not loop.is_closed(): - # Use existing loop - task = loop.create_task(rollout_processor(row, config=config)) - processed: List[EvaluationRow] = loop.run_until_complete(task) - else: - processed: List[EvaluationRow] = asyncio.run(rollout_processor(row, config=config)) - except RuntimeError: - # No event loop or other issues, create a new one - processed: List[EvaluationRow] = asyncio.run(rollout_processor(row, config=config)) - else: - processed: List[EvaluationRow] = rollout_processor(row, config=config) - input_dataset.extend(processed) - - all_results: List[EvaluationRow] = [] - for _ in range(num_runs): - # Each run reuses the same processed rows - results = execute_with_params( - test_func, - model=model_name, - input_dataset=input_dataset, - input_params=kwargs.get("input_params") if "input_params" in kwargs else None, - ) - if results is None: - raise ValueError( - f"Test function {test_func.__name__} did not return an EvaluationRow instance. You must return an EvaluationRow instance from your test function decorated with @evaluation_test." - ) - if not isinstance(results, list): - raise ValueError( - f"Test function {test_func.__name__} did not return a list of EvaluationRow instances. You must return a list of EvaluationRow instances from your test function decorated with @evaluation_test." - ) - if not results: - raise ValueError( - f"Test function {test_func.__name__} returned an empty list. You must return a non-empty list of EvaluationRow instances from your test function decorated with @evaluation_test." - ) - if not all(isinstance(r, EvaluationRow) for r in results): - raise ValueError( - f"Test function {test_func.__name__} returned a list containing non-EvaluationRow instances. You must return a list of EvaluationRow instances from your test function decorated with @evaluation_test." - ) - all_results.extend(results) - - scores = [r.evaluation_result.score for r in all_results if r.evaluation_result] - agg_score = _aggregate(scores, aggregation_method) - if threshold_of_success is not None: - assert ( - agg_score >= threshold_of_success - ), f"Aggregated score {agg_score:.3f} below threshold {threshold_of_success}" - - # Create a function with the exact signature pytest expects without using exec - from functools import wraps - - @wraps(test_func) - def wrapper(**kwargs): - return wrapper_body(**kwargs) - - parameters = [ - inspect.Parameter(name, inspect.Parameter.POSITIONAL_OR_KEYWORD) for name in test_param_names - ] - wrapper.__signature__ = inspect.Signature(parameters) - - return wrapper - - wrapper = create_wrapper_with_signature() - wrapper = pytest.mark.parametrize(test_param_names, param_tuples)(wrapper) - - return wrapper - - return decorator diff --git a/eval_protocol/pytest/types.py b/eval_protocol/pytest/types.py index 8850a5ea..a1e124c8 100644 --- a/eval_protocol/pytest/types.py +++ b/eval_protocol/pytest/types.py @@ -3,7 +3,7 @@ """ from dataclasses import dataclass -from typing import Any, Callable, Dict, List +from typing import Any, Callable, Dict, List, Literal from ..models import EvaluationRow, Message @@ -13,6 +13,17 @@ InputMessagesParam = List[Message] Dataset = List[EvaluationRow] + +EvaluationTestMode = Literal["batch", "pointwise"] +""" +"batch": (default) expects test function to handle full dataset. +"pointwise": applies test function to each row. + +How to choose between "batch" and "pointwise": +If your evaluation requires the rollout of all rows to be passed into your eval compute the score, use "batch". +If your evaluation can be computed pointwise, use "pointwise" as EP can pipeline the rollouts and evals to be faster. +""" + """ Test function types """ diff --git a/eval_protocol/pytest/utils.py b/eval_protocol/pytest/utils.py new file mode 100644 index 00000000..fccf5f81 --- /dev/null +++ b/eval_protocol/pytest/utils.py @@ -0,0 +1,99 @@ +import asyncio +import inspect +from typing import Any, Callable, List, Literal + +from ..models import EvaluateResult, EvaluationRow + + +def execute_function(func: Callable, **kwargs) -> Any: + """ + Execute a function with proper async handling. + + This is a pure function that handles both async and non-async function execution + with proper event loop management for async functions. + + Args: + func: The function to execute + **kwargs: Arguments to pass to the function + + Returns: + The result of the function execution + """ + is_async = asyncio.iscoroutinefunction(func) + if is_async: + # Handle async functions with proper event loop management + try: + loop = asyncio.get_event_loop() + if not loop.is_closed(): + # Use existing loop + task = loop.create_task(func(**kwargs)) + results = loop.run_until_complete(task) + else: + # Loop is closed, create a new one + results = asyncio.run(func(**kwargs)) + except RuntimeError: + # No event loop or other issues, create a new one + results = asyncio.run(func(**kwargs)) + else: + results = func(**kwargs) + return results + + +def evaluate( + rows: List[EvaluationRow], reward_fn: Callable[..., EvaluateResult], **kwargs: Any +) -> List[EvaluationRow]: + """Apply a reward function to each row and attach the result.""" + evaluated: List[EvaluationRow] = [] + for row in rows: + result = reward_fn(messages=row.messages, ground_truth=row.ground_truth, **kwargs) + row.evaluation_result = result + evaluated.append(row) + return evaluated + + +AggregationMethod = Literal["mean", "max", "min"] + + +def aggregate(scores: List[float], method: AggregationMethod) -> float: + if not scores: + return 0.0 + if method == "mean": + return sum(scores) / len(scores) + if method == "max": + return max(scores) + if method == "min": + return min(scores) + raise ValueError(f"Unknown aggregation method: {method}") + + +def create_dynamically_parameterized_wrapper(test_func, wrapper_body, test_param_names): + """ + Creates a wrapper function with dynamic parameters for pytest parameterization. + + This function takes a test function and creates a wrapper that: + 1. Preserves the original function's metadata using functools.wraps + 2. Creates a new function signature with the specified parameter names that maps to pytest.mark.parametrize decorator + 3. Returns a callable that can be used with pytest.mark.parametrize + + The function signature is dynamically created to match the parameter names expected by + pytest.mark.parametrize, ensuring that pytest can properly map the test parameters + to the function arguments. + + Args: + test_func: The original test function to wrap + wrapper_body: The function body that contains the actual test logic + test_param_names: List of parameter names for the dynamic signature + + Returns: + A wrapper function with the specified parameter signature that calls wrapper_body + """ + from functools import wraps + + @wraps(test_func) + def wrapper(**kwargs): + return wrapper_body(**kwargs) + + parameters = [inspect.Parameter(name, inspect.Parameter.POSITIONAL_OR_KEYWORD) for name in test_param_names] + wrapper.__signature__ = inspect.Signature(parameters) + + return wrapper diff --git a/pyproject.toml b/pyproject.toml index f7095386..dcc10ba4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,6 +80,7 @@ dev = [ "ipykernel>=6.30.0", "jupyter>=1.1.1", "pip>=25.1.1", + "haikus==0.3.8", ] trl = [ "torch>=1.9", @@ -119,3 +120,9 @@ line_length = 119 [tool.uv.sources] tau2 = { git = "https://github.com/sierra-research/tau2-bench.git" } + +[dependency-groups] +dev = [ + "haikus==0.3.8", + "pytest>=8.4.1", +] diff --git a/tests/pytest/helper/word_count_to_evaluation_row.py b/tests/pytest/helper/word_count_to_evaluation_row.py new file mode 100644 index 00000000..f0517dd0 --- /dev/null +++ b/tests/pytest/helper/word_count_to_evaluation_row.py @@ -0,0 +1,14 @@ +from typing import Any, Dict, List + +from eval_protocol.models import EvaluationRow, Message + + +def word_count_to_evaluation_row(data: List[Dict[str, Any]]) -> List[EvaluationRow]: + """Convert gsm8k dataset format to EvaluationRow for word_count evaluation.""" + return [ + EvaluationRow( + messages=[Message(role="user", content=row["user_query"])], + ground_truth=row["ground_truth_for_eval"] + ) + for row in data + ] \ No newline at end of file diff --git a/tests/pytest/test_markdown_highlighting.py b/tests/pytest/test_markdown_highlighting.py index d0029c17..e85f0742 100644 --- a/tests/pytest/test_markdown_highlighting.py +++ b/tests/pytest/test_markdown_highlighting.py @@ -4,7 +4,6 @@ This test demonstrates how to check if model responses contain the required number of highlighted sections. """ -import json import re from typing import Any, Dict, List, Optional @@ -15,56 +14,48 @@ def markdown_dataset_to_evaluation_row(data: List[Dict[str, Any]]) -> List[EvaluationRow]: """ Convert entries from markdown dataset to EvaluationRow objects. - """ + """ return [ - EvaluationRow( - messages=[Message(role="user", content=row["prompt"])], - ground_truth=str(row["num_highlights"]) - ) + EvaluationRow(messages=[Message(role="user", content=row["prompt"])], ground_truth=str(row["num_highlights"])) for row in data ] -def markdown_format_evaluate(messages: List[Message], ground_truth: Optional[str]=None, **kwargs) -> EvaluateResult: +def markdown_format_evaluate(messages: List[Message], ground_truth: Optional[str] = None, **kwargs) -> EvaluateResult: """ Evaluation function that checks if the model's response contains the required number of formatted sections. """ - + assistant_response = messages[-1].content - + if not assistant_response: - return EvaluateResult( - score=0.0, - reason="❌ No assistant response found" - ) - + return EvaluateResult(score=0.0, reason="❌ No assistant response found") + required_highlights = int(ground_truth) # Check if the response contains the required number of formatted sections # e.g. **bold** or *italic* - + actual_count = 0 highlights = re.findall(r"\*[^\n\*]*\*", assistant_response) double_highlights = re.findall(r"\*\*[^\n\*]*\*\*", assistant_response) - + for highlight in highlights: if highlight.strip("*").strip(): actual_count += 1 for highlight in double_highlights: if highlight.removeprefix("**").removesuffix("**").strip(): actual_count += 1 - + meets_requirement = actual_count >= required_highlights - + if meets_requirement: return EvaluateResult( - score=1.0, - reason=f"✅ Found {actual_count} highlighted sections (required: {required_highlights})" + score=1.0, reason=f"✅ Found {actual_count} highlighted sections (required: {required_highlights})" ) else: return EvaluateResult( - score=0.0, - reason=f"❌ Only found {actual_count} highlighted sections (required: {required_highlights})" + score=0.0, reason=f"❌ Only found {actual_count} highlighted sections (required: {required_highlights})" ) @@ -72,13 +63,13 @@ def markdown_format_evaluate(messages: List[Message], ground_truth: Optional[str input_dataset=["tests/pytest/data/markdown_dataset.jsonl"], dataset_adapter=markdown_dataset_to_evaluation_row, model=["accounts/fireworks/models/llama-v3p1-8b-instruct"], - input_params=[{"temperature": 0.0, "max_tokens": 4096}], + input_params=[{"temperature": 0.0, "max_tokens": 4096}], threshold_of_success=1.0, rollout_processor=default_single_turn_rollout_processor, - num_runs=1 + num_runs=1, ) -def test_markdown_highlighting_evaluation(input_dataset, input_params, model): +def test_markdown_highlighting_evaluation(rows: List[EvaluationRow]) -> List[EvaluationRow]: """ Test markdown highlighting validation using batch mode with evaluate(). """ - return evaluate(input_dataset, markdown_format_evaluate) \ No newline at end of file + return evaluate(rows, markdown_format_evaluate) diff --git a/tests/pytest/test_pytest_async.py b/tests/pytest/test_pytest_async.py index e37d43a2..620683e1 100644 --- a/tests/pytest/test_pytest_async.py +++ b/tests/pytest/test_pytest_async.py @@ -1,6 +1,6 @@ from typing import List -from eval_protocol.models import EvaluationRow +from eval_protocol.models import EvaluationRow, Message from eval_protocol.pytest import evaluation_test from examples.math_example.main import evaluate as math_evaluate @@ -8,14 +8,14 @@ @evaluation_test( input_messages=[ [ - {"role": "user", "content": "What is the capital of France?"}, + Message(role="user", content="What is the capital of France?"), ], [ - {"role": "user", "content": "What is the capital of the moon?"}, + Message(role="user", content="What is the capital of the moon?"), ], ], model=["accounts/fireworks/models/kimi-k2-instruct"], ) -async def test_pytest_async(input_dataset: List[EvaluationRow], model): +async def test_pytest_async(rows: List[EvaluationRow]) -> List[EvaluationRow]: """Run math evaluation on sample dataset using pytest interface.""" - return input_dataset + return rows diff --git a/tests/pytest/test_pytest_default_agent_rollout_processor.py b/tests/pytest/test_pytest_default_agent_rollout_processor.py index e18dcf32..06762046 100644 --- a/tests/pytest/test_pytest_default_agent_rollout_processor.py +++ b/tests/pytest/test_pytest_default_agent_rollout_processor.py @@ -1,24 +1,24 @@ from datetime import datetime from typing import List -from eval_protocol.models import EvaluationRow, Message +from eval_protocol.models import Message, EvaluationRow from eval_protocol.pytest import default_agent_rollout_processor, evaluation_test @evaluation_test( input_messages=[ [ - { - "role": "user", - "content": "Can you give a summary of the past week in the 'general, model-requests, bug-reports, questions, and feature-requests' channels. For EVERY message or thread has not been resolved, please list them at the end of your response in a table. Be sure to include the exact message, severity, and current status so far. Current Date & Time: {current_date_time}".format( + Message( + role="user", + content="Can you give a summary of the past week in the 'general, model-requests, bug-reports, questions, and feature-requests' channels. For EVERY message or thread has not been resolved, please list them at the end of your response in a table. Be sure to include the exact message, severity, and current status so far. Current Date & Time: {current_date_time}".format( current_date_time=datetime.now().strftime("%B %d, %Y at %I:%M %p") ), - } + ) ] ], rollout_processor=default_agent_rollout_processor, model=["fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"], ) -def test_pytest_default_agent_rollout_processor(input_dataset: List[EvaluationRow], model): +def test_pytest_default_agent_rollout_processor(rows: List[EvaluationRow]) -> List[EvaluationRow]: """Run math evaluation on sample dataset using pytest interface.""" - return input_dataset + return rows diff --git a/tests/pytest/test_pytest_input_messages.py b/tests/pytest/test_pytest_input_messages.py index c6f8d52a..c5a59fd8 100644 --- a/tests/pytest/test_pytest_input_messages.py +++ b/tests/pytest/test_pytest_input_messages.py @@ -1,18 +1,18 @@ from typing import List -from eval_protocol.models import EvaluationRow +from eval_protocol.models import Message, EvaluationRow from eval_protocol.pytest import default_single_turn_rollout_processor, evaluation_test @evaluation_test( input_messages=[ [ - {"role": "user", "content": "What is the capital of France?"}, + Message(role="user", content="What is the capital of France?"), ] ], model=["accounts/fireworks/models/kimi-k2-instruct"], rollout_processor=default_single_turn_rollout_processor, ) -def test_input_messages_in_decorator(input_dataset: List[EvaluationRow], model): +def test_input_messages_in_decorator(rows: List[EvaluationRow]) -> List[EvaluationRow]: """Run math evaluation on sample dataset using pytest interface.""" - return input_dataset + return rows diff --git a/tests/pytest/test_pytest_math_example.py b/tests/pytest/test_pytest_math_example.py index 267c4705..367794a0 100644 --- a/tests/pytest/test_pytest_math_example.py +++ b/tests/pytest/test_pytest_math_example.py @@ -1,3 +1,5 @@ +from typing import List +from eval_protocol.models import EvaluationRow from eval_protocol.pytest import default_single_turn_rollout_processor, evaluate, evaluation_test from examples.math_example.main import evaluate as math_evaluate from tests.pytest.helper.gsm8k_to_evaluation_row import gsm8k_to_evaluation_row @@ -12,6 +14,6 @@ threshold_of_success=0.0, rollout_processor=default_single_turn_rollout_processor, ) -def test_math_dataset(input_dataset, input_params, model): +def test_math_dataset(rows: List[EvaluationRow]) -> List[EvaluationRow]: """Run math evaluation on sample dataset using pytest interface.""" - return evaluate(input_dataset, math_evaluate) + return evaluate(rows, math_evaluate) diff --git a/tests/pytest/test_pytest_math_format_length.py b/tests/pytest/test_pytest_math_format_length.py index 589685ab..ba5dd60b 100644 --- a/tests/pytest/test_pytest_math_format_length.py +++ b/tests/pytest/test_pytest_math_format_length.py @@ -1,3 +1,5 @@ +from typing import List +from eval_protocol.models import EvaluationRow from eval_protocol.pytest import default_single_turn_rollout_processor, evaluate, evaluation_test from examples.math_with_format_and_length.main import evaluate as math_fl_evaluate from tests.pytest.helper.gsm8k_to_evaluation_row import gsm8k_to_evaluation_row @@ -12,6 +14,6 @@ threshold_of_success=0.0, rollout_processor=default_single_turn_rollout_processor, ) -def test_math_format_length_dataset(input_dataset, input_params, model): +def test_math_format_length_dataset(rows: List[EvaluationRow]) -> List[EvaluationRow]: """Run math with format and length evaluation on sample dataset.""" - return evaluate(input_dataset, math_fl_evaluate) + return evaluate(rows, math_fl_evaluate) diff --git a/tests/pytest/test_pytest_word_count_example.py b/tests/pytest/test_pytest_word_count_example.py new file mode 100644 index 00000000..a0cb908c --- /dev/null +++ b/tests/pytest/test_pytest_word_count_example.py @@ -0,0 +1,83 @@ +from haikus import haikus + +from eval_protocol.models import EvaluateResult, EvaluationRow, MetricResult +from eval_protocol.pytest import default_single_turn_rollout_processor, evaluation_test +from tests.pytest.helper.word_count_to_evaluation_row import word_count_to_evaluation_row + + +@evaluation_test( + input_dataset=["development/gsm8k_sample.jsonl"], + dataset_adapter=word_count_to_evaluation_row, + model=["accounts/fireworks/models/kimi-k2-instruct"], + input_params=[{"temperature": 0.0}], + max_dataset_rows=5, + threshold_of_success=0.3, # Reasonable threshold for word count evaluation + rollout_processor=default_single_turn_rollout_processor, + mode="pointwise", # Use pointwise mode for elegant row-by-row evaluation +) +def test_word_count_evaluate(row: EvaluationRow) -> EvaluationRow: + """ + Pointwise word count evaluator - just the core evaluation logic. + Everything else (models, datasets, thresholds) is parameterized in the decorator. + + NOTE: This function does not make any sense since it just counts the number + of words in the last message and computes some haiku analysis but only uses + the word count to compute the score. But tests/shows how to write a + pointwise evaluation function. + """ + if not row.messages: + return EvaluateResult(score=0.0, reason="No messages found", is_score_valid=False) + + last_message = row.messages[-1] + content = last_message.content if last_message and last_message.content else "" + + # Word count logic + word_count = len(content.split()) + word_count_score = min(word_count / 100, 1.0) + + # Haiku analysis logic + haiku_lines = content.splitlines() + haiku_analysis_data = {} + haiku_metric_score = 0.0 + haiku_metric_reason = "Content not suitable for haiku analysis." + haiku_metric_valid = False + + if len(haiku_lines) in [3, 5]: + try: + analysis = haikus(haiku_lines) + haiku_analysis_data = analysis + kigo = analysis.get("kigo", []) + haiku_type = analysis.get("type", "unknown") + + if kigo: + haiku_metric_score = 1.0 + elif haiku_type not in ["unknown", "error"]: + haiku_metric_score = 0.5 + + haiku_metric_reason = f"Haiku analysis - Type: {haiku_type}, Kigo: {', '.join(kigo) if kigo else 'None'}" + haiku_metric_valid = True + except Exception as e: + haiku_metric_reason = f"Haiku analysis failed: {str(e)}" + haiku_metric_valid = False + + # Combine metrics + metrics = { + "word_count": MetricResult( + score=word_count_score, + is_score_valid=word_count > 0, + reason=f"Word count: {word_count}", + ), + "haiku_analysis": MetricResult( + score=haiku_metric_score, + is_score_valid=haiku_metric_valid, + reason=haiku_metric_reason, + data=haiku_analysis_data, + ), + } + + row.evaluation_result = EvaluateResult( + score=word_count_score, + reason=f"Word count: {word_count}. {haiku_metric_reason}", + metrics=metrics, + ) + return row diff --git a/uv.lock b/uv.lock index 431722cf..4a5fdba8 100644 --- a/uv.lock +++ b/uv.lock @@ -953,6 +953,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" }, ] +[[package]] +name = "deprecated" +version = "1.2.18" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/97/06afe62762c9a8a86af0cfb7bfdab22a43ad17138b07af5b1a58442690a2/deprecated-1.2.18.tar.gz", hash = "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d", size = 2928744, upload-time = "2025-01-27T10:46:25.7Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6e/c6/ac0b6c1e2d138f1002bcf799d330bd6d85084fece321e662a14223794041/Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec", size = 9998, upload-time = "2025-01-27T10:46:09.186Z" }, +] + [[package]] name = "dill" version = "0.3.8" @@ -1073,6 +1085,7 @@ dev = [ { name = "docker" }, { name = "e2b" }, { name = "flake8" }, + { name = "haikus" }, { name = "ipykernel" }, { name = "isort" }, { name = "jupyter" }, @@ -1108,6 +1121,12 @@ trl = [ { name = "trl" }, ] +[package.dev-dependencies] +dev = [ + { name = "haikus" }, + { name = "pytest" }, +] + [package.metadata] requires-dist = [ { name = "accelerate", marker = "extra == 'trl'", specifier = ">=0.28.0" }, @@ -1119,8 +1138,8 @@ requires-dist = [ { name = "black", marker = "extra == 'dev'", specifier = ">=21.5b2" }, { name = "build", marker = "extra == 'dev'" }, { name = "dataclasses-json", specifier = ">=0.5.7" }, - { name = "deepdiff", specifier = ">=6.0.0" }, { name = "datasets" }, + { name = "deepdiff", specifier = ">=6.0.0" }, { name = "docker", marker = "extra == 'dev'", specifier = "==7.1.0" }, { name = "docstring-parser", specifier = ">=0.15" }, { name = "e2b", marker = "extra == 'dev'" }, @@ -1129,6 +1148,7 @@ requires-dist = [ { name = "flake8", marker = "extra == 'dev'", specifier = ">=3.9.2" }, { name = "fsspec" }, { name = "gymnasium", specifier = ">=0.29.0" }, + { name = "haikus", marker = "extra == 'dev'", specifier = "==0.3.8" }, { name = "httpx", specifier = ">=0.24.0" }, { name = "hydra-core", specifier = ">=1.3.2" }, { name = "ipykernel", specifier = ">=6.30.0" }, @@ -1175,6 +1195,12 @@ requires-dist = [ ] provides-extras = ["dev", "trl", "openevals", "fireworks"] +[package.metadata.requires-dev] +dev = [ + { name = "haikus", specifier = "==0.3.8" }, + { name = "pytest", specifier = ">=8.4.1" }, +] + [[package]] name = "exceptiongroup" version = "1.3.0" @@ -1554,6 +1580,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d0/9e/984486f2d0a0bd2b024bf4bc1c62688fcafa9e61991f041fb0e2def4a982/h2-4.2.0-py3-none-any.whl", hash = "sha256:479a53ad425bb29af087f3458a61d30780bc818e4ebcf01f0b536ba916462ed0", size = 60957, upload-time = "2025-02-01T11:02:26.481Z" }, ] +[[package]] +name = "haikus" +version = "0.3.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pykakasi" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7a/c2/2309ca1210318e3fc66007d0c8b1c3f959d9b3432d17a8b17a55fc6e145a/haikus-0.3.8.tar.gz", hash = "sha256:0e59cf8bfae8faa51965a9b39d60aa511e68f053a53a1fd956e391e26dbb796e", size = 96508, upload-time = "2024-01-17T04:42:32.572Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/eb/21/89a60826a8973ba43bb24ed3747f26fa93467586bb79703670140901120e/haikus-0.3.8-py3-none-any.whl", hash = "sha256:fc5566b062db047a8128db38a32be4195fcc84482f0f279dad34d825c5ee1799", size = 98532, upload-time = "2024-01-17T04:42:28.572Z" }, +] + [[package]] name = "hf-xet" version = "1.1.5" @@ -1850,6 +1888,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c1/11/114d0a5f4dabbdcedc1125dee0888514c3c3b16d3e9facad87ed96fad97c/isort-6.0.1-py3-none-any.whl", hash = "sha256:2dc5d7f65c9678d94c88dfc29161a320eec67328bc97aad576874cb4be1e9615", size = 94186, upload-time = "2025-02-26T21:13:14.911Z" }, ] +[[package]] +name = "jaconv" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d2/e1/670cefc7f00b0e1890e114a37a98ea425f7e06131342aeb9636856ac663c/jaconv-0.4.0.tar.gz", hash = "sha256:32da74b247f276e09a52d6b35c153df2387965cb85a6f034cc8af21d446f8161", size = 17402, upload-time = "2024-07-25T16:35:24.75Z" } + [[package]] name = "jaraco-classes" version = "3.4.0" @@ -4072,6 +4116,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, ] +[[package]] +name = "pykakasi" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "deprecated" }, + { name = "jaconv" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ad/32/2a8e213fd744459a03864af7cf4c6142ee061fc915757c8152d147b16015/pykakasi-2.3.0.tar.gz", hash = "sha256:fa052a8e63f59fb8d6569abbe719a8c9f9daf15ed27a67a56ab1705f0f67b0a1", size = 21752447, upload-time = "2024-06-24T04:57:31.233Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0f/e8/11644fe823e05c583b330e9fb81e3e8fc5d079036512a8300fc157be349d/pykakasi-2.3.0-py3-none-any.whl", hash = "sha256:26d21b090048ff45c6a4d8e962426b7951767216008ec30358e8a9d74af77f29", size = 2395003, upload-time = "2024-06-24T04:57:18.101Z" }, +] + [[package]] name = "pyproject-hooks" version = "1.2.0" @@ -5406,6 +5463,70 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e1/07/c6fe3ad3e685340704d314d765b7912993bcb8dc198f0e7a89382d37974b/win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390", size = 4083, upload-time = "2024-12-07T15:28:26.465Z" }, ] +[[package]] +name = "wrapt" +version = "1.17.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/fc/e91cc220803d7bc4db93fb02facd8461c37364151b8494762cc88b0fbcef/wrapt-1.17.2.tar.gz", hash = "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3", size = 55531, upload-time = "2025-01-14T10:35:45.465Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/d1/1daec934997e8b160040c78d7b31789f19b122110a75eca3d4e8da0049e1/wrapt-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d57c572081fed831ad2d26fd430d565b76aa277ed1d30ff4d40670b1c0dd984", size = 53307, upload-time = "2025-01-14T10:33:13.616Z" }, + { url = "https://files.pythonhosted.org/packages/1b/7b/13369d42651b809389c1a7153baa01d9700430576c81a2f5c5e460df0ed9/wrapt-1.17.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5e251054542ae57ac7f3fba5d10bfff615b6c2fb09abeb37d2f1463f841ae22", size = 38486, upload-time = "2025-01-14T10:33:15.947Z" }, + { url = "https://files.pythonhosted.org/packages/62/bf/e0105016f907c30b4bd9e377867c48c34dc9c6c0c104556c9c9126bd89ed/wrapt-1.17.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:80dd7db6a7cb57ffbc279c4394246414ec99537ae81ffd702443335a61dbf3a7", size = 38777, upload-time = "2025-01-14T10:33:17.462Z" }, + { url = "https://files.pythonhosted.org/packages/27/70/0f6e0679845cbf8b165e027d43402a55494779295c4b08414097b258ac87/wrapt-1.17.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a6e821770cf99cc586d33833b2ff32faebdbe886bd6322395606cf55153246c", size = 83314, upload-time = "2025-01-14T10:33:21.282Z" }, + { url = "https://files.pythonhosted.org/packages/0f/77/0576d841bf84af8579124a93d216f55d6f74374e4445264cb378a6ed33eb/wrapt-1.17.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b60fb58b90c6d63779cb0c0c54eeb38941bae3ecf7a73c764c52c88c2dcb9d72", size = 74947, upload-time = "2025-01-14T10:33:24.414Z" }, + { url = "https://files.pythonhosted.org/packages/90/ec/00759565518f268ed707dcc40f7eeec38637d46b098a1f5143bff488fe97/wrapt-1.17.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b870b5df5b71d8c3359d21be8f0d6c485fa0ebdb6477dda51a1ea54a9b558061", size = 82778, upload-time = "2025-01-14T10:33:26.152Z" }, + { url = "https://files.pythonhosted.org/packages/f8/5a/7cffd26b1c607b0b0c8a9ca9d75757ad7620c9c0a9b4a25d3f8a1480fafc/wrapt-1.17.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4011d137b9955791f9084749cba9a367c68d50ab8d11d64c50ba1688c9b457f2", size = 81716, upload-time = "2025-01-14T10:33:27.372Z" }, + { url = "https://files.pythonhosted.org/packages/7e/09/dccf68fa98e862df7e6a60a61d43d644b7d095a5fc36dbb591bbd4a1c7b2/wrapt-1.17.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:1473400e5b2733e58b396a04eb7f35f541e1fb976d0c0724d0223dd607e0f74c", size = 74548, upload-time = "2025-01-14T10:33:28.52Z" }, + { url = "https://files.pythonhosted.org/packages/b7/8e/067021fa3c8814952c5e228d916963c1115b983e21393289de15128e867e/wrapt-1.17.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3cedbfa9c940fdad3e6e941db7138e26ce8aad38ab5fe9dcfadfed9db7a54e62", size = 81334, upload-time = "2025-01-14T10:33:29.643Z" }, + { url = "https://files.pythonhosted.org/packages/4b/0d/9d4b5219ae4393f718699ca1c05f5ebc0c40d076f7e65fd48f5f693294fb/wrapt-1.17.2-cp310-cp310-win32.whl", hash = "sha256:582530701bff1dec6779efa00c516496968edd851fba224fbd86e46cc6b73563", size = 36427, upload-time = "2025-01-14T10:33:30.832Z" }, + { url = "https://files.pythonhosted.org/packages/72/6a/c5a83e8f61aec1e1aeef939807602fb880e5872371e95df2137142f5c58e/wrapt-1.17.2-cp310-cp310-win_amd64.whl", hash = "sha256:58705da316756681ad3c9c73fd15499aa4d8c69f9fd38dc8a35e06c12468582f", size = 38774, upload-time = "2025-01-14T10:33:32.897Z" }, + { url = "https://files.pythonhosted.org/packages/cd/f7/a2aab2cbc7a665efab072344a8949a71081eed1d2f451f7f7d2b966594a2/wrapt-1.17.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ff04ef6eec3eee8a5efef2401495967a916feaa353643defcc03fc74fe213b58", size = 53308, upload-time = "2025-01-14T10:33:33.992Z" }, + { url = "https://files.pythonhosted.org/packages/50/ff/149aba8365fdacef52b31a258c4dc1c57c79759c335eff0b3316a2664a64/wrapt-1.17.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4db983e7bca53819efdbd64590ee96c9213894272c776966ca6306b73e4affda", size = 38488, upload-time = "2025-01-14T10:33:35.264Z" }, + { url = "https://files.pythonhosted.org/packages/65/46/5a917ce85b5c3b490d35c02bf71aedaa9f2f63f2d15d9949cc4ba56e8ba9/wrapt-1.17.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9abc77a4ce4c6f2a3168ff34b1da9b0f311a8f1cfd694ec96b0603dff1c79438", size = 38776, upload-time = "2025-01-14T10:33:38.28Z" }, + { url = "https://files.pythonhosted.org/packages/ca/74/336c918d2915a4943501c77566db41d1bd6e9f4dbc317f356b9a244dfe83/wrapt-1.17.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b929ac182f5ace000d459c59c2c9c33047e20e935f8e39371fa6e3b85d56f4a", size = 83776, upload-time = "2025-01-14T10:33:40.678Z" }, + { url = "https://files.pythonhosted.org/packages/09/99/c0c844a5ccde0fe5761d4305485297f91d67cf2a1a824c5f282e661ec7ff/wrapt-1.17.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f09b286faeff3c750a879d336fb6d8713206fc97af3adc14def0cdd349df6000", size = 75420, upload-time = "2025-01-14T10:33:41.868Z" }, + { url = "https://files.pythonhosted.org/packages/b4/b0/9fc566b0fe08b282c850063591a756057c3247b2362b9286429ec5bf1721/wrapt-1.17.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a7ed2d9d039bd41e889f6fb9364554052ca21ce823580f6a07c4ec245c1f5d6", size = 83199, upload-time = "2025-01-14T10:33:43.598Z" }, + { url = "https://files.pythonhosted.org/packages/9d/4b/71996e62d543b0a0bd95dda485219856def3347e3e9380cc0d6cf10cfb2f/wrapt-1.17.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:129a150f5c445165ff941fc02ee27df65940fcb8a22a61828b1853c98763a64b", size = 82307, upload-time = "2025-01-14T10:33:48.499Z" }, + { url = "https://files.pythonhosted.org/packages/39/35/0282c0d8789c0dc9bcc738911776c762a701f95cfe113fb8f0b40e45c2b9/wrapt-1.17.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1fb5699e4464afe5c7e65fa51d4f99e0b2eadcc176e4aa33600a3df7801d6662", size = 75025, upload-time = "2025-01-14T10:33:51.191Z" }, + { url = "https://files.pythonhosted.org/packages/4f/6d/90c9fd2c3c6fee181feecb620d95105370198b6b98a0770cba090441a828/wrapt-1.17.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9a2bce789a5ea90e51a02dfcc39e31b7f1e662bc3317979aa7e5538e3a034f72", size = 81879, upload-time = "2025-01-14T10:33:52.328Z" }, + { url = "https://files.pythonhosted.org/packages/8f/fa/9fb6e594f2ce03ef03eddbdb5f4f90acb1452221a5351116c7c4708ac865/wrapt-1.17.2-cp311-cp311-win32.whl", hash = "sha256:4afd5814270fdf6380616b321fd31435a462019d834f83c8611a0ce7484c7317", size = 36419, upload-time = "2025-01-14T10:33:53.551Z" }, + { url = "https://files.pythonhosted.org/packages/47/f8/fb1773491a253cbc123c5d5dc15c86041f746ed30416535f2a8df1f4a392/wrapt-1.17.2-cp311-cp311-win_amd64.whl", hash = "sha256:acc130bc0375999da18e3d19e5a86403667ac0c4042a094fefb7eec8ebac7cf3", size = 38773, upload-time = "2025-01-14T10:33:56.323Z" }, + { url = "https://files.pythonhosted.org/packages/a1/bd/ab55f849fd1f9a58ed7ea47f5559ff09741b25f00c191231f9f059c83949/wrapt-1.17.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d5e2439eecc762cd85e7bd37161d4714aa03a33c5ba884e26c81559817ca0925", size = 53799, upload-time = "2025-01-14T10:33:57.4Z" }, + { url = "https://files.pythonhosted.org/packages/53/18/75ddc64c3f63988f5a1d7e10fb204ffe5762bc663f8023f18ecaf31a332e/wrapt-1.17.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fc7cb4c1c744f8c05cd5f9438a3caa6ab94ce8344e952d7c45a8ed59dd88392", size = 38821, upload-time = "2025-01-14T10:33:59.334Z" }, + { url = "https://files.pythonhosted.org/packages/48/2a/97928387d6ed1c1ebbfd4efc4133a0633546bec8481a2dd5ec961313a1c7/wrapt-1.17.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fdbdb757d5390f7c675e558fd3186d590973244fab0c5fe63d373ade3e99d40", size = 38919, upload-time = "2025-01-14T10:34:04.093Z" }, + { url = "https://files.pythonhosted.org/packages/73/54/3bfe5a1febbbccb7a2f77de47b989c0b85ed3a6a41614b104204a788c20e/wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb1d0dbf99411f3d871deb6faa9aabb9d4e744d67dcaaa05399af89d847a91d", size = 88721, upload-time = "2025-01-14T10:34:07.163Z" }, + { url = "https://files.pythonhosted.org/packages/25/cb/7262bc1b0300b4b64af50c2720ef958c2c1917525238d661c3e9a2b71b7b/wrapt-1.17.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d18a4865f46b8579d44e4fe1e2bcbc6472ad83d98e22a26c963d46e4c125ef0b", size = 80899, upload-time = "2025-01-14T10:34:09.82Z" }, + { url = "https://files.pythonhosted.org/packages/2a/5a/04cde32b07a7431d4ed0553a76fdb7a61270e78c5fd5a603e190ac389f14/wrapt-1.17.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc570b5f14a79734437cb7b0500376b6b791153314986074486e0b0fa8d71d98", size = 89222, upload-time = "2025-01-14T10:34:11.258Z" }, + { url = "https://files.pythonhosted.org/packages/09/28/2e45a4f4771fcfb109e244d5dbe54259e970362a311b67a965555ba65026/wrapt-1.17.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6d9187b01bebc3875bac9b087948a2bccefe464a7d8f627cf6e48b1bbae30f82", size = 86707, upload-time = "2025-01-14T10:34:12.49Z" }, + { url = "https://files.pythonhosted.org/packages/c6/d2/dcb56bf5f32fcd4bd9aacc77b50a539abdd5b6536872413fd3f428b21bed/wrapt-1.17.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9e8659775f1adf02eb1e6f109751268e493c73716ca5761f8acb695e52a756ae", size = 79685, upload-time = "2025-01-14T10:34:15.043Z" }, + { url = "https://files.pythonhosted.org/packages/80/4e/eb8b353e36711347893f502ce91c770b0b0929f8f0bed2670a6856e667a9/wrapt-1.17.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8b2816ebef96d83657b56306152a93909a83f23994f4b30ad4573b00bd11bb9", size = 87567, upload-time = "2025-01-14T10:34:16.563Z" }, + { url = "https://files.pythonhosted.org/packages/17/27/4fe749a54e7fae6e7146f1c7d914d28ef599dacd4416566c055564080fe2/wrapt-1.17.2-cp312-cp312-win32.whl", hash = "sha256:468090021f391fe0056ad3e807e3d9034e0fd01adcd3bdfba977b6fdf4213ea9", size = 36672, upload-time = "2025-01-14T10:34:17.727Z" }, + { url = "https://files.pythonhosted.org/packages/15/06/1dbf478ea45c03e78a6a8c4be4fdc3c3bddea5c8de8a93bc971415e47f0f/wrapt-1.17.2-cp312-cp312-win_amd64.whl", hash = "sha256:ec89ed91f2fa8e3f52ae53cd3cf640d6feff92ba90d62236a81e4e563ac0e991", size = 38865, upload-time = "2025-01-14T10:34:19.577Z" }, + { url = "https://files.pythonhosted.org/packages/ce/b9/0ffd557a92f3b11d4c5d5e0c5e4ad057bd9eb8586615cdaf901409920b14/wrapt-1.17.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6ed6ffac43aecfe6d86ec5b74b06a5be33d5bb9243d055141e8cabb12aa08125", size = 53800, upload-time = "2025-01-14T10:34:21.571Z" }, + { url = "https://files.pythonhosted.org/packages/c0/ef/8be90a0b7e73c32e550c73cfb2fa09db62234227ece47b0e80a05073b375/wrapt-1.17.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:35621ae4c00e056adb0009f8e86e28eb4a41a4bfa8f9bfa9fca7d343fe94f998", size = 38824, upload-time = "2025-01-14T10:34:22.999Z" }, + { url = "https://files.pythonhosted.org/packages/36/89/0aae34c10fe524cce30fe5fc433210376bce94cf74d05b0d68344c8ba46e/wrapt-1.17.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a604bf7a053f8362d27eb9fefd2097f82600b856d5abe996d623babd067b1ab5", size = 38920, upload-time = "2025-01-14T10:34:25.386Z" }, + { url = "https://files.pythonhosted.org/packages/3b/24/11c4510de906d77e0cfb5197f1b1445d4fec42c9a39ea853d482698ac681/wrapt-1.17.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cbabee4f083b6b4cd282f5b817a867cf0b1028c54d445b7ec7cfe6505057cf8", size = 88690, upload-time = "2025-01-14T10:34:28.058Z" }, + { url = "https://files.pythonhosted.org/packages/71/d7/cfcf842291267bf455b3e266c0c29dcb675b5540ee8b50ba1699abf3af45/wrapt-1.17.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49703ce2ddc220df165bd2962f8e03b84c89fee2d65e1c24a7defff6f988f4d6", size = 80861, upload-time = "2025-01-14T10:34:29.167Z" }, + { url = "https://files.pythonhosted.org/packages/d5/66/5d973e9f3e7370fd686fb47a9af3319418ed925c27d72ce16b791231576d/wrapt-1.17.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8112e52c5822fc4253f3901b676c55ddf288614dc7011634e2719718eaa187dc", size = 89174, upload-time = "2025-01-14T10:34:31.702Z" }, + { url = "https://files.pythonhosted.org/packages/a7/d3/8e17bb70f6ae25dabc1aaf990f86824e4fd98ee9cadf197054e068500d27/wrapt-1.17.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fee687dce376205d9a494e9c121e27183b2a3df18037f89d69bd7b35bcf59e2", size = 86721, upload-time = "2025-01-14T10:34:32.91Z" }, + { url = "https://files.pythonhosted.org/packages/6f/54/f170dfb278fe1c30d0ff864513cff526d624ab8de3254b20abb9cffedc24/wrapt-1.17.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:18983c537e04d11cf027fbb60a1e8dfd5190e2b60cc27bc0808e653e7b218d1b", size = 79763, upload-time = "2025-01-14T10:34:34.903Z" }, + { url = "https://files.pythonhosted.org/packages/4a/98/de07243751f1c4a9b15c76019250210dd3486ce098c3d80d5f729cba029c/wrapt-1.17.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:703919b1633412ab54bcf920ab388735832fdcb9f9a00ae49387f0fe67dad504", size = 87585, upload-time = "2025-01-14T10:34:36.13Z" }, + { url = "https://files.pythonhosted.org/packages/f9/f0/13925f4bd6548013038cdeb11ee2cbd4e37c30f8bfd5db9e5a2a370d6e20/wrapt-1.17.2-cp313-cp313-win32.whl", hash = "sha256:abbb9e76177c35d4e8568e58650aa6926040d6a9f6f03435b7a522bf1c487f9a", size = 36676, upload-time = "2025-01-14T10:34:37.962Z" }, + { url = "https://files.pythonhosted.org/packages/bf/ae/743f16ef8c2e3628df3ddfd652b7d4c555d12c84b53f3d8218498f4ade9b/wrapt-1.17.2-cp313-cp313-win_amd64.whl", hash = "sha256:69606d7bb691b50a4240ce6b22ebb319c1cfb164e5f6569835058196e0f3a845", size = 38871, upload-time = "2025-01-14T10:34:39.13Z" }, + { url = "https://files.pythonhosted.org/packages/3d/bc/30f903f891a82d402ffb5fda27ec1d621cc97cb74c16fea0b6141f1d4e87/wrapt-1.17.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4a721d3c943dae44f8e243b380cb645a709ba5bd35d3ad27bc2ed947e9c68192", size = 56312, upload-time = "2025-01-14T10:34:40.604Z" }, + { url = "https://files.pythonhosted.org/packages/8a/04/c97273eb491b5f1c918857cd26f314b74fc9b29224521f5b83f872253725/wrapt-1.17.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:766d8bbefcb9e00c3ac3b000d9acc51f1b399513f44d77dfe0eb026ad7c9a19b", size = 40062, upload-time = "2025-01-14T10:34:45.011Z" }, + { url = "https://files.pythonhosted.org/packages/4e/ca/3b7afa1eae3a9e7fefe499db9b96813f41828b9fdb016ee836c4c379dadb/wrapt-1.17.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e496a8ce2c256da1eb98bd15803a79bee00fc351f5dfb9ea82594a3f058309e0", size = 40155, upload-time = "2025-01-14T10:34:47.25Z" }, + { url = "https://files.pythonhosted.org/packages/89/be/7c1baed43290775cb9030c774bc53c860db140397047cc49aedaf0a15477/wrapt-1.17.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d615e4fe22f4ad3528448c193b218e077656ca9ccb22ce2cb20db730f8d306", size = 113471, upload-time = "2025-01-14T10:34:50.934Z" }, + { url = "https://files.pythonhosted.org/packages/32/98/4ed894cf012b6d6aae5f5cc974006bdeb92f0241775addad3f8cd6ab71c8/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a5aaeff38654462bc4b09023918b7f21790efb807f54c000a39d41d69cf552cb", size = 101208, upload-time = "2025-01-14T10:34:52.297Z" }, + { url = "https://files.pythonhosted.org/packages/ea/fd/0c30f2301ca94e655e5e057012e83284ce8c545df7661a78d8bfca2fac7a/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7d15bbd2bc99e92e39f49a04653062ee6085c0e18b3b7512a4f2fe91f2d681", size = 109339, upload-time = "2025-01-14T10:34:53.489Z" }, + { url = "https://files.pythonhosted.org/packages/75/56/05d000de894c4cfcb84bcd6b1df6214297b8089a7bd324c21a4765e49b14/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e3890b508a23299083e065f435a492b5435eba6e304a7114d2f919d400888cc6", size = 110232, upload-time = "2025-01-14T10:34:55.327Z" }, + { url = "https://files.pythonhosted.org/packages/53/f8/c3f6b2cf9b9277fb0813418e1503e68414cd036b3b099c823379c9575e6d/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8c8b293cd65ad716d13d8dd3624e42e5a19cc2a2f1acc74b30c2c13f15cb61a6", size = 100476, upload-time = "2025-01-14T10:34:58.055Z" }, + { url = "https://files.pythonhosted.org/packages/a7/b1/0bb11e29aa5139d90b770ebbfa167267b1fc548d2302c30c8f7572851738/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c82b8785d98cdd9fed4cac84d765d234ed3251bd6afe34cb7ac523cb93e8b4f", size = 106377, upload-time = "2025-01-14T10:34:59.3Z" }, + { url = "https://files.pythonhosted.org/packages/6a/e1/0122853035b40b3f333bbb25f1939fc1045e21dd518f7f0922b60c156f7c/wrapt-1.17.2-cp313-cp313t-win32.whl", hash = "sha256:13e6afb7fe71fe7485a4550a8844cc9ffbe263c0f1a1eea569bc7091d4898555", size = 37986, upload-time = "2025-01-14T10:35:00.498Z" }, + { url = "https://files.pythonhosted.org/packages/09/5e/1655cf481e079c1f22d0cabdd4e51733679932718dc23bf2db175f329b76/wrapt-1.17.2-cp313-cp313t-win_amd64.whl", hash = "sha256:eaf675418ed6b3b31c7a989fd007fa7c3be66ce14e5c3b27336383604c9da85c", size = 40750, upload-time = "2025-01-14T10:35:03.378Z" }, + { url = "https://files.pythonhosted.org/packages/2d/82/f56956041adef78f849db6b289b282e72b55ab8045a75abad81898c28d19/wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8", size = 23594, upload-time = "2025-01-14T10:35:44.018Z" }, +] + [[package]] name = "wsproto" version = "1.2.0"