From 20843dfef9796b92d35f5de8e71263dea860ac69 Mon Sep 17 00:00:00 2001 From: Sara Robinson Date: Fri, 30 Jan 2026 11:05:44 -0800 Subject: [PATCH] chore: Resolve all evals mypy errors PiperOrigin-RevId: 863313260 --- vertexai/_genai/_evals_common.py | 12 ++--- vertexai/_genai/_evals_metric_handlers.py | 8 +-- vertexai/_genai/evals.py | 64 +++++++++++++---------- 3 files changed, 47 insertions(+), 37 deletions(-) diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py index 5d9a854b81..f33320324a 100644 --- a/vertexai/_genai/_evals_common.py +++ b/vertexai/_genai/_evals_common.py @@ -71,7 +71,7 @@ @contextlib.contextmanager -def _temp_logger_level(logger_name: str, level: int): +def _temp_logger_level(logger_name: str, level: int) -> None: # type: ignore[misc] """Temporarily sets the level of a logger.""" logger_instance = logging.getLogger(logger_name) original_level = logger_instance.getEffectiveLevel() @@ -95,7 +95,7 @@ def _get_api_client_with_location( location, api_client.location, ) - return vertexai.Client( + return vertexai.Client( # type: ignore[no-any-return] project=api_client.project, location=location, credentials=api_client._credentials, @@ -1798,10 +1798,10 @@ def _convert_evaluation_run_results( api_client: BaseApiClient, evaluation_run_results: types.EvaluationRunResults, inference_configs: Optional[dict[str, types.EvaluationRunInferenceConfig]] = None, -) -> Union[list[types.EvaluationItem], types.EvaluationResult]: +) -> Optional[types.EvaluationResult]: """Retrieves an EvaluationItem from the EvaluationRunResults.""" if not evaluation_run_results or not evaluation_run_results.evaluation_set: - return [] + return None evals_module = evals.Evals(api_client_=api_client) eval_set = evals_module.get_evaluation_set( @@ -1823,10 +1823,10 @@ async def _convert_evaluation_run_results_async( api_client: BaseApiClient, evaluation_run_results: types.EvaluationRunResults, inference_configs: Optional[dict[str, types.EvaluationRunInferenceConfig]] = None, -) -> Union[list[types.EvaluationItem], types.EvaluationResult]: +) -> Optional[types.EvaluationResult]: """Retrieves an EvaluationItem from the EvaluationRunResults.""" if not evaluation_run_results or not evaluation_run_results.evaluation_set: - return [] + return None evals_module = evals.AsyncEvals(api_client_=api_client) eval_set = await evals_module.get_evaluation_set( diff --git a/vertexai/_genai/_evals_metric_handlers.py b/vertexai/_genai/_evals_metric_handlers.py index 9475846fdb..9f4494c74d 100644 --- a/vertexai/_genai/_evals_metric_handlers.py +++ b/vertexai/_genai/_evals_metric_handlers.py @@ -617,7 +617,7 @@ def _build_pointwise_input( def _add_autorater_config(self, payload: dict[str, Any]) -> None: """Adds autorater config to the request payload if specified.""" - autorater_config = {} + autorater_config: dict[str, Any] = {} if self.metric.judge_model: autorater_config["autorater_model"] = self.metric.judge_model if self.metric.judge_model_generation_config: @@ -625,7 +625,7 @@ def _add_autorater_config(self, payload: dict[str, Any]) -> None: self.metric.judge_model_generation_config ) if self.metric.judge_model_sampling_count: - autorater_config["sampling_count"] = self.metric.judge_model_sampling_count # type: ignore[assignment] + autorater_config["sampling_count"] = self.metric.judge_model_sampling_count if not autorater_config: return @@ -989,11 +989,11 @@ def _build_request_payload( agent_data=PredefinedMetricHandler._eval_case_to_agent_data(eval_case), ) - request_payload = { + request_payload: dict[str, Any] = { "instance": instance_payload, } - autorater_config = {} + autorater_config: dict[str, Any] = {} if self.metric.judge_model: autorater_config["autorater_model"] = self.metric.judge_model if self.metric.judge_model_generation_config: diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py index 67bbf44ffa..3632628b87 100644 --- a/vertexai/_genai/evals.py +++ b/vertexai/_genai/evals.py @@ -36,7 +36,7 @@ try: from google.adk.agents import LlmAgent except ImportError: - LlmAgent = None # type: ignore[assignment] + LlmAgent = None logger = logging.getLogger("vertexai_genai.evals") @@ -1216,10 +1216,10 @@ def evaluate( types.EvaluationDatasetOrDict, list[types.EvaluationDatasetOrDict], ], - metrics: list[types.MetricOrDict] = None, + metrics: Optional[list[types.MetricOrDict]] = None, location: Optional[str] = None, config: Optional[types.EvaluateMethodConfigOrDict] = None, - **kwargs, + **kwargs: Any, ) -> types.EvaluationResult: """Evaluates candidate responses in the provided dataset(s) using the specified metrics. @@ -1625,24 +1625,28 @@ def create_evaluation_run( raise ValueError( "At most one of agent_info or inference_configs can be provided." ) - if agent_info and isinstance(agent_info, dict): - agent_info = types.evals.AgentInfo.model_validate(agent_info) - if type(dataset).__name__ == "EvaluationDataset": + agent_info_pydantic: types.evals.AgentInfo = types.evals.AgentInfo() + if agent_info: + if isinstance(agent_info, dict): + agent_info_pydantic = types.evals.AgentInfo.model_validate(agent_info) + else: + agent_info_pydantic = agent_info + if isinstance(dataset, types.EvaluationDataset): if dataset.eval_dataset_df is None: raise ValueError( "EvaluationDataset must have eval_dataset_df populated." ) - if ( + if agent_info_pydantic is not None and ( dataset.candidate_name - and agent_info - and agent_info.name - and dataset.candidate_name != agent_info.name + and agent_info_pydantic + and agent_info_pydantic.name + and dataset.candidate_name != agent_info_pydantic.name ): logger.warning( "Evaluation dataset candidate_name and agent_info.name are different. Please make sure this is intended." ) - elif dataset.candidate_name is None and agent_info: - dataset.candidate_name = agent_info.name + elif dataset.candidate_name is None and agent_info_pydantic: + dataset.candidate_name = agent_info_pydantic.name eval_set = _evals_common._create_evaluation_set_from_dataframe( self._api_client, dest, dataset.eval_dataset_df, dataset.candidate_name ) @@ -1656,20 +1660,26 @@ def create_evaluation_run( evaluation_config = types.EvaluationRunConfig( output_config=output_config, metrics=resolved_metrics ) - if agent_info: + if agent_info_pydantic and agent_info_pydantic.name is not None: inference_configs = {} - inference_configs[agent_info.name] = types.EvaluationRunInferenceConfig( - agent_config=types.EvaluationRunAgentConfig( - developer_instruction=genai_types.Content( - parts=[genai_types.Part(text=agent_info.instruction)] - ), - tools=agent_info.tool_declarations, + inference_configs[agent_info_pydantic.name] = ( + types.EvaluationRunInferenceConfig( + agent_config=types.EvaluationRunAgentConfig( + developer_instruction=genai_types.Content( + parts=[ + genai_types.Part(text=agent_info_pydantic.instruction) + ] + ), + tools=agent_info_pydantic.tool_declarations, + ) ) ) - if agent_info.agent_resource_name: + if agent_info_pydantic.agent_resource_name: labels = labels or {} labels["vertex-ai-evaluation-agent-engine-id"] = ( - agent_info.agent_resource_name.split("reasoningEngines/")[-1] + agent_info_pydantic.agent_resource_name.split("reasoningEngines/")[ + -1 + ] ) if not name: name = f"evaluation_run_{uuid.uuid4()}" @@ -2487,12 +2497,12 @@ async def create_evaluation_run( ) if agent_info and isinstance(agent_info, dict): agent_info = types.evals.AgentInfo.model_validate(agent_info) - if type(dataset).__name__ == "EvaluationDataset": + if isinstance(dataset, types.EvaluationDataset): if dataset.eval_dataset_df is None: raise ValueError( "EvaluationDataset must have eval_dataset_df populated." ) - if ( + if agent_info is not None and ( dataset.candidate_name and agent_info.name and dataset.candidate_name != agent_info.name @@ -2515,7 +2525,7 @@ async def create_evaluation_run( evaluation_config = types.EvaluationRunConfig( output_config=output_config, metrics=resolved_metrics ) - if agent_info: + if agent_info and agent_info.name is not None: inference_configs = {} inference_configs[agent_info.name] = types.EvaluationRunInferenceConfig( agent_config=types.EvaluationRunAgentConfig( @@ -2533,7 +2543,7 @@ async def create_evaluation_run( if not name: name = f"evaluation_run_{uuid.uuid4()}" - result = await self._create_evaluation_run( # type: ignore[no-any-return] + result = await self._create_evaluation_run( name=name, display_name=display_name or name, data_source=dataset, @@ -2645,7 +2655,7 @@ async def create_evaluation_item( Returns: The evaluation item. """ - result = await self._create_evaluation_item( # type: ignore[no-any-return] + result = await self._create_evaluation_item( evaluation_item_type=evaluation_item_type, gcs_uri=gcs_uri, display_name=display_name, @@ -2676,7 +2686,7 @@ async def create_evaluation_set( Returns: The evaluation set. """ - result = await self._create_evaluation_set( # type: ignore[no-any-return] + result = await self._create_evaluation_set( evaluation_items=evaluation_items, display_name=display_name, config=config,