From bc4a23b5ff80a9202e94ad0b30731f08404e3086 Mon Sep 17 00:00:00 2001 From: Tung Hoang Date: Sun, 7 Jun 2026 12:00:01 -0700 Subject: [PATCH] fix(optimization): Handle failed inference cases gracefully in GEPA pipeline Fixes three cascading crashes when eval cases fail during GEPA optimization: 1. Initialize inferences to empty list on inference failure - Prevents TypeError when iterating over None - Maintains consistent type contract (inferences always List) - File: local_eval_service.py 2. Use .get() for score lookup in GEPA adapter - Gracefully handles missing scores from failed eval cases - Defaults to 0.0 (conservative default penalizing failing prompts) - Allows optimization to continue with successful cases - File: gepa_root_agent_prompt_optimizer.py 3. Add None check before rounding scores - Handles failed cases with None scores before type conversion - Python 3.14+ compatible (TypeError on round(None)) - File: local_eval_sampler.py These minimal changes enable graceful degradation when transient failures occur (rate limits, timeouts, API errors), allowing GEPA optimization to complete successfully with successful eval cases. Fixes #6004 Related: #5876, #5115, #5403, PR #5878 --- src/google/adk/evaluation/local_eval_service.py | 1 + src/google/adk/optimization/gepa_root_agent_prompt_optimizer.py | 2 +- src/google/adk/optimization/local_eval_sampler.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/google/adk/evaluation/local_eval_service.py b/src/google/adk/evaluation/local_eval_service.py index 1a032bad64..9f759979c9 100644 --- a/src/google/adk/evaluation/local_eval_service.py +++ b/src/google/adk/evaluation/local_eval_service.py @@ -534,4 +534,5 @@ async def _perform_inference_single_eval_item( ) inference_result.status = InferenceStatus.FAILURE inference_result.error_message = str(e) + inference_result.inferences = [] # Ensure inferences is not None on failure return inference_result diff --git a/src/google/adk/optimization/gepa_root_agent_prompt_optimizer.py b/src/google/adk/optimization/gepa_root_agent_prompt_optimizer.py index e9b82cdd50..5f0eb92d17 100644 --- a/src/google/adk/optimization/gepa_root_agent_prompt_optimizer.py +++ b/src/google/adk/optimization/gepa_root_agent_prompt_optimizer.py @@ -147,7 +147,7 @@ def evaluate( trajectories = [] for example_id in batch: - score = result.scores[example_id] + score = result.scores.get(example_id, 0.0) scores.append(score) eval_data = result.data.get(example_id, {}) if result.data else {} diff --git a/src/google/adk/optimization/local_eval_sampler.py b/src/google/adk/optimization/local_eval_sampler.py index b00c34280f..e5d9a41f6c 100644 --- a/src/google/adk/optimization/local_eval_sampler.py +++ b/src/google/adk/optimization/local_eval_sampler.py @@ -289,7 +289,7 @@ def _extract_eval_data( for eval_metric_result in per_invocation_result.eval_metric_results: eval_metric_results.append({ "metric_name": eval_metric_result.metric_name, - "score": round(eval_metric_result.score, 2), # accurate enough + "score": round(eval_metric_result.score, 2) if eval_metric_result.score is not None else 0.0, "eval_status": eval_metric_result.eval_status.name, }) per_invocation_result_dict = {