From 931ab68f1f794f193f25c95fbe4c11a413934914 Mon Sep 17 00:00:00 2001 From: "hanzhi.421" Date: Fri, 10 Apr 2026 14:48:55 +0800 Subject: [PATCH] fix: ark_llm apikey and enable response without caching --- veadk/agent.py | 4 ++++ veadk/models/ark_llm.py | 22 ++++++++++++++++++---- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/veadk/agent.py b/veadk/agent.py index abfc668c..32320078 100644 --- a/veadk/agent.py +++ b/veadk/agent.py @@ -87,6 +87,8 @@ class Agent(LlmAgent): example_store (Optional[BaseExampleProvider]): Example store for providing example Q/A. enable_shadowchar (bool): Whether to enable shadow character for the agent. enable_dynamic_load_skills (bool): Whether to enable dynamic loading of skills. + enable_responses_cache (bool): Whether Ark Responses API should reuse + `previous_response_id` and caching for multi-turn continuation. """ model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow") @@ -118,6 +120,7 @@ class Agent(LlmAgent): tracers: list[BaseTracer] = [] enable_responses: bool = False + enable_responses_cache: bool = True context_cache_config: Optional[ContextCacheConfig] = None @@ -194,6 +197,7 @@ def model_post_init(self, __context: Any) -> None: model=f"{self.model_provider}/{self.model_name}", api_key=self.model_api_key, api_base=self.model_api_base, + enable_responses_cache=self.enable_responses_cache, **self.model_extra_config, ) else: diff --git a/veadk/models/ark_llm.py b/veadk/models/ark_llm.py index 3fd4d035..5dc02258 100644 --- a/veadk/models/ark_llm.py +++ b/veadk/models/ark_llm.py @@ -481,10 +481,17 @@ def _remove_caching(request_data: dict) -> None: request_data.pop("caching", None) -def request_reorganization_by_ark(request_data: Dict) -> Dict: +def request_reorganization_by_ark( + request_data: Dict, enable_responses_cache: bool = True +) -> Dict: # 1. model provider request_data = get_model_without_provider(request_data) + if not enable_responses_cache: + request_data.pop("previous_response_id", None) + _remove_caching(request_data) + request_data.pop("store", None) + # 2. filtered input request_data["input"] = filtered_inputs( request_data.get("input"), @@ -672,7 +679,9 @@ async def aresponses( ) -> Union[ArkTypeResponse, AsyncStream[ResponseStreamEvent]]: # 1. Get request params api_base = kwargs.pop("api_base", DEFAULT_VIDEO_MODEL_API_BASE) - api_key = kwargs.pop("api_key", settings.model.api_key) + api_key = kwargs.pop("api_key", None) + if api_key is None: + api_key = settings.model.api_key # 2. Call openai responses client = AsyncArk( @@ -689,6 +698,7 @@ class ArkLlm(Gemini): llm_client: ArkLlmClient = Field(default_factory=ArkLlmClient) _additional_args: Dict[str, Any] = None use_interactions_api: bool = True + enable_responses_cache: bool = True def __init__(self, **kwargs): # adk version check @@ -699,12 +709,14 @@ def __init__(self, **kwargs): "`pip install -U 'google-adk>=1.21.0'`" ) super().__init__(**kwargs) + self.enable_responses_cache = kwargs.get("enable_responses_cache", True) drop_params = kwargs.pop("drop_params", None) self._additional_args = dict(kwargs) self._additional_args.pop("llm_client", None) self._additional_args.pop("messages", None) self._additional_args.pop("tools", None) self._additional_args.pop("stream", None) + self._additional_args.pop("enable_responses_cache", None) if drop_params is not None: self._additional_args["drop_params"] = drop_params @@ -733,7 +745,7 @@ async def generate_content_async( # ------------------------------------------------------ # # get previous_response_id previous_response_id = None - if llm_request.previous_interaction_id: + if self.enable_responses_cache and llm_request.previous_interaction_id: previous_response_id = llm_request.previous_interaction_id responses_args = { "model": self.model, @@ -786,7 +798,9 @@ async def generate_content_async( async def generate_content_via_responses( self, responses_args: dict, stream: bool = False ): - responses_args = request_reorganization_by_ark(responses_args) + responses_args = request_reorganization_by_ark( + responses_args, enable_responses_cache=self.enable_responses_cache + ) if stream: responses_args["stream"] = True async for part in await self.llm_client.aresponses(**responses_args):