From 026bf9082f6102f2b7bce8b7cde37f9062701742 Mon Sep 17 00:00:00 2001 From: Stiaan Smit Date: Fri, 3 Apr 2026 18:21:51 +0000 Subject: [PATCH] feat(gemini): add service_tier support for Flex Inference Add serviceTier provider option to Gemini Text, Stream, and Structured handlers, enabling Google's Flex Inference API. This provides 50% cost reduction for latency-tolerant workloads. Follows the same pattern as the existing OpenAI service_tier implementation. Passes service_tier at the top level of the Gemini REST request body. Ref: https://ai.google.dev/gemini-api/docs/flex-inference --- src/Providers/Gemini/Handlers/Stream.php | 1 + src/Providers/Gemini/Handlers/Structured.php | 1 + src/Providers/Gemini/Handlers/Text.php | 1 + tests/Providers/Gemini/GeminiStreamTest.php | 24 ++++++++++++ .../Providers/Gemini/GeminiStructuredTest.php | 29 ++++++++++++++ tests/Providers/Gemini/GeminiTextTest.php | 38 +++++++++++++++++++ 6 files changed, 94 insertions(+) diff --git a/src/Providers/Gemini/Handlers/Stream.php b/src/Providers/Gemini/Handlers/Stream.php index e7acfad33..0471a9e1c 100644 --- a/src/Providers/Gemini/Handlers/Stream.php +++ b/src/Providers/Gemini/Handlers/Stream.php @@ -507,6 +507,7 @@ protected function sendRequest(Request $request): Response 'tools' => $tools !== [] ? $tools : null, 'tool_config' => $request->toolChoice() ? ToolChoiceMap::map($request->toolChoice()) : null, 'safetySettings' => $providerOptions['safetySettings'] ?? null, + 'service_tier' => $providerOptions['serviceTier'] ?? null, ]) ); diff --git a/src/Providers/Gemini/Handlers/Structured.php b/src/Providers/Gemini/Handlers/Structured.php index 6d4870f59..43e164ccd 100644 --- a/src/Providers/Gemini/Handlers/Structured.php +++ b/src/Providers/Gemini/Handlers/Structured.php @@ -137,6 +137,7 @@ public function sendRequest(Request $request): array 'tools' => $tools !== [] ? $tools : null, 'tool_config' => $request->toolChoice() ? ToolChoiceMap::map($request->toolChoice()) : null, 'safetySettings' => $providerOptions['safetySettings'] ?? null, + 'service_tier' => $providerOptions['serviceTier'] ?? null, ]) ); diff --git a/src/Providers/Gemini/Handlers/Text.php b/src/Providers/Gemini/Handlers/Text.php index fce051b67..017e1f445 100644 --- a/src/Providers/Gemini/Handlers/Text.php +++ b/src/Providers/Gemini/Handlers/Text.php @@ -119,6 +119,7 @@ protected function sendRequest(Request $request): ClientResponse 'tools' => $tools !== [] ? $tools : null, 'tool_config' => $request->toolChoice() ? ToolChoiceMap::map($request->toolChoice()) : null, 'safetySettings' => $providerOptions['safetySettings'] ?? null, + 'service_tier' => $providerOptions['serviceTier'] ?? null, ]) ); diff --git a/tests/Providers/Gemini/GeminiStreamTest.php b/tests/Providers/Gemini/GeminiStreamTest.php index 92e15dae3..fc68e6a43 100644 --- a/tests/Providers/Gemini/GeminiStreamTest.php +++ b/tests/Providers/Gemini/GeminiStreamTest.php @@ -440,3 +440,27 @@ expect($toolCalls[1]->reasoningId)->not->toBeNull(); expect($toolCalls[0]->reasoningId)->toBe($toolCalls[1]->reasoningId); }); + +it('passes service_tier in the request body for streaming', function (): void { + FixtureResponse::fakeResponseSequence('*', 'gemini/stream-basic-text'); + + $response = Prism::text() + ->using(Provider::Gemini, 'gemini-2.5-flash') + ->withPrompt('Summarize this document.') + ->withProviderOptions(['serviceTier' => 'flex']) + ->asStream(); + + // Consume the stream + foreach ($response as $event) { + // + } + + Http::assertSent(function (Request $request): true { + $data = $request->data(); + + expect($data)->toHaveKey('service_tier') + ->and($data['service_tier'])->toBe('flex'); + + return true; + }); +}); diff --git a/tests/Providers/Gemini/GeminiStructuredTest.php b/tests/Providers/Gemini/GeminiStructuredTest.php index c05af645f..9e5c2df58 100644 --- a/tests/Providers/Gemini/GeminiStructuredTest.php +++ b/tests/Providers/Gemini/GeminiStructuredTest.php @@ -404,3 +404,32 @@ expect($response->steps[0]->additionalContent['thoughtSummaries'])->toBeArray(); expect($response->steps[0]->additionalContent['thoughtSummaries'][0])->toContain('Let me think about'); }); + +it('passes service_tier in the request body for structured output', function (): void { + FixtureResponse::fakeResponseSequence('*', 'gemini/generate-structured'); + + $schema = new ObjectSchema( + 'output', + 'the output object', + [ + new StringSchema('weather', 'The weather forecast'), + ], + ['weather'] + ); + + Prism::structured() + ->using(Provider::Gemini, 'gemini-2.5-flash') + ->withSchema($schema) + ->withPrompt('What is the weather?') + ->withProviderOptions(['serviceTier' => 'flex']) + ->asStructured(); + + Http::assertSent(function (Request $request): true { + $data = $request->data(); + + expect($data)->toHaveKey('service_tier') + ->and($data['service_tier'])->toBe('flex'); + + return true; + }); +}); diff --git a/tests/Providers/Gemini/GeminiTextTest.php b/tests/Providers/Gemini/GeminiTextTest.php index 354d86cb8..5af317cb7 100644 --- a/tests/Providers/Gemini/GeminiTextTest.php +++ b/tests/Providers/Gemini/GeminiTextTest.php @@ -620,3 +620,41 @@ function (Request $request): bool { }); }); }); + +describe('Flex Inference for Gemini', function (): void { + it('passes service_tier in the request body', function (): void { + FixtureResponse::fakeResponseSequence('*', 'gemini/generate-text-with-a-prompt'); + + Prism::text() + ->using(Provider::Gemini, 'gemini-2.5-flash') + ->withPrompt('Summarize this document.') + ->withProviderOptions(['serviceTier' => 'flex']) + ->asText(); + + Http::assertSent(function (Request $request): true { + $data = $request->data(); + + expect($data)->toHaveKey('service_tier') + ->and($data['service_tier'])->toBe('flex'); + + return true; + }); + }); + + it('does not include service_tier when not set', function (): void { + FixtureResponse::fakeResponseSequence('*', 'gemini/generate-text-with-a-prompt'); + + Prism::text() + ->using(Provider::Gemini, 'gemini-2.5-flash') + ->withPrompt('Hello') + ->asText(); + + Http::assertSent(function (Request $request): true { + $data = $request->data(); + + expect($data)->not->toHaveKey('service_tier'); + + return true; + }); + }); +});