From c5da0f9307b4121f506fe47eb3bd2de29907c8b9 Mon Sep 17 00:00:00 2001 From: Corie Watson Date: Tue, 6 Jan 2026 15:09:49 +0000 Subject: [PATCH 01/12] feat(anthropic): prompt caching --- js/plugins/anthropic/src/runner/base.ts | 26 ++--- js/plugins/anthropic/src/runner/beta.ts | 16 +--- js/plugins/anthropic/src/runner/stable.ts | 62 ++++++------ js/testapps/anthropic/package.json | 1 + js/testapps/anthropic/src/long-text.txt | 55 +++++++++++ .../anthropic/src/stable/prompt_caching.ts | 94 +++++++++++++++++++ 6 files changed, 193 insertions(+), 61 deletions(-) create mode 100644 js/testapps/anthropic/src/long-text.txt create mode 100644 js/testapps/anthropic/src/stable/prompt_caching.ts diff --git a/js/plugins/anthropic/src/runner/base.ts b/js/plugins/anthropic/src/runner/base.ts index e6b7132e28..4a3a4ba1e6 100644 --- a/js/plugins/anthropic/src/runner/base.ts +++ b/js/plugins/anthropic/src/runner/base.ts @@ -395,39 +395,31 @@ export abstract class BaseRunner { * toAnthropicMessageContent implementation. */ protected toAnthropicMessages(messages: MessageData[]): { - system?: string; + system?: RunnerContentBlockParam[]; messages: RunnerMessageParam[]; } { - let system: string | undefined; + let system: RunnerContentBlockParam[] | undefined; if (messages[0]?.role === 'system') { const systemMessage = messages[0]; - const textParts: string[] = []; + messages = messages.slice(1); for (const part of systemMessage.content ?? []) { - if (part.text) { - textParts.push(part.text); - } else if (part.media || part.toolRequest || part.toolResponse) { + if (part.media || part.toolRequest || part.toolResponse) { throw new Error( 'System messages can only contain text content. Media, tool requests, and tool responses are not supported in system messages.' ); } } - // Concatenate multiple text parts into a single string. - // Note: The Anthropic SDK supports system as string | Array, - // so we could alternatively preserve the multi-part structure as: - // system = textParts.map(text => ({ type: 'text', text })) - // However, concatenation is simpler and maintains semantic equivalence while - // keeping the cache control logic straightforward in the concrete runners. - system = textParts.length > 0 ? textParts.join('\n\n') : undefined; + system = systemMessage.content.map((part) => + this.toAnthropicMessageContent(part) + ); } - const messagesToIterate = - system !== undefined ? messages.slice(1) : messages; const anthropicMsgs: RunnerMessageParam[] = []; - for (const message of messagesToIterate) { + for (const message of messages) { const msg = new GenkitMessage(message); // Detect tool message kind from Genkit Parts (no SDK typing needed) @@ -449,7 +441,7 @@ export abstract class BaseRunner { anthropicMsgs.push({ role, content }); } - return { system, messages: anthropicMsgs }; + return { system: system, messages: anthropicMsgs }; } /** diff --git a/js/plugins/anthropic/src/runner/beta.ts b/js/plugins/anthropic/src/runner/beta.ts index 099a589909..73e4b50a89 100644 --- a/js/plugins/anthropic/src/runner/beta.ts +++ b/js/plugins/anthropic/src/runner/beta.ts @@ -307,20 +307,6 @@ export class BetaRunner extends BaseRunner { const mappedModelName = request.config?.version ?? extractVersion(model, modelName); - let betaSystem: BetaMessageCreateParamsNonStreaming['system']; - - if (system !== undefined) { - betaSystem = cacheSystemPrompt - ? [ - { - type: 'text' as const, - text: system, - cache_control: { type: 'ephemeral' as const }, - }, - ] - : system; - } - const thinkingConfig = this.toAnthropicThinkingConfig( request.config?.thinking ) as BetaMessageCreateParams['thinking'] | undefined; @@ -342,7 +328,7 @@ export class BetaRunner extends BaseRunner { max_tokens: request.config?.maxOutputTokens ?? this.DEFAULT_MAX_OUTPUT_TOKENS, messages, - system: betaSystem, + system: system as BetaTextBlockParam[], stop_sequences: request.config?.stopSequences, temperature: request.config?.temperature, top_k: topK, diff --git a/js/plugins/anthropic/src/runner/stable.ts b/js/plugins/anthropic/src/runner/stable.ts index 1496029ebd..54f50af2d7 100644 --- a/js/plugins/anthropic/src/runner/stable.ts +++ b/js/plugins/anthropic/src/runner/stable.ts @@ -107,6 +107,9 @@ export class Runner extends BaseRunner { type: 'text', text: part.text, citations: null, + cache_control: part.metadata?.cache_control as + | TextBlockParam['cache_control'] + | null, }; } @@ -115,6 +118,9 @@ export class Runner extends BaseRunner { return { type: 'document', source: this.toPdfDocumentSource(part.media), + cache_control: part.metadata?.cache_control as + | DocumentBlockParam['cache_control'] + | null, }; } @@ -127,6 +133,9 @@ export class Runner extends BaseRunner { data: source.data, media_type: source.mediaType, }, + cache_control: part.metadata?.cache_control as + | ImageBlockParam['cache_control'] + | null, }; } return { @@ -135,6 +144,9 @@ export class Runner extends BaseRunner { type: 'url', url: source.url, }, + cache_control: part.metadata?.cache_control as + | ImageBlockParam['cache_control'] + | null, }; } @@ -151,6 +163,9 @@ export class Runner extends BaseRunner { id: part.toolRequest.ref, name: part.toolRequest.name, input: part.toolRequest.input, + cache_control: part.metadata?.cache_control as + | ToolUseBlockParam['cache_control'] + | null, }; } @@ -166,6 +181,9 @@ export class Runner extends BaseRunner { type: 'tool_result', tool_use_id: part.toolResponse.ref, content: [this.toAnthropicToolResponseContent(part)], + cache_control: part.metadata?.cache_control as + | ToolResultBlockParam['cache_control'] + | null, }; } @@ -192,19 +210,6 @@ export class Runner extends BaseRunner { const mappedModelName = request.config?.version ?? extractVersion(model, modelName); - const systemValue = - system === undefined - ? undefined - : cacheSystemPrompt - ? [ - { - type: 'text' as const, - text: system, - cache_control: { type: 'ephemeral' as const }, - }, - ] - : system; - const thinkingConfig = this.toAnthropicThinkingConfig( request.config?.thinking ) as MessageCreateParams['thinking'] | undefined; @@ -226,7 +231,7 @@ export class Runner extends BaseRunner { max_tokens: request.config?.maxOutputTokens ?? this.DEFAULT_MAX_OUTPUT_TOKENS, messages, - system: systemValue, + system: system as TextBlockParam[], stop_sequences: request.config?.stopSequences, temperature: request.config?.temperature, top_k: topK, @@ -257,19 +262,6 @@ export class Runner extends BaseRunner { const mappedModelName = request.config?.version ?? extractVersion(model, modelName); - const systemValue = - system === undefined - ? undefined - : cacheSystemPrompt - ? [ - { - type: 'text' as const, - text: system, - cache_control: { type: 'ephemeral' as const }, - }, - ] - : system; - const thinkingConfig = this.toAnthropicThinkingConfig( request.config?.thinking ) as MessageCreateParams['thinking'] | undefined; @@ -292,7 +284,7 @@ export class Runner extends BaseRunner { request.config?.maxOutputTokens ?? this.DEFAULT_MAX_OUTPUT_TOKENS, messages, stream: true, - system: systemValue, + system: system as TextBlockParam[], stop_sequences: request.config?.stopSequences, temperature: request.config?.temperature, top_k: topK, @@ -311,6 +303,7 @@ export class Runner extends BaseRunner { body: MessageCreateParamsNonStreaming, abortSignal: AbortSignal ): Promise { + console.log('body in createMessage', JSON.stringify(body, null, 2)); return await this.client.messages.create(body, { signal: abortSignal }); } @@ -441,7 +434,9 @@ export class Runner extends BaseRunner { }; case 'text': - return { text: contentBlock.text }; + return { + text: contentBlock.text, + }; case 'thinking': return this.createThinkingPart( @@ -498,6 +493,15 @@ export class Runner extends BaseRunner { usage: { inputTokens: response.usage.input_tokens, outputTokens: response.usage.output_tokens, + custom: { + cache_creation_input_tokens: + response.usage.cache_creation_input_tokens ?? 0, + cache_read_input_tokens: response.usage.cache_read_input_tokens ?? 0, + ephemeral_5m_input_tokens: + response.usage.cache_creation?.ephemeral_5m_input_tokens ?? 0, + ephemeral_1h_input_tokens: + response.usage.cache_creation?.ephemeral_1h_input_tokens ?? 0, + }, }, custom: response, }; diff --git a/js/testapps/anthropic/package.json b/js/testapps/anthropic/package.json index 0f7ac15001..032e39ab7a 100644 --- a/js/testapps/anthropic/package.json +++ b/js/testapps/anthropic/package.json @@ -19,6 +19,7 @@ "dev:stable:pdf": "genkit start -- npx tsx --watch src/stable/pdf.ts", "dev:stable:tools": "genkit start -- npx tsx --watch src/stable/tools.ts", "dev:stable:vision": "genkit start -- npx tsx --watch src/stable/vision.ts", + "dev:stable:prompt-caching": "genkit start -- npx tsx --watch src/stable/prompt_caching.ts", "genkit:dev": "cross-env GENKIT_ENV=dev npm run dev:stable", "genkit:start": "cross-env GENKIT_ENV=dev genkit start -- tsx --watch src/stable/basic.ts", "dev": "export GENKIT_RUNTIME_ID=$(openssl rand -hex 8) && node lib/stable/basic.js 2>&1" diff --git a/js/testapps/anthropic/src/long-text.txt b/js/testapps/anthropic/src/long-text.txt new file mode 100644 index 0000000000..cd345a6221 --- /dev/null +++ b/js/testapps/anthropic/src/long-text.txt @@ -0,0 +1,55 @@ +On the morning the town decided to rename its streets, nothing about the air suggested history was about to be edited with a clipboard and a pen. The sky looked the way it always did in that part of the world: a pale, cautious blue that never committed to drama, never rose to the kind of brilliance that made people stop in the middle of errands. It was a sky that tolerated routines. Beneath it, the town behaved like an organism that had memorized its own pulse. Doors opened. Kettles boiled. Shoes thudded against porch steps. Somewhere a radio announced the time with an oddly triumphant beep, as if the minute itself were an accomplishment. + +It began, as these things often do, with an official notice taped to the glass of the bakery. Not pinned to a board, not placed politely on a counter. Taped, right at eye level, where the cinnamon rolls usually performed their most persuasive work. The notice used a font that tried to look friendly while still remaining unmistakably bureaucratic. It stated, with calm confidence, that several street names would be updated to better reflect the community’s “evolving identity,” and that residents should consult the attached list to see whether their address was affected. It offered no explanation for what an evolving identity looked like on a map, but it did provide a phone number for questions, and it did so in bold, like a promise. + +People read it the way they read weather warnings: with a half-belief that inconvenience is always aimed at someone else. They leaned in, squinting, the smell of warm sugar and yeast in their noses, scanning down the page for familiar words. Some smiled with relief, as if having your street left untouched was proof of personal virtue. Others frowned, lips pursed, seeing their daily geography suddenly rendered provisional. When you live somewhere long enough, you stop thinking of street names as labels and start thinking of them as part of the local climate. You don’t say you live on “Maple Avenue” the way you say you wear a blue shirt. You say it like it’s a small fact that holds other facts in place: the direction the wind hits your porch, the way the afternoon light lands on the mailbox, the route the neighborhood cats prefer. + +By midmorning, the news had traveled to the hardware store, the laundromat, the small library with the stubborn carpet pattern that looked like it had been designed to hide secrets. The librarian, a woman with a silver braid and the quiet authority of someone who had shushed generations, had already printed extra copies of the list. She laid them out near the return slot, next to a jar of “lost buttons,” as if street names and stray buttons belonged to the same category of misplaced objects. + +The list itself was long enough to suggest ambition. It included some changes that felt harmless, almost cosmetic. “Pine Lane” would become “Pine Walk,” a shift as gentle as changing your shoes without changing your outfit. “Harrison Street” would become “Harbor Street,” which people assumed was a typo until they remembered there was a pond nearby, and that the town occasionally liked to exaggerate its geography into something grander. But some changes were unmistakably different. “Foundry Road,” named after a building that had been torn down decades ago, would become “Sunrise Road,” as if the town wanted to replace industry with optimism. “Old Quarry Path” would become “Pebble Way,” which sounded like a children’s book. And then there were names nobody recognized at all: “Linden Spiral,” “Ciderglass Terrace,” “Juniper Echo.” They sounded like marketing campaigns for scented candles, or the kinds of titles poets chose when they wanted to be gently mysterious. + +At first, people responded with the usual mixture of annoyance and humor. Someone joked that the town council must have hired a fantasy novelist. Someone else said it was probably a ploy to confuse delivery drivers and reduce the amount of junk mail. A teenager, hearing that “Birch Street” might become “Birch River,” asked whether they were finally getting an actual river, and when told no, shrugged with the disappointment of someone who has learned not to trust labels. + +But behind the jokes was a quieter discomfort, the sense that something stable was being rearranged. Because changing street names isn’t like repainting a bench. It forces you to rewrite small scripts you didn’t know you’d memorized. It changes how you tell someone to find you. It changes how you picture home. It changes the way you remember: “We used to walk down Elm…” becomes a sentence that needs correction, like a story interrupted. + +The town held a meeting that evening in the gymnasium of the local school, where the air always smelled faintly of varnish and old basketballs. Folding chairs formed loose rows. A microphone was set up at the front, and it squealed once, as if to remind everyone who had the power to make noise official. The council members sat behind a long table with a cloth draped over it, the kind of cloth that tried to make a temporary arrangement seem ceremonial. There was a pitcher of water and plastic cups. There were nameplates. The nameplates were crisp and newly printed, which felt, to some, like a bad omen. + +The mayor—whose voice was naturally loud in a way that made you suspect she talked to birds the same way she talked to people—opened the meeting with a speech about “modernization” and “community coherence.” She spoke about honoring local heritage while embracing new stories. She said the current names had “grown inconsistent.” She said the town’s map should “read like a narrative.” Nobody knew what that meant, but it sounded impressive enough that some people nodded, as if they understood narrative maps the way they understood potholes. + +Then the questions began. + +A man with a baseball cap stood and asked who had chosen the new names. A council member with a neat mustache said a committee had been formed. The man asked who was on the committee. The council member said it included “a range of stakeholders.” Someone laughed, not loudly, but sharply, like a cough. The man asked why “Foundry Road” needed to become “Sunrise Road.” The council member explained that the town wanted names that inspired “forward-thinking feelings.” The man said he felt forward-thinking enough without his road pretending to be the dawn. + +A woman in a green sweater asked about mail delivery. The mayor said the post office had been informed, and there would be a “grace period.” The woman asked how long the grace period would last. The mayor said the details were being worked out. Someone muttered that details always got worked out after people got annoyed. The librarian raised her hand and asked whether the old names would be archived anywhere for historical reference. The mayor brightened, happy to answer a question that sounded supportive, and said yes, there would be a “heritage display.” People imagined a small plaque somewhere nobody visited. + +As the meeting continued, a strange thing happened. The more the council explained, the less satisfied people became—not just because of the practical inconvenience, but because the explanations felt like they belonged to a different town. The language of “identity” and “narrative” sounded like it had been imported from somewhere else, a place where towns existed as concepts rather than as streets you swept and sidewalks you shoveled. The residents weren’t against change, exactly. They just wanted change to have a recognizable reason, the way winter has a reason, the way a tree grows because it cannot do otherwise. + +Near the end of the meeting, a young man—new to town, with the earnestness of someone who still believes meetings can be solved—stood and said he actually liked the new names. He said “Juniper Echo” sounded beautiful. He said it made the town feel “alive.” Some people rolled their eyes. A few looked thoughtful. Beauty is not a negligible argument, but it’s an argument that makes practical people nervous, because it can’t be measured in inches or minutes or dollars. The young man added, quickly, that he understood the inconvenience, but maybe the town was allowed to reinvent itself sometimes. An older woman near the back said, “We reinvent ourselves every time the roof leaks.” The room laughed, and for a moment, the tension softened into something almost friendly. + +The council promised to consider feedback. People left with pamphlets and uneaten cookies. Outside, the night had cooled, and streetlights cast yellow pools that made the sidewalks look like they had been painted. A couple of neighbors stood near the entrance and compared the new names to the old ones as if they were swapping stories about distant relatives. Someone said, “Ciderglass Terrace? What does that even mean?” Someone else said, “It means whoever named it has never waited for a snowplow.” + +Over the next weeks, the town began to change in small, almost comical ways. New street signs appeared, their metal posts shining too brightly at first, like teeth that hadn’t yet learned how to belong in a mouth. Some people refused to use the new names out of stubborn principle. Others used them with a hint of performance, as if daring the syllables to feel natural. Children adapted fastest, because children treat names like costumes: something you can try on and forget about without mourning. Delivery drivers adapted slowly, because their work depended on the old map’s logic, and because GPS systems lagged behind human decisions like a stubborn animal. + +Conversations gained new pauses. “I live on—well, it used to be—now it’s…” People developed a habit of adding parenthetical directions, as if language alone could no longer do the job. The town’s Facebook group (which had previously been dedicated mostly to lost pets and debates about leaf blowers) became a battlefield of opinions. Some posts were angry. Some were sarcastic. Some were surprisingly poetic, with people arguing that certain names had a “sound” that matched the street’s personality. Someone claimed “Pebble Way” was insulting because the road was full of potholes, not pebbles. Someone else said maybe the name was aspirational. + +And then, quietly, without any committee announcing it, residents began to make their own hybrid system. They would say “Sunrise—old Foundry” or “Linden Spiral, near where Harrison used to be.” They created a linguistic bridge between past and present, the way you might keep an old nickname for someone even after they insist they’ve grown out of it. This bridging wasn’t official, but it was effective. It allowed people to keep their memories intact while still complying with new mail labels. It was messy and human and, ironically, more coherent than the town’s attempt at narrative. + +The most interesting changes weren’t on the signs. They were in the way people started to talk about place, and therefore about themselves. A street name is a shared reference point, a tiny agreement that says: when I speak this word, you will picture the same corner of the world. Changing the word changes the agreement, and agreements are the invisible architecture of community. In the awkwardness of learning new names, people were forced to acknowledge how much they relied on shared language. They were forced to admit that home wasn’t only a structure of wood and paint, but also a structure of words. + +One afternoon, the librarian walked down what was now called “Juniper Echo,” carrying a stack of returned books in a canvas bag. The street had no junipers, at least none she could see, and no echo unless you counted the way her footsteps bounced off parked cars. She paused by one of the new signs and read it again, slowly, as if the letters might rearrange themselves into something familiar. A boy on a bicycle stopped nearby and asked her what it meant. She said she didn’t know. The boy said he liked it anyway. He said it sounded like a secret. Then he pedaled off, and the librarian watched him go, thinking how children accept secrets more easily than explanations. + +That evening, she added a small section to the library’s local history shelf. She labeled it “Former Street Names and Their Stories.” She included photocopies of old maps, newspaper clippings about the foundry that used to exist, a photo of the quarry workers standing in front of their tools, faces squinting in sunlight. She wrote short notes explaining how “Maple Avenue” got its name, and why “Old Quarry Path” had mattered even after the quarry was gone. She didn’t do this out of rebellion, exactly, but out of care. Names change. People forget. Libraries, at their best, are places where forgetting is slowed down. + +Weeks became months. The new names started to feel less like intruders and more like furniture—strange at first, but eventually part of the room. Some residents admitted, reluctantly, that “Harbor Street” did sound nicer than “Harrison Street,” which had always been confused with “Harris Street” anyway. Others never forgave “Ciderglass Terrace,” but they still wrote it on forms. The town’s identity did evolve, if only because anything that survives time evolves by definition. + +And yet, the old names didn’t vanish. They remained in conversation, in jokes, in the way people gave directions: “Turn left where Maple used to be.” They remained in the minds of those who had lived long enough to have a relationship with the past version of the map. They remained in the way the town’s oldest residents sometimes paused, searching for the “right” word and finding two words instead. + +If you visited the town later, you might not notice anything unusual. You would see neat signs, consistent fonts, a map that indeed read like someone’s idea of a story. You might compliment “Juniper Echo” without realizing it had once been something ordinary like “Third Street.” You might assume the town had always spoken this way. But if you listened carefully—at the bakery, at the hardware store, at the library—you would hear the faint double-language of a place in transition. You would hear people carrying two maps at once: the one on paper and the one in memory. + +And perhaps you would realize that this is how communities work in general. Not with clean edits, not with perfect coherence, but with overlap and improvisation. The official world loves the idea of replacing, of wiping away old labels and setting down new ones as if the earth were a chalkboard. The lived world prefers layering. It keeps the old underneath, not to resist the new, but to give it depth. A renamed street is still the same stretch of pavement, still lined with the same trees and mailboxes and worn places where bicycles cut the corner. But the name—like a story—tells you what people want to notice. + +In that sense, the town had not simply changed its streets. It had revealed something about the way it wanted to be perceived. It wanted to sound hopeful, natural, maybe even a little enchanting. It wanted its map to feel like a poem you could walk through. Whether that desire was wise or ridiculous depended on who you asked, and on whether you valued accuracy over aspiration. But either way, the town had made a choice, and choices leave traces. + +One year after the renaming, a small article appeared in the local paper. It interviewed residents about the changes. Some people had forgotten they ever disliked the new names. Others still insisted they were nonsense. The article ended with a quote from the librarian, who said, “A name is a handle we use to lift a place into our minds. If you change the handle, you don’t change the place, but you do change how we carry it.” + +The paper printed that sentence in italics, as if it were wiser than the rest. And maybe it was. Or maybe it was simply true in a way that felt comforting: the reminder that even when the labels shift, the act of living continues—kettles boiling, shoes on porch steps, radios announcing time as if time needs announcing. Under the same pale sky, people found their way home, sometimes by the new names, sometimes by the old ones, and often by a mixture of both, because that is what humans do when the world is rewritten: they keep walking, and they keep speaking, and they make the new language fit the shape of their lives. \ No newline at end of file diff --git a/js/testapps/anthropic/src/stable/prompt_caching.ts b/js/testapps/anthropic/src/stable/prompt_caching.ts new file mode 100644 index 0000000000..efb1318bfb --- /dev/null +++ b/js/testapps/anthropic/src/stable/prompt_caching.ts @@ -0,0 +1,94 @@ +/** + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { anthropic } from '@genkit-ai/anthropic'; +import { promises as fs } from 'fs'; +import { genkit } from 'genkit'; +import path from 'path'; + +const ai = genkit({ + plugins: [ + // Configure the plugin with environment-driven API key + anthropic(), + ], +}); + +const longTextPath = path.join(__dirname, '../long-text.txt'); + +ai.defineFlow('caching system prompt', async () => { + const longTextBuffer = await fs.readFile(longTextPath); + const longText = longTextBuffer.toString('utf-8'); + + const response = await ai.generate({ + model: anthropic.model('claude-sonnet-4-5'), + system: { + text: `You are a friendly Claude assistant. Greet the user briefly. You will be given a long text to read and answer questions about it. + ${longText}`, + metadata: { + cache_control: { + type: 'ephemeral', + ttl: '5m', + }, + }, + }, + messages: [ + { + role: 'user', + content: [ + { text: 'What is the main idea of the text?' }, + ], + }, + ], + }); + + return { + text: response.text, + usage: response.usage, + }; +}); + +ai.defineFlow('caching user prompt', async () => { + const longTextBuffer = await fs.readFile(longTextPath); + const longText = longTextBuffer.toString('utf-8'); + + const response = await ai.generate({ + model: anthropic.model('claude-sonnet-4-5'), + system: { + text: 'You are a friendly Claude assistant. Greet the user briefly. You will be given a long text to read and answer questions about it.', + }, + messages: [ + { + role: 'user', + content: [ + { + text: longText, + metadata: { + cache_control: { + type: 'ephemeral', + ttl: '5m', + }, + }, + }, + ], + }, + ], + }); + + return { + text: response.text, + usage: response.usage, + }; +}); From 2f85c216a117453d309821115ae2ad1a67855131 Mon Sep 17 00:00:00 2001 From: Corie Watson Date: Tue, 6 Jan 2026 15:13:13 +0000 Subject: [PATCH 02/12] chore: format --- js/testapps/anthropic/src/stable/prompt_caching.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/js/testapps/anthropic/src/stable/prompt_caching.ts b/js/testapps/anthropic/src/stable/prompt_caching.ts index efb1318bfb..3a456d62ee 100644 --- a/js/testapps/anthropic/src/stable/prompt_caching.ts +++ b/js/testapps/anthropic/src/stable/prompt_caching.ts @@ -47,9 +47,7 @@ ai.defineFlow('caching system prompt', async () => { messages: [ { role: 'user', - content: [ - { text: 'What is the main idea of the text?' }, - ], + content: [{ text: 'What is the main idea of the text?' }], }, ], }); From bbf1b451c53c6cd156b99a0ea0a915aae4b13b74 Mon Sep 17 00:00:00 2001 From: Corie Watson Date: Tue, 6 Jan 2026 16:48:52 +0000 Subject: [PATCH 03/12] refactor(anthropic): remove `cacheSystemPrompt` --- js/plugins/anthropic/src/index.ts | 14 +++----------- js/plugins/anthropic/src/models.ts | 8 +------- js/plugins/anthropic/src/runner/base.ts | 22 ++++------------------ js/plugins/anthropic/src/runner/beta.ts | 21 +++------------------ js/plugins/anthropic/src/runner/stable.ts | 6 ++---- js/plugins/anthropic/src/types.ts | 2 -- 6 files changed, 13 insertions(+), 60 deletions(-) diff --git a/js/plugins/anthropic/src/index.ts b/js/plugins/anthropic/src/index.ts index d5a0fef9cb..33f1321dba 100644 --- a/js/plugins/anthropic/src/index.ts +++ b/js/plugins/anthropic/src/index.ts @@ -33,8 +33,6 @@ import { } from './models.js'; import { InternalPluginOptions, PluginOptions, __testClient } from './types.js'; -const PROMPT_CACHING_BETA_HEADER_VALUE = 'prompt-caching-2024-07-31'; - /** * Gets or creates an Anthropic client instance. * Supports test client injection for internal testing. @@ -53,11 +51,7 @@ function getAnthropicClient(options?: PluginOptions): Anthropic { 'Please pass in the API key or set the ANTHROPIC_API_KEY environment variable' ); } - const defaultHeaders: Record = {}; - if (options?.cacheSystemPrompt) { - defaultHeaders['anthropic-beta'] = PROMPT_CACHING_BETA_HEADER_VALUE; - } - return new Anthropic({ apiKey, defaultHeaders }); + return new Anthropic({ apiKey }); } /** @@ -71,7 +65,7 @@ function getAnthropicClient(options?: PluginOptions): Anthropic { * - anthropic: The main plugin function to interact with the Anthropic AI. * * Usage: - * To use the Claude models, initialize the anthropic plugin inside `genkit()` and pass the configuration options. If no API key is provided in the options, the environment variable `ANTHROPIC_API_KEY` must be set. If you want to cache the system prompt, set `cacheSystemPrompt` to `true`. **Note:** Prompt caching is in beta and may change. To learn more, see https://docs.anthropic.com/en/docs/prompt-caching. + * To use the Claude models, initialize the anthropic plugin inside `genkit()` and pass the configuration options. If no API key is provided in the options, the environment variable `ANTHROPIC_API_KEY` must be set. * * Example: * ``` @@ -80,7 +74,7 @@ function getAnthropicClient(options?: PluginOptions): Anthropic { * * const ai = genkit({ * plugins: [ - * anthropic({ apiKey: 'your-api-key', cacheSystemPrompt: false }) + * anthropic({ apiKey: 'your-api-key' }) * ... // other plugins * ] * }); @@ -103,7 +97,6 @@ function anthropicPlugin(options?: PluginOptions): GenkitPluginV2 { const action = claudeModel({ name, client, - cacheSystemPrompt: options?.cacheSystemPrompt, defaultApiVersion, }); actions.push(action); @@ -117,7 +110,6 @@ function anthropicPlugin(options?: PluginOptions): GenkitPluginV2 { return claudeModel({ name: modelName, client, - cacheSystemPrompt: options?.cacheSystemPrompt, defaultApiVersion, }); } diff --git a/js/plugins/anthropic/src/models.ts b/js/plugins/anthropic/src/models.ts index ba03c15910..cb0e23f48e 100644 --- a/js/plugins/anthropic/src/models.ts +++ b/js/plugins/anthropic/src/models.ts @@ -272,12 +272,7 @@ export function claudeModelReference( export function claudeModel( params: ClaudeModelParams ): ModelAction { - const { - name, - client: runnerClient, - cacheSystemPrompt: cachePrompt, - defaultApiVersion: apiVersion, - } = params; + const { name, client: runnerClient, defaultApiVersion: apiVersion } = params; // Use supported model ref if available, otherwise create generic model ref const knownModelRef = KNOWN_CLAUDE_MODELS[name]; let modelInfo = knownModelRef @@ -297,7 +292,6 @@ export function claudeModel( { name, client: runnerClient, - cacheSystemPrompt: cachePrompt, defaultApiVersion: apiVersion, }, configSchema diff --git a/js/plugins/anthropic/src/runner/base.ts b/js/plugins/anthropic/src/runner/base.ts index 4a3a4ba1e6..75c2eaf8db 100644 --- a/js/plugins/anthropic/src/runner/base.ts +++ b/js/plugins/anthropic/src/runner/base.ts @@ -62,7 +62,6 @@ const ANTHROPIC_THINKING_CUSTOM_KEY = 'anthropicThinking'; export abstract class BaseRunner { protected name: string; protected client: Anthropic; - protected cacheSystemPrompt?: boolean; /** * Default maximum output tokens for Claude models when not specified in the request. @@ -72,7 +71,6 @@ export abstract class BaseRunner { constructor(params: ClaudeRunnerParams) { this.name = params.name; this.client = params.client; - this.cacheSystemPrompt = params.cacheSystemPrompt; } /** @@ -459,28 +457,24 @@ export abstract class BaseRunner { * Converts an Anthropic request to a non-streaming Anthropic API request body. * @param modelName The name of the Anthropic model to use. * @param request The Genkit GenerateRequest to convert. - * @param cacheSystemPrompt Whether to cache the system prompt. * @returns The converted Anthropic API non-streaming request body. * @throws An error if an unsupported output format is requested. */ protected abstract toAnthropicRequestBody( modelName: string, - request: GenerateRequest, - cacheSystemPrompt?: boolean + request: GenerateRequest ): RunnerRequestBody; /** * Converts an Anthropic request to a streaming Anthropic API request body. * @param modelName The name of the Anthropic model to use. * @param request The Genkit GenerateRequest to convert. - * @param cacheSystemPrompt Whether to cache the system prompt. * @returns The converted Anthropic API streaming request body. * @throws An error if an unsupported output format is requested. */ protected abstract toAnthropicStreamingRequestBody( modelName: string, - request: GenerateRequest, - cacheSystemPrompt?: boolean + request: GenerateRequest ): RunnerStreamingRequestBody; protected abstract createMessage( @@ -512,11 +506,7 @@ export abstract class BaseRunner { const { streamingRequested, sendChunk, abortSignal } = options; if (streamingRequested) { - const body = this.toAnthropicStreamingRequestBody( - this.name, - request, - this.cacheSystemPrompt - ); + const body = this.toAnthropicStreamingRequestBody(this.name, request); const stream = this.streamMessages(body, abortSignal); for await (const event of stream) { const part = this.toGenkitPart(event); @@ -531,11 +521,7 @@ export abstract class BaseRunner { return this.toGenkitResponse(finalMessage); } - const body = this.toAnthropicRequestBody( - this.name, - request, - this.cacheSystemPrompt - ); + const body = this.toAnthropicRequestBody(this.name, request); const response = await this.createMessage(body, abortSignal); return this.toGenkitResponse(response); } diff --git a/js/plugins/anthropic/src/runner/beta.ts b/js/plugins/anthropic/src/runner/beta.ts index 73e4b50a89..b772623442 100644 --- a/js/plugins/anthropic/src/runner/beta.ts +++ b/js/plugins/anthropic/src/runner/beta.ts @@ -299,8 +299,7 @@ export class BetaRunner extends BaseRunner { */ protected toAnthropicRequestBody( modelName: string, - request: GenerateRequest, - cacheSystemPrompt?: boolean + request: GenerateRequest ): BetaMessageCreateParamsNonStreaming { const model = KNOWN_CLAUDE_MODELS[modelName]; const { system, messages } = this.toAnthropicMessages(request.messages); @@ -357,27 +356,13 @@ export class BetaRunner extends BaseRunner { */ protected toAnthropicStreamingRequestBody( modelName: string, - request: GenerateRequest, - cacheSystemPrompt?: boolean + request: GenerateRequest ): BetaMessageCreateParamsStreaming { const model = KNOWN_CLAUDE_MODELS[modelName]; const { system, messages } = this.toAnthropicMessages(request.messages); const mappedModelName = request.config?.version ?? extractVersion(model, modelName); - const betaSystem = - system === undefined - ? undefined - : cacheSystemPrompt - ? [ - { - type: 'text' as const, - text: system, - cache_control: { type: 'ephemeral' as const }, - }, - ] - : system; - const thinkingConfig = this.toAnthropicThinkingConfig( request.config?.thinking ) as BetaMessageCreateParams['thinking'] | undefined; @@ -400,7 +385,7 @@ export class BetaRunner extends BaseRunner { request.config?.maxOutputTokens ?? this.DEFAULT_MAX_OUTPUT_TOKENS, messages, stream: true, - system: betaSystem, + system: system as BetaTextBlockParam[], stop_sequences: request.config?.stopSequences, temperature: request.config?.temperature, top_k: topK, diff --git a/js/plugins/anthropic/src/runner/stable.ts b/js/plugins/anthropic/src/runner/stable.ts index 54f50af2d7..e14c0d3505 100644 --- a/js/plugins/anthropic/src/runner/stable.ts +++ b/js/plugins/anthropic/src/runner/stable.ts @@ -196,8 +196,7 @@ export class Runner extends BaseRunner { protected toAnthropicRequestBody( modelName: string, - request: GenerateRequest, - cacheSystemPrompt?: boolean + request: GenerateRequest ): MessageCreateParamsNonStreaming { if (request.output?.format && request.output.format !== 'text') { throw new Error( @@ -248,8 +247,7 @@ export class Runner extends BaseRunner { protected toAnthropicStreamingRequestBody( modelName: string, - request: GenerateRequest, - cacheSystemPrompt?: boolean + request: GenerateRequest ): MessageCreateParamsStreaming { if (request.output?.format && request.output.format !== 'text') { throw new Error( diff --git a/js/plugins/anthropic/src/types.ts b/js/plugins/anthropic/src/types.ts index 2f61464a10..a0cb709a2c 100644 --- a/js/plugins/anthropic/src/types.ts +++ b/js/plugins/anthropic/src/types.ts @@ -31,7 +31,6 @@ export const __testClient = Symbol('testClient'); */ export interface PluginOptions { apiKey?: string; - cacheSystemPrompt?: boolean; /** Default API surface for all requests unless overridden per-request. */ apiVersion?: 'stable' | 'beta'; } @@ -50,7 +49,6 @@ export interface InternalPluginOptions extends PluginOptions { interface ClaudeHelperParamsBase { name: string; client: Anthropic; - cacheSystemPrompt?: boolean; defaultApiVersion?: 'stable' | 'beta'; } From affeef6fe0faa3932a5eff7d3dcff9f1c9cdc8bc Mon Sep 17 00:00:00 2001 From: Corie Watson Date: Tue, 6 Jan 2026 17:14:07 +0000 Subject: [PATCH 04/12] fix(anthropic): tests --- .../anthropic/tests/beta_runner_test.ts | 73 ---- js/plugins/anthropic/tests/execution_test.ts | 12 +- js/plugins/anthropic/tests/index_test.ts | 12 +- .../anthropic/tests/stable_runner_test.ts | 311 ++++++++---------- 4 files changed, 155 insertions(+), 253 deletions(-) diff --git a/js/plugins/anthropic/tests/beta_runner_test.ts b/js/plugins/anthropic/tests/beta_runner_test.ts index 0d549b938c..38af9b618a 100644 --- a/js/plugins/anthropic/tests/beta_runner_test.ts +++ b/js/plugins/anthropic/tests/beta_runner_test.ts @@ -26,7 +26,6 @@ describe('BetaRunner.toAnthropicMessageContent', () => { return new BetaRunner({ name: 'anthropic/claude-3-5-haiku', client: createMockAnthropicClient(), - cacheSystemPrompt: false, }); } @@ -460,7 +459,6 @@ describe('BetaRunner', () => { const runner = new BetaRunner({ name: 'claude-3-5-haiku', client: mockClient as Anthropic, - cacheSystemPrompt: true, }) as any; const request = { @@ -542,77 +540,6 @@ describe('BetaRunner', () => { assert.deepStrictEqual(disabledBody.thinking, { type: 'disabled' }); }); - it('should concatenate multiple text parts in system message', () => { - const mockClient = createMockAnthropicClient(); - const runner = new BetaRunner({ - name: 'claude-3-5-haiku', - client: mockClient as Anthropic, - }) as any; - - const request = { - messages: [ - { - role: 'system', - content: [ - { text: 'You are a helpful assistant.' }, - { text: 'Always be concise.' }, - { text: 'Use proper grammar.' }, - ], - }, - { role: 'user', content: [{ text: 'Hi' }] }, - ], - output: { format: 'text' }, - } satisfies any; - - const body = runner.toAnthropicRequestBody( - 'claude-3-5-haiku', - request, - false - ); - - assert.strictEqual( - body.system, - 'You are a helpful assistant.\n\nAlways be concise.\n\nUse proper grammar.' - ); - }); - - it('should concatenate multiple text parts in system message with caching', () => { - const mockClient = createMockAnthropicClient(); - const runner = new BetaRunner({ - name: 'claude-3-5-haiku', - client: mockClient as Anthropic, - }) as any; - - const request = { - messages: [ - { - role: 'system', - content: [ - { text: 'You are a helpful assistant.' }, - { text: 'Always be concise.' }, - ], - }, - { role: 'user', content: [{ text: 'Hi' }] }, - ], - output: { format: 'text' }, - } satisfies any; - - const body = runner.toAnthropicRequestBody( - 'claude-3-5-haiku', - request, - true - ); - - assert.ok(Array.isArray(body.system)); - assert.deepStrictEqual(body.system, [ - { - type: 'text', - text: 'You are a helpful assistant.\n\nAlways be concise.', - cache_control: { type: 'ephemeral' }, - }, - ]); - }); - it('should throw error if system message contains media', () => { const mockClient = createMockAnthropicClient(); const runner = new BetaRunner({ diff --git a/js/plugins/anthropic/tests/execution_test.ts b/js/plugins/anthropic/tests/execution_test.ts index ae7b6a85e7..dacde4b426 100644 --- a/js/plugins/anthropic/tests/execution_test.ts +++ b/js/plugins/anthropic/tests/execution_test.ts @@ -186,10 +186,14 @@ describe('Model Execution Integration Tests', () => { assert.strictEqual(createStub.mock.calls.length, 1); const apiRequest = createStub.mock.calls[0].arguments[0]; assert.ok(apiRequest.system, 'System prompt should be set'); - assert.strictEqual( - apiRequest.system, - 'You are a pirate. Respond like a pirate.' - ); + assert.deepStrictEqual(apiRequest.system, [ + { + type: 'text', + text: 'You are a pirate. Respond like a pirate.', + citations: null, + cache_control: undefined, + }, + ]); assert.strictEqual( apiRequest.messages.length, 1, diff --git a/js/plugins/anthropic/tests/index_test.ts b/js/plugins/anthropic/tests/index_test.ts index 9a5271e131..328123c04b 100644 --- a/js/plugins/anthropic/tests/index_test.ts +++ b/js/plugins/anthropic/tests/index_test.ts @@ -254,10 +254,9 @@ describe('Anthropic resolve helpers', () => { assert.strictEqual(referenceAny.config?.temperature, 0.25); }); - it('should apply system prompt caching when cacheSystemPrompt is true', async () => { + it('should apply system prompt caching when caching is enabled', async () => { const mockClient = createMockAnthropicClient(); const plugin = anthropic({ - cacheSystemPrompt: true, [__testClient]: mockClient, } as PluginOptions); @@ -270,7 +269,12 @@ describe('Anthropic resolve helpers', () => { messages: [ { role: 'system', - content: [{ text: 'You are helpful.' }], + content: [ + { + text: 'You are helpful.', + metadata: { cache_control: { type: 'ephemeral', ttl: '5m' } }, + }, + ], }, ], }, @@ -281,6 +285,6 @@ describe('Anthropic resolve helpers', () => { assert.strictEqual(createStub.mock.calls.length, 1); const requestBody = createStub.mock.calls[0].arguments[0]; assert.ok(Array.isArray(requestBody.system)); - assert.strictEqual(requestBody.system[0].cache_control.type, 'ephemeral'); + assert.strictEqual(requestBody.system[0].cache_control?.type, 'ephemeral'); }); }); diff --git a/js/plugins/anthropic/tests/stable_runner_test.ts b/js/plugins/anthropic/tests/stable_runner_test.ts index 72797251b9..39f34c70ae 100644 --- a/js/plugins/anthropic/tests/stable_runner_test.ts +++ b/js/plugins/anthropic/tests/stable_runner_test.ts @@ -57,13 +57,11 @@ type RunnerProtectedMethods = { toAnthropicTool: (tool: ToolDefinition) => any; toAnthropicRequestBody: ( modelName: string, - request: GenerateRequest, - cacheSystemPrompt?: boolean + request: GenerateRequest ) => any; toAnthropicStreamingRequestBody: ( modelName: string, - request: GenerateRequest, - cacheSystemPrompt?: boolean + request: GenerateRequest ) => any; fromAnthropicContentBlockChunk: ( event: MessageStreamEvent @@ -183,6 +181,7 @@ describe('toAnthropicMessageContent', () => { type: 'url', url: 'https://example.com/image.png', }, + cache_control: undefined, }); }); @@ -201,6 +200,7 @@ describe('toAnthropicMessageContent', () => { media_type: 'application/pdf', data: 'JVBERi0xLjQKJ', }, + cache_control: undefined, }); }); @@ -218,6 +218,7 @@ describe('toAnthropicMessageContent', () => { type: 'url', url: 'https://example.com/document.pdf', }, + cache_control: undefined, }); }); }); @@ -228,7 +229,7 @@ describe('toAnthropicMessages', () => { inputMessages: MessageData[]; expectedOutput: { messages: MessageParam[]; - system?: string; + system?: MessageParam['content']; }; }[] = [ { @@ -257,6 +258,7 @@ describe('toAnthropicMessages', () => { id: 'toolu_01A09q90qw90lq917835lq9', name: 'tellAFunnyJoke', input: { topic: 'bob' }, + cache_control: undefined, }, ], }, @@ -288,6 +290,7 @@ describe('toAnthropicMessages', () => { { type: 'tool_result', tool_use_id: 'call_SVDpFV2l2fW88QRFtv85FWwM', + cache_control: undefined, content: [ { type: 'text', @@ -328,6 +331,7 @@ describe('toAnthropicMessages', () => { { type: 'tool_result', tool_use_id: 'call_SVDpFV2l2fW88QRFtv85FWwM', + cache_control: undefined, content: [ { type: 'image', @@ -370,6 +374,7 @@ describe('toAnthropicMessages', () => { { type: 'tool_result', tool_use_id: 'call_SVDpFV2l2fW88QRFtv85FWwM', + cache_control: undefined, content: [ { type: 'image', @@ -402,6 +407,7 @@ describe('toAnthropicMessages', () => { text: 'hi', type: 'text', citations: null, + cache_control: undefined, }, ], role: 'user', @@ -412,6 +418,7 @@ describe('toAnthropicMessages', () => { text: 'how can I help you?', type: 'text', citations: null, + cache_control: undefined, }, ], role: 'assistant', @@ -422,6 +429,7 @@ describe('toAnthropicMessages', () => { text: 'I am testing', type: 'text', citations: null, + cache_control: undefined, }, ], role: 'user', @@ -444,12 +452,20 @@ describe('toAnthropicMessages', () => { text: 'hi', type: 'text', citations: null, + cache_control: undefined, }, ], role: 'user', }, ], - system: 'You are an helpful assistant', + system: [ + { + type: 'text', + text: 'You are an helpful assistant', + citations: null, + cache_control: undefined, + }, + ], }, }, { @@ -476,6 +492,7 @@ describe('toAnthropicMessages', () => { text: 'describe the following image:', type: 'text', citations: null, + cache_control: undefined, }, { source: { @@ -484,6 +501,7 @@ describe('toAnthropicMessages', () => { media_type: 'image/gif', }, type: 'image', + cache_control: undefined, }, ], role: 'user', @@ -518,6 +536,7 @@ describe('toAnthropicMessages', () => { media_type: 'application/pdf', data: 'JVBERi0xLjQKJ', }, + cache_control: undefined, }, ], role: 'user', @@ -551,6 +570,7 @@ describe('toAnthropicMessages', () => { type: 'url', url: 'https://example.com/document.pdf', }, + cache_control: undefined, }, ], role: 'user', @@ -589,6 +609,7 @@ describe('toAnthropicMessages', () => { text: 'Analyze this PDF and image:', type: 'text', citations: null, + cache_control: undefined, }, { type: 'document', @@ -597,6 +618,7 @@ describe('toAnthropicMessages', () => { media_type: 'application/pdf', data: 'JVBERi0xLjQKJ', }, + cache_control: undefined, }, { source: { @@ -605,6 +627,7 @@ describe('toAnthropicMessages', () => { media_type: 'image/png', }, type: 'image', + cache_control: undefined, }, ], role: 'user', @@ -862,6 +885,12 @@ describe('fromAnthropicResponse', () => { usage: { inputTokens: 10, outputTokens: 20, + custom: { + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + ephemeral_5m_input_tokens: 0, + ephemeral_1h_input_tokens: 0, + }, }, }, }, @@ -911,6 +940,12 @@ describe('fromAnthropicResponse', () => { usage: { inputTokens: 10, outputTokens: 20, + custom: { + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + ephemeral_5m_input_tokens: 0, + ephemeral_1h_input_tokens: 0, + }, }, }, }, @@ -964,6 +999,7 @@ describe('toAnthropicRequestBody', () => { text: 'Tell a joke about dogs.', type: 'text', citations: null, + cache_control: undefined, }, ], role: 'user', @@ -998,6 +1034,7 @@ describe('toAnthropicRequestBody', () => { text: 'Tell a joke about dogs.', type: 'text', citations: null, + cache_control: undefined, }, ], role: 'user', @@ -1043,76 +1080,15 @@ describe('toAnthropicRequestBody', () => { }); it('should apply system prompt caching when enabled', () => { - const request: GenerateRequest = { - messages: [ - { role: 'system', content: [{ text: 'You are a helpful assistant' }] }, - { role: 'user', content: [{ text: 'Hi' }] }, - ], - output: { format: 'text' }, - }; - - // Test with caching enabled - const outputWithCaching = testRunner.toAnthropicRequestBody( - 'claude-3-5-haiku', - request, - true - ); - assert.deepStrictEqual(outputWithCaching.system, [ - { - type: 'text', - text: 'You are a helpful assistant', - cache_control: { type: 'ephemeral' }, - }, - ]); - - // Test with caching disabled - const outputWithoutCaching = testRunner.toAnthropicRequestBody( - 'claude-3-5-haiku', - request, - false - ); - assert.strictEqual( - outputWithoutCaching.system, - 'You are a helpful assistant' - ); - }); - - it('should concatenate multiple text parts in system message', () => { const request: GenerateRequest = { messages: [ { role: 'system', content: [ - { text: 'You are a helpful assistant.' }, - { text: 'Always be concise.' }, - { text: 'Use proper grammar.' }, - ], - }, - { role: 'user', content: [{ text: 'Hi' }] }, - ], - output: { format: 'text' }, - }; - - const output = testRunner.toAnthropicRequestBody( - 'claude-3-5-haiku', - request, - false - ); - - assert.strictEqual( - output.system, - 'You are a helpful assistant.\n\nAlways be concise.\n\nUse proper grammar.' - ); - }); - - it('should concatenate multiple text parts in system message with caching', () => { - const request: GenerateRequest = { - messages: [ - { - role: 'system', - content: [ - { text: 'You are a helpful assistant.' }, - { text: 'Always be concise.' }, + { + text: 'You are a helpful assistant', + metadata: { cache_control: { type: 'ephemeral' } }, + }, ], }, { role: 'user', content: [{ text: 'Hi' }] }, @@ -1120,17 +1096,17 @@ describe('toAnthropicRequestBody', () => { output: { format: 'text' }, }; - const output = testRunner.toAnthropicRequestBody( + // Test with caching enabled + const outputWithCaching = testRunner.toAnthropicRequestBody( 'claude-3-5-haiku', - request, - true + request ); - - assert.deepStrictEqual(output.system, [ + assert.deepStrictEqual(outputWithCaching.system, [ { type: 'text', - text: 'You are a helpful assistant.\n\nAlways be concise.', + text: 'You are a helpful assistant', cache_control: { type: 'ephemeral' }, + citations: null, }, ]); }); @@ -1156,8 +1132,7 @@ describe('toAnthropicRequestBody', () => { }; assert.throws( - () => - testRunner.toAnthropicRequestBody('claude-3-5-haiku', request, false), + () => testRunner.toAnthropicRequestBody('claude-3-5-haiku', request), /System messages can only contain text content/ ); }); @@ -1178,8 +1153,7 @@ describe('toAnthropicRequestBody', () => { }; assert.throws( - () => - testRunner.toAnthropicRequestBody('claude-3-5-haiku', request, false), + () => testRunner.toAnthropicRequestBody('claude-3-5-haiku', request), /System messages can only contain text content/ ); }); @@ -1200,8 +1174,7 @@ describe('toAnthropicRequestBody', () => { }; assert.throws( - () => - testRunner.toAnthropicRequestBody('claude-3-5-haiku', request, false), + () => testRunner.toAnthropicRequestBody('claude-3-5-haiku', request), /System messages can only contain text content/ ); }); @@ -1227,7 +1200,15 @@ describe('toAnthropicStreamingRequestBody', () => { it('should support system prompt caching in streaming mode', () => { const request: GenerateRequest = { messages: [ - { role: 'system', content: [{ text: 'You are a helpful assistant' }] }, + { + role: 'system', + content: [ + { + text: 'You are a helpful assistant', + metadata: { cache_control: { type: 'ephemeral' } }, + }, + ], + }, { role: 'user', content: [{ text: 'Hello' }] }, ], output: { format: 'text' }, @@ -1235,27 +1216,30 @@ describe('toAnthropicStreamingRequestBody', () => { const outputWithCaching = testRunner.toAnthropicStreamingRequestBody( 'claude-3-5-haiku', - request, - true + request ); assert.deepStrictEqual(outputWithCaching.system, [ { type: 'text', text: 'You are a helpful assistant', cache_control: { type: 'ephemeral' }, + citations: null, }, ]); assert.strictEqual(outputWithCaching.stream, true); const outputWithoutCaching = testRunner.toAnthropicStreamingRequestBody( 'claude-3-5-haiku', - request, - false - ); - assert.strictEqual( - outputWithoutCaching.system, - 'You are a helpful assistant' + request ); + assert.deepStrictEqual(outputWithoutCaching.system, [ + { + type: 'text', + text: 'You are a helpful assistant', + citations: null, + cache_control: { type: 'ephemeral' }, + }, + ]); assert.strictEqual(outputWithoutCaching.stream, true); }); }); @@ -1509,7 +1493,6 @@ describe('claudeRunner param object', () => { { name: 'claude-3-5-haiku', client: mockClient, - cacheSystemPrompt: true, }, AnthropicConfigSchema ); @@ -1593,7 +1576,6 @@ describe('claudeModel', () => { name: 'claude-3-5-haiku', client: mockClient, defaultApiVersion: 'beta', - cacheSystemPrompt: true, }); const abortSignal = new AbortController().signal; @@ -2165,46 +2147,41 @@ describe('Runner request bodies and error branches', () => { const runner = new Runner({ name: 'claude-3-5-haiku', client: mockClient, - cacheSystemPrompt: true, }) as Runner & RunnerProtectedMethods; - const body = runner['toAnthropicRequestBody']( - 'claude-3-5-haiku', - { - messages: [ - { - role: 'system', - content: [{ text: 'You are helpful.' }], - }, - { - role: 'user', - content: [{ text: 'Tell me a joke' }], - }, - ], - config: { - maxOutputTokens: 256, - topK: 3, - topP: 0.75, - temperature: 0.6, - stopSequences: ['END'], - metadata: { user_id: 'user-xyz' }, - tool_choice: { type: 'auto' }, - thinking: { enabled: true, budgetTokens: 2048 }, + const body = runner['toAnthropicRequestBody']('claude-3-5-haiku', { + messages: [ + { + role: 'system', + content: [{ text: 'You are helpful.' }], }, - tools: [ - { - name: 'get_weather', - description: 'Returns the weather', - inputSchema: { type: 'object' }, - }, - ], - } as unknown as GenerateRequest, - true - ); + { + role: 'user', + content: [{ text: 'Tell me a joke' }], + }, + ], + config: { + maxOutputTokens: 256, + topK: 3, + topP: 0.75, + temperature: 0.6, + stopSequences: ['END'], + metadata: { user_id: 'user-xyz' }, + tool_choice: { type: 'auto' }, + thinking: { enabled: true, budgetTokens: 2048 }, + }, + tools: [ + { + name: 'get_weather', + description: 'Returns the weather', + inputSchema: { type: 'object' }, + }, + ], + } as unknown as GenerateRequest); assert.strictEqual(body.model, 'claude-3-5-haiku-20241022'); assert.ok(Array.isArray(body.system)); - assert.strictEqual(body.system?.[0].cache_control?.type, 'ephemeral'); + assert.strictEqual(body.system?.[0].cache_control?.type, undefined); assert.strictEqual(body.max_tokens, 256); assert.strictEqual(body.top_k, 3); assert.strictEqual(body.top_p, 0.75); @@ -2224,42 +2201,37 @@ describe('Runner request bodies and error branches', () => { const runner = new Runner({ name: 'claude-3-5-haiku', client: mockClient, - cacheSystemPrompt: true, }) as Runner & RunnerProtectedMethods; - const body = runner['toAnthropicStreamingRequestBody']( - 'claude-3-5-haiku', - { - messages: [ - { - role: 'system', - content: [{ text: 'Stay brief.' }], - }, - { - role: 'user', - content: [{ text: 'Summarize the weather.' }], - }, - ], - config: { - maxOutputTokens: 64, - topK: 2, - topP: 0.6, - temperature: 0.4, - stopSequences: ['STOP'], - metadata: { user_id: 'user-abc' }, - tool_choice: { type: 'any' }, - thinking: { enabled: true, budgetTokens: 1536 }, + const body = runner['toAnthropicStreamingRequestBody']('claude-3-5-haiku', { + messages: [ + { + role: 'system', + content: [{ text: 'Stay brief.' }], }, - tools: [ - { - name: 'summarize_weather', - description: 'Summarizes a forecast', - inputSchema: { type: 'object' }, - }, - ], - } as unknown as GenerateRequest, - true - ); + { + role: 'user', + content: [{ text: 'Summarize the weather.' }], + }, + ], + config: { + maxOutputTokens: 64, + topK: 2, + topP: 0.6, + temperature: 0.4, + stopSequences: ['STOP'], + metadata: { user_id: 'user-abc' }, + tool_choice: { type: 'any' }, + thinking: { enabled: true, budgetTokens: 1536 }, + }, + tools: [ + { + name: 'summarize_weather', + description: 'Summarizes a forecast', + inputSchema: { type: 'object' }, + }, + ], + } as unknown as GenerateRequest); assert.strictEqual(body.stream, true); assert.ok(Array.isArray(body.system)); @@ -2284,16 +2256,12 @@ describe('Runner request bodies and error branches', () => { client: mockClient, }) as Runner & RunnerProtectedMethods; - const body = runner['toAnthropicRequestBody']( - 'claude-3-5-haiku', - { - messages: [], - config: { - thinking: { enabled: false }, - }, - } as unknown as GenerateRequest, - false - ); + const body = runner['toAnthropicRequestBody']('claude-3-5-haiku', { + messages: [], + config: { + thinking: { enabled: false }, + }, + } as unknown as GenerateRequest); assert.deepStrictEqual(body.thinking, { type: 'disabled' }); }); @@ -2303,7 +2271,6 @@ describe('Runner request bodies and error branches', () => { const runner = new Runner({ name: 'claude-3-5-haiku', client: mockClient, - cacheSystemPrompt: false, }) as Runner & RunnerProtectedMethods; assert.throws( From 2b9352dfed2e60b1a3eae164532ae338b09642a7 Mon Sep 17 00:00:00 2001 From: Corie Watson Date: Wed, 7 Jan 2026 09:28:57 +0000 Subject: [PATCH 05/12] fix(anthropic): testapps --- js/testapps/anthropic/src/beta/additional_params.ts | 1 - js/testapps/anthropic/src/beta/basic.ts | 1 - js/testapps/anthropic/src/beta/effort.ts | 1 - js/testapps/anthropic/src/beta/structured_output.ts | 1 - 4 files changed, 4 deletions(-) diff --git a/js/testapps/anthropic/src/beta/additional_params.ts b/js/testapps/anthropic/src/beta/additional_params.ts index 2e443fc01a..5a21441f8d 100644 --- a/js/testapps/anthropic/src/beta/additional_params.ts +++ b/js/testapps/anthropic/src/beta/additional_params.ts @@ -22,7 +22,6 @@ const ai = genkit({ // Default all flows in this sample to the beta surface anthropic({ apiVersion: 'beta', - cacheSystemPrompt: true, apiKey: process.env.ANTHROPIC_API_KEY, }), ], diff --git a/js/testapps/anthropic/src/beta/basic.ts b/js/testapps/anthropic/src/beta/basic.ts index f9841f4c6d..198c0c1377 100644 --- a/js/testapps/anthropic/src/beta/basic.ts +++ b/js/testapps/anthropic/src/beta/basic.ts @@ -22,7 +22,6 @@ const ai = genkit({ // Default all flows in this sample to the beta surface anthropic({ apiVersion: 'beta', - cacheSystemPrompt: true, apiKey: process.env.ANTHROPIC_API_KEY, }), ], diff --git a/js/testapps/anthropic/src/beta/effort.ts b/js/testapps/anthropic/src/beta/effort.ts index 03e83a60af..a96641f4bc 100644 --- a/js/testapps/anthropic/src/beta/effort.ts +++ b/js/testapps/anthropic/src/beta/effort.ts @@ -22,7 +22,6 @@ const ai = genkit({ // Default all flows in this sample to the beta surface anthropic({ apiVersion: 'beta', - cacheSystemPrompt: true, apiKey: process.env.ANTHROPIC_API_KEY, }), ], diff --git a/js/testapps/anthropic/src/beta/structured_output.ts b/js/testapps/anthropic/src/beta/structured_output.ts index dbd3f5ca17..9d4d8412f8 100644 --- a/js/testapps/anthropic/src/beta/structured_output.ts +++ b/js/testapps/anthropic/src/beta/structured_output.ts @@ -22,7 +22,6 @@ const ai = genkit({ // Default all flows in this sample to the beta surface anthropic({ apiVersion: 'beta', - cacheSystemPrompt: true, apiKey: process.env.ANTHROPIC_API_KEY, }), ], From 5e6aa929f3f4dd61770a41d75d4f7c51c70fad8c Mon Sep 17 00:00:00 2001 From: Corie Watson Date: Wed, 7 Jan 2026 11:31:23 +0000 Subject: [PATCH 06/12] fix(anthropic): address feedback problems --- js/plugins/anthropic/src/runner/base.ts | 2 +- js/plugins/anthropic/src/runner/stable.ts | 32 +++++++++-------------- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/js/plugins/anthropic/src/runner/base.ts b/js/plugins/anthropic/src/runner/base.ts index 75c2eaf8db..508918374e 100644 --- a/js/plugins/anthropic/src/runner/base.ts +++ b/js/plugins/anthropic/src/runner/base.ts @@ -439,7 +439,7 @@ export abstract class BaseRunner { anthropicMsgs.push({ role, content }); } - return { system: system, messages: anthropicMsgs }; + return { system, messages: anthropicMsgs }; } /** diff --git a/js/plugins/anthropic/src/runner/stable.ts b/js/plugins/anthropic/src/runner/stable.ts index e14c0d3505..b6317f325d 100644 --- a/js/plugins/anthropic/src/runner/stable.ts +++ b/js/plugins/anthropic/src/runner/stable.ts @@ -107,9 +107,9 @@ export class Runner extends BaseRunner { type: 'text', text: part.text, citations: null, - cache_control: part.metadata?.cache_control as - | TextBlockParam['cache_control'] - | null, + // This is intentional. `part.metadata?.cache_control` is unknown, and casting it to the relevant type of the property makes it more robust to Anthropic SDK API changes. + cache_control: part.metadata + ?.cache_control as TextBlockParam['cache_control'], }; } @@ -118,9 +118,8 @@ export class Runner extends BaseRunner { return { type: 'document', source: this.toPdfDocumentSource(part.media), - cache_control: part.metadata?.cache_control as - | DocumentBlockParam['cache_control'] - | null, + cache_control: part.metadata + ?.cache_control as DocumentBlockParam['cache_control'], }; } @@ -133,9 +132,8 @@ export class Runner extends BaseRunner { data: source.data, media_type: source.mediaType, }, - cache_control: part.metadata?.cache_control as - | ImageBlockParam['cache_control'] - | null, + cache_control: part.metadata + ?.cache_control as ImageBlockParam['cache_control'], }; } return { @@ -144,9 +142,8 @@ export class Runner extends BaseRunner { type: 'url', url: source.url, }, - cache_control: part.metadata?.cache_control as - | ImageBlockParam['cache_control'] - | null, + cache_control: part.metadata + ?.cache_control as ImageBlockParam['cache_control'], }; } @@ -163,9 +160,8 @@ export class Runner extends BaseRunner { id: part.toolRequest.ref, name: part.toolRequest.name, input: part.toolRequest.input, - cache_control: part.metadata?.cache_control as - | ToolUseBlockParam['cache_control'] - | null, + cache_control: part.metadata + ?.cache_control as ToolUseBlockParam['cache_control'], }; } @@ -181,9 +177,8 @@ export class Runner extends BaseRunner { type: 'tool_result', tool_use_id: part.toolResponse.ref, content: [this.toAnthropicToolResponseContent(part)], - cache_control: part.metadata?.cache_control as - | ToolResultBlockParam['cache_control'] - | null, + cache_control: part.metadata + ?.cache_control as ToolResultBlockParam['cache_control'], }; } @@ -301,7 +296,6 @@ export class Runner extends BaseRunner { body: MessageCreateParamsNonStreaming, abortSignal: AbortSignal ): Promise { - console.log('body in createMessage', JSON.stringify(body, null, 2)); return await this.client.messages.create(body, { signal: abortSignal }); } From 297b446b50bee551a83f9bb9998b705164b49778 Mon Sep 17 00:00:00 2001 From: Corie Watson Date: Wed, 7 Jan 2026 15:42:37 +0000 Subject: [PATCH 07/12] chore(anthropic): add satisfies type to testapp --- js/pnpm-lock.yaml | 32 +++++++++++-------- js/testapps/anthropic/package.json | 1 + .../anthropic/src/stable/prompt_caching.ts | 3 +- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/js/pnpm-lock.yaml b/js/pnpm-lock.yaml index 7f3123818d..1838dc8088 100644 --- a/js/pnpm-lock.yaml +++ b/js/pnpm-lock.yaml @@ -153,6 +153,10 @@ importers: zod-to-json-schema: specifier: ^3.22.4 version: 3.24.5(zod@3.25.67) + optionalDependencies: + '@genkit-ai/firebase': + specifier: ^1.16.1 + version: 1.16.1(@google-cloud/firestore@7.11.1(encoding@0.1.13))(encoding@0.1.13)(firebase-admin@13.6.0(encoding@0.1.13))(firebase@11.9.1)(genkit@1.27.0(@google-cloud/firestore@7.11.1(encoding@0.1.13))(encoding@0.1.13)(firebase-admin@13.6.0(encoding@0.1.13))(firebase@11.9.1)) devDependencies: '@types/express': specifier: ^4.17.21 @@ -178,10 +182,6 @@ importers: typescript: specifier: ^4.9.0 version: 4.9.5 - optionalDependencies: - '@genkit-ai/firebase': - specifier: ^1.16.1 - version: 1.16.1(@google-cloud/firestore@7.11.1(encoding@0.1.13))(encoding@0.1.13)(firebase-admin@13.6.0(encoding@0.1.13))(firebase@11.9.1)(genkit@1.27.0(@google-cloud/firestore@7.11.1(encoding@0.1.13))(encoding@0.1.13)(firebase-admin@13.6.0(encoding@0.1.13))(firebase@11.9.1)) doc-snippets: dependencies: @@ -961,7 +961,7 @@ importers: version: 0.24.3(encoding@0.1.13) '@anthropic-ai/vertex-sdk': specifier: ^0.4.0 - version: 0.4.3(encoding@0.1.13) + version: 0.4.3(encoding@0.1.13)(zod@3.25.67) '@google-cloud/aiplatform': specifier: ^3.23.0 version: 3.35.0(encoding@0.1.13) @@ -986,6 +986,13 @@ importers: openai: specifier: ^4.52.7 version: 4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.67) + optionalDependencies: + '@google-cloud/bigquery': + specifier: ^7.8.0 + version: 7.9.4(encoding@0.1.13) + firebase-admin: + specifier: '>=12.2' + version: 13.4.0(encoding@0.1.13) devDependencies: '@types/node': specifier: ^20.11.16 @@ -1017,16 +1024,12 @@ importers: typescript: specifier: ^4.9.0 version: 4.9.5 - optionalDependencies: - '@google-cloud/bigquery': - specifier: ^7.8.0 - version: 7.9.4(encoding@0.1.13) - firebase-admin: - specifier: '>=12.2' - version: 13.4.0(encoding@0.1.13) testapps/anthropic: dependencies: + '@anthropic-ai/sdk': + specifier: ^0.71.2 + version: 0.71.2(zod@3.25.67) '@genkit-ai/anthropic': specifier: workspace:* version: link:../../plugins/anthropic @@ -9072,13 +9075,14 @@ snapshots: transitivePeerDependencies: - encoding - '@anthropic-ai/vertex-sdk@0.4.3(encoding@0.1.13)': + '@anthropic-ai/vertex-sdk@0.4.3(encoding@0.1.13)(zod@3.25.67)': dependencies: - '@anthropic-ai/sdk': 0.24.3(encoding@0.1.13) + '@anthropic-ai/sdk': 0.71.2(zod@3.25.67) google-auth-library: 9.15.1(encoding@0.1.13) transitivePeerDependencies: - encoding - supports-color + - zod '@apidevtools/json-schema-ref-parser@9.1.2': dependencies: diff --git a/js/testapps/anthropic/package.json b/js/testapps/anthropic/package.json index 032e39ab7a..de46ed96ff 100644 --- a/js/testapps/anthropic/package.json +++ b/js/testapps/anthropic/package.json @@ -32,6 +32,7 @@ "author": "", "license": "Apache-2.0", "dependencies": { + "@anthropic-ai/sdk": "^0.71.2", "@genkit-ai/anthropic": "workspace:*", "genkit": "workspace:*" }, diff --git a/js/testapps/anthropic/src/stable/prompt_caching.ts b/js/testapps/anthropic/src/stable/prompt_caching.ts index 3a456d62ee..f80dbb7865 100644 --- a/js/testapps/anthropic/src/stable/prompt_caching.ts +++ b/js/testapps/anthropic/src/stable/prompt_caching.ts @@ -14,6 +14,7 @@ * limitations under the License. */ +import { CacheControlEphemeral } from '@anthropic-ai/sdk/resources'; import { anthropic } from '@genkit-ai/anthropic'; import { promises as fs } from 'fs'; import { genkit } from 'genkit'; @@ -77,7 +78,7 @@ ai.defineFlow('caching user prompt', async () => { cache_control: { type: 'ephemeral', ttl: '5m', - }, + } satisfies CacheControlEphemeral, }, }, ], From 04eaf0463b540f45024111b813cfd69996031c79 Mon Sep 17 00:00:00 2001 From: Corie Watson Date: Wed, 7 Jan 2026 16:04:29 +0000 Subject: [PATCH 08/12] fix(anthropic): pnpm-lock.yaml --- js/pnpm-lock.yaml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/js/pnpm-lock.yaml b/js/pnpm-lock.yaml index 1838dc8088..89c405e82d 100644 --- a/js/pnpm-lock.yaml +++ b/js/pnpm-lock.yaml @@ -961,7 +961,7 @@ importers: version: 0.24.3(encoding@0.1.13) '@anthropic-ai/vertex-sdk': specifier: ^0.4.0 - version: 0.4.3(encoding@0.1.13)(zod@3.25.67) + version: 0.4.3(encoding@0.1.13) '@google-cloud/aiplatform': specifier: ^3.23.0 version: 3.35.0(encoding@0.1.13) @@ -992,7 +992,7 @@ importers: version: 7.9.4(encoding@0.1.13) firebase-admin: specifier: '>=12.2' - version: 13.4.0(encoding@0.1.13) + version: 13.6.0(encoding@0.1.13) devDependencies: '@types/node': specifier: ^20.11.16 @@ -9075,14 +9075,13 @@ snapshots: transitivePeerDependencies: - encoding - '@anthropic-ai/vertex-sdk@0.4.3(encoding@0.1.13)(zod@3.25.67)': + '@anthropic-ai/vertex-sdk@0.4.3(encoding@0.1.13)': dependencies: - '@anthropic-ai/sdk': 0.71.2(zod@3.25.67) + '@anthropic-ai/sdk': 0.24.3(encoding@0.1.13) google-auth-library: 9.15.1(encoding@0.1.13) transitivePeerDependencies: - encoding - supports-color - - zod '@apidevtools/json-schema-ref-parser@9.1.2': dependencies: From bb96ebd15b87768ed4829a4683cf5ef2884f0ec5 Mon Sep 17 00:00:00 2001 From: Corie Watson Date: Mon, 12 Jan 2026 12:19:35 +0000 Subject: [PATCH 09/12] feat(testapps/anthropic): caching tool flow, caching pdf flow, caching image flow --- .../anthropic/src/stable/prompt_caching.ts | 129 +++++++++++++++++- 1 file changed, 127 insertions(+), 2 deletions(-) diff --git a/js/testapps/anthropic/src/stable/prompt_caching.ts b/js/testapps/anthropic/src/stable/prompt_caching.ts index f80dbb7865..b6b90c725b 100644 --- a/js/testapps/anthropic/src/stable/prompt_caching.ts +++ b/js/testapps/anthropic/src/stable/prompt_caching.ts @@ -14,7 +14,6 @@ * limitations under the License. */ -import { CacheControlEphemeral } from '@anthropic-ai/sdk/resources'; import { anthropic } from '@genkit-ai/anthropic'; import { promises as fs } from 'fs'; import { genkit } from 'genkit'; @@ -78,7 +77,7 @@ ai.defineFlow('caching user prompt', async () => { cache_control: { type: 'ephemeral', ttl: '5m', - } satisfies CacheControlEphemeral, + }, }, }, ], @@ -91,3 +90,129 @@ ai.defineFlow('caching user prompt', async () => { usage: response.usage, }; }); + +ai.defineFlow('caching image prompt', async () => { + const imagePath = path.join(__dirname, 'sample-image.png'); + const imageBuffer = await fs.readFile(imagePath); + const imageBase64 = imageBuffer.toString('base64'); + + const longTextBuffer = await fs.readFile(longTextPath); + const longText = longTextBuffer.toString('utf-8'); + + const response = await ai.generate({ + model: anthropic.model('claude-sonnet-4-5'), + system: { + text: 'Does the following text match the image?', + }, + messages: [ + { + role: 'user', + content: [ + { + text: longText, + }, + { + media: { + url: `data:image/png;base64,${imageBase64}`, + contentType: 'image/png', + }, + metadata: { + cache_control: { + type: 'ephemeral', + ttl: '5m', + }, + }, + }, + ], + }, + ], + }); + + return { + text: response.text, + usage: response.usage, + }; +}); + +ai.defineFlow('caching pdf prompt', async () => { + const pdfPath = path.join(__dirname, '../attention-first-page.pdf'); + const pdfBuffer = await fs.readFile(pdfPath); + const pdfBase64 = pdfBuffer.toString('base64'); + + const response = await ai.generate({ + model: anthropic.model('claude-sonnet-4-5'), + system: { + text: 'You are a Claude assistant. Analyze the following PDF document and describe what you see briefly.', + }, + messages: [ + { + role: 'user', + content: [ + { + text: 'Are the contents of these PDF documents the same?', + }, + { + media: { + url: `data:application/pdf;base64,${pdfBase64}`, + contentType: 'application/pdf', + }, + metadata: { + cache_control: { + type: 'ephemeral', + ttl: '5m', + }, + }, + }, + ], + }, + ], + }); + + return { + text: response.text, + usage: response.usage, + }; +}); + +ai.defineFlow('caching with tool call', async () => { + const longTextBuffer = await fs.readFile(longTextPath); + const longText = longTextBuffer.toString('utf-8'); + + const response = await ai.generate({ + model: anthropic.model('claude-sonnet-4-5'), + system: { + text: `You are a friendly Claude assistant. Greet the user briefly. You will be given a long text to read and answer questions about it. + ${longText}`, + metadata: { + cache_control: { + type: 'ephemeral', + ttl: '5m', + }, + }, + }, + messages: [ + { + role: 'user', + content: [ + { + text: 'Search the web for the definition of the word longest word in the text that begins with the letter "P"', + }, + ], + }, + ], + config: { + tools: [ + { + type: 'web_search_20250305', + name: 'web_search', + max_uses: 5, + }, + ], + }, + }); + + return { + text: response.text, + usage: response.usage, + }; +}); From 9b0a5bf36b87723ca5aa6e1149f8ce892f8ec4fc Mon Sep 17 00:00:00 2001 From: Corie Watson Date: Mon, 12 Jan 2026 14:56:31 +0000 Subject: [PATCH 10/12] chore(testapps/anthropic): update readme --- js/testapps/anthropic/README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/js/testapps/anthropic/README.md b/js/testapps/anthropic/README.md index b209958e68..c2911726be 100644 --- a/js/testapps/anthropic/README.md +++ b/js/testapps/anthropic/README.md @@ -42,6 +42,7 @@ src/ - `pnpm run dev:stable:webp` – Start Dev UI for WEBP image handling demo. - `pnpm run dev:stable:pdf` – Start Dev UI for PDF document processing demo. - `pnpm run dev:stable:vision` – Start Dev UI for image/vision analysis demo. +- `pnpm run dev:stable:prompt-caching` – Start Dev UI for prompt caching demo. ## Flows @@ -72,4 +73,11 @@ Each source file defines flows that can be invoked from the Dev UI or the Genkit - `stable-vision-base64` – Analyze an image from a local file (base64 encoded) - `stable-vision-conversation` – Multi-turn conversation about an image +### Prompt Caching +- `stable-prompt-caching-system` – Caches a long system prompt. +- `stable-prompt-caching-user` – Applies caching to user-provided text input. +- `stable-prompt-caching-image` – Caches prompts that combine text and an image. +- `stable-prompt-caching-pdf` – Caches prompts including PDF file content. +- `stable-prompt-caching-with-tool-call` – Demonstrates caching with tool-assisted prompts. + Example: `genkit flow:run anthropic-stable-hello` From 9bae325e9d036bf0a3f1c32672d36cafa07ae0ec Mon Sep 17 00:00:00 2001 From: Corie Watson Date: Mon, 12 Jan 2026 16:00:33 +0000 Subject: [PATCH 11/12] chore(plugins/anthropic): update readme --- js/plugins/anthropic/README.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/js/plugins/anthropic/README.md b/js/plugins/anthropic/README.md index 99bb31e688..bf49607ca0 100644 --- a/js/plugins/anthropic/README.md +++ b/js/plugins/anthropic/README.md @@ -81,6 +81,27 @@ console.log(response.reasoning); // Summarized thinking steps When thinking is enabled, request bodies sent through the plugin include the `thinking` payload (`{ type: 'enabled', budget_tokens: … }`) that Anthropic's API expects, and streamed responses deliver `reasoning` parts as they arrive so you can render the chain-of-thought incrementally. +### Prompt Caching + +You can cache prompts by adding `cache_control` metadata to the prompt. You can define this for system messages, user messages, tools, and media. + +```typescript +const response = await ai.generate({ + prompt: 'What is the weather in Tokyo?', + messages: [ + { + role: 'user', + content: [{ text: 'What is the main idea of the text?' }], + metadata: { + cache_control: { type: 'ephemeral', ttl: '5m' }, // TTL options of either '5m' or '1h' + }, + }, + ], +}); +``` + +Note: Caching is only used when the prompt exceeds a certain token length. This token length is documented in the [Anthropic API documentation](https://platform.claude.com/docs/en/build-with-claude/prompt-caching). + ### Beta API Limitations The beta API surface provides access to experimental features, but some server-managed tool blocks are not yet supported by this plugin. The following beta API features will cause an error if encountered: From b36498f2f953c762e839a7371532d39818fe30d5 Mon Sep 17 00:00:00 2001 From: Corie Watson Date: Mon, 19 Jan 2026 14:27:15 +0000 Subject: [PATCH 12/12] chore(anthropic): address feedback --- js/plugins/anthropic/README.md | 11 +++++----- .../anthropic/tests/stable_runner_test.ts | 20 +++---------------- 2 files changed, 9 insertions(+), 22 deletions(-) diff --git a/js/plugins/anthropic/README.md b/js/plugins/anthropic/README.md index bf49607ca0..104c58b7e5 100644 --- a/js/plugins/anthropic/README.md +++ b/js/plugins/anthropic/README.md @@ -87,14 +87,15 @@ You can cache prompts by adding `cache_control` metadata to the prompt. You can ```typescript const response = await ai.generate({ - prompt: 'What is the weather in Tokyo?', messages: [ { role: 'user', - content: [{ text: 'What is the main idea of the text?' }], - metadata: { - cache_control: { type: 'ephemeral', ttl: '5m' }, // TTL options of either '5m' or '1h' - }, + content: [{ + text: 'What is the main idea of the text?', + metadata: { + cache_control: { type: 'ephemeral', ttl: '5m' }, // TTL options of either '5m' or '1h' + }, + }], }, ], }); diff --git a/js/plugins/anthropic/tests/stable_runner_test.ts b/js/plugins/anthropic/tests/stable_runner_test.ts index 39f34c70ae..0c62afcc35 100644 --- a/js/plugins/anthropic/tests/stable_runner_test.ts +++ b/js/plugins/anthropic/tests/stable_runner_test.ts @@ -1214,11 +1214,11 @@ describe('toAnthropicStreamingRequestBody', () => { output: { format: 'text' }, }; - const outputWithCaching = testRunner.toAnthropicStreamingRequestBody( + const output = testRunner.toAnthropicStreamingRequestBody( 'claude-3-5-haiku', request ); - assert.deepStrictEqual(outputWithCaching.system, [ + assert.deepStrictEqual(output.system, [ { type: 'text', text: 'You are a helpful assistant', @@ -1226,21 +1226,7 @@ describe('toAnthropicStreamingRequestBody', () => { citations: null, }, ]); - assert.strictEqual(outputWithCaching.stream, true); - - const outputWithoutCaching = testRunner.toAnthropicStreamingRequestBody( - 'claude-3-5-haiku', - request - ); - assert.deepStrictEqual(outputWithoutCaching.system, [ - { - type: 'text', - text: 'You are a helpful assistant', - citations: null, - cache_control: { type: 'ephemeral' }, - }, - ]); - assert.strictEqual(outputWithoutCaching.stream, true); + assert.strictEqual(output.stream, true); }); });