invertase · CorieW · Jan 6, 2026 · Jan 6, 2026 · Jan 6, 2026 · Jan 6, 2026
diff --git a/js/plugins/anthropic/README.md b/js/plugins/anthropic/README.md
@@ -81,6 +81,28 @@ console.log(response.reasoning);  // Summarized thinking steps
 
 When thinking is enabled, request bodies sent through the plugin include the `thinking` payload (`{ type: 'enabled', budget_tokens: … }`) that Anthropic's API expects, and streamed responses deliver `reasoning` parts as they arrive so you can render the chain-of-thought incrementally.
 
+### Prompt Caching
+
+You can cache prompts by adding `cache_control` metadata to the prompt. You can define this for system messages, user messages, tools, and media.
+
+```typescript
+const response = await ai.generate({
+  messages: [
+    {
+      role: 'user',
+      content: [{
+        text: 'What is the main idea of the text?',
+        metadata: {
+          cache_control: { type: 'ephemeral', ttl: '5m' }, // TTL options of either '5m' or '1h'
+        },
+      }],
+    },
+  ],
+});
+```
+
+Note: Caching is only used when the prompt exceeds a certain token length. This token length is documented in the [Anthropic API documentation](https://platform.claude.com/docs/en/build-with-claude/prompt-caching).
+
 ### Beta API Limitations
 
 The beta API surface provides access to experimental features, but some server-managed tool blocks are not yet supported by this plugin. The following beta API features will cause an error if encountered:

diff --git a/js/plugins/anthropic/src/index.ts b/js/plugins/anthropic/src/index.ts
@@ -33,8 +33,6 @@ import {
 } from './models.js';
 import { InternalPluginOptions, PluginOptions, __testClient } from './types.js';
 
-const PROMPT_CACHING_BETA_HEADER_VALUE = 'prompt-caching-2024-07-31';
-
 /**
  * Gets or creates an Anthropic client instance.
  * Supports test client injection for internal testing.
@@ -53,11 +51,7 @@ function getAnthropicClient(options?: PluginOptions): Anthropic {
       'Please pass in the API key or set the ANTHROPIC_API_KEY environment variable'
     );
   }
-  const defaultHeaders: Record<string, string> = {};
-  if (options?.cacheSystemPrompt) {
-    defaultHeaders['anthropic-beta'] = PROMPT_CACHING_BETA_HEADER_VALUE;
-  }
-  return new Anthropic({ apiKey, defaultHeaders });
+  return new Anthropic({ apiKey });
 }
 
 /**
@@ -71,7 +65,7 @@ function getAnthropicClient(options?: PluginOptions): Anthropic {
  * - anthropic: The main plugin function to interact with the Anthropic AI.
  *
  * Usage:
- * To use the Claude models, initialize the anthropic plugin inside `genkit()` and pass the configuration options. If no API key is provided in the options, the environment variable `ANTHROPIC_API_KEY` must be set. If you want to cache the system prompt, set `cacheSystemPrompt` to `true`. **Note:** Prompt caching is in beta and may change. To learn more, see https://docs.anthropic.com/en/docs/prompt-caching.
+ * To use the Claude models, initialize the anthropic plugin inside `genkit()` and pass the configuration options. If no API key is provided in the options, the environment variable `ANTHROPIC_API_KEY` must be set.
  *
  * Example:
  * ```
@@ -80,7 +74,7 @@ function getAnthropicClient(options?: PluginOptions): Anthropic {
  *
  * const ai = genkit({
  *  plugins: [
- *    anthropic({ apiKey: 'your-api-key', cacheSystemPrompt: false })
+ *    anthropic({ apiKey: 'your-api-key' })
  *    ... // other plugins
  *  ]
  * });
@@ -103,7 +97,6 @@ function anthropicPlugin(options?: PluginOptions): GenkitPluginV2 {
         const action = claudeModel({
           name,
           client,
-          cacheSystemPrompt: options?.cacheSystemPrompt,
           defaultApiVersion,
         });
         actions.push(action);
@@ -117,7 +110,6 @@ function anthropicPlugin(options?: PluginOptions): GenkitPluginV2 {
         return claudeModel({
           name: modelName,
           client,
-          cacheSystemPrompt: options?.cacheSystemPrompt,
           defaultApiVersion,
         });
       }

diff --git a/js/plugins/anthropic/src/models.ts b/js/plugins/anthropic/src/models.ts
@@ -272,12 +272,7 @@ export function claudeModelReference(
 export function claudeModel(
   params: ClaudeModelParams
 ): ModelAction<z.ZodTypeAny> {
-  const {
-    name,
-    client: runnerClient,
-    cacheSystemPrompt: cachePrompt,
-    defaultApiVersion: apiVersion,
-  } = params;
+  const { name, client: runnerClient, defaultApiVersion: apiVersion } = params;
   // Use supported model ref if available, otherwise create generic model ref
   const knownModelRef = KNOWN_CLAUDE_MODELS[name];
   let modelInfo = knownModelRef
@@ -297,7 +292,6 @@ export function claudeModel(
       {
         name,
         client: runnerClient,
-        cacheSystemPrompt: cachePrompt,
         defaultApiVersion: apiVersion,
       },
       configSchema

diff --git a/js/plugins/anthropic/src/runner/base.ts b/js/plugins/anthropic/src/runner/base.ts
@@ -62,7 +62,6 @@ const ANTHROPIC_THINKING_CUSTOM_KEY = 'anthropicThinking';
 export abstract class BaseRunner<ApiTypes extends RunnerTypes> {
   protected name: string;
   protected client: Anthropic;
-  protected cacheSystemPrompt?: boolean;
 
   /**
    * Default maximum output tokens for Claude models when not specified in the request.
@@ -72,7 +71,6 @@ export abstract class BaseRunner<ApiTypes extends RunnerTypes> {
   constructor(params: ClaudeRunnerParams) {
     this.name = params.name;
     this.client = params.client;
-    this.cacheSystemPrompt = params.cacheSystemPrompt;
   }
 
   /**
@@ -395,39 +393,31 @@ export abstract class BaseRunner<ApiTypes extends RunnerTypes> {
    * toAnthropicMessageContent implementation.
    */
   protected toAnthropicMessages(messages: MessageData[]): {
-    system?: string;
+    system?: RunnerContentBlockParam<ApiTypes>[];
     messages: RunnerMessageParam<ApiTypes>[];
   } {
-    let system: string | undefined;
+    let system: RunnerContentBlockParam<ApiTypes>[] | undefined;
 
     if (messages[0]?.role === 'system') {
       const systemMessage = messages[0];
-      const textParts: string[] = [];
+      messages = messages.slice(1);
 
       for (const part of systemMessage.content ?? []) {
-        if (part.text) {
-          textParts.push(part.text);
-        } else if (part.media || part.toolRequest || part.toolResponse) {
+        if (part.media || part.toolRequest || part.toolResponse) {
           throw new Error(
             'System messages can only contain text content. Media, tool requests, and tool responses are not supported in system messages.'
           );
         }
       }
 
-      // Concatenate multiple text parts into a single string.
-      // Note: The Anthropic SDK supports system as string | Array<TextBlockParam>,
-      // so we could alternatively preserve the multi-part structure as:
-      //   system = textParts.map(text => ({ type: 'text', text }))
-      // However, concatenation is simpler and maintains semantic equivalence while
-      // keeping the cache control logic straightforward in the concrete runners.
-      system = textParts.length > 0 ? textParts.join('\n\n') : undefined;
+      system = systemMessage.content.map((part) =>
+        this.toAnthropicMessageContent(part)
+      );
     }
 
-    const messagesToIterate =
-      system !== undefined ? messages.slice(1) : messages;
     const anthropicMsgs: RunnerMessageParam<ApiTypes>[] = [];
 
-    for (const message of messagesToIterate) {
+    for (const message of messages) {
       const msg = new GenkitMessage(message);
 
       // Detect tool message kind from Genkit Parts (no SDK typing needed)
@@ -467,28 +457,24 @@ export abstract class BaseRunner<ApiTypes extends RunnerTypes> {
    * Converts an Anthropic request to a non-streaming Anthropic API request body.
    * @param modelName The name of the Anthropic model to use.
    * @param request The Genkit GenerateRequest to convert.
-   * @param cacheSystemPrompt Whether to cache the system prompt.
    * @returns The converted Anthropic API non-streaming request body.
    * @throws An error if an unsupported output format is requested.
    */
   protected abstract toAnthropicRequestBody(
     modelName: string,
-    request: GenerateRequest<typeof AnthropicConfigSchema>,
-    cacheSystemPrompt?: boolean
+    request: GenerateRequest<typeof AnthropicConfigSchema>
   ): RunnerRequestBody<ApiTypes>;
 
   /**
    * Converts an Anthropic request to a streaming Anthropic API request body.
    * @param modelName The name of the Anthropic model to use.
    * @param request The Genkit GenerateRequest to convert.
-   * @param cacheSystemPrompt Whether to cache the system prompt.
    * @returns The converted Anthropic API streaming request body.
    * @throws An error if an unsupported output format is requested.
    */
   protected abstract toAnthropicStreamingRequestBody(
     modelName: string,
-    request: GenerateRequest<typeof AnthropicConfigSchema>,
-    cacheSystemPrompt?: boolean
+    request: GenerateRequest<typeof AnthropicConfigSchema>
   ): RunnerStreamingRequestBody<ApiTypes>;
 
   protected abstract createMessage(
@@ -520,11 +506,7 @@ export abstract class BaseRunner<ApiTypes extends RunnerTypes> {
     const { streamingRequested, sendChunk, abortSignal } = options;
 
     if (streamingRequested) {
-      const body = this.toAnthropicStreamingRequestBody(
-        this.name,
-        request,
-        this.cacheSystemPrompt
-      );
+      const body = this.toAnthropicStreamingRequestBody(this.name, request);
       const stream = this.streamMessages(body, abortSignal);
       for await (const event of stream) {
         const part = this.toGenkitPart(event);
@@ -539,11 +521,7 @@ export abstract class BaseRunner<ApiTypes extends RunnerTypes> {
       return this.toGenkitResponse(finalMessage);
     }
 
-    const body = this.toAnthropicRequestBody(
-      this.name,
-      request,
-      this.cacheSystemPrompt
-    );
+    const body = this.toAnthropicRequestBody(this.name, request);
     const response = await this.createMessage(body, abortSignal);
     return this.toGenkitResponse(response);
   }

diff --git a/js/plugins/anthropic/src/runner/beta.ts b/js/plugins/anthropic/src/runner/beta.ts
@@ -299,28 +299,13 @@ export class BetaRunner extends BaseRunner<BetaRunnerTypes> {
    */
   protected toAnthropicRequestBody(
     modelName: string,
-    request: GenerateRequest<typeof AnthropicConfigSchema>,
-    cacheSystemPrompt?: boolean
+    request: GenerateRequest<typeof AnthropicConfigSchema>
   ): BetaMessageCreateParamsNonStreaming {
     const model = KNOWN_CLAUDE_MODELS[modelName];
     const { system, messages } = this.toAnthropicMessages(request.messages);
     const mappedModelName =
       request.config?.version ?? extractVersion(model, modelName);
 
-    let betaSystem: BetaMessageCreateParamsNonStreaming['system'];
-
-    if (system !== undefined) {
-      betaSystem = cacheSystemPrompt
-        ? [
-            {
-              type: 'text' as const,
-              text: system,
-              cache_control: { type: 'ephemeral' as const },
-            },
-          ]
-        : system;
-    }
-
     const thinkingConfig = this.toAnthropicThinkingConfig(
       request.config?.thinking
     ) as BetaMessageCreateParams['thinking'] | undefined;
@@ -342,7 +327,7 @@ export class BetaRunner extends BaseRunner<BetaRunnerTypes> {
       max_tokens:
         request.config?.maxOutputTokens ?? this.DEFAULT_MAX_OUTPUT_TOKENS,
       messages,
-      system: betaSystem,
+      system: system as BetaTextBlockParam[],
       stop_sequences: request.config?.stopSequences,
       temperature: request.config?.temperature,
       top_k: topK,
@@ -371,27 +356,13 @@ export class BetaRunner extends BaseRunner<BetaRunnerTypes> {
    */
   protected toAnthropicStreamingRequestBody(
     modelName: string,
-    request: GenerateRequest<typeof AnthropicConfigSchema>,
-    cacheSystemPrompt?: boolean
+    request: GenerateRequest<typeof AnthropicConfigSchema>
   ): BetaMessageCreateParamsStreaming {
     const model = KNOWN_CLAUDE_MODELS[modelName];
     const { system, messages } = this.toAnthropicMessages(request.messages);
     const mappedModelName =
       request.config?.version ?? extractVersion(model, modelName);
 
-    const betaSystem =
-      system === undefined
-        ? undefined
-        : cacheSystemPrompt
-          ? [
-              {
-                type: 'text' as const,
-                text: system,
-                cache_control: { type: 'ephemeral' as const },
-              },
-            ]
-          : system;
-
     const thinkingConfig = this.toAnthropicThinkingConfig(
       request.config?.thinking
     ) as BetaMessageCreateParams['thinking'] | undefined;
@@ -414,7 +385,7 @@ export class BetaRunner extends BaseRunner<BetaRunnerTypes> {
         request.config?.maxOutputTokens ?? this.DEFAULT_MAX_OUTPUT_TOKENS,
       messages,
       stream: true,
-      system: betaSystem,
+      system: system as BetaTextBlockParam[],
       stop_sequences: request.config?.stopSequences,
       temperature: request.config?.temperature,
       top_k: topK,