Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions js/plugins/anthropic/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,28 @@ console.log(response.reasoning); // Summarized thinking steps

When thinking is enabled, request bodies sent through the plugin include the `thinking` payload (`{ type: 'enabled', budget_tokens: … }`) that Anthropic's API expects, and streamed responses deliver `reasoning` parts as they arrive so you can render the chain-of-thought incrementally.

### Prompt Caching

You can cache prompts by adding `cache_control` metadata to the prompt. You can define this for system messages, user messages, tools, and media.

```typescript
const response = await ai.generate({
messages: [
{
role: 'user',
content: [{
text: 'What is the main idea of the text?',
metadata: {
cache_control: { type: 'ephemeral', ttl: '5m' }, // TTL options of either '5m' or '1h'
},
}],
},
],
});
```

Note: Caching is only used when the prompt exceeds a certain token length. This token length is documented in the [Anthropic API documentation](https://platform.claude.com/docs/en/build-with-claude/prompt-caching).

### Beta API Limitations

The beta API surface provides access to experimental features, but some server-managed tool blocks are not yet supported by this plugin. The following beta API features will cause an error if encountered:
Expand Down
14 changes: 3 additions & 11 deletions js/plugins/anthropic/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ import {
} from './models.js';
import { InternalPluginOptions, PluginOptions, __testClient } from './types.js';

const PROMPT_CACHING_BETA_HEADER_VALUE = 'prompt-caching-2024-07-31';

/**
* Gets or creates an Anthropic client instance.
* Supports test client injection for internal testing.
Expand All @@ -53,11 +51,7 @@ function getAnthropicClient(options?: PluginOptions): Anthropic {
'Please pass in the API key or set the ANTHROPIC_API_KEY environment variable'
);
}
const defaultHeaders: Record<string, string> = {};
if (options?.cacheSystemPrompt) {
defaultHeaders['anthropic-beta'] = PROMPT_CACHING_BETA_HEADER_VALUE;
}
return new Anthropic({ apiKey, defaultHeaders });
return new Anthropic({ apiKey });
}

/**
Expand All @@ -71,7 +65,7 @@ function getAnthropicClient(options?: PluginOptions): Anthropic {
* - anthropic: The main plugin function to interact with the Anthropic AI.
*
* Usage:
* To use the Claude models, initialize the anthropic plugin inside `genkit()` and pass the configuration options. If no API key is provided in the options, the environment variable `ANTHROPIC_API_KEY` must be set. If you want to cache the system prompt, set `cacheSystemPrompt` to `true`. **Note:** Prompt caching is in beta and may change. To learn more, see https://docs.anthropic.com/en/docs/prompt-caching.
* To use the Claude models, initialize the anthropic plugin inside `genkit()` and pass the configuration options. If no API key is provided in the options, the environment variable `ANTHROPIC_API_KEY` must be set.
*
* Example:
* ```
Expand All @@ -80,7 +74,7 @@ function getAnthropicClient(options?: PluginOptions): Anthropic {
*
* const ai = genkit({
* plugins: [
* anthropic({ apiKey: 'your-api-key', cacheSystemPrompt: false })
* anthropic({ apiKey: 'your-api-key' })
* ... // other plugins
* ]
* });
Expand All @@ -103,7 +97,6 @@ function anthropicPlugin(options?: PluginOptions): GenkitPluginV2 {
const action = claudeModel({
name,
client,
cacheSystemPrompt: options?.cacheSystemPrompt,
defaultApiVersion,
});
actions.push(action);
Expand All @@ -117,7 +110,6 @@ function anthropicPlugin(options?: PluginOptions): GenkitPluginV2 {
return claudeModel({
name: modelName,
client,
cacheSystemPrompt: options?.cacheSystemPrompt,
defaultApiVersion,
});
}
Expand Down
8 changes: 1 addition & 7 deletions js/plugins/anthropic/src/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -272,12 +272,7 @@ export function claudeModelReference(
export function claudeModel(
params: ClaudeModelParams
): ModelAction<z.ZodTypeAny> {
const {
name,
client: runnerClient,
cacheSystemPrompt: cachePrompt,
defaultApiVersion: apiVersion,
} = params;
const { name, client: runnerClient, defaultApiVersion: apiVersion } = params;
// Use supported model ref if available, otherwise create generic model ref
const knownModelRef = KNOWN_CLAUDE_MODELS[name];
let modelInfo = knownModelRef
Expand All @@ -297,7 +292,6 @@ export function claudeModel(
{
name,
client: runnerClient,
cacheSystemPrompt: cachePrompt,
defaultApiVersion: apiVersion,
},
configSchema
Expand Down
46 changes: 12 additions & 34 deletions js/plugins/anthropic/src/runner/base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ const ANTHROPIC_THINKING_CUSTOM_KEY = 'anthropicThinking';
export abstract class BaseRunner<ApiTypes extends RunnerTypes> {
protected name: string;
protected client: Anthropic;
protected cacheSystemPrompt?: boolean;

/**
* Default maximum output tokens for Claude models when not specified in the request.
Expand All @@ -72,7 +71,6 @@ export abstract class BaseRunner<ApiTypes extends RunnerTypes> {
constructor(params: ClaudeRunnerParams) {
this.name = params.name;
this.client = params.client;
this.cacheSystemPrompt = params.cacheSystemPrompt;
}

/**
Expand Down Expand Up @@ -395,39 +393,31 @@ export abstract class BaseRunner<ApiTypes extends RunnerTypes> {
* toAnthropicMessageContent implementation.
*/
protected toAnthropicMessages(messages: MessageData[]): {
system?: string;
system?: RunnerContentBlockParam<ApiTypes>[];
messages: RunnerMessageParam<ApiTypes>[];
} {
let system: string | undefined;
let system: RunnerContentBlockParam<ApiTypes>[] | undefined;
Comment thread
dackers86 marked this conversation as resolved.

if (messages[0]?.role === 'system') {
const systemMessage = messages[0];
const textParts: string[] = [];
messages = messages.slice(1);

for (const part of systemMessage.content ?? []) {
if (part.text) {
textParts.push(part.text);
} else if (part.media || part.toolRequest || part.toolResponse) {
if (part.media || part.toolRequest || part.toolResponse) {
throw new Error(
'System messages can only contain text content. Media, tool requests, and tool responses are not supported in system messages.'
);
}
}

// Concatenate multiple text parts into a single string.
// Note: The Anthropic SDK supports system as string | Array<TextBlockParam>,
// so we could alternatively preserve the multi-part structure as:
// system = textParts.map(text => ({ type: 'text', text }))
// However, concatenation is simpler and maintains semantic equivalence while
// keeping the cache control logic straightforward in the concrete runners.
system = textParts.length > 0 ? textParts.join('\n\n') : undefined;
system = systemMessage.content.map((part) =>
this.toAnthropicMessageContent(part)
);
}

const messagesToIterate =
system !== undefined ? messages.slice(1) : messages;
const anthropicMsgs: RunnerMessageParam<ApiTypes>[] = [];

for (const message of messagesToIterate) {
for (const message of messages) {
const msg = new GenkitMessage(message);

// Detect tool message kind from Genkit Parts (no SDK typing needed)
Expand Down Expand Up @@ -467,28 +457,24 @@ export abstract class BaseRunner<ApiTypes extends RunnerTypes> {
* Converts an Anthropic request to a non-streaming Anthropic API request body.
* @param modelName The name of the Anthropic model to use.
* @param request The Genkit GenerateRequest to convert.
* @param cacheSystemPrompt Whether to cache the system prompt.
* @returns The converted Anthropic API non-streaming request body.
* @throws An error if an unsupported output format is requested.
*/
protected abstract toAnthropicRequestBody(
modelName: string,
request: GenerateRequest<typeof AnthropicConfigSchema>,
cacheSystemPrompt?: boolean
request: GenerateRequest<typeof AnthropicConfigSchema>
): RunnerRequestBody<ApiTypes>;

/**
* Converts an Anthropic request to a streaming Anthropic API request body.
* @param modelName The name of the Anthropic model to use.
* @param request The Genkit GenerateRequest to convert.
* @param cacheSystemPrompt Whether to cache the system prompt.
* @returns The converted Anthropic API streaming request body.
* @throws An error if an unsupported output format is requested.
*/
protected abstract toAnthropicStreamingRequestBody(
modelName: string,
request: GenerateRequest<typeof AnthropicConfigSchema>,
cacheSystemPrompt?: boolean
request: GenerateRequest<typeof AnthropicConfigSchema>
): RunnerStreamingRequestBody<ApiTypes>;

protected abstract createMessage(
Expand Down Expand Up @@ -520,11 +506,7 @@ export abstract class BaseRunner<ApiTypes extends RunnerTypes> {
const { streamingRequested, sendChunk, abortSignal } = options;

if (streamingRequested) {
const body = this.toAnthropicStreamingRequestBody(
this.name,
request,
this.cacheSystemPrompt
);
const body = this.toAnthropicStreamingRequestBody(this.name, request);
const stream = this.streamMessages(body, abortSignal);
for await (const event of stream) {
const part = this.toGenkitPart(event);
Expand All @@ -539,11 +521,7 @@ export abstract class BaseRunner<ApiTypes extends RunnerTypes> {
return this.toGenkitResponse(finalMessage);
}

const body = this.toAnthropicRequestBody(
this.name,
request,
this.cacheSystemPrompt
);
const body = this.toAnthropicRequestBody(this.name, request);
const response = await this.createMessage(body, abortSignal);
return this.toGenkitResponse(response);
}
Expand Down
37 changes: 4 additions & 33 deletions js/plugins/anthropic/src/runner/beta.ts
Original file line number Diff line number Diff line change
Expand Up @@ -299,28 +299,13 @@ export class BetaRunner extends BaseRunner<BetaRunnerTypes> {
*/
protected toAnthropicRequestBody(
modelName: string,
request: GenerateRequest<typeof AnthropicConfigSchema>,
cacheSystemPrompt?: boolean
request: GenerateRequest<typeof AnthropicConfigSchema>
): BetaMessageCreateParamsNonStreaming {
const model = KNOWN_CLAUDE_MODELS[modelName];
const { system, messages } = this.toAnthropicMessages(request.messages);
const mappedModelName =
request.config?.version ?? extractVersion(model, modelName);

let betaSystem: BetaMessageCreateParamsNonStreaming['system'];

if (system !== undefined) {
betaSystem = cacheSystemPrompt
? [
{
type: 'text' as const,
text: system,
cache_control: { type: 'ephemeral' as const },
},
]
: system;
}

const thinkingConfig = this.toAnthropicThinkingConfig(
request.config?.thinking
) as BetaMessageCreateParams['thinking'] | undefined;
Expand All @@ -342,7 +327,7 @@ export class BetaRunner extends BaseRunner<BetaRunnerTypes> {
max_tokens:
request.config?.maxOutputTokens ?? this.DEFAULT_MAX_OUTPUT_TOKENS,
messages,
system: betaSystem,
system: system as BetaTextBlockParam[],
stop_sequences: request.config?.stopSequences,
temperature: request.config?.temperature,
top_k: topK,
Expand Down Expand Up @@ -371,27 +356,13 @@ export class BetaRunner extends BaseRunner<BetaRunnerTypes> {
*/
protected toAnthropicStreamingRequestBody(
modelName: string,
request: GenerateRequest<typeof AnthropicConfigSchema>,
cacheSystemPrompt?: boolean
request: GenerateRequest<typeof AnthropicConfigSchema>
): BetaMessageCreateParamsStreaming {
const model = KNOWN_CLAUDE_MODELS[modelName];
const { system, messages } = this.toAnthropicMessages(request.messages);
const mappedModelName =
request.config?.version ?? extractVersion(model, modelName);

const betaSystem =
system === undefined
? undefined
: cacheSystemPrompt
? [
{
type: 'text' as const,
text: system,
cache_control: { type: 'ephemeral' as const },
},
]
: system;

const thinkingConfig = this.toAnthropicThinkingConfig(
request.config?.thinking
) as BetaMessageCreateParams['thinking'] | undefined;
Expand All @@ -414,7 +385,7 @@ export class BetaRunner extends BaseRunner<BetaRunnerTypes> {
request.config?.maxOutputTokens ?? this.DEFAULT_MAX_OUTPUT_TOKENS,
messages,
stream: true,
system: betaSystem,
system: system as BetaTextBlockParam[],
stop_sequences: request.config?.stopSequences,
temperature: request.config?.temperature,
top_k: topK,
Expand Down
Loading