diff --git a/README.md b/README.md index 29ae3b5e..b3de58f3 100644 --- a/README.md +++ b/README.md @@ -184,7 +184,7 @@ object Main: response.body match { case Right(messageResponse) => messageResponse.content.foreach { - case ContentBlock.TextContent(text, _) => println(text) + case ContentBlock.TextContent(text, _, _) => println(text) case _ => // Handle other content types if needed } println(s"Usage: ${messageResponse.usage}") @@ -290,6 +290,7 @@ val request = MessageRequest( stopSequences = Some(List("\n\n")), // Stop generation at sequences system = Some("Be concise and helpful."), tools = Some(tools) // Tool calling support + cacheControl = Some(CacheControl.Ephemeral()) // Optional cache control ) ``` @@ -391,7 +392,7 @@ object StructuredOutputExample: response.body match { case Right(messageResponse) => messageResponse.content.foreach { - case ContentBlock.TextContent(text, _) => + case ContentBlock.TextContent(text, _, _) => println("Structured JSON output:") println(text) diff --git a/claude/src/main/scala/sttp/ai/claude/ClaudeSyncClient.scala b/claude/src/main/scala/sttp/ai/claude/ClaudeSyncClient.scala index 0da80e84..0f6b2e26 100644 --- a/claude/src/main/scala/sttp/ai/claude/ClaudeSyncClient.scala +++ b/claude/src/main/scala/sttp/ai/claude/ClaudeSyncClient.scala @@ -25,7 +25,7 @@ class ClaudeSyncClient(config: ClaudeConfig, backend: SyncBackend = DefaultSyncB val response = createMessage(withSchema) - val text = response.content.collect { case ContentBlock.TextContent(t, _) => t }.mkString + val text = response.content.collect { case ContentBlock.TextContent(t, _, _) => t }.mkString try SnakePickle.read[T](text) catch { diff --git a/claude/src/main/scala/sttp/ai/claude/agent/ClaudeAgent.scala b/claude/src/main/scala/sttp/ai/claude/agent/ClaudeAgent.scala index 9dea06e9..c4ef213d 100644 --- a/claude/src/main/scala/sttp/ai/claude/agent/ClaudeAgent.scala +++ b/claude/src/main/scala/sttp/ai/claude/agent/ClaudeAgent.scala @@ -112,7 +112,7 @@ private[claude] class ClaudeAgentBackend[F[_]]( monad.flatMap(monad.map(client.createMessage(request).send(backend))(_.body)) { case Right(response) => val textContent = response.content - .collectFirst { case ContentBlock.TextContent(text, _) => text } + .collectFirst { case ContentBlock.TextContent(text, _, _) => text } .getOrElse("") val toolCalls = response.content.collect { case ContentBlock.ToolUseContent(id, name, input) => diff --git a/claude/src/main/scala/sttp/ai/claude/models/CacheControl.scala b/claude/src/main/scala/sttp/ai/claude/models/CacheControl.scala new file mode 100644 index 00000000..5b69d851 --- /dev/null +++ b/claude/src/main/scala/sttp/ai/claude/models/CacheControl.scala @@ -0,0 +1,18 @@ +package sttp.ai.claude.models + +import sttp.ai.core.json.SnakePickle +import upickle.implicits.key + +@key("type") +sealed trait CacheControl + +object CacheControl { + @key("ephemeral") + final case class Ephemeral(ttl: Option[String] = None) extends CacheControl + + object Ephemeral { + implicit val rw: SnakePickle.ReadWriter[Ephemeral] = SnakePickle.macroRW + } + + implicit val rw: SnakePickle.ReadWriter[CacheControl] = SnakePickle.macroRW +} diff --git a/claude/src/main/scala/sttp/ai/claude/models/ContentBlock.scala b/claude/src/main/scala/sttp/ai/claude/models/ContentBlock.scala index 5176ad3d..8525b12a 100644 --- a/claude/src/main/scala/sttp/ai/claude/models/ContentBlock.scala +++ b/claude/src/main/scala/sttp/ai/claude/models/ContentBlock.scala @@ -11,7 +11,8 @@ sealed trait ContentBlock { object ContentBlock { @key("text") - case class TextContent(text: String, citations: Option[List[Citation]] = None) extends ContentBlock { + case class TextContent(text: String, citations: Option[List[Citation]] = None, cacheControl: Option[CacheControl] = None) + extends ContentBlock { val `type`: String = "text" } @@ -21,7 +22,7 @@ object ContentBlock { } @key("image") - case class ImageContent(source: ImageSource) extends ContentBlock { + case class ImageContent(source: ImageSource, cacheControl: Option[CacheControl] = None) extends ContentBlock { val `type`: String = "image" } @@ -38,7 +39,8 @@ object ContentBlock { case class ToolResultContent( toolUseId: String, content: String, - isError: Option[Boolean] = None + isError: Option[Boolean] = None, + cacheControl: Option[CacheControl] = None ) extends ContentBlock { val `type`: String = "tool_result" } @@ -48,7 +50,8 @@ object ContentBlock { source: DocumentSource, title: Option[String] = None, context: Option[String] = None, - citations: Option[CitationsConfig] = None + citations: Option[CitationsConfig] = None, + cacheControl: Option[CacheControl] = None ) extends ContentBlock { val `type`: String = "document" } @@ -75,7 +78,8 @@ object ContentBlock { url: String, title: String, pageAge: Option[String] = None, - encryptedContent: Option[String] = None + encryptedContent: Option[String] = None, + cacheControl: Option[CacheControl] = None ) { val `type`: String = "web_search_result" } diff --git a/claude/src/main/scala/sttp/ai/claude/models/Tool.scala b/claude/src/main/scala/sttp/ai/claude/models/Tool.scala index f3ac14f0..8d2807fd 100644 --- a/claude/src/main/scala/sttp/ai/claude/models/Tool.scala +++ b/claude/src/main/scala/sttp/ai/claude/models/Tool.scala @@ -9,7 +9,8 @@ sealed trait Tool case class ToolInputSchema( `type`: String, properties: Map[String, PropertySchema], - required: Option[List[String]] = None + required: Option[List[String]] = None, + cacheControl: Option[CacheControl] = None ) case class PropertySchema( diff --git a/claude/src/main/scala/sttp/ai/claude/models/Usage.scala b/claude/src/main/scala/sttp/ai/claude/models/Usage.scala index ed547657..42b4efcf 100644 --- a/claude/src/main/scala/sttp/ai/claude/models/Usage.scala +++ b/claude/src/main/scala/sttp/ai/claude/models/Usage.scala @@ -4,9 +4,12 @@ import sttp.ai.core.json.SnakePickle.{macroRW, ReadWriter} case class Usage( inputTokens: Int, - outputTokens: Int + outputTokens: Int, + cacheReadInputTokens: Option[Int] = None, + cacheCreationInputTokens: Option[Int] = None ) { - def totalTokens: Int = inputTokens + outputTokens + def totalInputTokens: Int = inputTokens + cacheReadInputTokens.getOrElse(0) + cacheCreationInputTokens.getOrElse(0) + def totalTokens: Int = totalInputTokens + outputTokens } object Usage { diff --git a/claude/src/main/scala/sttp/ai/claude/requests/MessageRequest.scala b/claude/src/main/scala/sttp/ai/claude/requests/MessageRequest.scala index f7718104..6b86eab7 100644 --- a/claude/src/main/scala/sttp/ai/claude/requests/MessageRequest.scala +++ b/claude/src/main/scala/sttp/ai/claude/requests/MessageRequest.scala @@ -1,6 +1,6 @@ package sttp.ai.claude.requests -import sttp.ai.claude.models.{Effort, Message, OutputConfig, OutputFormat, Tool} +import sttp.ai.claude.models.{CacheControl, Effort, Message, OutputConfig, OutputFormat, Tool} import sttp.ai.core.json.SnakePickle.{macroRW, ReadWriter} case class MessageRequest( @@ -14,7 +14,8 @@ case class MessageRequest( stopSequences: Option[List[String]] = None, stream: Option[Boolean] = None, tools: Option[List[Tool]] = None, - outputConfig: Option[OutputConfig] = None + outputConfig: Option[OutputConfig] = None, + cacheControl: Option[CacheControl] = None ) { def usesStructuredOutput: Boolean = outputConfig.exists(_.format.exists(_.isInstanceOf[OutputFormat.JsonSchema])) @@ -23,6 +24,9 @@ case class MessageRequest( this.copy(outputConfig = Some(updated)) } + def withCacheControl(cacheControl: CacheControl): MessageRequest = + this.copy(cacheControl = Some(cacheControl)) + def withEffort(effort: Effort): MessageRequest = { val updated = outputConfig.getOrElse(OutputConfig()).copy(effort = Some(effort)) this.copy(outputConfig = Some(updated)) diff --git a/claude/src/test/resources/Readme-prefix.md b/claude/src/test/resources/Readme-prefix.md new file mode 100644 index 00000000..5a2684b7 --- /dev/null +++ b/claude/src/test/resources/Readme-prefix.md @@ -0,0 +1,655 @@ +![sttp-ai](https://github.com/softwaremill/sttp-ai/raw/master/banner.png) + +[![Ideas, suggestions, problems, questions](https://img.shields.io/badge/Discourse-ask%20question-blue)](https://softwaremill.community/c/open-source) +[![CI](https://github.com/softwaremill/sttp-ai/workflows/CI/badge.svg)](https://github.com/softwaremill/sttp-ai/actions?query=workflow%3ACI+branch%3Amaster) + +[![sttp.ai:core](https://maven-badges.sml.io/sonatype-central/com.softwaremill.sttp.ai/core_3/badge.svg?subject=sttp.ai:core)](https://maven-badges.sml.io/sonatype-central/com.softwaremill.sttp.ai/core_3/) +[![sttp.ai:openai](https://maven-badges.sml.io/sonatype-central/com.softwaremill.sttp.ai/openai_3/badge.svg?subject=sttp.ai:openai)](https://maven-badges.sml.io/sonatype-central/com.softwaremill.sttp.ai/openai_3/) +[![sttp.ai:claude](https://maven-badges.sml.io/sonatype-central/com.softwaremill.sttp.ai/claude_3/badge.svg?subject=sttp.ai:claude)](https://maven-badges.sml.io/sonatype-central/com.softwaremill.sttp.ai/claude_3/) + +sttp is a family of Scala HTTP-related projects, and currently includes: + +* [sttp client](https://github.com/softwaremill/sttp): The Scala HTTP client you always wanted! +* [sttp tapir](https://github.com/softwaremill/tapir): Typed API descRiptions +* sttp ai: this project. Non-official Scala client wrapper for OpenAI, Claude (Anthropic), and OpenAI-compatible APIs. Use the power of ChatGPT and Claude inside your code! + +## Table of Contents + +- [Intro](#intro) +- [Quickstart](#quickstart) + - [OpenAI/OpenAI-compatible APIs](#for-openaiopenai-compatible-apis) + - [Claude (Anthropic) API](#for-claude-anthropic-api) +- [OpenAI API](#openai-api) + - [Basic Usage](#basic-usage-openai) + - [Streaming](#streaming-openai) + - [Structured Outputs/JSON Schema](#structured-outputsjson-schema-support) + - [Function/Tool Calling](#generating-json-schema-from-case-class) +- [Claude API](#claude-api) + - [Features](#claude-features) + - [Basic Usage](#basic-usage-claude) + - [Configuration](#claude-configuration) + - [Messages API](#claude-messages-api) + - [Structured Outputs](#claude-structured-outputs) + - [Tool Calling](#claude-tool-calling) + - [Streaming](#claude-streaming) + - [Models API](#claude-models-api) + - [Error Handling](#claude-error-handling) + - [Key Differences from OpenAI](#key-differences-from-openai-api) + - [Synchronous Claude Client](#synchronous-claude-client) +- [Agent Loop](#agent-loop) + - [Exception Handling](#exception-handling) +- [OpenAI-Compatible APIs](#openai-compatible-apis) +- [Examples](#examples) +- [Contributing](#contributing) +- [Commercial Support](#commercial-support) +- [Copyright](#copyright) + +## Intro + +sttp-ai uses sttp client to describe requests and responses used in OpenAI, Claude (Anthropic), and OpenAI-compatible endpoints. + +## Quickstart + +### For OpenAI/OpenAI-compatible APIs + +Add the following dependency: + +```sbt +"com.softwaremill.sttp.ai" %% "openai" % "0.4.14" +``` + +### For Claude (Anthropic) API + +Add the following dependency: + +```sbt +"com.softwaremill.sttp.ai" %% "claude" % "0.4.14" + +// For streaming support, add one or more: +"com.softwaremill.sttp.ai" %% "claude-streaming-fs2" % "0.4.14" // cats-effect/fs2 +"com.softwaremill.sttp.ai" %% "claude-streaming-zio" % "0.4.14" // ZIO +"com.softwaremill.sttp.ai" %% "claude-streaming-akka" % "0.4.14" // Akka Streams (Scala 2.13 only) +"com.softwaremill.sttp.ai" %% "claude-streaming-pekko" % "0.4.14" // Pekko Streams +"com.softwaremill.sttp.ai" %% "claude-streaming-ox" % "0.4.14" // Ox direct-style (Scala 3 only) +``` + +sttp-openai is available for Scala 2.13 and Scala 3 + +## OpenAI API + +OpenAI API Official Documentation: https://platform.openai.com/docs/api-reference/completions + +Examples are runnable using [scala-cli](https://scala-cli.virtuslab.org). + +### Basic Usage (OpenAI) + +```scala mdoc:compile-only +//> using dep com.softwaremill.sttp.ai::openai:0.4.14 + +import sttp.ai.openai.OpenAISyncClient +import sttp.ai.openai.requests.completions.chat.ChatRequestResponseData.ChatResponse +import sttp.ai.openai.requests.completions.chat.ChatRequestBody.{ChatBody, ChatCompletionModel} +import sttp.ai.openai.requests.completions.chat.message.* + +object Main: + def main(args: Array[String]): Unit = + val apiKey = System.getenv("OPENAI_KEY") + val openAI = OpenAISyncClient(apiKey) + + // Create body of Chat Completions Request + val bodyMessages: Seq[Message] = Seq( + Message.UserMessage( + content = Content.TextContent("Hello!"), + ) + ) + + // use ChatCompletionModel.CustomChatCompletionModel("gpt-some-future-version") + // for models not yet supported here + val chatRequestBody: ChatBody = ChatBody( + model = ChatCompletionModel.GPT4oMini, + messages = bodyMessages + ) + + // be aware that calling `createChatCompletion` may throw an OpenAIException + // e.g. AuthenticationException, RateLimitException and many more + val chatResponse: ChatResponse = openAI.createChatCompletion(chatRequestBody) + + println(chatResponse) + /* + ChatResponse( + chatcmpl-79shQITCiqTHFlI9tgElqcbMTJCLZ,chat.completion, + 1682589572, + gpt-4o-mini, + Usage(10,10,20), + List( + Choices( + Message(assistant, Hello there! How can I assist you today?), stop, 0) + ) + ) + */ +``` + +## Claude API + +This module provides **native support for Anthropic's Claude API** within the sttp-openai library. Unlike OpenAI compatibility layers, this provides direct access to Claude's unique features and API structure. + +### Claude Features + +- ✅ **Native Claude API support** - Direct Claude API integration, not compatibility layer +- ✅ **ContentBlock structure** - Support for Claude's rich message content blocks (text, images) +- ✅ **Proper Authentication** - Uses `x-api-key` and `anthropic-version` headers +- ✅ **Messages API** - Complete `/v1/messages` endpoint implementation +- ✅ **Models API** - List available Claude models via `/v1/models` +- ✅ **Streaming Support** - Server-Sent Events streaming for all effect systems (fs2, ZIO, Akka, Pekko, Ox) +- ✅ **Tool Calling** - Native Claude tool calling support +- ✅ **Structured Outputs** - Beta support for JSON schema validation (Claude 4.1+ models) +- ✅ **Image Support** - Multi-modal inputs via ContentBlock with base64 encoding +- ✅ **Comprehensive Error Handling** - Claude-specific exception hierarchy +- ✅ **System Messages** - Proper system message handling via `system` parameter +- ✅ **Cross-platform** - Support for Scala 2.13 and Scala 3 + +### Basic Usage (Claude) + +```scala mdoc:compile-only +//> using dep com.softwaremill.sttp.ai::claude:0.4.14 + +import sttp.ai.claude.* +import sttp.ai.claude.config.ClaudeConfig +import sttp.ai.claude.models.{ContentBlock, Message} +import sttp.ai.claude.requests.MessageRequest +import sttp.client4.* + +object Main: + def main(args: Array[String]): Unit = + // Create an instance of ClaudeClient using your Anthropic API key + // Set ANTHROPIC_API_KEY environment variable or pass it directly + val config = ClaudeConfig.fromEnv // reads ANTHROPIC_API_KEY + val backend: SyncBackend = DefaultSyncBackend() + val client = ClaudeClient(config) + + // Create a simple message + val messages = List( + Message.user(List(ContentBlock.text("Hello Claude! What's the weather like today?"))) + ) + + val request = MessageRequest.simple( + model = "claude-3-haiku-20240307", // Fast, cost-effective model + messages = messages, + maxTokens = 500 + ) + + // Send the request (returns Either[ClaudeException, MessageResponse]) + val response = client.createMessage(request).send(backend) + + response.body match { + case Right(messageResponse) => + messageResponse.content.foreach { + case ContentBlock.TextContent(text, _) => println(text) + case _ => // Handle other content types if needed + } + println(s"Usage: ${messageResponse.usage}") + case Left(error) => + println(s"Claude API Error: ${error.getMessage}") + } + + backend.close() +``` + +**Key differences from OpenAI:** +- Uses `ContentBlock` instead of simple strings for rich content (text, images) +- Separate system parameter instead of system role messages +- Different authentication headers (`x-api-key` + `anthropic-version`) +- Native Claude model names (e.g., `claude-3-haiku-20240307`) + +### Claude Configuration + +```scala +case class ClaudeConfig( + apiKey: String, // Your Anthropic API key + anthropicVersion: String = "2023-06-01", // API version header + baseUrl: Uri = "https://api.anthropic.com", // API base URL + timeout: Duration = 60.seconds, // Request timeout + maxRetries: Int = 3, // Max retry attempts + organization: Option[String] = None // Optional organization ID +) +``` + +**Environment Variables:** +- `ANTHROPIC_API_KEY` - Your API key (required) +- `ANTHROPIC_VERSION` - API version (optional, defaults to "2023-06-01") +- `ANTHROPIC_BASE_URL` - Custom base URL (optional) + +### Claude Messages API + +#### Basic Text Conversation + +```scala +val messages = List( + Message.user(List(ContentBlock.text("What is the capital of France?"))), + Message.assistant(List(ContentBlock.text("The capital of France is Paris."))), + Message.user(List(ContentBlock.text("What about Italy?"))) +) + +val request = MessageRequest.simple( + model = "claude-3-sonnet-20240229", + messages = messages, + maxTokens = 1000 +) +``` + +#### System Messages + +Unlike OpenAI, Claude uses a separate `system` parameter instead of system role messages: + +```scala +val request = MessageRequest.withSystem( + model = "claude-3-sonnet-20240229", + system = "You are a helpful assistant that always responds in French.", + messages = List(Message.user(List(ContentBlock.text("Hello!")))), + maxTokens = 1000 +) +``` + +#### Image Support + +```scala +import java.util.Base64 +import java.nio.file.{Files, Paths} + +// Read and encode image +val imageBytes = Files.readAllBytes(Paths.get("image.jpg")) +val base64Image = Base64.getEncoder.encodeToString(imageBytes) + +val messages = List( + Message.user(List( + ContentBlock.text("What do you see in this image?"), + ContentBlock.image( + mediaType = "image/jpeg", + data = base64Image + ) + )) +) + +val request = MessageRequest.simple( + model = "claude-3-sonnet-20240229", + messages = messages, + maxTokens = 1000 +) +``` + +#### Advanced Parameters + +```scala +val request = MessageRequest( + model = "claude-3-sonnet-20240229", + messages = messages, + maxTokens = 4000, + temperature = Some(0.7), // Creativity (0.0 - 1.0) + topP = Some(0.9), // Nucleus sampling + topK = Some(40), // Top-k sampling + stopSequences = Some(List("\n\n")), // Stop generation at sequences + system = Some("Be concise and helpful."), + tools = Some(tools) // Tool calling support +) +``` + +### Claude Structured Outputs + +Claude's structured output feature (currently in beta) allows you to enforce that the model's response follows a specific JSON schema. This is useful for getting consistently formatted responses for data extraction, API responses, and structured data processing. + +**Model Support:** +- ✅ **Supported models**: Claude 4.1+ models (`claude-sonnet-4-1-20250514`, `claude-opus-4-1-20250514`, etc.) +- ❌ **Legacy models**: Claude 3.x series don't support structured outputs +- ✅ **Forward compatibility**: Unknown/future models default to supported + +#### Typed responses with `createMessageAs[T]` + +For the shortest path, use `ClaudeSyncClient.createMessageAs[T]` — the response schema is derived from `T` via Tapir, set on the request automatically, and the model's response is parsed back into `T` via uPickle. + +```scala mdoc:compile-only +//> using dep com.softwaremill.sttp.ai::claude:0.4.14 + +import sttp.ai.claude.ClaudeSyncClient +import sttp.ai.claude.models.Message +import sttp.ai.claude.requests.MessageRequest +import sttp.ai.core.json.SnakePickle +import sttp.tapir.Schema + +case class Language(name: String, paradigm: String, summary: String) derives SnakePickle.ReadWriter, Schema +case class LanguageList(languages: List[Language]) derives SnakePickle.ReadWriter, Schema + +object Main: + def main(args: Array[String]): Unit = + val claude = ClaudeSyncClient.fromEnv + try { + val request = MessageRequest.simple( + model = "claude-haiku-4-5-20251001", + messages = List(Message.user( + "List 10 well-known programming languages. For each, give the dominant paradigm and a one-sentence summary." + )), + maxTokens = 1500 + ) + val result: LanguageList = claude.createMessageAs[LanguageList](request) + result.languages.foreach(l => println(s"${l.name} [${l.paradigm}] — ${l.summary}")) + } finally claude.close() +``` + +`T` must have both a `sttp.tapir.Schema[T]` (for schema generation) and a `SnakePickle.ReadWriter[T]` (for parsing) — the `derives` clause supplies both in Scala 3. + +#### Basic Structured Output Example + +```scala mdoc:compile-only +//> using dep com.softwaremill.sttp.ai::claude:0.4.14 +//> using dep com.softwaremill.sttp.tapir::tapir-core:1.11.7 + +import sttp.ai.claude.* +import sttp.ai.claude.config.ClaudeConfig +import sttp.ai.claude.models.{ContentBlock, Message, OutputFormat} +import sttp.ai.claude.requests.MessageRequest +import sttp.ai.core.json.SnakePickle +import sttp.apispec.{Schema => ASchema} +import sttp.client4.* +import sttp.tapir.Schema +import sttp.tapir.docs.apispec.schema.TapirSchemaToJsonSchema +import sttp.tapir.generic.auto.* + +object StructuredOutputExample: + // Define case class with Schema derivation + case class PersonInfo( + name: String, + age: Int, + occupation: String, + skills: List[String] + ) + + object PersonInfo: + given SnakePickle.Reader[PersonInfo] = SnakePickle.macroR[PersonInfo] + + def main(args: Array[String]): Unit = + val config = ClaudeConfig.fromEnv + val backend: SyncBackend = DefaultSyncBackend() + val client = ClaudeClient(config) + + // Generate JSON schema from case class using Tapir + val tapirSchema = implicitly[Schema[PersonInfo]] + val jsonSchema: ASchema = TapirSchemaToJsonSchema(tapirSchema, markOptionsAsNullable = true) + + val outputFormat = OutputFormat.JsonSchema(jsonSchema) + + val messages = List( + Message.user(List(ContentBlock.text( + "Extract information about John, a 30-year-old software engineer who knows Python and Scala." + ))) + ) + + val request = MessageRequest + .simple("claude-sonnet-4-5-20250514", messages, 500) + .withStructuredOutput(outputFormat) + + val response = client.createMessage(request).send(backend) + + response.body match { + case Right(messageResponse) => + messageResponse.content.foreach { + case ContentBlock.TextContent(text, _) => + println("Structured JSON output:") + println(text) + + // Parse JSON response back to case class + val personInfo = SnakePickle.read[PersonInfo](text) + println(s"Parsed: ${personInfo.name}, age ${personInfo.age}, ${personInfo.occupation}") + println(s"Skills: ${personInfo.skills.mkString(", ")}") + case _ => // Handle other content types + } + case Left(error) => + println(s"Error: ${error.getMessage}") + } + + backend.close() +``` + +#### Manual Schema Definition + +If you prefer not to use Tapir, you can define schemas manually: + +```scala +import sttp.apispec.{Schema => ASchema, SchemaType} +import scala.collection.immutable.ListMap + +val schema: ASchema = ASchema(SchemaType.Object).copy( + properties = ListMap( + "summary" -> ASchema(SchemaType.String), + "confidence" -> ASchema(SchemaType.Number).copy(minimum = Some(0), maximum = Some(1)) + ), + required = List("summary", "confidence") +) +val outputFormat = OutputFormat.JsonSchema(schema) +``` + +**Important Notes:** +- Structured outputs require Claude 4.1+ models (`claude-sonnet-4-1-*`, `claude-opus-4-1-*`, etc.) +- Legacy models will throw `UnsupportedModelForStructuredOutputException` +- The beta feature uses `anthropic-beta: structured-outputs-2025-11-13` header automatically +- Unknown/future models default to supporting structured outputs for forward compatibility +- JSON schemas must be valid and follow standard JSON Schema specifications + +### Claude Tool Calling + +#### Custom Tools + +Define your own tools that Claude calls and your application executes: + +```scala +import sttp.ai.claude.models.{Tool, ToolInputSchema, PropertySchema} + +val weatherTool = Tool( + name = "get_weather", + description = "Get current weather for a location", + inputSchema = ToolInputSchema( + `type` = "object", + properties = Map( + "location" -> PropertySchema(`type` = "string", description = Some("City name")), + "unit" -> PropertySchema(`type` = "string", `enum` = Some(List("celsius", "fahrenheit"))) + ), + required = Some(List("location")) + ) +) + +val request = MessageRequest.withTools( + model = "claude-3-sonnet-20240229", + messages = List(Message.user(List(ContentBlock.text("What's the weather in Paris?")))), + maxTokens = 1000, + tools = List(weatherTool) +) +``` + +#### Predefined Tools + +Currently supported: + +- **`Tool.WebSearch`** (`web_search_20250305`) + +```scala +import sttp.ai.claude.models.{ContentBlock, Message, Tool} +import sttp.ai.claude.requests.MessageRequest + +val request = MessageRequest.withTools( + model = "claude-sonnet-4-5-20250514", + messages = List(Message.user(List(ContentBlock.text("What was the most recent SpaceX launch?")))), + maxTokens = 1024, + tools = List(Tool.WebSearch.default) +) + +val response = client.createMessage(request) + +response.content.foreach { + case t: ContentBlock.TextContent => println(t.text) + case s: ContentBlock.ServerToolUseContent => + println(s"Searched for: ${s.input.get("query").map(_.str).getOrElse("")}") + case r: ContentBlock.WebSearchToolResultContent => + r.content match { + case ContentBlock.WebSearchToolResult.Results(items) => + items.foreach(it => println(s"- ${it.title} — ${it.url}")) + case ContentBlock.WebSearchToolResult.Error(code) => + println(s"Web search failed: $code") + } + case _ => () +} +``` + +Both custom and predefined tools can be passed in the same `tools` list. + +### Claude Streaming + +#### Using fs2 (cats-effect) + +```scala +import sttp.ai.claude.streaming.fs2.* +import sttp.client4.httpclient.fs2.HttpClientFs2Backend +import cats.effect.IO + +val backend = HttpClientFs2Backend[IO]() + +// Extension method for streaming +val streamRequest = client.createMessageAsBinaryStream(backend.capabilities.streams, request) + +streamRequest + .send(backend) + .map(_.map(_.parseSSE.parseClaudeStreamResponse)) + .flatMap { + case Right(stream) => + stream + .evalTap(response => IO.println(response.delta.text.getOrElse(""))) + .compile + .drain + case Left(error) => + IO.println(s"Error: $error") + } +``` + +#### Using ZIO + +```scala +import sttp.ai.claude.streaming.zio.* +import sttp.client4.httpclient.zio.HttpClientZioBackend +import zio.* + +val backend = HttpClientZioBackend() + +val program = for { + streamRequest <- ZIO.succeed(client.createMessageAsBinaryStream(backend.capabilities.streams, request)) + result <- streamRequest.send(backend) + _ <- result match { + case Right(stream) => + stream + .parseSSE + .parseClaudeStreamResponse + .tap(response => Console.printLine(response.delta.text.getOrElse(""))) + .runDrain + case Left(error) => + Console.printLine(s"Error: $error") + } +} yield () +``` + +#### Using Ox (Scala 3) + +```scala +import sttp.ai.claude.streaming.ox.* +import sttp.client4.ox.OxHttpClientBackend +import ox.* + +val backend = OxHttpClientBackend() + +val streamRequest = client.createMessageAsBinaryStream(backend.capabilities.streams, request) + +val result = streamRequest.send(backend) +result match { + case Right(stream) => + stream + .parseSSE + .parseClaudeStreamResponse + .tap(response => println(response.delta.text.getOrElse(""))) + .runDrain() + case Left(error) => + println(s"Error: $error") +} +``` + +### Claude Models API + +```scala +val modelsRequest = client.listModels() +val models = modelsRequest.send(backend) + +models match { + case Right(response) => + response.data.foreach(model => println(s"${model.id} - ${model.displayName.getOrElse("N/A")}")) + case Left(error) => + println(s"Error: $error") +} +``` + +**Common Claude models** (use `listModels()` for current list): + +- `claude-3-sonnet-20240229` - Balanced performance and speed +- `claude-3-opus-20240229` - Highest capability model +- `claude-3-haiku-20240307` - Fastest model +- `claude-instant-1.2` - Legacy fast model + +### Claude Error Handling + +Claude-specific exception hierarchy: + +```scala +import sttp.ai.claude.ClaudeExceptions.* + +client.createMessage(request).send(backend) match { + case Right(response) => // Success + handleResponse(response) + case Left(error) => error match { + case _: AuthenticationException => // Invalid API key + println("Authentication failed - check your API key") + case _: RateLimitException => // Rate limited + println("Rate limited - please wait before retrying") + case _: InvalidRequestException => // Malformed request + println("Invalid request - check your parameters") + case _: PermissionException => // Access denied + println("Permission denied for this resource") + case _: APIException => // Other API error + println(s"API error: ${error.getMessage}") + case _: DeserializationClaudeException => // JSON parsing error + println("Failed to parse response") + } +} +``` + +### Key Differences from OpenAI API + +| Feature | Claude API | OpenAI API | +|---------|------------|------------| +| **Message Content** | `ContentBlock` arrays | Simple strings | +| **System Messages** | `system` parameter | Role-based message | +| **Authentication** | `x-api-key` + `anthropic-version` headers | `Authorization` header | +| **Image Input** | ContentBlock with base64 | URL or base64 in content | +| **Tool Calling** | Native tool structure | Function calling | +| **Streaming** | Server-Sent Events | Server-Sent Events | +| **Model Names** | `claude-3-sonnet-20240229` | `gpt-4` | + +### Synchronous Claude Client + +For blocking operations, use `ClaudeSyncClient`: + +```scala +import sttp.ai.claude.ClaudeSyncClient + +val syncClient = new ClaudeSyncClient(config) + +// Throws ClaudeException on error +try { + val response = syncClient.createMessage(request) + println(response.content.head.text.getOrElse("")) +} catch { + case e: ClaudeException => println(s"Error: ${e.getMessage}") +} +``` \ No newline at end of file diff --git a/claude/src/test/scala/sttp/ai/claude/integration/ClaudeIntegrationSpec.scala b/claude/src/test/scala/sttp/ai/claude/integration/ClaudeIntegrationSpec.scala index d18607bf..94ed8099 100644 --- a/claude/src/test/scala/sttp/ai/claude/integration/ClaudeIntegrationSpec.scala +++ b/claude/src/test/scala/sttp/ai/claude/integration/ClaudeIntegrationSpec.scala @@ -1,6 +1,7 @@ package sttp.ai.claude.integration import org.scalatest.BeforeAndAfterAll +import org.scalatest.Inside.inside import org.scalatest.concurrent.Eventually import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers @@ -135,7 +136,7 @@ class ClaudeIntegrationSpec extends AnyFlatSpec with Matchers with BeforeAndAfte response.role shouldBe "assistant" response.content should not be empty // The response should contain the answer to 2+2 - val textContent = response.content.collectFirst { case ContentBlock.TextContent(text, _) => + val textContent = response.content.collectFirst { case ContentBlock.TextContent(text, _, _) => text } textContent should be(defined) @@ -172,7 +173,7 @@ class ClaudeIntegrationSpec extends AnyFlatSpec with Matchers with BeforeAndAfte response.role shouldBe "assistant" response.content should not be empty // Claude should acknowledge the image in some way - val textContent = response.content.collectFirst { case ContentBlock.TextContent(text, _) => + val textContent = response.content.collectFirst { case ContentBlock.TextContent(text, _, _) => text } textContent should be(defined) @@ -208,7 +209,7 @@ class ClaudeIntegrationSpec extends AnyFlatSpec with Matchers with BeforeAndAfte response.role shouldBe "assistant" response.content should not be empty // Claude should acknowledge the image in some way - val textContent = response.content.collectFirst { case ContentBlock.TextContent(text, _) => + val textContent = response.content.collectFirst { case ContentBlock.TextContent(text, _, _) => text } textContent should be(defined) @@ -248,8 +249,8 @@ class ClaudeIntegrationSpec extends AnyFlatSpec with Matchers with BeforeAndAfte // Claude should either use the tool or explain why it can't val hasToolUse = response.content.exists(_.isInstanceOf[ContentBlock.ToolUseContent]) val hasTextResponse = response.content.exists { - case ContentBlock.TextContent(text, _) => text.toLowerCase.contains("weather") || text.toLowerCase.contains("tool") - case _ => false + case ContentBlock.TextContent(text, _, _) => text.toLowerCase.contains("weather") || text.toLowerCase.contains("tool") + case _ => false } (hasToolUse || hasTextResponse) shouldBe true @@ -364,7 +365,7 @@ class ClaudeIntegrationSpec extends AnyFlatSpec with Matchers with BeforeAndAfte response.role shouldBe "assistant" response.content should not be empty // Claude should respond with some text content - val textContent = response.content.collectFirst { case ContentBlock.TextContent(text, _) => + val textContent = response.content.collectFirst { case ContentBlock.TextContent(text, _, _) => text } textContent should be(defined) @@ -390,7 +391,7 @@ class ClaudeIntegrationSpec extends AnyFlatSpec with Matchers with BeforeAndAfte response.content should not be empty // Extract the text content which should be valid JSON - val textContent = response.content.collectFirst { case ContentBlock.TextContent(text, _) => + val textContent = response.content.collectFirst { case ContentBlock.TextContent(text, _, _) => text } textContent should be(defined) @@ -464,4 +465,63 @@ class ClaudeIntegrationSpec extends AnyFlatSpec with Matchers with BeforeAndAfte textBlock.get.citations.get should not be empty () } + + "Claude Cache Control" should "respect cache control settings on request body" in withClient { client => + + val source = scala.io.Source.fromResource("./Readme-prefix.md") + val docText = + try + source.mkString + finally + source.close() + + val firstMessage = Message.user( + List( + ContentBlock.document(docText, title = Some("sttp-ai Readme")), + ContentBlock.text("Summarise this document in one sentence, citing specific claims.") + ) + ) + val firstRequest = MessageRequest + .simple( + model = testModel, + messages = List( + firstMessage + ), + maxTokens = 150 + ) + .withCacheControl(CacheControl.Ephemeral()) + + def secondRequest(response: List[ContentBlock]) = MessageRequest + .simple( + model = testModel, + messages = List( + firstMessage, + Message.assistant(response), + Message.user( + List( + ContentBlock.text("Are you sure you are correct?") + ) + ) + ), + maxTokens = 150 + ) + .copy(cacheControl = Some(CacheControl.Ephemeral())) + + val responseOne = client.createMessage(firstRequest) + responseOne.content should not be empty + + inside((responseOne.usage.cacheCreationInputTokens, responseOne.usage.cacheReadInputTokens)) { + case (Some(creationTokens), Some(readTokens)) => + assert( + creationTokens > 0 || readTokens > 0 + ) // if test is run in a caching window multiple times, it might not write to cache, but read from it instead. + } + + val content = responseOne.content + val responseTwo = client.createMessage(secondRequest(content)) + responseTwo.content should not be empty + inside(responseTwo.usage.cacheReadInputTokens) { case Some(tokens) => + assert(tokens > 0) + } + } } diff --git a/claude/src/test/scala/sttp/ai/claude/unit/requests/MessageRequestSpec.scala b/claude/src/test/scala/sttp/ai/claude/unit/requests/MessageRequestSpec.scala index d29ca32f..a323a389 100644 --- a/claude/src/test/scala/sttp/ai/claude/unit/requests/MessageRequestSpec.scala +++ b/claude/src/test/scala/sttp/ai/claude/unit/requests/MessageRequestSpec.scala @@ -2,7 +2,7 @@ package sttp.ai.claude.unit.requests import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers -import sttp.ai.claude.models.{ContentBlock, Message, OutputFormat} +import sttp.ai.claude.models.{CacheControl, ContentBlock, Message, OutputFormat} import sttp.ai.claude.requests.MessageRequest import sttp.ai.core.json.SnakePickle import sttp.tapir.{Schema => TSchema} @@ -24,6 +24,11 @@ class MessageRequestSpec extends AnyFlatSpec with Matchers { Message.user(List(ContentBlock.TextContent("Hello"))) ) + // not ttl test + val sampleMessagesWithCacheControl: List[Message] = List( + Message.user(List(ContentBlock.TextContent("Hello", cacheControl = Some(CacheControl.Ephemeral())))) + ) + "MessageRequest serialization" should "include output_config with format and schema" in { val outputFormat = OutputFormat.JsonSchema.withTapirSchema[UserProfile] val request = MessageRequest @@ -70,4 +75,14 @@ class MessageRequestSpec extends AnyFlatSpec with Matchers { deserialized.outputConfig.get.format shouldBe defined deserialized.outputConfig.get.format.get shouldBe a[OutputFormat.JsonSchema] } + + it should "round-trip with caching control" in { + val request = MessageRequest + .simple("claude-sonnet-4-5-20250514", sampleMessagesWithCacheControl, 1024) + .withCacheControl(CacheControl.Ephemeral()) + + val json = SnakePickle.write(request) + val deserialized = SnakePickle.read[MessageRequest](json) + deserialized shouldEqual request + } } diff --git a/examples/src/main/scala/examples/ClaudeBasicExample.scala b/examples/src/main/scala/examples/ClaudeBasicExample.scala index e2e28f4d..43d00a6f 100644 --- a/examples/src/main/scala/examples/ClaudeBasicExample.scala +++ b/examples/src/main/scala/examples/ClaudeBasicExample.scala @@ -41,8 +41,8 @@ object ClaudeBasicExample extends App { case Right(messageResponse) => println("Claude's response:") messageResponse.content.foreach { - case ContentBlock.TextContent(text, _) => println(text) - case _ => // Handle other content types if needed + case ContentBlock.TextContent(text, _, _) => println(text) + case _ => // Handle other content types if needed } println(s"\nUsage: ${messageResponse.usage}") case Left(error) => @@ -72,8 +72,8 @@ object ClaudeBasicExample extends App { case Right(messageResponse) => println("Claude's follow-up response:") messageResponse.content.foreach { - case ContentBlock.TextContent(text, _) => println(text) - case _ => // Handle other content types if needed + case ContentBlock.TextContent(text, _, _) => println(text) + case _ => // Handle other content types if needed } case Left(error) => println(s"Error: ${error.getMessage}") @@ -95,8 +95,8 @@ object ClaudeBasicExample extends App { case Right(messageResponse) => println("Claude's system-guided response:") messageResponse.content.foreach { - case ContentBlock.TextContent(text, _) => println(text) - case _ => // Handle other content types if needed + case ContentBlock.TextContent(text, _, _) => println(text) + case _ => // Handle other content types if needed } case Left(error) => println(s"Error: ${error.getMessage}") @@ -120,8 +120,8 @@ object ClaudeBasicExample extends App { case Right(messageResponse) => println("Claude's creative response:") messageResponse.content.foreach { - case ContentBlock.TextContent(text, _) => println(text) - case _ => // Handle other content types if needed + case ContentBlock.TextContent(text, _, _) => println(text) + case _ => // Handle other content types if needed } case Left(error) => println(s"Error: ${error.getMessage}") diff --git a/examples/src/main/scala/examples/ClaudeImageAnalysisExample.scala b/examples/src/main/scala/examples/ClaudeImageAnalysisExample.scala index a556ffeb..bf113243 100644 --- a/examples/src/main/scala/examples/ClaudeImageAnalysisExample.scala +++ b/examples/src/main/scala/examples/ClaudeImageAnalysisExample.scala @@ -56,8 +56,8 @@ object ClaudeImageAnalysisExample extends App { case Right(messageResponse) => println("Claude's image analysis:") messageResponse.content.foreach { - case ContentBlock.TextContent(text, _) => println(text) - case _ => // Handle other content types if needed + case ContentBlock.TextContent(text, _, _) => println(text) + case _ => // Handle other content types if needed } println(s"\nUsage: ${messageResponse.usage}") case Left(error) => @@ -97,8 +97,8 @@ object ClaudeImageAnalysisExample extends App { case Right(messageResponse) => println("Claude's comparison:") messageResponse.content.foreach { - case ContentBlock.TextContent(text, _) => println(text) - case _ => // Handle other content types if needed + case ContentBlock.TextContent(text, _, _) => println(text) + case _ => // Handle other content types if needed } case Left(error) => println(s"Error: ${error.getMessage}") @@ -131,8 +131,8 @@ object ClaudeImageAnalysisExample extends App { case Right(messageResponse) => println("Claude's technical analysis:") messageResponse.content.foreach { - case ContentBlock.TextContent(text, _) => println(text) - case _ => // Handle other content types if needed + case ContentBlock.TextContent(text, _, _) => println(text) + case _ => // Handle other content types if needed } case Left(error) => println(s"Error: ${error.getMessage}") diff --git a/examples/src/main/scala/examples/ClaudeToolCallingExample.scala b/examples/src/main/scala/examples/ClaudeToolCallingExample.scala index 8a2a97c8..8c915e7e 100644 --- a/examples/src/main/scala/examples/ClaudeToolCallingExample.scala +++ b/examples/src/main/scala/examples/ClaudeToolCallingExample.scala @@ -69,7 +69,7 @@ object ClaudeToolCallingExample extends App { case Right(messageResponse) => println("Claude's response:") messageResponse.content.foreach { - case ContentBlock.TextContent(text, _) => + case ContentBlock.TextContent(text, _, _) => println(s"Text: $text") case ContentBlock.ToolUseContent(id, name, input) => println(s"Tool called: $name") @@ -108,7 +108,7 @@ object ClaudeToolCallingExample extends App { case Right(messageResponse) => println("Claude's tool-assisted calculation:") messageResponse.content.foreach { - case ContentBlock.TextContent(text, _) => + case ContentBlock.TextContent(text, _, _) => println(text) case ContentBlock.ToolUseContent(id, name, input) => val result = simulateToolExecution(name, input) diff --git a/openai/src/main/scala/sttp/ai/openai/requests/caching/CacheRetentionPolicy.scala b/openai/src/main/scala/sttp/ai/openai/requests/caching/CacheRetentionPolicy.scala new file mode 100644 index 00000000..56efaae7 --- /dev/null +++ b/openai/src/main/scala/sttp/ai/openai/requests/caching/CacheRetentionPolicy.scala @@ -0,0 +1,20 @@ +package sttp.ai.openai.requests.caching +import sttp.ai.core.json.SnakePickle + +sealed trait CacheRetentionPolicy + +object CacheRetentionPolicy { + case object `24H` extends CacheRetentionPolicy + case object InMemory extends CacheRetentionPolicy + + implicit val writer: SnakePickle.Writer[CacheRetentionPolicy] = SnakePickle.writer[String].comap { + case `24H` => "24h" + case InMemory => "in_memory" + } + + implicit val reader: SnakePickle.Reader[CacheRetentionPolicy] = SnakePickle.reader[String].map { + case "24h" => `24H` + case "in_memory" => InMemory + case other => throw new IllegalArgumentException(s"Unknown cache retention policy: $other") + } +} diff --git a/openai/src/main/scala/sttp/ai/openai/requests/completions/chat/ChatRequestBody.scala b/openai/src/main/scala/sttp/ai/openai/requests/completions/chat/ChatRequestBody.scala index 4ad9fdc4..5f1f7c24 100644 --- a/openai/src/main/scala/sttp/ai/openai/requests/completions/chat/ChatRequestBody.scala +++ b/openai/src/main/scala/sttp/ai/openai/requests/completions/chat/ChatRequestBody.scala @@ -2,6 +2,7 @@ package sttp.ai.openai.requests.completions.chat import sttp.apispec.Schema import sttp.ai.core.json.{SerializationHelpers, SnakePickle} +import sttp.ai.openai.requests.caching.CacheRetentionPolicy import sttp.ai.openai.requests.completions.Stop import sttp.ai.openai.requests.completions.chat.message.{Message, Tool, ToolChoice} import sttp.tapir.docs.apispec.schema.TapirSchemaToJsonSchema @@ -98,7 +99,8 @@ object ChatRequestBody { * @param toolChoice * Controls which (if any) function is called by the model. * @param user - * A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + * A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Do not combine with + * promptCacheKey, as they are mutually exclusive. * @param store * Whether or not to store the output of this chat completion request for use in our model distillation or evals products. * @param reasoningEffort @@ -139,6 +141,11 @@ object ChatRequestBody { * ahead of time. This is most common when you are regenerating a file with only minor changes to most of the content. * @param audio * Parameters for audio output. Required when audio output is requested with modalities: ["audio"]. + * @param promptCacheKey + * Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. + * @param promptCacheRetention + * Can be used to specify policy on how long the prompt cache should be retained, not every model support every policy, check the API + * documentation for more details. */ case class ChatBody( messages: Seq[Message], @@ -167,7 +174,9 @@ object ChatRequestBody { parallelToolCalls: Option[Boolean] = None, streamOptions: Option[StreamOptions] = None, prediction: Option[Prediction] = None, - audio: Option[Audio] = None + audio: Option[Audio] = None, + promptCacheKey: Option[String] = None, + promptCacheRetention: Option[CacheRetentionPolicy] = None ) object ChatBody { diff --git a/openai/src/main/scala/sttp/ai/openai/requests/responses/ResponsesRequestBody.scala b/openai/src/main/scala/sttp/ai/openai/requests/responses/ResponsesRequestBody.scala index f983c20b..22230d62 100644 --- a/openai/src/main/scala/sttp/ai/openai/requests/responses/ResponsesRequestBody.scala +++ b/openai/src/main/scala/sttp/ai/openai/requests/responses/ResponsesRequestBody.scala @@ -2,6 +2,7 @@ package sttp.ai.openai.requests.responses import sttp.apispec.Schema import sttp.ai.core.json.SnakePickle +import sttp.ai.openai.requests.caching.CacheRetentionPolicy import sttp.ai.openai.requests.completions.chat.SchemaSupport import sttp.ai.openai.requests.responses.ResponsesRequestBody.Input import sttp.ai.openai.requests.responses.ResponsesRequestBody.Input.OutputContentItem.OutputText.{Annotation, LogProb} @@ -66,6 +67,9 @@ import ujson.Value * The truncation strategy to use for the model response. Defaults to 'disabled'. * @param user * Deprecated. Use safetyIdentifier and promptCacheKey instead. + * @param promptCacheRetention + * Can be used to specify policy on how long the prompt cache should be retained, not every model support every policy, check the API + * documentation for more details. */ case class ResponsesRequestBody( background: Option[Boolean] = None, @@ -92,7 +96,8 @@ case class ResponsesRequestBody( topLogprobs: Option[Int] = None, topP: Option[Double] = None, truncation: Option[String] = None, - user: Option[String] = None + user: Option[String] = None, + promptCacheRetention: Option[CacheRetentionPolicy] = None ) object ResponsesRequestBody { diff --git a/openai/src/test/scala/sttp/ai/openai/openai/integration/OpenAIIntegrationSpec.scala b/openai/src/test/scala/sttp/ai/openai/openai/integration/OpenAIIntegrationSpec.scala index a6815574..2598dc99 100644 --- a/openai/src/test/scala/sttp/ai/openai/openai/integration/OpenAIIntegrationSpec.scala +++ b/openai/src/test/scala/sttp/ai/openai/openai/integration/OpenAIIntegrationSpec.scala @@ -1,12 +1,14 @@ package sttp.ai.openai.integration import org.scalatest.BeforeAndAfterAll +import org.scalatest.Inside.inside import org.scalatest.concurrent.Eventually import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers import org.scalatest.time.{Millis, Seconds, Span} import sttp.ai.openai.OpenAIExceptions.OpenAIException import sttp.ai.openai.OpenAISyncClient +import sttp.ai.openai.requests.caching.CacheRetentionPolicy import sttp.ai.openai.requests.completions.chat.ChatRequestBody.{ChatBody, ChatCompletionModel} import sttp.ai.openai.requests.completions.chat.message.{Content, Message} import sttp.ai.openai.requests.embeddings.EmbeddingsRequestBody.{EmbeddingsBody, EmbeddingsInput, EmbeddingsModel} @@ -191,6 +193,32 @@ class OpenAIIntegrationSpec extends AnyFlatSpec with Matchers with BeforeAndAfte () } + it should "properly pass cache control parameters" in withClient { client => + // given + val messages = Seq( + Message.UserMessage( + content = Content.TextContent("Hi") + ) + ) + + val chatBody = ChatBody( + model = ChatCompletionModel.GPT4oMini, + messages = messages, + maxTokens = Some(50), + promptCacheKey = Some("sttp-ai-test-cache-key"), + promptCacheRetention = Some(CacheRetentionPolicy.InMemory) + ) + + // when + val response = client.createChatCompletion(chatBody) + + // then + inside(response.usage.promptTokensDetails.flatMap(_.cachedTokens)) { case Some(tokens) => + tokens should be > 0 + } + + } + "OpenAI Responses API" should "create, retrieve, list input items, and delete a model response successfully" in withClient { client => // given