diff --git a/packages/types/src/providers/baseten.ts b/packages/types/src/providers/baseten.ts index 27b8cbff4ac..6c275cda2c0 100644 --- a/packages/types/src/providers/baseten.ts +++ b/packages/types/src/providers/baseten.ts @@ -83,6 +83,28 @@ export const basetenModels = { description: "DeepSeek's hybrid reasoning model with efficient long context scaling with GPT-5 level performance", }, + "deepseek-ai/DeepSeek-V4-Pro": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 1.74, + outputPrice: 3.48, + cacheWritesPrice: 1.74, + cacheReadsPrice: 0.145, + description: "DeepSeek V4 Pro", + }, + "deepseek-ai/DeepSeek-V4-Flash": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 0.14, + outputPrice: 0.28, + cacheWritesPrice: 0.14, + cacheReadsPrice: 0.028, + description: "DeepSeek V4 Flash", + }, "openai/gpt-oss-120b": { maxTokens: 16_384, contextWindow: 128_072, diff --git a/packages/types/src/providers/deepseek.ts b/packages/types/src/providers/deepseek.ts index 40722471cb8..0e7267a3aab 100644 --- a/packages/types/src/providers/deepseek.ts +++ b/packages/types/src/providers/deepseek.ts @@ -32,6 +32,30 @@ export const deepSeekModels = { cacheReadsPrice: 0.028, // $0.028 per million tokens (cache hit) - Updated Dec 9, 2025 description: `DeepSeek-V3.2 (Thinking Mode) achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks. Supports Chain of Thought reasoning with up to 8K output tokens. Supports JSON output, tool calls, and chat prefix completion (beta).`, }, + "deepseek-v4-pro": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + preserveReasoning: true, + inputPrice: 1.74, // $1.74 per million tokens (cache miss) + outputPrice: 3.48, // $3.48 per million tokens + cacheWritesPrice: 1.74, // $1.74 per million tokens (cache miss) + cacheReadsPrice: 0.145, // $0.145 per million tokens (cache hit) + description: `DeepSeek V4 Pro`, + }, + "deepseek-v4-flash": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + preserveReasoning: true, + inputPrice: 0.14, // $0.14 per million tokens (cache miss) + outputPrice: 0.28, // $0.28 per million tokens + cacheWritesPrice: 0.14, // $0.14 per million tokens (cache miss) + cacheReadsPrice: 0.028, // $0.028 per million tokens (cache hit) + description: `DeepSeek V4 Flash`, + }, } as const satisfies Record // https://api-docs.deepseek.com/quick_start/parameter_settings diff --git a/packages/types/src/providers/fireworks.ts b/packages/types/src/providers/fireworks.ts index c9017c54cde..fb839c07e1c 100644 --- a/packages/types/src/providers/fireworks.ts +++ b/packages/types/src/providers/fireworks.ts @@ -13,6 +13,8 @@ export type FireworksModelId = | "accounts/fireworks/models/deepseek-v3" | "accounts/fireworks/models/deepseek-v3p1" | "accounts/fireworks/models/deepseek-v3p2" + | "accounts/fireworks/models/deepseek-v4-pro" + | "accounts/fireworks/models/deepseek-v4-flash" | "accounts/fireworks/models/glm-4p5" | "accounts/fireworks/models/glm-4p5-air" | "accounts/fireworks/models/glm-4p6" @@ -200,6 +202,28 @@ export const fireworksModels = { description: "DeepSeek V3.2 is the latest iteration of the V3 model family with enhanced reasoning capabilities, improved code generation, and better instruction following.", }, + "accounts/fireworks/models/deepseek-v4-pro": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 1.74, + outputPrice: 3.48, + cacheWritesPrice: 1.74, + cacheReadsPrice: 0.145, + description: "DeepSeek V4 Pro", + }, + "accounts/fireworks/models/deepseek-v4-flash": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 0.14, + outputPrice: 0.28, + cacheWritesPrice: 0.14, + cacheReadsPrice: 0.028, + description: "DeepSeek V4 Flash", + }, "accounts/fireworks/models/glm-4p7": { maxTokens: 25344, contextWindow: 198000, diff --git a/packages/types/src/providers/sambanova.ts b/packages/types/src/providers/sambanova.ts index 624a7eb8c77..8b2f88ea589 100644 --- a/packages/types/src/providers/sambanova.ts +++ b/packages/types/src/providers/sambanova.ts @@ -7,6 +7,7 @@ export type SambaNovaModelId = | "DeepSeek-R1" | "DeepSeek-V3-0324" | "DeepSeek-V3.1" + | "DeepSeek-V4-Pro" | "Llama-4-Maverick-17B-128E-Instruct" | "Qwen3-32B" | "gpt-oss-120b" @@ -60,6 +61,28 @@ export const sambaNovaModels = { outputPrice: 4.5, description: "DeepSeek V3.1 model with 32K context window.", }, + "DeepSeek-V4-Pro": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 1.74, + outputPrice: 3.48, + cacheWritesPrice: 1.74, + cacheReadsPrice: 0.145, + description: "DeepSeek V4 Pro", + }, + "deepseek-ai/DeepSeek-V4-Flash": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 0.14, + outputPrice: 0.28, + cacheWritesPrice: 0.14, + cacheReadsPrice: 0.028, + description: "DeepSeek V4 Flash", + }, "Llama-4-Maverick-17B-128E-Instruct": { maxTokens: 8192, contextWindow: 131072, diff --git a/src/api/providers/__tests__/fireworks.spec.ts b/src/api/providers/__tests__/fireworks.spec.ts index 79f69f868b1..6641292f538 100644 --- a/src/api/providers/__tests__/fireworks.spec.ts +++ b/src/api/providers/__tests__/fireworks.spec.ts @@ -245,6 +245,52 @@ describe("FireworksHandler", () => { ) }) + it("should return DeepSeek V4 Pro model with correct configuration", () => { + const testModelId: FireworksModelId = "accounts/fireworks/models/deepseek-v4-pro" + const handlerWithModel = new FireworksHandler({ + apiModelId: testModelId, + fireworksApiKey: "test-fireworks-api-key", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual( + expect.objectContaining({ + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 1.74, + outputPrice: 3.48, + cacheWritesPrice: 1.74, + cacheReadsPrice: 0.145, + description: expect.stringContaining("DeepSeek V4 Pro"), + }), + ) + }) + + it("should return DeepSeek V4 Flash model with correct configuration", () => { + const testModelId: FireworksModelId = "accounts/fireworks/models/deepseek-v4-flash" + const handlerWithModel = new FireworksHandler({ + apiModelId: testModelId, + fireworksApiKey: "test-fireworks-api-key", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual( + expect.objectContaining({ + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 0.14, + outputPrice: 0.28, + cacheWritesPrice: 0.14, + cacheReadsPrice: 0.028, + description: expect.stringContaining("DeepSeek V4 Flash"), + }), + ) + }) + it("should return GLM-4.5 model with correct configuration", () => { const testModelId: FireworksModelId = "accounts/fireworks/models/glm-4p5" const handlerWithModel = new FireworksHandler({ diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts index 73b542dbc73..f1f0cbe2cff 100644 --- a/src/api/providers/__tests__/openai.spec.ts +++ b/src/api/providers/__tests__/openai.spec.ts @@ -1139,6 +1139,79 @@ describe("OpenAiHandler", () => { ) }) }) + + describe("DeepSeek V4 / R1 Compatibility", () => { + it("should inject extra_body and reasoning_effort for DeepSeek R1 format", async () => { + const r1Options: ApiHandlerOptions = { + ...mockOptions, + openAiR1FormatEnabled: true, + openAiModelId: "deepseek-reasoner", + openAiBaseUrl: "https://api.deepseek.com/v1", + openAiCustomModelInfo: { + contextWindow: 64_000, + supportsReasoningEffort: true, + reasoningEffort: "xhigh", + supportsPromptCache: false, + }, + } + const r1Handler = new OpenAiHandler(r1Options) + const stream = r1Handler.createMessage("system", [{ role: "user", content: "test" }]) + + for await (const _ of stream) { + } + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + reasoning_effort: "max", // xhigh -> max for deepseek + extra_body: { + thinking: { type: "enabled" }, + }, + temperature: undefined, // disabled for thinking mode + }), + expect.any(Object), + ) + + // T005 Verify preserveReasoning is enabled + const model = r1Handler.getModel() + expect(model.info.preserveReasoning).toBe(true) + }) + + it("should NOT inject R1 parameters for standard models even if model ID contains deepseek but R1 format is disabled", async () => { + const standardOptions: ApiHandlerOptions = { + ...mockOptions, + openAiR1FormatEnabled: false, + openAiModelId: "deepseek-chat", // standard model + } + const standardHandler = new OpenAiHandler(standardOptions) + const stream = standardHandler.createMessage("system", [{ role: "user", content: "test" }]) + + for await (const _ of stream) { + } + + const callArgs = mockCreate.mock.calls[0][0] + expect(callArgs).not.toHaveProperty("extra_body") + expect(callArgs.temperature).toBeDefined() + }) + + it("should use standard OpenAI protocol when R1 format is disabled", async () => { + const standardOptions: ApiHandlerOptions = { + ...mockOptions, + openAiR1FormatEnabled: false, + openAiModelId: "gpt-4o", + } + const standardHandler = new OpenAiHandler(standardOptions) + const stream = standardHandler.createMessage("system", [{ role: "user", content: "test" }]) + + for await (const _ of stream) { + } + + const callArgs = mockCreate.mock.calls[0][0] + expect(callArgs).not.toHaveProperty("extra_body") + expect(callArgs).not.toHaveProperty("reasoning_effort") + // Standard model should have temperature 0 by default + expect(callArgs.temperature).toBe(0) + }) + }) }) describe("getOpenAiModels", () => { diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts index 84cd557de05..bced110a39b 100644 --- a/src/api/providers/deepseek.ts +++ b/src/api/providers/deepseek.ts @@ -55,8 +55,11 @@ export class DeepSeekHandler extends OpenAiHandler { const modelId = this.options.apiModelId ?? deepSeekDefaultModelId const { info: modelInfo } = this.getModel() - // Check if this is a thinking-enabled model (deepseek-reasoner) - const isThinkingModel = modelId.includes("deepseek-reasoner") + // Check if this is a thinking-enabled model (deepseek-reasoner, deepseek-v4-pro, deepseek-v4-flash) + const isThinkingModel = + modelId.includes("deepseek-reasoner") || + modelId.includes("deepseek-v4-pro") || + modelId.includes("deepseek-v4-flash") // Convert messages to R1 format (merges consecutive same-role messages) // This is required for DeepSeek which does not support successive messages with the same role diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 33b29abcafe..1008ecc5c4e 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -25,6 +25,16 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ". import { getApiRequestTimeout } from "./utils/timeout-config" import { handleOpenAIError } from "./utils/openai-error-handler" +// Custom interface for OpenAI params to support DeepSeek's thinking mode and other non-standard features +type OpenAiChatCompletionParams = ( + | OpenAI.Chat.ChatCompletionCreateParamsStreaming + | OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming +) & { + extra_body?: { + thinking?: { type: "enabled" | "disabled" } + } +} + // TODO: Rename this to OpenAICompatibleHandler. Also, I think the // `OpenAINativeHandler` can subclass from this, since it's obviously // compatible with the OpenAI API. We can also rename it to `OpenAIHandler`. @@ -89,7 +99,11 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const modelId = this.options.openAiModelId ?? "" const enabledR1Format = this.options.openAiR1FormatEnabled ?? false const isAzureAiInference = this._isAzureAiInference(modelUrl) - const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format + const deepseekReasoner = + modelId.includes("deepseek-reasoner") || + modelId.includes("deepseek-v4-pro") || + modelId.includes("deepseek-v4-flash") || + enabledR1Format if (modelId.includes("o1") || modelId.includes("o3") || modelId.includes("o4")) { yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages, metadata) @@ -105,7 +119,9 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl let convertedMessages if (deepseekReasoner) { - convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]) + convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages], { + mergeToolResultText: true, + }) } else { if (modelInfo.supportsPromptCache) { systemMessage = { @@ -152,13 +168,31 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const isGrokXAI = this._isGrokXAI(this.options.openAiBaseUrl) - const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { + const isDeepSeekV4 = + deepseekReasoner && (this._isDeepSeek(this.options.openAiBaseUrl) || modelId.includes("deepseek")) + + const requestOptions: OpenAiChatCompletionParams = { model: modelId, - temperature: this.options.modelTemperature ?? (deepseekReasoner ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0), + temperature: deepseekReasoner + ? undefined + : (this.options.modelTemperature ?? (deepseekReasoner ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0)), messages: convertedMessages, stream: true as const, ...(isGrokXAI ? {} : { stream_options: { include_usage: true } }), ...(reasoning && reasoning), + ...(deepseekReasoner && { + reasoning_effort: (isDeepSeekV4 + ? modelInfo.reasoningEffort === "xhigh" + ? "max" + : "high" + : modelInfo.reasoningEffort) as OpenAI.Chat.Completions.ChatCompletionCreateParams["reasoning_effort"], + extra_body: { + thinking: { type: "enabled" }, + }, + top_p: undefined, + presence_penalty: undefined, + frequency_penalty: undefined, + }), tools: this.convertToolsForOpenAI(metadata?.tools), tool_choice: metadata?.tool_choice, parallel_tool_calls: metadata?.parallelToolCalls ?? true, @@ -167,12 +201,12 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl // Add max_tokens if needed this.addMaxTokensIfNeeded(requestOptions, modelInfo) - let stream + let stream: AsyncIterable try { - stream = await this.client.chat.completions.create( - requestOptions, + stream = (await this.client.chat.completions.create( + requestOptions as OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming, isAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {}, - ) + )) as AsyncIterable } catch (error) { throw handleOpenAIError(error, this.providerName) } @@ -221,26 +255,45 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl yield this.processUsageMetrics(lastUsage, modelInfo) } } else { - const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { + const isDeepSeekV4 = + deepseekReasoner && (this._isDeepSeek(this.options.openAiBaseUrl) || modelId.includes("deepseek")) + + const requestOptions: OpenAiChatCompletionParams = { model: modelId, messages: deepseekReasoner - ? convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]) + ? convertToR1Format([{ role: "user", content: systemPrompt }, ...messages], { + mergeToolResultText: true, + }) : [systemMessage, ...convertToOpenAiMessages(messages)], // Tools are always present (minimum ALWAYS_AVAILABLE_TOOLS) tools: this.convertToolsForOpenAI(metadata?.tools), tool_choice: metadata?.tool_choice, parallel_tool_calls: metadata?.parallelToolCalls ?? true, + ...(deepseekReasoner && { + reasoning_effort: (isDeepSeekV4 + ? modelInfo.reasoningEffort === "xhigh" + ? "max" + : "high" + : modelInfo.reasoningEffort) as OpenAI.Chat.Completions.ChatCompletionCreateParams["reasoning_effort"], + extra_body: { + thinking: { type: "enabled" }, + }, + temperature: undefined, + top_p: undefined, + presence_penalty: undefined, + frequency_penalty: undefined, + }), } // Add max_tokens if needed this.addMaxTokensIfNeeded(requestOptions, modelInfo) - let response + let response: OpenAI.Chat.Completions.ChatCompletion try { - response = await this.client.chat.completions.create( - requestOptions, + response = (await this.client.chat.completions.create( + requestOptions as OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming, this._isAzureAiInference(modelUrl) ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {}, - ) + )) as OpenAI.Chat.Completions.ChatCompletion } catch (error) { throw handleOpenAIError(error, this.providerName) } @@ -289,7 +342,22 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl settings: this.options, defaultTemperature: 0, }) - return { id, info, ...params } + + const enabledR1Format = this.options.openAiR1FormatEnabled ?? false + const deepseekReasoner = + id.includes("deepseek-reasoner") || + id.includes("deepseek-v4-pro") || + id.includes("deepseek-v4-flash") || + enabledR1Format + + return { + id, + info: { + ...info, + ...(deepseekReasoner && { preserveReasoning: true }), + }, + ...params, + } } async completePrompt(prompt: string): Promise { @@ -509,6 +577,11 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl return urlHost.includes("x.ai") } + private _isDeepSeek(baseUrl?: string): boolean { + const urlHost = this._getUrlHost(baseUrl) + return urlHost === "deepseek.com" || urlHost.endsWith(".deepseek.com") + } + protected _isAzureAiInference(baseUrl?: string): boolean { const urlHost = this._getUrlHost(baseUrl) return urlHost.endsWith(".services.ai.azure.com") diff --git a/src/api/transform/openai-format.ts b/src/api/transform/openai-format.ts index 8974dd599ba..35963433351 100644 --- a/src/api/transform/openai-format.ts +++ b/src/api/transform/openai-format.ts @@ -317,6 +317,9 @@ export function convertToOpenAiMessages( if (mapped) { ;(baseMessage as any).reasoning_details = mapped } + if (messageWithDetails.reasoning_content !== undefined) { + ;(baseMessage as any).reasoning_content = messageWithDetails.reasoning_content + } } openAiMessages.push(baseMessage) @@ -494,6 +497,11 @@ export function convertToOpenAiMessages( baseMessage.reasoning_details = mapped } + // Pass through reasoning_content for DeepSeek V4/R1 thinking mode + if (messageWithDetails.reasoning_content !== undefined) { + ;(baseMessage as any).reasoning_content = messageWithDetails.reasoning_content + } + // Add tool_calls after reasoning_details // Cannot be an empty array. API expects an array with minimum length 1, and will respond with an error if it's empty if (tool_calls.length > 0) { diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 005bb0f292b..b68c5dcaf0a 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -4469,6 +4469,12 @@ export class Task extends EventEmitter implements TaskLike { const cleanConversationHistory: (Anthropic.Messages.MessageParam | ReasoningItemForRequest)[] = [] + // Check if the current model requires reasoning to be preserved (e.g., DeepSeek V4/R1). + // When preserveReasoning is true, ALL assistant messages must include reasoning_content + // to satisfy the API protocol. Messages from other models (e.g., Gemini) that lack + // reasoning_content need to be backfilled with an empty string. + const shouldPreserveForApi = this.api.getModel().info.preserveReasoning === true + for (const msg of messages) { // Standalone reasoning: send encrypted, skip plain text if (msg.type === "reasoning") { @@ -4498,8 +4504,11 @@ export class Task extends EventEmitter implements TaskLike { const [first, ...rest] = contentArray // Check if this message has reasoning_details (OpenRouter format for Gemini 3, etc.) - const msgWithDetails = msg - if (msgWithDetails.reasoning_details && Array.isArray(msgWithDetails.reasoning_details)) { + const msgWithDetails = msg as any + if ( + (msgWithDetails.reasoning_details && Array.isArray(msgWithDetails.reasoning_details)) || + msgWithDetails.reasoning_content + ) { // Build the assistant message with reasoning_details let assistantContent: Anthropic.Messages.MessageParam["content"] @@ -4515,7 +4524,12 @@ export class Task extends EventEmitter implements TaskLike { cleanConversationHistory.push({ role: "assistant", content: assistantContent, - reasoning_details: msgWithDetails.reasoning_details, + ...(msgWithDetails.reasoning_details && { + reasoning_details: msgWithDetails.reasoning_details, + }), + ...(msgWithDetails.reasoning_content && { + reasoning_content: msgWithDetails.reasoning_content, + }), } as any) continue @@ -4587,10 +4601,22 @@ export class Task extends EventEmitter implements TaskLike { // Default path for regular messages (no embedded reasoning) if (msg.role) { - cleanConversationHistory.push({ + const baseMessage: any = { role: msg.role, content: msg.content as Anthropic.Messages.ContentBlockParam[] | string, - }) + } + + // DeepSeek thinking mode requires ALL assistant messages to have reasoning_content. + // If a message from another model (e.g., Gemini) lacks it, add an empty placeholder + // to prevent 400 errors when switching back to DeepSeek mid-conversation. + if (msg.role === "assistant" && shouldPreserveForApi) { + // Use a space instead of an empty string. Some proxies/gateways or + // internal SDK logic may silently strip empty string fields, causing + // DeepSeek to reject the request with 400. + baseMessage.reasoning_content = " " + } + + cleanConversationHistory.push(baseMessage) } }