From 28c6d2770738bbb2e8080e4284cb59a45f7615a2 Mon Sep 17 00:00:00 2001 From: RISC-1145 Date: Fri, 24 Apr 2026 18:21:12 +0800 Subject: [PATCH 1/2] Add support for Deepseek V4 Pro and Deepseek V4 Flash --- packages/types/src/providers/baseten.ts | 22 +++++++++ packages/types/src/providers/deepseek.ts | 24 ++++++++++ packages/types/src/providers/fireworks.ts | 24 ++++++++++ packages/types/src/providers/sambanova.ts | 23 ++++++++++ src/api/providers/__tests__/fireworks.spec.ts | 46 +++++++++++++++++++ src/api/providers/deepseek.ts | 7 ++- 6 files changed, 144 insertions(+), 2 deletions(-) diff --git a/packages/types/src/providers/baseten.ts b/packages/types/src/providers/baseten.ts index 27b8cbff4ac..dbb9dcb2d7e 100644 --- a/packages/types/src/providers/baseten.ts +++ b/packages/types/src/providers/baseten.ts @@ -83,6 +83,28 @@ export const basetenModels = { description: "DeepSeek's hybrid reasoning model with efficient long context scaling with GPT-5 level performance", }, + "deepseek-ai/DeepSeek-V4-Pro": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 12, + outputPrice: 24, + cacheWritesPrice: 0, + cacheReadsPrice: 1, + description: "DeepSeek V4 Pro", + }, + "deepseek-ai/DeepSeek-V4-Flash": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 0.2, + outputPrice: 1, + cacheWritesPrice: 0, + cacheReadsPrice: 1, + description: "DeepSeek V4 Flash", + }, "openai/gpt-oss-120b": { maxTokens: 16_384, contextWindow: 128_072, diff --git a/packages/types/src/providers/deepseek.ts b/packages/types/src/providers/deepseek.ts index 40722471cb8..449a4c299e2 100644 --- a/packages/types/src/providers/deepseek.ts +++ b/packages/types/src/providers/deepseek.ts @@ -32,6 +32,30 @@ export const deepSeekModels = { cacheReadsPrice: 0.028, // $0.028 per million tokens (cache hit) - Updated Dec 9, 2025 description: `DeepSeek-V3.2 (Thinking Mode) achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks. Supports Chain of Thought reasoning with up to 8K output tokens. Supports JSON output, tool calls, and chat prefix completion (beta).`, }, + "deepseek-v4-pro": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + preserveReasoning: true, + inputPrice: 12, + outputPrice: 24, + cacheWritesPrice: 0, + cacheReadsPrice: 1, + description: `DeepSeek V4 Pro`, + }, + "deepseek-v4-flash": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + preserveReasoning: true, + inputPrice: 0.2, + outputPrice: 1, + cacheWritesPrice: 0, + cacheReadsPrice: 1, + description: `DeepSeek V4 Flash`, + }, } as const satisfies Record // https://api-docs.deepseek.com/quick_start/parameter_settings diff --git a/packages/types/src/providers/fireworks.ts b/packages/types/src/providers/fireworks.ts index c9017c54cde..c7d794e9709 100644 --- a/packages/types/src/providers/fireworks.ts +++ b/packages/types/src/providers/fireworks.ts @@ -13,6 +13,8 @@ export type FireworksModelId = | "accounts/fireworks/models/deepseek-v3" | "accounts/fireworks/models/deepseek-v3p1" | "accounts/fireworks/models/deepseek-v3p2" + | "accounts/fireworks/models/deepseek-v4-pro" + | "accounts/fireworks/models/deepseek-v4-flash" | "accounts/fireworks/models/glm-4p5" | "accounts/fireworks/models/glm-4p5-air" | "accounts/fireworks/models/glm-4p6" @@ -200,6 +202,28 @@ export const fireworksModels = { description: "DeepSeek V3.2 is the latest iteration of the V3 model family with enhanced reasoning capabilities, improved code generation, and better instruction following.", }, + "accounts/fireworks/models/deepseek-v4-pro": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 12, + outputPrice: 24, + cacheWritesPrice: 0, + cacheReadsPrice: 1, + description: "DeepSeek V4 Pro", + }, + "accounts/fireworks/models/deepseek-v4-flash": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 0.2, + outputPrice: 1, + cacheWritesPrice: 0, + cacheReadsPrice: 1, + description: "DeepSeek V4 Flash", + }, "accounts/fireworks/models/glm-4p7": { maxTokens: 25344, contextWindow: 198000, diff --git a/packages/types/src/providers/sambanova.ts b/packages/types/src/providers/sambanova.ts index 624a7eb8c77..426204800ca 100644 --- a/packages/types/src/providers/sambanova.ts +++ b/packages/types/src/providers/sambanova.ts @@ -7,6 +7,7 @@ export type SambaNovaModelId = | "DeepSeek-R1" | "DeepSeek-V3-0324" | "DeepSeek-V3.1" + | "DeepSeek-V4-Pro" | "Llama-4-Maverick-17B-128E-Instruct" | "Qwen3-32B" | "gpt-oss-120b" @@ -60,6 +61,28 @@ export const sambaNovaModels = { outputPrice: 4.5, description: "DeepSeek V3.1 model with 32K context window.", }, + "DeepSeek-V4-Pro": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 12, + outputPrice: 24, + cacheWritesPrice: 0, + cacheReadsPrice: 1, + description: "DeepSeek V4 Pro", + }, + "deepseek-ai/DeepSeek-V4-Flash": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 0.2, + outputPrice: 1, + cacheWritesPrice: 0, + cacheReadsPrice: 1, + description: "DeepSeek V4 Flash", + }, "Llama-4-Maverick-17B-128E-Instruct": { maxTokens: 8192, contextWindow: 131072, diff --git a/src/api/providers/__tests__/fireworks.spec.ts b/src/api/providers/__tests__/fireworks.spec.ts index 79f69f868b1..898297353d0 100644 --- a/src/api/providers/__tests__/fireworks.spec.ts +++ b/src/api/providers/__tests__/fireworks.spec.ts @@ -245,6 +245,52 @@ describe("FireworksHandler", () => { ) }) + it("should return DeepSeek V4 Pro model with correct configuration", () => { + const testModelId: FireworksModelId = "accounts/fireworks/models/deepseek-v4-pro" + const handlerWithModel = new FireworksHandler({ + apiModelId: testModelId, + fireworksApiKey: "test-fireworks-api-key", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual( + expect.objectContaining({ + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 12, + outputPrice: 24, + cacheWritesPrice: 0, + cacheReadsPrice: 1, + description: expect.stringContaining("DeepSeek V4 Pro"), + }), + ) + }) + + it("should return DeepSeek V4 Flash model with correct configuration", () => { + const testModelId: FireworksModelId = "accounts/fireworks/models/deepseek-v4-flash" + const handlerWithModel = new FireworksHandler({ + apiModelId: testModelId, + fireworksApiKey: "test-fireworks-api-key", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual( + expect.objectContaining({ + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 0.2, + outputPrice: 1, + cacheWritesPrice: 0, + cacheReadsPrice: 1, + description: expect.stringContaining("DeepSeek V4 Flash"), + }), + ) + }) + it("should return GLM-4.5 model with correct configuration", () => { const testModelId: FireworksModelId = "accounts/fireworks/models/glm-4p5" const handlerWithModel = new FireworksHandler({ diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts index 84cd557de05..bced110a39b 100644 --- a/src/api/providers/deepseek.ts +++ b/src/api/providers/deepseek.ts @@ -55,8 +55,11 @@ export class DeepSeekHandler extends OpenAiHandler { const modelId = this.options.apiModelId ?? deepSeekDefaultModelId const { info: modelInfo } = this.getModel() - // Check if this is a thinking-enabled model (deepseek-reasoner) - const isThinkingModel = modelId.includes("deepseek-reasoner") + // Check if this is a thinking-enabled model (deepseek-reasoner, deepseek-v4-pro, deepseek-v4-flash) + const isThinkingModel = + modelId.includes("deepseek-reasoner") || + modelId.includes("deepseek-v4-pro") || + modelId.includes("deepseek-v4-flash") // Convert messages to R1 format (merges consecutive same-role messages) // This is required for DeepSeek which does not support successive messages with the same role From aec3f1184ca71d3b39d53e983504cc2808c42b0c Mon Sep 17 00:00:00 2001 From: RISC-1145 Date: Fri, 24 Apr 2026 19:51:15 +0800 Subject: [PATCH 2/2] fix: correct DeepSeek V4 model pricing to accurate USD values --- packages/types/src/providers/baseten.ts | 16 ++++++++-------- packages/types/src/providers/deepseek.ts | 16 ++++++++-------- packages/types/src/providers/fireworks.ts | 16 ++++++++-------- packages/types/src/providers/sambanova.ts | 16 ++++++++-------- src/api/providers/__tests__/fireworks.spec.ts | 16 ++++++++-------- 5 files changed, 40 insertions(+), 40 deletions(-) diff --git a/packages/types/src/providers/baseten.ts b/packages/types/src/providers/baseten.ts index dbb9dcb2d7e..6c275cda2c0 100644 --- a/packages/types/src/providers/baseten.ts +++ b/packages/types/src/providers/baseten.ts @@ -88,10 +88,10 @@ export const basetenModels = { contextWindow: 1_000_000, supportsImages: false, supportsPromptCache: true, - inputPrice: 12, - outputPrice: 24, - cacheWritesPrice: 0, - cacheReadsPrice: 1, + inputPrice: 1.74, + outputPrice: 3.48, + cacheWritesPrice: 1.74, + cacheReadsPrice: 0.145, description: "DeepSeek V4 Pro", }, "deepseek-ai/DeepSeek-V4-Flash": { @@ -99,10 +99,10 @@ export const basetenModels = { contextWindow: 1_000_000, supportsImages: false, supportsPromptCache: true, - inputPrice: 0.2, - outputPrice: 1, - cacheWritesPrice: 0, - cacheReadsPrice: 1, + inputPrice: 0.14, + outputPrice: 0.28, + cacheWritesPrice: 0.14, + cacheReadsPrice: 0.028, description: "DeepSeek V4 Flash", }, "openai/gpt-oss-120b": { diff --git a/packages/types/src/providers/deepseek.ts b/packages/types/src/providers/deepseek.ts index 449a4c299e2..0e7267a3aab 100644 --- a/packages/types/src/providers/deepseek.ts +++ b/packages/types/src/providers/deepseek.ts @@ -38,10 +38,10 @@ export const deepSeekModels = { supportsImages: false, supportsPromptCache: true, preserveReasoning: true, - inputPrice: 12, - outputPrice: 24, - cacheWritesPrice: 0, - cacheReadsPrice: 1, + inputPrice: 1.74, // $1.74 per million tokens (cache miss) + outputPrice: 3.48, // $3.48 per million tokens + cacheWritesPrice: 1.74, // $1.74 per million tokens (cache miss) + cacheReadsPrice: 0.145, // $0.145 per million tokens (cache hit) description: `DeepSeek V4 Pro`, }, "deepseek-v4-flash": { @@ -50,10 +50,10 @@ export const deepSeekModels = { supportsImages: false, supportsPromptCache: true, preserveReasoning: true, - inputPrice: 0.2, - outputPrice: 1, - cacheWritesPrice: 0, - cacheReadsPrice: 1, + inputPrice: 0.14, // $0.14 per million tokens (cache miss) + outputPrice: 0.28, // $0.28 per million tokens + cacheWritesPrice: 0.14, // $0.14 per million tokens (cache miss) + cacheReadsPrice: 0.028, // $0.028 per million tokens (cache hit) description: `DeepSeek V4 Flash`, }, } as const satisfies Record diff --git a/packages/types/src/providers/fireworks.ts b/packages/types/src/providers/fireworks.ts index c7d794e9709..fb839c07e1c 100644 --- a/packages/types/src/providers/fireworks.ts +++ b/packages/types/src/providers/fireworks.ts @@ -207,10 +207,10 @@ export const fireworksModels = { contextWindow: 1_000_000, supportsImages: false, supportsPromptCache: true, - inputPrice: 12, - outputPrice: 24, - cacheWritesPrice: 0, - cacheReadsPrice: 1, + inputPrice: 1.74, + outputPrice: 3.48, + cacheWritesPrice: 1.74, + cacheReadsPrice: 0.145, description: "DeepSeek V4 Pro", }, "accounts/fireworks/models/deepseek-v4-flash": { @@ -218,10 +218,10 @@ export const fireworksModels = { contextWindow: 1_000_000, supportsImages: false, supportsPromptCache: true, - inputPrice: 0.2, - outputPrice: 1, - cacheWritesPrice: 0, - cacheReadsPrice: 1, + inputPrice: 0.14, + outputPrice: 0.28, + cacheWritesPrice: 0.14, + cacheReadsPrice: 0.028, description: "DeepSeek V4 Flash", }, "accounts/fireworks/models/glm-4p7": { diff --git a/packages/types/src/providers/sambanova.ts b/packages/types/src/providers/sambanova.ts index 426204800ca..8b2f88ea589 100644 --- a/packages/types/src/providers/sambanova.ts +++ b/packages/types/src/providers/sambanova.ts @@ -66,10 +66,10 @@ export const sambaNovaModels = { contextWindow: 1_000_000, supportsImages: false, supportsPromptCache: true, - inputPrice: 12, - outputPrice: 24, - cacheWritesPrice: 0, - cacheReadsPrice: 1, + inputPrice: 1.74, + outputPrice: 3.48, + cacheWritesPrice: 1.74, + cacheReadsPrice: 0.145, description: "DeepSeek V4 Pro", }, "deepseek-ai/DeepSeek-V4-Flash": { @@ -77,10 +77,10 @@ export const sambaNovaModels = { contextWindow: 1_000_000, supportsImages: false, supportsPromptCache: true, - inputPrice: 0.2, - outputPrice: 1, - cacheWritesPrice: 0, - cacheReadsPrice: 1, + inputPrice: 0.14, + outputPrice: 0.28, + cacheWritesPrice: 0.14, + cacheReadsPrice: 0.028, description: "DeepSeek V4 Flash", }, "Llama-4-Maverick-17B-128E-Instruct": { diff --git a/src/api/providers/__tests__/fireworks.spec.ts b/src/api/providers/__tests__/fireworks.spec.ts index 898297353d0..6641292f538 100644 --- a/src/api/providers/__tests__/fireworks.spec.ts +++ b/src/api/providers/__tests__/fireworks.spec.ts @@ -259,10 +259,10 @@ describe("FireworksHandler", () => { contextWindow: 1_000_000, supportsImages: false, supportsPromptCache: true, - inputPrice: 12, - outputPrice: 24, - cacheWritesPrice: 0, - cacheReadsPrice: 1, + inputPrice: 1.74, + outputPrice: 3.48, + cacheWritesPrice: 1.74, + cacheReadsPrice: 0.145, description: expect.stringContaining("DeepSeek V4 Pro"), }), ) @@ -282,10 +282,10 @@ describe("FireworksHandler", () => { contextWindow: 1_000_000, supportsImages: false, supportsPromptCache: true, - inputPrice: 0.2, - outputPrice: 1, - cacheWritesPrice: 0, - cacheReadsPrice: 1, + inputPrice: 0.14, + outputPrice: 0.28, + cacheWritesPrice: 0.14, + cacheReadsPrice: 0.028, description: expect.stringContaining("DeepSeek V4 Flash"), }), )