Skip to content

Commit e882b9d

Browse files
committed
Add DeepSeek V4 models
1 parent ad25634 commit e882b9d

9 files changed

Lines changed: 447 additions & 15 deletions

File tree

.changeset/deepseek-v4-models.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"roo-code": patch
3+
---
4+
5+
Add DeepSeek V4 Flash and Pro model definitions with thinking mode controls and a chat quick selector for reasoning effort.

packages/types/src/providers/deepseek.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,34 @@ export const deepSeekModels = {
3232
cacheReadsPrice: 0.028, // $0.028 per million tokens (cache hit) - Updated Dec 9, 2025
3333
description: `DeepSeek-V3.2 (Thinking Mode) achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks. Supports Chain of Thought reasoning with up to 8K output tokens. Supports JSON output, tool calls, and chat prefix completion (beta).`,
3434
},
35+
"deepseek-v4-flash": {
36+
maxTokens: 384_000, // 384K max output
37+
contextWindow: 1_000_000,
38+
supportsImages: false,
39+
supportsPromptCache: true,
40+
preserveReasoning: true,
41+
supportsReasoningEffort: ["disable", "high", "xhigh"],
42+
reasoningEffort: "high",
43+
inputPrice: 0.14, // $0.14 per million tokens (cache miss) - Updated Apr 24, 2026
44+
outputPrice: 0.28, // $0.28 per million tokens - Updated Apr 24, 2026
45+
cacheWritesPrice: 0.14, // $0.14 per million tokens (cache miss) - Updated Apr 24, 2026
46+
cacheReadsPrice: 0.0028, // $0.0028 per million tokens (cache hit) - Updated Apr 24, 2026
47+
description: `DeepSeek-V4 Flash supports 1M context, 384K max output, tool calls, and thinking/non-thinking modes.`,
48+
},
49+
"deepseek-v4-pro": {
50+
maxTokens: 384_000, // 384K max output
51+
contextWindow: 1_000_000,
52+
supportsImages: false,
53+
supportsPromptCache: true,
54+
preserveReasoning: true,
55+
supportsReasoningEffort: ["disable", "high", "xhigh"],
56+
reasoningEffort: "high",
57+
inputPrice: 0.435, // $0.435 per million tokens (cache miss) - Updated Apr 24, 2026
58+
outputPrice: 0.87, // $0.87 per million tokens - Updated Apr 24, 2026
59+
cacheWritesPrice: 0.435, // $0.435 per million tokens (cache miss) - Updated Apr 24, 2026
60+
cacheReadsPrice: 0.003625, // $0.003625 per million tokens (cache hit) - Updated Apr 24, 2026
61+
description: `DeepSeek-V4 Pro supports 1M context, 384K max output, tool calls, and thinking/non-thinking modes.`,
62+
},
3563
} as const satisfies Record<string, ModelInfo>
3664

3765
// https://api-docs.deepseek.com/quick_start/parameter_settings

src/api/providers/__tests__/deepseek.spec.ts

Lines changed: 79 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ vi.mock("openai", () => {
3030
}
3131

3232
// Check if this is a reasoning_content test by looking at model
33-
const isReasonerModel = options.model?.includes("deepseek-reasoner")
33+
const isReasonerModel =
34+
options.model?.includes("deepseek-reasoner") || options.model?.includes("deepseek-v4")
3435
const isToolCallTest = options.tools?.length > 0
3536

3637
// Return async iterator for streaming
@@ -122,7 +123,7 @@ vi.mock("openai", () => {
122123
import OpenAI from "openai"
123124
import type { Anthropic } from "@anthropic-ai/sdk"
124125

125-
import { deepSeekDefaultModelId, DEEP_SEEK_DEFAULT_TEMPERATURE, type ModelInfo } from "@roo-code/types"
126+
import { deepSeekDefaultModelId, deepSeekModels, DEEP_SEEK_DEFAULT_TEMPERATURE, type ModelInfo } from "@roo-code/types"
126127

127128
import type { ApiHandlerOptions } from "../../../shared/api"
128129

@@ -226,6 +227,28 @@ describe("DeepSeekHandler", () => {
226227
expect(model.info.supportsPromptCache).toBe(true)
227228
})
228229

230+
it.each([
231+
["deepseek-v4-flash", deepSeekModels["deepseek-v4-flash"]],
232+
["deepseek-v4-pro", deepSeekModels["deepseek-v4-pro"]],
233+
])("should return correct model info for %s", (modelId, expectedInfo) => {
234+
const handlerWithV4 = new DeepSeekHandler({
235+
...mockOptions,
236+
apiModelId: modelId,
237+
})
238+
239+
const model = handlerWithV4.getModel()
240+
const info = model.info as ModelInfo
241+
242+
expect(model.id).toBe(modelId)
243+
expect(info.maxTokens).toBe(384_000)
244+
expect(info.contextWindow).toBe(1_000_000)
245+
expect(info.supportsPromptCache).toBe(true)
246+
expect(info.preserveReasoning).toBe(true)
247+
expect(info.supportsReasoningEffort).toEqual(["disable", "high", "xhigh"])
248+
expect(info.reasoningEffort).toBe("high")
249+
expect(model.info).toBe(expectedInfo)
250+
})
251+
229252
it("should have preserveReasoning enabled for deepseek-reasoner to support interleaved thinking", () => {
230253
// This is critical for DeepSeek's interleaved thinking mode with tool calls.
231254
// See: https://api-docs.deepseek.com/guides/thinking_mode
@@ -475,6 +498,60 @@ describe("DeepSeekHandler", () => {
475498
expect(callArgs.thinking).toBeUndefined()
476499
})
477500

501+
it("should pass reasoning parameters for DeepSeek V4 models", async () => {
502+
const v4Handler = new DeepSeekHandler({
503+
...mockOptions,
504+
apiModelId: "deepseek-v4-flash",
505+
enableReasoningEffort: true,
506+
reasoningEffort: "high",
507+
})
508+
509+
const stream = v4Handler.createMessage(systemPrompt, messages)
510+
for await (const _chunk of stream) {
511+
// Consume the stream
512+
}
513+
514+
const callArgs = mockCreate.mock.calls[0][0]
515+
expect(callArgs.thinking).toEqual({ type: "enabled" })
516+
expect(callArgs.reasoning_effort).toBe("high")
517+
})
518+
519+
it("should map xhigh reasoning effort to DeepSeek max", async () => {
520+
const v4Handler = new DeepSeekHandler({
521+
...mockOptions,
522+
apiModelId: "deepseek-v4-pro",
523+
enableReasoningEffort: true,
524+
reasoningEffort: "xhigh",
525+
})
526+
527+
const stream = v4Handler.createMessage(systemPrompt, messages)
528+
for await (const _chunk of stream) {
529+
// Consume the stream
530+
}
531+
532+
const callArgs = mockCreate.mock.calls[0][0]
533+
expect(callArgs.thinking).toEqual({ type: "enabled" })
534+
expect(callArgs.reasoning_effort).toBe("max")
535+
})
536+
537+
it("should disable thinking for DeepSeek V4 when reasoning effort is disabled", async () => {
538+
const v4Handler = new DeepSeekHandler({
539+
...mockOptions,
540+
apiModelId: "deepseek-v4-flash",
541+
enableReasoningEffort: true,
542+
reasoningEffort: "disable",
543+
})
544+
545+
const stream = v4Handler.createMessage(systemPrompt, messages)
546+
for await (const _chunk of stream) {
547+
// Consume the stream
548+
}
549+
550+
const callArgs = mockCreate.mock.calls[0][0]
551+
expect(callArgs.thinking).toEqual({ type: "disabled" })
552+
expect(callArgs.reasoning_effort).toBeUndefined()
553+
})
554+
478555
it("should handle tool calls with reasoning_content", async () => {
479556
const reasonerHandler = new DeepSeekHandler({
480557
...mockOptions,

src/api/providers/__tests__/openai.spec.ts

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,78 @@ describe("OpenAiHandler", () => {
391391
expect(callArgs.reasoning_effort).toBeUndefined()
392392
})
393393

394+
it("should include DeepSeek V4 thinking and high reasoning effort for OpenAI-compatible endpoints", async () => {
395+
const deepSeekV4Options: ApiHandlerOptions = {
396+
...mockOptions,
397+
openAiBaseUrl: "https://api.deepseek.com",
398+
openAiModelId: "deepseek-v4-flash",
399+
enableReasoningEffort: true,
400+
reasoningEffort: "high",
401+
openAiCustomModelInfo: {
402+
contextWindow: 1_000_000,
403+
supportsPromptCache: true,
404+
supportsReasoningEffort: ["disable", "high", "xhigh"],
405+
reasoningEffort: "high",
406+
},
407+
}
408+
const deepSeekV4Handler = new OpenAiHandler(deepSeekV4Options)
409+
const stream = deepSeekV4Handler.createMessage(systemPrompt, messages)
410+
for await (const _chunk of stream) {
411+
}
412+
413+
const callArgs = mockCreate.mock.calls[0][0]
414+
expect(callArgs.thinking).toEqual({ type: "enabled" })
415+
expect(callArgs.reasoning_effort).toBe("high")
416+
})
417+
418+
it("should map DeepSeek V4 xhigh reasoning effort to max for OpenAI-compatible endpoints", async () => {
419+
const deepSeekV4Options: ApiHandlerOptions = {
420+
...mockOptions,
421+
openAiBaseUrl: "https://api.deepseek.com",
422+
openAiModelId: "deepseek-v4-pro",
423+
enableReasoningEffort: true,
424+
reasoningEffort: "xhigh",
425+
openAiCustomModelInfo: {
426+
contextWindow: 1_000_000,
427+
supportsPromptCache: true,
428+
supportsReasoningEffort: ["disable", "high", "xhigh"],
429+
reasoningEffort: "high",
430+
},
431+
}
432+
const deepSeekV4Handler = new OpenAiHandler(deepSeekV4Options)
433+
const stream = deepSeekV4Handler.createMessage(systemPrompt, messages)
434+
for await (const _chunk of stream) {
435+
}
436+
437+
const callArgs = mockCreate.mock.calls[0][0]
438+
expect(callArgs.thinking).toEqual({ type: "enabled" })
439+
expect(callArgs.reasoning_effort).toBe("max")
440+
})
441+
442+
it("should disable DeepSeek V4 thinking for OpenAI-compatible endpoints", async () => {
443+
const deepSeekV4Options: ApiHandlerOptions = {
444+
...mockOptions,
445+
openAiBaseUrl: "https://api.deepseek.com",
446+
openAiModelId: "deepseek-v4-flash",
447+
enableReasoningEffort: true,
448+
reasoningEffort: "disable",
449+
openAiCustomModelInfo: {
450+
contextWindow: 1_000_000,
451+
supportsPromptCache: true,
452+
supportsReasoningEffort: ["disable", "high", "xhigh"],
453+
reasoningEffort: "high",
454+
},
455+
}
456+
const deepSeekV4Handler = new OpenAiHandler(deepSeekV4Options)
457+
const stream = deepSeekV4Handler.createMessage(systemPrompt, messages)
458+
for await (const _chunk of stream) {
459+
}
460+
461+
const callArgs = mockCreate.mock.calls[0][0]
462+
expect(callArgs.thinking).toEqual({ type: "disabled" })
463+
expect(callArgs.reasoning_effort).toBeUndefined()
464+
})
465+
394466
it("should include max_tokens when includeMaxTokens is true", async () => {
395467
const optionsWithMaxTokens: ApiHandlerOptions = {
396468
...mockOptions,

src/api/providers/deepseek.ts

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,18 @@ export class DeepSeekHandler extends OpenAiHandler {
5353
metadata?: ApiHandlerCreateMessageMetadata,
5454
): ApiStream {
5555
const modelId = this.options.apiModelId ?? deepSeekDefaultModelId
56-
const { info: modelInfo } = this.getModel()
57-
58-
// Check if this is a thinking-enabled model (deepseek-reasoner)
59-
const isThinkingModel = modelId.includes("deepseek-reasoner")
56+
const { info: modelInfo, reasoning } = this.getModel()
57+
58+
const isDeepSeekV4Model = modelId.includes("deepseek-v4")
59+
const isThinkingModel = modelId.includes("deepseek-reasoner") || isDeepSeekV4Model
60+
const thinkingType =
61+
isDeepSeekV4Model &&
62+
(this.options.reasoningEffort === "disable" || this.options.enableReasoningEffort === false)
63+
? "disabled"
64+
: "enabled"
65+
const selectedReasoningEffort = (reasoning as { reasoning_effort?: string } | undefined)?.reasoning_effort
66+
const reasoningEffort =
67+
selectedReasoningEffort === "xhigh" ? "max" : selectedReasoningEffort === "high" ? "high" : undefined
6068

6169
// Convert messages to R1 format (merges consecutive same-role messages)
6270
// This is required for DeepSeek which does not support successive messages with the same role
@@ -74,8 +82,12 @@ export class DeepSeekHandler extends OpenAiHandler {
7482
messages: convertedMessages,
7583
stream: true as const,
7684
stream_options: { include_usage: true },
77-
// Enable thinking mode for deepseek-reasoner or when tools are used with thinking model
78-
...(isThinkingModel && { thinking: { type: "enabled" } }),
85+
// Enable thinking mode for DeepSeek reasoning models. DeepSeek V4 also supports explicit disabling.
86+
...(isThinkingModel && { thinking: { type: thinkingType } }),
87+
...(reasoningEffort && {
88+
// DeepSeek accepts "max"; the OpenAI SDK type has not caught up yet.
89+
reasoning_effort: reasoningEffort as OpenAI.Chat.ChatCompletionCreateParams["reasoning_effort"],
90+
}),
7991
tools: this.convertToolsForOpenAI(metadata?.tools),
8092
tool_choice: metadata?.tool_choice,
8193
parallel_tool_calls: metadata?.parallelToolCalls ?? true,

src/api/providers/openai.ts

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,22 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ".
2525
import { getApiRequestTimeout } from "./utils/timeout-config"
2626
import { handleOpenAIError } from "./utils/openai-error-handler"
2727

28+
type OpenAiCompatibleChatCompletionParamsStreaming = Omit<
29+
OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming,
30+
"reasoning_effort"
31+
> & {
32+
thinking?: { type: "enabled" | "disabled" }
33+
reasoning_effort?: OpenAI.Chat.ChatCompletionCreateParams["reasoning_effort"]
34+
}
35+
36+
type OpenAiCompatibleChatCompletionParamsNonStreaming = Omit<
37+
OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming,
38+
"reasoning_effort"
39+
> & {
40+
thinking?: { type: "enabled" | "disabled" }
41+
reasoning_effort?: OpenAI.Chat.ChatCompletionCreateParams["reasoning_effort"]
42+
}
43+
2844
// TODO: Rename this to OpenAICompatibleHandler. Also, I think the
2945
// `OpenAINativeHandler` can subclass from this, since it's obviously
3046
// compatible with the OpenAI API. We can also rename it to `OpenAIHandler`.
@@ -89,7 +105,22 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
89105
const modelId = this.options.openAiModelId ?? ""
90106
const enabledR1Format = this.options.openAiR1FormatEnabled ?? false
91107
const isAzureAiInference = this._isAzureAiInference(modelUrl)
92-
const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format
108+
const isDeepSeekV4Model = modelId.includes("deepseek-v4")
109+
const usesDeepSeekThinkingParam = modelId.includes("deepseek-reasoner") || isDeepSeekV4Model
110+
const deepseekReasoner = usesDeepSeekThinkingParam || enabledR1Format
111+
const deepseekThinkingType =
112+
isDeepSeekV4Model &&
113+
(this.options.reasoningEffort === "disable" || this.options.enableReasoningEffort === false)
114+
? "disabled"
115+
: "enabled"
116+
const selectedReasoningEffort = (reasoning as { reasoning_effort?: string } | undefined)?.reasoning_effort
117+
const providerReasoning =
118+
isDeepSeekV4Model && selectedReasoningEffort === "xhigh"
119+
? {
120+
// DeepSeek accepts "max"; the OpenAI SDK type has not caught up yet.
121+
reasoning_effort: "max" as OpenAI.Chat.ChatCompletionCreateParams["reasoning_effort"],
122+
}
123+
: reasoning
93124

94125
if (modelId.includes("o1") || modelId.includes("o3") || modelId.includes("o4")) {
95126
yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages, metadata)
@@ -152,13 +183,14 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
152183

153184
const isGrokXAI = this._isGrokXAI(this.options.openAiBaseUrl)
154185

155-
const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
186+
const requestOptions: OpenAiCompatibleChatCompletionParamsStreaming = {
156187
model: modelId,
157188
temperature: this.options.modelTemperature ?? (deepseekReasoner ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0),
158189
messages: convertedMessages,
159190
stream: true as const,
160191
...(isGrokXAI ? {} : { stream_options: { include_usage: true } }),
161-
...(reasoning && reasoning),
192+
...(usesDeepSeekThinkingParam && { thinking: { type: deepseekThinkingType } }),
193+
...(providerReasoning && providerReasoning),
162194
tools: this.convertToolsForOpenAI(metadata?.tools),
163195
tool_choice: metadata?.tool_choice,
164196
parallel_tool_calls: metadata?.parallelToolCalls ?? true,
@@ -221,11 +253,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
221253
yield this.processUsageMetrics(lastUsage, modelInfo)
222254
}
223255
} else {
224-
const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
256+
const requestOptions: OpenAiCompatibleChatCompletionParamsNonStreaming = {
225257
model: modelId,
226258
messages: deepseekReasoner
227259
? convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
228260
: [systemMessage, ...convertToOpenAiMessages(messages)],
261+
...(usesDeepSeekThinkingParam && { thinking: { type: deepseekThinkingType } }),
262+
...(providerReasoning && providerReasoning),
229263
// Tools are always present (minimum ALWAYS_AVAILABLE_TOOLS)
230264
tools: this.convertToolsForOpenAI(metadata?.tools),
231265
tool_choice: metadata?.tool_choice,

src/shared/api.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ export const shouldUseReasoningEffort = ({
6767
| "low"
6868
| "medium"
6969
| "high"
70+
| "xhigh"
7071
| undefined
7172

7273
// "disable" explicitly omits reasoning
@@ -92,6 +93,7 @@ export const shouldUseReasoningEffort = ({
9293
| "low"
9394
| "medium"
9495
| "high"
96+
| "xhigh"
9597
| undefined
9698
return !!modelDefaultEffort
9799
}

0 commit comments

Comments
 (0)