Add DeepSeek V4 models

avaritiachaos · avaritiachaos · commit e882b9ddcc2a · 2026-04-29T13:14:27.000+08:00
diff --git a/.changeset/deepseek-v4-models.md b/.changeset/deepseek-v4-models.md
@@ -0,0 +1,5 @@
+---
+"roo-code": patch
+---
+
+Add DeepSeek V4 Flash and Pro model definitions with thinking mode controls and a chat quick selector for reasoning effort.
diff --git a/packages/types/src/providers/deepseek.ts b/packages/types/src/providers/deepseek.ts
@@ -32,6 +32,34 @@ export const deepSeekModels = {
 		cacheReadsPrice: 0.028, // $0.028 per million tokens (cache hit) - Updated Dec 9, 2025
 		description: `DeepSeek-V3.2 (Thinking Mode) achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks. Supports Chain of Thought reasoning with up to 8K output tokens. Supports JSON output, tool calls, and chat prefix completion (beta).`,
 	},
+	"deepseek-v4-flash": {
+		maxTokens: 384_000, // 384K max output
+		contextWindow: 1_000_000,
+		supportsImages: false,
+		supportsPromptCache: true,
+		preserveReasoning: true,
+		supportsReasoningEffort: ["disable", "high", "xhigh"],
+		reasoningEffort: "high",
+		inputPrice: 0.14, // $0.14 per million tokens (cache miss) - Updated Apr 24, 2026
+		outputPrice: 0.28, // $0.28 per million tokens - Updated Apr 24, 2026
+		cacheWritesPrice: 0.14, // $0.14 per million tokens (cache miss) - Updated Apr 24, 2026
+		cacheReadsPrice: 0.0028, // $0.0028 per million tokens (cache hit) - Updated Apr 24, 2026
+		description: `DeepSeek-V4 Flash supports 1M context, 384K max output, tool calls, and thinking/non-thinking modes.`,
+	},
+	"deepseek-v4-pro": {
+		maxTokens: 384_000, // 384K max output
+		contextWindow: 1_000_000,
+		supportsImages: false,
+		supportsPromptCache: true,
+		preserveReasoning: true,
+		supportsReasoningEffort: ["disable", "high", "xhigh"],
+		reasoningEffort: "high",
+		inputPrice: 0.435, // $0.435 per million tokens (cache miss) - Updated Apr 24, 2026
+		outputPrice: 0.87, // $0.87 per million tokens - Updated Apr 24, 2026
+		cacheWritesPrice: 0.435, // $0.435 per million tokens (cache miss) - Updated Apr 24, 2026
+		cacheReadsPrice: 0.003625, // $0.003625 per million tokens (cache hit) - Updated Apr 24, 2026
+		description: `DeepSeek-V4 Pro supports 1M context, 384K max output, tool calls, and thinking/non-thinking modes.`,
+	},
 } as const satisfies Record<string, ModelInfo>
 
 // https://api-docs.deepseek.com/quick_start/parameter_settings
diff --git a/src/api/providers/__tests__/deepseek.spec.ts b/src/api/providers/__tests__/deepseek.spec.ts
@@ -30,7 +30,8 @@ vi.mock("openai", () => {
 						}
 
 						// Check if this is a reasoning_content test by looking at model
-						const isReasonerModel = options.model?.includes("deepseek-reasoner")
+						const isReasonerModel =
+							options.model?.includes("deepseek-reasoner") || options.model?.includes("deepseek-v4")
 						const isToolCallTest = options.tools?.length > 0
 
 						// Return async iterator for streaming
@@ -122,7 +123,7 @@ vi.mock("openai", () => {
 import OpenAI from "openai"
 import type { Anthropic } from "@anthropic-ai/sdk"
 
-import { deepSeekDefaultModelId, DEEP_SEEK_DEFAULT_TEMPERATURE, type ModelInfo } from "@roo-code/types"
+import { deepSeekDefaultModelId, deepSeekModels, DEEP_SEEK_DEFAULT_TEMPERATURE, type ModelInfo } from "@roo-code/types"
 
 import type { ApiHandlerOptions } from "../../../shared/api"
 
@@ -226,6 +227,28 @@ describe("DeepSeekHandler", () => {
 			expect(model.info.supportsPromptCache).toBe(true)
 		})
 
+		it.each([
+			["deepseek-v4-flash", deepSeekModels["deepseek-v4-flash"]],
+			["deepseek-v4-pro", deepSeekModels["deepseek-v4-pro"]],
+		])("should return correct model info for %s", (modelId, expectedInfo) => {
+			const handlerWithV4 = new DeepSeekHandler({
+				...mockOptions,
+				apiModelId: modelId,
+			})
+
+			const model = handlerWithV4.getModel()
+			const info = model.info as ModelInfo
+
+			expect(model.id).toBe(modelId)
+			expect(info.maxTokens).toBe(384_000)
+			expect(info.contextWindow).toBe(1_000_000)
+			expect(info.supportsPromptCache).toBe(true)
+			expect(info.preserveReasoning).toBe(true)
+			expect(info.supportsReasoningEffort).toEqual(["disable", "high", "xhigh"])
+			expect(info.reasoningEffort).toBe("high")
+			expect(model.info).toBe(expectedInfo)
+		})
+
 		it("should have preserveReasoning enabled for deepseek-reasoner to support interleaved thinking", () => {
 			// This is critical for DeepSeek's interleaved thinking mode with tool calls.
 			// See: https://api-docs.deepseek.com/guides/thinking_mode
@@ -475,6 +498,60 @@ describe("DeepSeekHandler", () => {
 			expect(callArgs.thinking).toBeUndefined()
 		})
 
+		it("should pass reasoning parameters for DeepSeek V4 models", async () => {
+			const v4Handler = new DeepSeekHandler({
+				...mockOptions,
+				apiModelId: "deepseek-v4-flash",
+				enableReasoningEffort: true,
+				reasoningEffort: "high",
+			})
+
+			const stream = v4Handler.createMessage(systemPrompt, messages)
+			for await (const _chunk of stream) {
+				// Consume the stream
+			}
+
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs.thinking).toEqual({ type: "enabled" })
+			expect(callArgs.reasoning_effort).toBe("high")
+		})
+
+		it("should map xhigh reasoning effort to DeepSeek max", async () => {
+			const v4Handler = new DeepSeekHandler({
+				...mockOptions,
+				apiModelId: "deepseek-v4-pro",
+				enableReasoningEffort: true,
+				reasoningEffort: "xhigh",
+			})
+
+			const stream = v4Handler.createMessage(systemPrompt, messages)
+			for await (const _chunk of stream) {
+				// Consume the stream
+			}
+
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs.thinking).toEqual({ type: "enabled" })
+			expect(callArgs.reasoning_effort).toBe("max")
+		})
+
+		it("should disable thinking for DeepSeek V4 when reasoning effort is disabled", async () => {
+			const v4Handler = new DeepSeekHandler({
+				...mockOptions,
+				apiModelId: "deepseek-v4-flash",
+				enableReasoningEffort: true,
+				reasoningEffort: "disable",
+			})
+
+			const stream = v4Handler.createMessage(systemPrompt, messages)
+			for await (const _chunk of stream) {
+				// Consume the stream
+			}
+
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs.thinking).toEqual({ type: "disabled" })
+			expect(callArgs.reasoning_effort).toBeUndefined()
+		})
+
 		it("should handle tool calls with reasoning_content", async () => {
 			const reasonerHandler = new DeepSeekHandler({
 				...mockOptions,
diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts
@@ -391,6 +391,78 @@ describe("OpenAiHandler", () => {
 			expect(callArgs.reasoning_effort).toBeUndefined()
 		})
 
+		it("should include DeepSeek V4 thinking and high reasoning effort for OpenAI-compatible endpoints", async () => {
+			const deepSeekV4Options: ApiHandlerOptions = {
+				...mockOptions,
+				openAiBaseUrl: "https://api.deepseek.com",
+				openAiModelId: "deepseek-v4-flash",
+				enableReasoningEffort: true,
+				reasoningEffort: "high",
+				openAiCustomModelInfo: {
+					contextWindow: 1_000_000,
+					supportsPromptCache: true,
+					supportsReasoningEffort: ["disable", "high", "xhigh"],
+					reasoningEffort: "high",
+				},
+			}
+			const deepSeekV4Handler = new OpenAiHandler(deepSeekV4Options)
+			const stream = deepSeekV4Handler.createMessage(systemPrompt, messages)
+			for await (const _chunk of stream) {
+			}
+
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs.thinking).toEqual({ type: "enabled" })
+			expect(callArgs.reasoning_effort).toBe("high")
+		})
+
+		it("should map DeepSeek V4 xhigh reasoning effort to max for OpenAI-compatible endpoints", async () => {
+			const deepSeekV4Options: ApiHandlerOptions = {
+				...mockOptions,
+				openAiBaseUrl: "https://api.deepseek.com",
+				openAiModelId: "deepseek-v4-pro",
+				enableReasoningEffort: true,
+				reasoningEffort: "xhigh",
+				openAiCustomModelInfo: {
+					contextWindow: 1_000_000,
+					supportsPromptCache: true,
+					supportsReasoningEffort: ["disable", "high", "xhigh"],
+					reasoningEffort: "high",
+				},
+			}
+			const deepSeekV4Handler = new OpenAiHandler(deepSeekV4Options)
+			const stream = deepSeekV4Handler.createMessage(systemPrompt, messages)
+			for await (const _chunk of stream) {
+			}
+
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs.thinking).toEqual({ type: "enabled" })
+			expect(callArgs.reasoning_effort).toBe("max")
+		})
+
+		it("should disable DeepSeek V4 thinking for OpenAI-compatible endpoints", async () => {
+			const deepSeekV4Options: ApiHandlerOptions = {
+				...mockOptions,
+				openAiBaseUrl: "https://api.deepseek.com",
+				openAiModelId: "deepseek-v4-flash",
+				enableReasoningEffort: true,
+				reasoningEffort: "disable",
+				openAiCustomModelInfo: {
+					contextWindow: 1_000_000,
+					supportsPromptCache: true,
+					supportsReasoningEffort: ["disable", "high", "xhigh"],
+					reasoningEffort: "high",
+				},
+			}
+			const deepSeekV4Handler = new OpenAiHandler(deepSeekV4Options)
+			const stream = deepSeekV4Handler.createMessage(systemPrompt, messages)
+			for await (const _chunk of stream) {
+			}
+
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs.thinking).toEqual({ type: "disabled" })
+			expect(callArgs.reasoning_effort).toBeUndefined()
+		})
+
 		it("should include max_tokens when includeMaxTokens is true", async () => {
 			const optionsWithMaxTokens: ApiHandlerOptions = {
 				...mockOptions,
diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts
@@ -53,10 +53,18 @@ export class DeepSeekHandler extends OpenAiHandler {
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
 		const modelId = this.options.apiModelId ?? deepSeekDefaultModelId
-		const { info: modelInfo } = this.getModel()
-
-		// Check if this is a thinking-enabled model (deepseek-reasoner)
-		const isThinkingModel = modelId.includes("deepseek-reasoner")
+		const { info: modelInfo, reasoning } = this.getModel()
+
+		const isDeepSeekV4Model = modelId.includes("deepseek-v4")
+		const isThinkingModel = modelId.includes("deepseek-reasoner") || isDeepSeekV4Model
+		const thinkingType =
+			isDeepSeekV4Model &&
+			(this.options.reasoningEffort === "disable" || this.options.enableReasoningEffort === false)
+				? "disabled"
+				: "enabled"
+		const selectedReasoningEffort = (reasoning as { reasoning_effort?: string } | undefined)?.reasoning_effort
+		const reasoningEffort =
+			selectedReasoningEffort === "xhigh" ? "max" : selectedReasoningEffort === "high" ? "high" : undefined
 
 		// Convert messages to R1 format (merges consecutive same-role messages)
 		// This is required for DeepSeek which does not support successive messages with the same role
@@ -74,8 +82,12 @@ export class DeepSeekHandler extends OpenAiHandler {
 			messages: convertedMessages,
 			stream: true as const,
 			stream_options: { include_usage: true },
-			// Enable thinking mode for deepseek-reasoner or when tools are used with thinking model
-			...(isThinkingModel && { thinking: { type: "enabled" } }),
+			// Enable thinking mode for DeepSeek reasoning models. DeepSeek V4 also supports explicit disabling.
+			...(isThinkingModel && { thinking: { type: thinkingType } }),
+			...(reasoningEffort && {
+				// DeepSeek accepts "max"; the OpenAI SDK type has not caught up yet.
+				reasoning_effort: reasoningEffort as OpenAI.Chat.ChatCompletionCreateParams["reasoning_effort"],
+			}),
 			tools: this.convertToolsForOpenAI(metadata?.tools),
 			tool_choice: metadata?.tool_choice,
 			parallel_tool_calls: metadata?.parallelToolCalls ?? true,
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
@@ -25,6 +25,22 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ".
 import { getApiRequestTimeout } from "./utils/timeout-config"
 import { handleOpenAIError } from "./utils/openai-error-handler"
 
+type OpenAiCompatibleChatCompletionParamsStreaming = Omit<
+	OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming,
+	"reasoning_effort"
+> & {
+	thinking?: { type: "enabled" | "disabled" }
+	reasoning_effort?: OpenAI.Chat.ChatCompletionCreateParams["reasoning_effort"]
+}
+
+type OpenAiCompatibleChatCompletionParamsNonStreaming = Omit<
+	OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming,
+	"reasoning_effort"
+> & {
+	thinking?: { type: "enabled" | "disabled" }
+	reasoning_effort?: OpenAI.Chat.ChatCompletionCreateParams["reasoning_effort"]
+}
+
 // TODO: Rename this to OpenAICompatibleHandler. Also, I think the
 // `OpenAINativeHandler` can subclass from this, since it's obviously
 // compatible with the OpenAI API. We can also rename it to `OpenAIHandler`.
@@ -89,7 +105,22 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		const modelId = this.options.openAiModelId ?? ""
 		const enabledR1Format = this.options.openAiR1FormatEnabled ?? false
 		const isAzureAiInference = this._isAzureAiInference(modelUrl)
-		const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format
+		const isDeepSeekV4Model = modelId.includes("deepseek-v4")
+		const usesDeepSeekThinkingParam = modelId.includes("deepseek-reasoner") || isDeepSeekV4Model
+		const deepseekReasoner = usesDeepSeekThinkingParam || enabledR1Format
+		const deepseekThinkingType =
+			isDeepSeekV4Model &&
+			(this.options.reasoningEffort === "disable" || this.options.enableReasoningEffort === false)
+				? "disabled"
+				: "enabled"
+		const selectedReasoningEffort = (reasoning as { reasoning_effort?: string } | undefined)?.reasoning_effort
+		const providerReasoning =
+			isDeepSeekV4Model && selectedReasoningEffort === "xhigh"
+				? {
+						// DeepSeek accepts "max"; the OpenAI SDK type has not caught up yet.
+						reasoning_effort: "max" as OpenAI.Chat.ChatCompletionCreateParams["reasoning_effort"],
+					}
+				: reasoning
 
 		if (modelId.includes("o1") || modelId.includes("o3") || modelId.includes("o4")) {
 			yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages, metadata)
@@ -152,13 +183,14 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 
 			const isGrokXAI = this._isGrokXAI(this.options.openAiBaseUrl)
 
-			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
+			const requestOptions: OpenAiCompatibleChatCompletionParamsStreaming = {
 				model: modelId,
 				temperature: this.options.modelTemperature ?? (deepseekReasoner ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0),
 				messages: convertedMessages,
 				stream: true as const,
 				...(isGrokXAI ? {} : { stream_options: { include_usage: true } }),
-				...(reasoning && reasoning),
+				...(usesDeepSeekThinkingParam && { thinking: { type: deepseekThinkingType } }),
+				...(providerReasoning && providerReasoning),
 				tools: this.convertToolsForOpenAI(metadata?.tools),
 				tool_choice: metadata?.tool_choice,
 				parallel_tool_calls: metadata?.parallelToolCalls ?? true,
@@ -221,11 +253,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				yield this.processUsageMetrics(lastUsage, modelInfo)
 			}
 		} else {
-			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
+			const requestOptions: OpenAiCompatibleChatCompletionParamsNonStreaming = {
 				model: modelId,
 				messages: deepseekReasoner
 					? convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
 					: [systemMessage, ...convertToOpenAiMessages(messages)],
+				...(usesDeepSeekThinkingParam && { thinking: { type: deepseekThinkingType } }),
+				...(providerReasoning && providerReasoning),
 				// Tools are always present (minimum ALWAYS_AVAILABLE_TOOLS)
 				tools: this.convertToolsForOpenAI(metadata?.tools),
 				tool_choice: metadata?.tool_choice,
diff --git a/src/shared/api.ts b/src/shared/api.ts
@@ -67,6 +67,7 @@ export const shouldUseReasoningEffort = ({
 		| "low"
 		| "medium"
 		| "high"
+		| "xhigh"
 		| undefined
 
 	// "disable" explicitly omits reasoning
@@ -92,6 +93,7 @@ export const shouldUseReasoningEffort = ({
 		| "low"
 		| "medium"
 		| "high"
+		| "xhigh"
 		| undefined
 	return !!modelDefaultEffort
 }
diff --git a/webview-ui/src/components/chat/ChatTextArea.tsx b/webview-ui/src/components/chat/ChatTextArea.tsx
diff --git a/webview-ui/src/components/chat/__tests__/ChatTextArea.spec.tsx b/webview-ui/src/components/chat/__tests__/ChatTextArea.spec.tsx

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"roo-code": patch
 +---
++
 +Add DeepSeek V4 Flash and Pro model definitions with thinking mode controls and a chat quick selector for reasoning effort.