Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/deepseek-v4-models.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"roo-code": patch
---

Add DeepSeek V4 Flash and Pro model definitions with thinking mode controls and a chat quick selector for reasoning effort.
39 changes: 22 additions & 17 deletions packages/types/src/providers/deepseek.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,36 @@ import type { ModelInfo } from "../model.js"
// continuation within the same turn. See: https://api-docs.deepseek.com/guides/thinking_mode
export type DeepSeekModelId = keyof typeof deepSeekModels

export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-chat"
export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-v4-flash"

export const deepSeekModels = {
"deepseek-chat": {
maxTokens: 8192, // 8K max output
contextWindow: 128_000,
"deepseek-v4-flash": {
maxTokens: 384_000, // 384K max output
contextWindow: 1_000_000,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0.28, // $0.28 per million tokens (cache miss) - Updated Dec 9, 2025
outputPrice: 0.42, // $0.42 per million tokens - Updated Dec 9, 2025
cacheWritesPrice: 0.28, // $0.28 per million tokens (cache miss) - Updated Dec 9, 2025
cacheReadsPrice: 0.028, // $0.028 per million tokens (cache hit) - Updated Dec 9, 2025
description: `DeepSeek-V3.2 (Non-thinking Mode) achieves a significant breakthrough in inference speed over previous models. It tops the leaderboard among open-source models and rivals the most advanced closed-source models globally. Supports JSON output, tool calls, chat prefix completion (beta), and FIM completion (beta).`,
preserveReasoning: true,
supportsReasoningEffort: ["disable", "high", "xhigh"],
reasoningEffort: "high",
inputPrice: 0.14, // $0.14 per million tokens (cache miss) - Updated Apr 30, 2026
outputPrice: 0.28, // $0.28 per million tokens - Updated Apr 30, 2026
cacheWritesPrice: 0.14, // $0.14 per million tokens (cache miss) - Updated Apr 30, 2026
cacheReadsPrice: 0.0028, // $0.0028 per million tokens (cache hit) - Updated Apr 30, 2026
description: `DeepSeek-V4 Flash supports 1M context, 384K max output, tool calls, and thinking/non-thinking modes.`,
},
"deepseek-reasoner": {
maxTokens: 8192, // 8K max output
contextWindow: 128_000,
"deepseek-v4-pro": {
maxTokens: 384_000, // 384K max output
contextWindow: 1_000_000,
supportsImages: false,
supportsPromptCache: true,
preserveReasoning: true,
inputPrice: 0.28, // $0.28 per million tokens (cache miss) - Updated Dec 9, 2025
outputPrice: 0.42, // $0.42 per million tokens - Updated Dec 9, 2025
cacheWritesPrice: 0.28, // $0.28 per million tokens (cache miss) - Updated Dec 9, 2025
cacheReadsPrice: 0.028, // $0.028 per million tokens (cache hit) - Updated Dec 9, 2025
description: `DeepSeek-V3.2 (Thinking Mode) achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks. Supports Chain of Thought reasoning with up to 8K output tokens. Supports JSON output, tool calls, and chat prefix completion (beta).`,
supportsReasoningEffort: ["disable", "high", "xhigh"],
reasoningEffort: "high",
inputPrice: 0.435, // $0.435 per million tokens (cache miss, 75% off until May 31, 2026) - Updated Apr 30, 2026
outputPrice: 0.87, // $0.87 per million tokens (75% off until May 31, 2026) - Updated Apr 30, 2026
cacheWritesPrice: 0.435, // $0.435 per million tokens (cache miss, 75% off until May 31, 2026) - Updated Apr 30, 2026
cacheReadsPrice: 0.003625, // $0.003625 per million tokens (cache hit, 75% off until May 31, 2026) - Updated Apr 30, 2026
description: `DeepSeek-V4 Pro supports 1M context, 384K max output, tool calls, and thinking/non-thinking modes.`,
},
} as const satisfies Record<string, ModelInfo>

Expand Down
160 changes: 122 additions & 38 deletions src/api/providers/__tests__/deepseek.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ vi.mock("openai", () => {
}

// Check if this is a reasoning_content test by looking at model
const isReasonerModel = options.model?.includes("deepseek-reasoner")
const isReasonerModel = options.model?.includes("deepseek-v4")
const isToolCallTest = options.tools?.length > 0

// Return async iterator for streaming
Expand Down Expand Up @@ -122,7 +122,7 @@ vi.mock("openai", () => {
import OpenAI from "openai"
import type { Anthropic } from "@anthropic-ai/sdk"

import { deepSeekDefaultModelId, DEEP_SEEK_DEFAULT_TEMPERATURE, type ModelInfo } from "@roo-code/types"
import { deepSeekDefaultModelId, deepSeekModels, DEEP_SEEK_DEFAULT_TEMPERATURE, type ModelInfo } from "@roo-code/types"

import type { ApiHandlerOptions } from "../../../shared/api"

Expand All @@ -135,7 +135,7 @@ describe("DeepSeekHandler", () => {
beforeEach(() => {
mockOptions = {
deepSeekApiKey: "test-api-key",
apiModelId: "deepseek-chat",
apiModelId: "deepseek-v4-flash",
deepSeekBaseUrl: "https://api.deepseek.com",
}
handler = new DeepSeekHandler(mockOptions)
Expand Down Expand Up @@ -206,47 +206,48 @@ describe("DeepSeekHandler", () => {
const model = handler.getModel()
expect(model.id).toBe(mockOptions.apiModelId)
expect(model.info).toBeDefined()
expect(model.info.maxTokens).toBe(8192) // deepseek-chat has 8K max
expect(model.info.contextWindow).toBe(128_000)
expect(model.info.maxTokens).toBe(384_000)
expect(model.info.contextWindow).toBe(1_000_000)
expect(model.info.supportsImages).toBe(false)
expect(model.info.supportsPromptCache).toBe(true) // Should be true now
expect(model.info.supportsPromptCache).toBe(true)
})

it("should return correct model info for deepseek-reasoner", () => {
const handlerWithReasoner = new DeepSeekHandler({
it.each([
["deepseek-v4-flash", deepSeekModels["deepseek-v4-flash"]],
["deepseek-v4-pro", deepSeekModels["deepseek-v4-pro"]],
])("should return correct model info for %s", (modelId, expectedInfo) => {
const handlerWithV4 = new DeepSeekHandler({
...mockOptions,
apiModelId: "deepseek-reasoner",
apiModelId: modelId,
})
const model = handlerWithReasoner.getModel()
expect(model.id).toBe("deepseek-reasoner")
expect(model.info).toBeDefined()
expect(model.info.maxTokens).toBe(8192) // deepseek-reasoner has 8K max
expect(model.info.contextWindow).toBe(128_000)
expect(model.info.supportsImages).toBe(false)
expect(model.info.supportsPromptCache).toBe(true)

const model = handlerWithV4.getModel()
const info = model.info as ModelInfo

expect(model.id).toBe(modelId)
expect(info.maxTokens).toBe(384_000)
expect(info.contextWindow).toBe(1_000_000)
expect(info.supportsPromptCache).toBe(true)
expect(info.preserveReasoning).toBe(true)
expect(info.supportsReasoningEffort).toEqual(["disable", "high", "xhigh"])
expect(info.reasoningEffort).toBe("high")
expect(model.info).toBe(expectedInfo)
})

it("should have preserveReasoning enabled for deepseek-reasoner to support interleaved thinking", () => {
it("should have preserveReasoning enabled for DeepSeek V4 models to support interleaved thinking", () => {
// This is critical for DeepSeek's interleaved thinking mode with tool calls.
// See: https://api-docs.deepseek.com/guides/thinking_mode
// The reasoning_content needs to be passed back during tool call continuation
// within the same turn for the model to continue reasoning properly.
const handlerWithReasoner = new DeepSeekHandler({
const handlerWithV4 = new DeepSeekHandler({
...mockOptions,
apiModelId: "deepseek-reasoner",
apiModelId: "deepseek-v4-pro",
})
const model = handlerWithReasoner.getModel()
const model = handlerWithV4.getModel()
// Cast to ModelInfo to access preserveReasoning which is an optional property
expect((model.info as ModelInfo).preserveReasoning).toBe(true)
})

it("should NOT have preserveReasoning enabled for deepseek-chat", () => {
// deepseek-chat doesn't use thinking mode, so no need to preserve reasoning
const model = handler.getModel()
// Cast to ModelInfo to access preserveReasoning which is an optional property
expect((model.info as ModelInfo).preserveReasoning).toBeUndefined()
})

it("should return provided model ID with default model info if model does not exist", () => {
const handlerWithInvalidModel = new DeepSeekHandler({
...mockOptions,
Expand Down Expand Up @@ -419,10 +420,10 @@ describe("DeepSeekHandler", () => {
},
]

it("should handle reasoning_content in streaming responses for deepseek-reasoner", async () => {
it("should handle reasoning_content in streaming responses for DeepSeek V4", async () => {
const reasonerHandler = new DeepSeekHandler({
...mockOptions,
apiModelId: "deepseek-reasoner",
apiModelId: "deepseek-v4-pro",
})

const stream = reasonerHandler.createMessage(systemPrompt, messages)
Expand All @@ -438,10 +439,55 @@ describe("DeepSeekHandler", () => {
expect(reasoningChunks[1].text).toBe(" I'll analyze step by step.")
})

it("should pass thinking parameter for deepseek-reasoner model", async () => {
it("should preserve empty reasoning_content chunks for DeepSeek V4", async () => {
mockCreate.mockImplementationOnce(async () => ({
[Symbol.asyncIterator]: async function* () {
yield {
choices: [
{
delta: { reasoning_content: "" },
index: 0,
},
],
usage: null,
}
yield {
choices: [
{
delta: {},
index: 0,
finish_reason: "stop",
},
],
usage: {
prompt_tokens: 10,
completion_tokens: 5,
total_tokens: 15,
},
}
},
}))

const reasonerHandler = new DeepSeekHandler({
...mockOptions,
apiModelId: "deepseek-v4-pro",
})

const chunks: any[] = []
for await (const chunk of reasonerHandler.createMessage(systemPrompt, messages)) {
chunks.push(chunk)
}

expect(chunks.filter((chunk) => chunk.type === "reasoning")).toContainEqual({
type: "reasoning",
text: "",
})
})

it("should pass thinking parameter for DeepSeek V4 models", async () => {
const reasonerHandler = new DeepSeekHandler({
...mockOptions,
apiModelId: "deepseek-reasoner",
apiModelId: "deepseek-v4-pro",
})

const stream = reasonerHandler.createMessage(systemPrompt, messages)
Expand All @@ -459,26 +505,64 @@ describe("DeepSeekHandler", () => {
)
})

it("should NOT pass thinking parameter for deepseek-chat model", async () => {
const chatHandler = new DeepSeekHandler({
it("should pass reasoning parameters for DeepSeek V4 models", async () => {
const v4Handler = new DeepSeekHandler({
...mockOptions,
apiModelId: "deepseek-v4-flash",
enableReasoningEffort: true,
reasoningEffort: "high",
})

const stream = v4Handler.createMessage(systemPrompt, messages)
for await (const _chunk of stream) {
// Consume the stream
}

const callArgs = mockCreate.mock.calls[0][0]
expect(callArgs.thinking).toEqual({ type: "enabled" })
expect(callArgs.reasoning_effort).toBe("high")
})

it("should map xhigh reasoning effort to DeepSeek max", async () => {
const v4Handler = new DeepSeekHandler({
...mockOptions,
apiModelId: "deepseek-v4-pro",
enableReasoningEffort: true,
reasoningEffort: "xhigh",
})

const stream = v4Handler.createMessage(systemPrompt, messages)
for await (const _chunk of stream) {
// Consume the stream
}

const callArgs = mockCreate.mock.calls[0][0]
expect(callArgs.thinking).toEqual({ type: "enabled" })
expect(callArgs.reasoning_effort).toBe("max")
})

it("should disable thinking for DeepSeek V4 when reasoning effort is disabled", async () => {
const v4Handler = new DeepSeekHandler({
...mockOptions,
apiModelId: "deepseek-chat",
apiModelId: "deepseek-v4-flash",
enableReasoningEffort: true,
reasoningEffort: "disable",
})

const stream = chatHandler.createMessage(systemPrompt, messages)
const stream = v4Handler.createMessage(systemPrompt, messages)
for await (const _chunk of stream) {
// Consume the stream
}

// Verify that the thinking parameter was NOT passed to the API
const callArgs = mockCreate.mock.calls[0][0]
expect(callArgs.thinking).toBeUndefined()
expect(callArgs.thinking).toEqual({ type: "disabled" })
expect(callArgs.reasoning_effort).toBeUndefined()
})

it("should handle tool calls with reasoning_content", async () => {
const reasonerHandler = new DeepSeekHandler({
...mockOptions,
apiModelId: "deepseek-reasoner",
apiModelId: "deepseek-v4-pro",
})

const tools: any[] = [
Expand Down
Loading
Loading