Skip to content
13 changes: 13 additions & 0 deletions packages/types/src/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,19 @@ export const modelInfoSchema = z.object({
supportsReasoningBudget: z.boolean().optional(),
// Capability flag to indicate whether the model supports simple on/off binary reasoning
supportsReasoningBinary: z.boolean().optional(),
/**
* Capability flag to indicate whether the model supports interleaved thinking.
* When true, the model emits `reasoning_content` alongside `content` in responses.
* Examples: DeepSeek reasoner, Kimi K2 Thinking, Minimax M2.
*/
supportsInterleavedThinking: z.boolean().optional(),
/**
* Provider-specific parameters needed to enable interleaved thinking.
* Different providers may use different parameter formats.
* Example: DeepSeek uses `{ thinking: { type: "enabled" } }`.
* This parameter is passed via `extra_body` or similar mechanism.
*/
interleavedThinkingParam: z.record(z.any()).optional(),
// Capability flag to indicate whether the model supports temperature parameter
supportsTemperature: z.boolean().optional(),
defaultTemperature: z.number().optional(),
Expand Down
4 changes: 4 additions & 0 deletions packages/types/src/providers/deepseek.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ export const deepSeekModels = {
supportsImages: false,
supportsPromptCache: true,
supportsNativeTools: true,
// Enables interleaved thinking mode (reasoning_content field)
supportsInterleavedThinking: true,
// Parameter passed via extra_body to enable thinking mode
interleavedThinkingParam: { thinking: { type: "enabled" } },
inputPrice: 0.56, // $0.56 per million tokens (cache miss) - Updated Sept 5, 2025
outputPrice: 1.68, // $1.68 per million tokens - Updated Sept 5, 2025
cacheWritesPrice: 0.56, // $0.56 per million tokens (cache miss) - Updated Sept 5, 2025
Expand Down
1 change: 1 addition & 0 deletions packages/types/src/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,7 @@ export const openAiModelInfoSaneDefaults: ModelInfo = {
inputPrice: 0,
outputPrice: 0,
supportsNativeTools: true,
supportsInterleavedThinking: false,
}

// https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
Expand Down
11 changes: 10 additions & 1 deletion src/api/providers/__tests__/deepseek.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ vi.mock("openai", () => {
import OpenAI from "openai"
import type { Anthropic } from "@anthropic-ai/sdk"

import { deepSeekDefaultModelId } from "@roo-code/types"
import { deepSeekDefaultModelId, type ModelInfo } from "@roo-code/types"

import type { ApiHandlerOptions } from "../../../shared/api"

Expand Down Expand Up @@ -172,6 +172,15 @@ describe("DeepSeekHandler", () => {
expect(model.info.contextWindow).toBe(128_000)
expect(model.info.supportsImages).toBe(false)
expect(model.info.supportsPromptCache).toBe(true)
// Verify interleaved thinking capability flags
expect((model.info as ModelInfo).supportsInterleavedThinking).toBe(true)
expect((model.info as ModelInfo).interleavedThinkingParam).toEqual({ thinking: { type: "enabled" } })
})

it("should not have interleaved thinking flags for deepseek-chat", () => {
const model = handler.getModel()
expect((model.info as ModelInfo).supportsInterleavedThinking).toBeUndefined()
expect((model.info as ModelInfo).interleavedThinkingParam).toBeUndefined()
})

it("should return provided model ID with default model info if model does not exist", () => {
Expand Down
142 changes: 124 additions & 18 deletions src/api/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ import { XmlMatcher } from "../../utils/xml-matcher"
import { convertToOpenAiMessages } from "../transform/openai-format"
import { convertToR1Format } from "../transform/r1-format"
import { convertToSimpleMessages } from "../transform/simple-format"
import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
import { isNewUserTurn } from "../transform/detect-turn-boundary"
import { ApiStream, ApiStreamUsageChunk, type ApiStreamToolCallPartialChunk } from "../transform/stream"
import { getModelParams } from "../transform/model-params"

import { DEFAULT_HEADERS } from "./constants"
Expand Down Expand Up @@ -85,13 +86,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
messages: Anthropic.Messages.MessageParam[],
metadata?: ApiHandlerCreateMessageMetadata,
): ApiStream {
const { info: modelInfo, reasoning } = this.getModel()
const { info: modelInfo, reasoning, temperature } = this.getModel()
const modelUrl = this.options.openAiBaseUrl ?? ""
const modelId = this.options.openAiModelId ?? ""
const enabledR1Format = this.options.openAiR1FormatEnabled ?? false
const enabledLegacyFormat = this.options.openAiLegacyFormat ?? false
const isAzureAiInference = this._isAzureAiInference(modelUrl)
const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format
const supportsInterleavedThinking = modelInfo?.supportsInterleavedThinking === true
const ark = modelUrl.includes(".volces.com")

if (modelId.includes("o1") || modelId.includes("o3") || modelId.includes("o4")) {
Expand All @@ -107,8 +108,16 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
if (this.options.openAiStreamingEnabled ?? true) {
let convertedMessages

if (deepseekReasoner) {
convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
if (supportsInterleavedThinking) {
// For interleaved thinking models, conditionally clear reasoning_content:
// - Clear for new user turns (preserve only final answers)
// - Preserve during tool call sequences (required by API)
const allMessages: Anthropic.Messages.MessageParam[] = [
{ role: "user" as const, content: systemPrompt },
...messages,
]
const shouldClearReasoning = isNewUserTurn(allMessages)
convertedMessages = convertToR1Format(allMessages, shouldClearReasoning)
} else if (ark || enabledLegacyFormat) {
convertedMessages = [systemMessage, ...convertToSimpleMessages(messages)]
} else {
Expand Down Expand Up @@ -159,7 +168,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl

const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
model: modelId,
temperature: this.options.modelTemperature ?? (deepseekReasoner ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0),
temperature,
messages: convertedMessages,
stream: true as const,
...(isGrokXAI ? {} : { stream_options: { include_usage: true } }),
Expand All @@ -171,6 +180,12 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
}),
}

// Add interleaved thinking parameter if supported
if (supportsInterleavedThinking && modelInfo?.interleavedThinkingParam) {
// @ts-ignore-next-line - extra_body is not in the type definition but is supported by OpenAI API
requestOptions.extra_body = modelInfo.interleavedThinkingParam
}

// Add max_tokens if needed
this.addMaxTokensIfNeeded(requestOptions, modelInfo)

Expand All @@ -193,33 +208,84 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
}) as const,
)

// Accumulation state for interleaved thinking mode
// According to API documentation for interleaved thinking, chunks contain either reasoning_content OR content, not both
// However, tool_calls may appear alongside either reasoning_content or content
let reasoningAccumulator = ""
let isReasoningPhase = true
let hasEmittedReasoning = false

let lastUsage
let finalToolCalls: any[] = []
let toolCallBuffer: ApiStreamToolCallPartialChunk[] = []

for await (const chunk of stream) {
const delta = chunk.choices?.[0]?.delta ?? {}

// Handle reasoning_content accumulation (interleaved thinking mode)
if ("reasoning_content" in delta && delta.reasoning_content) {
reasoningAccumulator += (delta.reasoning_content as string | undefined) || ""
isReasoningPhase = true
// Note: Continue to process tool_calls and usage in same chunk if present
}

// Handle content - if we were in reasoning phase, emit accumulated reasoning first
if (delta.content) {
for (const chunk of matcher.update(delta.content)) {
yield chunk
// Transition from reasoning to content phase
if (isReasoningPhase && reasoningAccumulator && !hasEmittedReasoning) {
yield {
type: "reasoning",
text: reasoningAccumulator,
}
hasEmittedReasoning = true
reasoningAccumulator = ""
}
}

if ("reasoning_content" in delta && delta.reasoning_content) {
yield {
type: "reasoning",
text: (delta.reasoning_content as string | undefined) || "",
// Emit buffered tool calls before processing content
for (const toolCall of toolCallBuffer) {
yield toolCall
}
toolCallBuffer = []

isReasoningPhase = false

// Process content as usual
for (const chunk of matcher.update(delta.content)) {
yield chunk
}
}

// Handle tool calls (can occur during reasoning or content phase)
// Note: Reasoning may continue after tool calls, so we don't emit reasoning here
// Reasoning will be emitted when transitioning to content phase or at stream end
// Buffer tool calls instead of yielding immediately to ensure reasoning appears first
if (delta.tool_calls) {
for (const toolCall of delta.tool_calls) {
yield {
// Track tool calls for debug logging
if (toolCall.index !== undefined) {
if (!finalToolCalls[toolCall.index]) {
finalToolCalls[toolCall.index] = {
id: toolCall.id,
type: toolCall.type,
function: { name: toolCall.function?.name, arguments: "" },
}
}
if (toolCall.function?.name) {
finalToolCalls[toolCall.index].function.name = toolCall.function.name
}
if (toolCall.function?.arguments) {
finalToolCalls[toolCall.index].function.arguments += toolCall.function.arguments
}
}
// Buffer tool calls instead of yielding immediately
// Default index to 0 if undefined (required by type)
toolCallBuffer.push({
type: "tool_call_partial",
index: toolCall.index,
index: toolCall.index ?? 0,
id: toolCall.id,
name: toolCall.function?.name,
arguments: toolCall.function?.arguments,
}
})
}
}

Expand All @@ -228,6 +294,22 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
}
}

// Emit any remaining accumulated reasoning content at stream end
// This handles cases where stream ends during reasoning phase
if (reasoningAccumulator && !hasEmittedReasoning) {
yield {
type: "reasoning",
text: reasoningAccumulator,
}
}

// Emit any buffered tool calls after reasoning is emitted
// This ensures reasoning appears before tool calls in the UI
for (const toolCall of toolCallBuffer) {
yield toolCall
}
toolCallBuffer = []

for (const chunk of matcher.final()) {
yield chunk
}
Expand All @@ -238,8 +320,18 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
} else {
const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
model: modelId,
messages: deepseekReasoner
? convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
messages: supportsInterleavedThinking
? (() => {
// For interleaved thinking models, conditionally clear reasoning_content:
// - Clear for new user turns (preserve only final answers)
// - Preserve during tool call sequences (required by API)
const allMessages: Anthropic.Messages.MessageParam[] = [
{ role: "user" as const, content: systemPrompt },
...messages,
]
const shouldClearReasoning = isNewUserTurn(allMessages)
return convertToR1Format(allMessages, shouldClearReasoning)
})()
: enabledLegacyFormat
? [systemMessage, ...convertToSimpleMessages(messages)]
: [systemMessage, ...convertToOpenAiMessages(messages)],
Expand All @@ -250,6 +342,12 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
}),
}

// Add interleaved thinking parameter if supported
if (supportsInterleavedThinking && modelInfo?.interleavedThinkingParam) {
// @ts-ignore-next-line - extra_body is not in the type definition but is supported by OpenAI API
requestOptions.extra_body = modelInfo.interleavedThinkingParam
}

// Add max_tokens if needed
this.addMaxTokensIfNeeded(requestOptions, modelInfo)

Expand Down Expand Up @@ -278,6 +376,14 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
}
}

// Handle reasoning_content for interleaved thinking models
if (supportsInterleavedThinking && "reasoning_content" in message && message.reasoning_content) {
yield {
type: "reasoning",
text: (message.reasoning_content as string | undefined) || "",
}
}

yield {
type: "text",
text: message?.content || "",
Expand Down
Loading
Loading