RooCodeInc · Skelectric · Dec 6, 2025 · Dec 6, 2025 · Dec 6, 2025 · Dec 6, 2025
@@ -83,6 +83,19 @@ export const modelInfoSchema = z.object({
 	supportsReasoningBudget: z.boolean().optional(),
 	// Capability flag to indicate whether the model supports simple on/off binary reasoning
 	supportsReasoningBinary: z.boolean().optional(),
+	/**
+	 * Capability flag to indicate whether the model supports interleaved thinking.
+	 * When true, the model emits `reasoning_content` alongside `content` in responses.
+	 * Examples: DeepSeek reasoner, Kimi K2 Thinking, Minimax M2.
+	 */
+	supportsInterleavedThinking: z.boolean().optional(),
+	/**
+	 * Provider-specific parameters needed to enable interleaved thinking.
+	 * Different providers may use different parameter formats.
+	 * Example: DeepSeek uses `{ thinking: { type: "enabled" } }`.
+	 * This parameter is passed via `extra_body` or similar mechanism.
+	 */
+	interleavedThinkingParam: z.record(z.any()).optional(),
 	// Capability flag to indicate whether the model supports temperature parameter
 	supportsTemperature: z.boolean().optional(),
 	defaultTemperature: z.number().optional(),

@@ -24,6 +24,10 @@ export const deepSeekModels = {
 		supportsImages: false,
 		supportsPromptCache: true,
 		supportsNativeTools: true,
+		// Enables interleaved thinking mode (reasoning_content field)
+		supportsInterleavedThinking: true,
+		// Parameter passed via extra_body to enable thinking mode
+		interleavedThinkingParam: { thinking: { type: "enabled" } },
 		inputPrice: 0.56, // $0.56 per million tokens (cache miss) - Updated Sept 5, 2025
 		outputPrice: 1.68, // $1.68 per million tokens - Updated Sept 5, 2025
 		cacheWritesPrice: 0.56, // $0.56 per million tokens (cache miss) - Updated Sept 5, 2025

@@ -484,6 +484,7 @@ export const openAiModelInfoSaneDefaults: ModelInfo = {
 	inputPrice: 0,
 	outputPrice: 0,
 	supportsNativeTools: true,
+	supportsInterleavedThinking: false,
 }
 
 // https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation

@@ -70,7 +70,7 @@ vi.mock("openai", () => {
 import OpenAI from "openai"
 import type { Anthropic } from "@anthropic-ai/sdk"
 
-import { deepSeekDefaultModelId } from "@roo-code/types"
+import { deepSeekDefaultModelId, type ModelInfo } from "@roo-code/types"
 
 import type { ApiHandlerOptions } from "../../../shared/api"
 
@@ -172,6 +172,15 @@ describe("DeepSeekHandler", () => {
 			expect(model.info.contextWindow).toBe(128_000)
 			expect(model.info.supportsImages).toBe(false)
 			expect(model.info.supportsPromptCache).toBe(true)
+			// Verify interleaved thinking capability flags
+			expect((model.info as ModelInfo).supportsInterleavedThinking).toBe(true)
+			expect((model.info as ModelInfo).interleavedThinkingParam).toEqual({ thinking: { type: "enabled" } })
+		})
+
+		it("should not have interleaved thinking flags for deepseek-chat", () => {
+			const model = handler.getModel()
+			expect((model.info as ModelInfo).supportsInterleavedThinking).toBeUndefined()
+			expect((model.info as ModelInfo).interleavedThinkingParam).toBeUndefined()
 		})
 
 		it("should return provided model ID with default model info if model does not exist", () => {

@@ -17,7 +17,8 @@ import { XmlMatcher } from "../../utils/xml-matcher"
 import { convertToOpenAiMessages } from "../transform/openai-format"
 import { convertToR1Format } from "../transform/r1-format"
 import { convertToSimpleMessages } from "../transform/simple-format"
-import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
+import { isNewUserTurn } from "../transform/detect-turn-boundary"
+import { ApiStream, ApiStreamUsageChunk, type ApiStreamToolCallPartialChunk } from "../transform/stream"
 import { getModelParams } from "../transform/model-params"
 
 import { DEFAULT_HEADERS } from "./constants"
@@ -85,13 +86,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		messages: Anthropic.Messages.MessageParam[],
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
-		const { info: modelInfo, reasoning } = this.getModel()
+		const { info: modelInfo, reasoning, temperature } = this.getModel()
 		const modelUrl = this.options.openAiBaseUrl ?? ""
 		const modelId = this.options.openAiModelId ?? ""
 		const enabledR1Format = this.options.openAiR1FormatEnabled ?? false
 		const enabledLegacyFormat = this.options.openAiLegacyFormat ?? false
 		const isAzureAiInference = this._isAzureAiInference(modelUrl)
-		const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format
+		const supportsInterleavedThinking = modelInfo?.supportsInterleavedThinking === true
 		const ark = modelUrl.includes(".volces.com")
 
 		if (modelId.includes("o1") || modelId.includes("o3") || modelId.includes("o4")) {
@@ -107,8 +108,16 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		if (this.options.openAiStreamingEnabled ?? true) {
 			let convertedMessages
 
-			if (deepseekReasoner) {
-				convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
+			if (supportsInterleavedThinking) {
+				// For interleaved thinking models, conditionally clear reasoning_content:
+				// - Clear for new user turns (preserve only final answers)
+				// - Preserve during tool call sequences (required by API)
+				const allMessages: Anthropic.Messages.MessageParam[] = [
+					{ role: "user" as const, content: systemPrompt },
+					...messages,
+				]
+				const shouldClearReasoning = isNewUserTurn(allMessages)
+				convertedMessages = convertToR1Format(allMessages, shouldClearReasoning)
 			} else if (ark || enabledLegacyFormat) {
 				convertedMessages = [systemMessage, ...convertToSimpleMessages(messages)]
 			} else {
@@ -159,7 +168,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 
 			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
 				model: modelId,
-				temperature: this.options.modelTemperature ?? (deepseekReasoner ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0),
+				temperature,
 				messages: convertedMessages,
 				stream: true as const,
 				...(isGrokXAI ? {} : { stream_options: { include_usage: true } }),
@@ -171,6 +180,12 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				}),
 			}
 
+			// Add interleaved thinking parameter if supported
+			if (supportsInterleavedThinking && modelInfo?.interleavedThinkingParam) {
+				// @ts-ignore-next-line - extra_body is not in the type definition but is supported by OpenAI API
+				requestOptions.extra_body = modelInfo.interleavedThinkingParam
+			}
+
 			// Add max_tokens if needed
 			this.addMaxTokensIfNeeded(requestOptions, modelInfo)
 
@@ -193,33 +208,84 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 					}) as const,
 			)
 
+			// Accumulation state for interleaved thinking mode
+			// According to API documentation for interleaved thinking, chunks contain either reasoning_content OR content, not both
+			// However, tool_calls may appear alongside either reasoning_content or content
+			let reasoningAccumulator = ""
+			let isReasoningPhase = true
+			let hasEmittedReasoning = false
+
 			let lastUsage
+			let finalToolCalls: any[] = []
+			let toolCallBuffer: ApiStreamToolCallPartialChunk[] = []
 
 			for await (const chunk of stream) {
 				const delta = chunk.choices?.[0]?.delta ?? {}
 
+				// Handle reasoning_content accumulation (interleaved thinking mode)
+				if ("reasoning_content" in delta && delta.reasoning_content) {
+					reasoningAccumulator += (delta.reasoning_content as string | undefined) || ""
+					isReasoningPhase = true
+					// Note: Continue to process tool_calls and usage in same chunk if present
+				}
+
+				// Handle content - if we were in reasoning phase, emit accumulated reasoning first
 				if (delta.content) {
-					for (const chunk of matcher.update(delta.content)) {
-						yield chunk
+					// Transition from reasoning to content phase
+					if (isReasoningPhase && reasoningAccumulator && !hasEmittedReasoning) {
+						yield {
+							type: "reasoning",
+							text: reasoningAccumulator,
+						}
+						hasEmittedReasoning = true
+						reasoningAccumulator = ""
 					}
-				}
 
-				if ("reasoning_content" in delta && delta.reasoning_content) {
-					yield {
-						type: "reasoning",
-						text: (delta.reasoning_content as string | undefined) || "",
+					// Emit buffered tool calls before processing content
+					for (const toolCall of toolCallBuffer) {
+						yield toolCall
+					}
+					toolCallBuffer = []
+
+					isReasoningPhase = false
+
+					// Process content as usual
+					for (const chunk of matcher.update(delta.content)) {
+						yield chunk
 					}
 				}
 
+				// Handle tool calls (can occur during reasoning or content phase)
+				// Note: Reasoning may continue after tool calls, so we don't emit reasoning here
+				// Reasoning will be emitted when transitioning to content phase or at stream end
+				// Buffer tool calls instead of yielding immediately to ensure reasoning appears first
 				if (delta.tool_calls) {
 					for (const toolCall of delta.tool_calls) {
-						yield {
+						// Track tool calls for debug logging
+						if (toolCall.index !== undefined) {
+							if (!finalToolCalls[toolCall.index]) {
+								finalToolCalls[toolCall.index] = {
+									id: toolCall.id,
+									type: toolCall.type,
+									function: { name: toolCall.function?.name, arguments: "" },
+								}
+							}
+							if (toolCall.function?.name) {
+								finalToolCalls[toolCall.index].function.name = toolCall.function.name
+							}
+							if (toolCall.function?.arguments) {
+								finalToolCalls[toolCall.index].function.arguments += toolCall.function.arguments
+							}
+						}
+						// Buffer tool calls instead of yielding immediately
+						// Default index to 0 if undefined (required by type)
+						toolCallBuffer.push({
 							type: "tool_call_partial",
-							index: toolCall.index,
+							index: toolCall.index ?? 0,
 							id: toolCall.id,
 							name: toolCall.function?.name,
 							arguments: toolCall.function?.arguments,
-						}
+						})
 					}
 				}
 
@@ -228,6 +294,22 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				}
 			}
 
+			// Emit any remaining accumulated reasoning content at stream end
+			// This handles cases where stream ends during reasoning phase
+			if (reasoningAccumulator && !hasEmittedReasoning) {
+				yield {
+					type: "reasoning",
+					text: reasoningAccumulator,
+				}
+			}
+
+			// Emit any buffered tool calls after reasoning is emitted
+			// This ensures reasoning appears before tool calls in the UI
+			for (const toolCall of toolCallBuffer) {
+				yield toolCall
+			}
+			toolCallBuffer = []
+
 			for (const chunk of matcher.final()) {
 				yield chunk
 			}
@@ -238,8 +320,18 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		} else {
 			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
 				model: modelId,
-				messages: deepseekReasoner
-					? convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
+				messages: supportsInterleavedThinking
+					? (() => {
+							// For interleaved thinking models, conditionally clear reasoning_content:
+							// - Clear for new user turns (preserve only final answers)
+							// - Preserve during tool call sequences (required by API)
+							const allMessages: Anthropic.Messages.MessageParam[] = [
+								{ role: "user" as const, content: systemPrompt },
+								...messages,
+							]
+							const shouldClearReasoning = isNewUserTurn(allMessages)
+							return convertToR1Format(allMessages, shouldClearReasoning)
+						})()
 					: enabledLegacyFormat
 						? [systemMessage, ...convertToSimpleMessages(messages)]
 						: [systemMessage, ...convertToOpenAiMessages(messages)],
@@ -250,6 +342,12 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				}),
 			}
 
+			// Add interleaved thinking parameter if supported
+			if (supportsInterleavedThinking && modelInfo?.interleavedThinkingParam) {
+				// @ts-ignore-next-line - extra_body is not in the type definition but is supported by OpenAI API
+				requestOptions.extra_body = modelInfo.interleavedThinkingParam
+			}
+
 			// Add max_tokens if needed
 			this.addMaxTokensIfNeeded(requestOptions, modelInfo)
 
@@ -278,6 +376,14 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				}
 			}
 
+			// Handle reasoning_content for interleaved thinking models
+			if (supportsInterleavedThinking && "reasoning_content" in message && message.reasoning_content) {
+				yield {
+					type: "reasoning",
+					text: (message.reasoning_content as string | undefined) || "",
+				}
+			}
+
 			yield {
 				type: "text",
 				text: message?.content || "",