Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 55 additions & 1 deletion src/common/utils/ai/providerOptions.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@
import type { OpenAIResponsesProviderOptions } from "@ai-sdk/openai";
import { createMuxMessage } from "@/common/types/message";
import { describe, test, expect, mock } from "bun:test";
import { buildProviderOptions } from "./providerOptions";
import {
buildProviderOptions,
buildRequestHeaders,
ANTHROPIC_1M_CONTEXT_HEADER,
} from "./providerOptions";

// Mock the log module to avoid console noise
void mock.module("@/node/services/log", () => ({
Expand Down Expand Up @@ -228,3 +232,53 @@ describe("buildProviderOptions - OpenAI", () => {
});
});
});

describe("buildRequestHeaders", () => {
test("should return anthropic-beta header for Opus 4.6 with use1MContext", () => {
const result = buildRequestHeaders("anthropic:claude-opus-4-6", {
anthropic: { use1MContext: true },
});
expect(result).toEqual({ "anthropic-beta": ANTHROPIC_1M_CONTEXT_HEADER });
});

test("should return anthropic-beta header for gateway-routed Anthropic model", () => {
const result = buildRequestHeaders("mux-gateway:anthropic/claude-opus-4-6", {
anthropic: { use1MContext: true },
});
expect(result).toEqual({ "anthropic-beta": ANTHROPIC_1M_CONTEXT_HEADER });
});

test("should return undefined for non-Anthropic model", () => {
const result = buildRequestHeaders("openai:gpt-5.2", {
anthropic: { use1MContext: true },
});
expect(result).toBeUndefined();
});

test("should return undefined when use1MContext is false", () => {
const result = buildRequestHeaders("anthropic:claude-opus-4-6", {
anthropic: { use1MContext: false },
});
expect(result).toBeUndefined();
});

test("should return undefined when no muxProviderOptions provided", () => {
const result = buildRequestHeaders("anthropic:claude-opus-4-6");
expect(result).toBeUndefined();
});

test("should return undefined for unsupported model even with use1MContext", () => {
// claude-opus-4-1 doesn't support 1M context
const result = buildRequestHeaders("anthropic:claude-opus-4-1", {
anthropic: { use1MContext: true },
});
expect(result).toBeUndefined();
});

test("should return header when model is in use1MContextModels list", () => {
const result = buildRequestHeaders("anthropic:claude-opus-4-6", {
anthropic: { use1MContextModels: ["anthropic:claude-opus-4-6"] },
});
expect(result).toEqual({ "anthropic-beta": ANTHROPIC_1M_CONTEXT_HEADER });
});
});
41 changes: 38 additions & 3 deletions src/common/utils/ai/providerOptions.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
/**
* Provider options builder for AI SDK
* Provider-specific request configuration for AI SDK
*
* Converts unified thinking levels to provider-specific options
* Builds both `providerOptions` (thinking, reasoning) and per-request HTTP
* `headers` (e.g. Anthropic 1M context beta) for streamText(). Both builders
* share the same gateway-normalization logic and provider branching.
*/

import type { AnthropicProviderOptions } from "@ai-sdk/anthropic";
Expand All @@ -19,7 +21,7 @@ import {
} from "@/common/types/thinking";
import { log } from "@/node/services/log";
import type { MuxMessage } from "@/common/types/message";
import { normalizeGatewayModel } from "./models";
import { normalizeGatewayModel, supports1MContext } from "./models";

/**
* OpenRouter reasoning options
Expand Down Expand Up @@ -340,3 +342,36 @@ export function buildProviderOptions(
log.debug("buildProviderOptions: Unsupported provider", provider);
return {};
}

// ---------------------------------------------------------------------------
// Per-request HTTP headers
// ---------------------------------------------------------------------------

/** Header value for Anthropic 1M context beta */
export const ANTHROPIC_1M_CONTEXT_HEADER = "context-1m-2025-08-07";

/**
* Build per-request HTTP headers for provider-specific features.
*
* These flow through streamText({ headers }) to the provider SDK, which merges
* them with provider-creation-time headers via combineHeaders(). This is the
* single injection site for features like the Anthropic 1M context beta header,
* regardless of whether the model is direct or gateway-routed.
*/
export function buildRequestHeaders(
modelString: string,
muxProviderOptions?: MuxProviderOptions
): Record<string, string> | undefined {
const normalized = normalizeGatewayModel(modelString);
const [provider] = normalized.split(":", 2);

if (provider !== "anthropic") return undefined;

const is1MEnabled =
((muxProviderOptions?.anthropic?.use1MContextModels?.includes(normalized) ?? false) ||
muxProviderOptions?.anthropic?.use1MContext === true) &&
supports1MContext(normalized);

if (!is1MEnabled) return undefined;
return { "anthropic-beta": ANTHROPIC_1M_CONTEXT_HEADER };
}
10 changes: 8 additions & 2 deletions src/node/services/aiService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ import type { WorkspaceMCPOverrides } from "@/common/types/mcp";
import type { MCPServerManager, MCPWorkspaceStats } from "@/node/services/mcpServerManager";
import { WorkspaceMcpOverridesService } from "./workspaceMcpOverridesService";
import type { TaskService } from "@/node/services/taskService";
import { buildProviderOptions } from "@/common/utils/ai/providerOptions";
import { buildProviderOptions, buildRequestHeaders } from "@/common/utils/ai/providerOptions";
import { sliceMessagesFromLatestCompactionBoundary } from "@/common/utils/messages/compactionBoundary";

import { THINKING_LEVEL_OFF, type ThinkingLevel } from "@/common/types/thinking";
Expand Down Expand Up @@ -847,6 +847,11 @@ export class AIService extends EventEmitter {
truncationMode
);

// Build per-request HTTP headers (e.g., anthropic-beta for 1M context).
// This is the single injection site for provider-specific headers, handling
// both direct and gateway-routed models identically.
const requestHeaders = buildRequestHeaders(modelString, effectiveMuxProviderOptions);

// Debug dump: Log the complete LLM request when MUX_DEBUG_LLM_REQUEST is set
if (process.env.MUX_DEBUG_LLM_REQUEST === "1") {
log.info(
Expand Down Expand Up @@ -951,7 +956,8 @@ export class AIService extends EventEmitter {
streamToken, // Pass the pre-generated stream token
hasQueuedMessage,
metadata.name,
effectiveThinkingLevel
effectiveThinkingLevel,
requestHeaders
);

if (!streamResult.success) {
Expand Down
22 changes: 10 additions & 12 deletions src/node/services/providerModelFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import { isProviderDisabledInConfig } from "@/common/utils/providers/isProviderD
import type { PolicyService } from "@/node/services/policyService";
import type { ProviderService } from "@/node/services/providerService";
import type { CodexOauthService } from "@/node/services/codexOauthService";
import { normalizeGatewayModel, supports1MContext } from "@/common/utils/ai/models";
import { normalizeGatewayModel } from "@/common/utils/ai/models";
import { MUX_APP_ATTRIBUTION_TITLE, MUX_APP_ATTRIBUTION_URL } from "@/constants/appAttribution";
import { resolveProviderCredentials } from "@/node/utils/providerRequirements";
import {
Expand Down Expand Up @@ -228,8 +228,9 @@ export function normalizeAnthropicBaseURL(baseURL: string): string {
return `${trimmed}/v1`;
}

/** Header value for Anthropic 1M context beta */
export const ANTHROPIC_1M_CONTEXT_HEADER = "context-1m-2025-08-07";
// Canonical definition lives in providerOptions; import for local use + re-export for backward compat.
import { ANTHROPIC_1M_CONTEXT_HEADER } from "@/common/utils/ai/providerOptions";
export { ANTHROPIC_1M_CONTEXT_HEADER };

/**
* Build headers for Anthropic provider, optionally including the 1M context beta header.
Expand Down Expand Up @@ -485,14 +486,9 @@ export class ProviderModelFactory {
? { ...configWithApiKey, baseURL: normalizeAnthropicBaseURL(effectiveBaseURL) }
: configWithApiKey;

// Add 1M context beta header if requested and model supports it.
// Check both per-model list (use1MContextModels) and legacy global flag (use1MContext).
const fullModelId = `anthropic:${modelId}`;
const is1MEnabled =
((muxProviderOptions?.anthropic?.use1MContextModels?.includes(fullModelId) ?? false) ||
muxProviderOptions?.anthropic?.use1MContext === true) &&
supports1MContext(fullModelId);
const headers = buildAnthropicHeaders(normalizedConfig.headers, is1MEnabled);
// 1M context beta header is injected per-request via buildRequestHeaders() →
// streamText({ headers }), not at provider creation time. This avoids duplicating
// header logic across direct and gateway handlers.

// Lazy-load Anthropic provider to reduce startup time
const { createAnthropic } = await PROVIDER_REGISTRY.anthropic();
Expand All @@ -503,7 +499,6 @@ export class ProviderModelFactory {
const fetchWithCacheControl = wrapFetchWithAnthropicCacheControl(baseFetch);
const provider = createAnthropic({
...normalizedConfig,
headers,
fetch: fetchWithCacheControl,
});
return Ok(provider(modelId));
Expand Down Expand Up @@ -1024,6 +1019,9 @@ export class ProviderModelFactory {
// Use configured baseURL or fall back to default gateway URL
const gatewayBaseURL =
providerConfig.baseURL ?? "https://gateway.mux.coder.com/api/v1/ai-gateway/v1/ai";

// 1M context beta header is injected per-request via buildRequestHeaders() →
// streamText({ headers }), not at provider creation time.
const gateway = createGateway({
apiKey: couponCode,
baseURL: gatewayBaseURL,
Expand Down
19 changes: 14 additions & 5 deletions src/node/services/streamManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ interface StreamRequestConfig {
tools?: Record<string, Tool>;
toolChoice?: StreamToolChoice;
providerOptions?: Record<string, unknown>;
/** Per-request HTTP headers (e.g., anthropic-beta for 1M context). */
headers?: Record<string, string | undefined>;
maxOutputTokens?: number;
hasQueuedMessage?: () => boolean;
}
Expand Down Expand Up @@ -929,7 +931,8 @@ export class StreamManager extends EventEmitter {
providerOptions?: Record<string, unknown>,
maxOutputTokens?: number,
toolPolicy?: ToolPolicy,
hasQueuedMessage?: () => boolean
hasQueuedMessage?: () => boolean,
headers?: Record<string, string | undefined>
): StreamRequestConfig {
// Determine toolChoice based on toolPolicy.
//
Expand Down Expand Up @@ -1002,6 +1005,7 @@ export class StreamManager extends EventEmitter {
tools: finalTools,
toolChoice,
providerOptions: finalProviderOptions,
headers,
maxOutputTokens: effectiveMaxOutputTokens,
hasQueuedMessage,
};
Expand Down Expand Up @@ -1049,6 +1053,7 @@ export class StreamManager extends EventEmitter {
stopWhen: this.createStopWhenCondition(request),
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment
providerOptions: request.providerOptions as any, // Pass provider-specific options (thinking/reasoning config)
headers: request.headers, // Per-request HTTP headers (e.g., anthropic-beta for 1M context)
maxOutputTokens: request.maxOutputTokens,
});
}
Expand All @@ -1075,7 +1080,8 @@ export class StreamManager extends EventEmitter {
toolPolicy?: ToolPolicy,
hasQueuedMessage?: () => boolean,
workspaceName?: string,
thinkingLevel?: string
thinkingLevel?: string,
headers?: Record<string, string | undefined>
): WorkspaceStreamInfo {
// abortController is created and linked to the caller-provided abortSignal in startStream().

Expand All @@ -1089,7 +1095,8 @@ export class StreamManager extends EventEmitter {
providerOptions,
maxOutputTokens,
toolPolicy,
hasQueuedMessage
hasQueuedMessage,
headers
);

// Start streaming - this can throw immediately if API key is missing
Expand Down Expand Up @@ -2432,7 +2439,8 @@ export class StreamManager extends EventEmitter {
providedStreamToken?: StreamToken,
hasQueuedMessage?: () => boolean,
workspaceName?: string,
thinkingLevel?: string
thinkingLevel?: string,
headers?: Record<string, string | undefined>
): Promise<Result<StreamToken, SendMessageError>> {
const typedWorkspaceId = workspaceId as WorkspaceId;

Expand Down Expand Up @@ -2507,7 +2515,8 @@ export class StreamManager extends EventEmitter {
toolPolicy,
hasQueuedMessage,
workspaceName,
thinkingLevel
thinkingLevel,
headers
);

// Guard against a narrow race:
Expand Down
Loading
Loading