Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/browser/components/ChatInput/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ const ChatInputInner: React.FC<ChatInputProps> = (props) => {
const workspaceIdForUsage = variant === "workspace" ? props.workspaceId : "";
const usage = useWorkspaceUsage(workspaceIdForUsage);
const { has1MContext } = useProviderOptions();
const lastUsage = usage?.liveUsage ?? usage?.lastContextUsage;
const lastUsage = usage?.currentContextUsage;
const usageModel = lastUsage?.model ?? null;
const use1M = has1MContext(usageModel ?? "");
const contextUsageData = useMemo(() => {
Expand Down
37 changes: 13 additions & 24 deletions src/browser/components/RightSidebar/CostsTab.tsx
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import React from "react";
import { useWorkspaceUsage, useWorkspaceConsumers } from "@/browser/stores/WorkspaceStore";
import { getModelStats } from "@/common/utils/tokens/modelStats";
import {
sumUsageHistory,
formatCostWithDollar,
type ChatUsageDisplay,
} from "@/common/utils/tokens/usageAggregator";
import { getSessionCostTotal, formatCostWithDollar } from "@/common/utils/tokens/usageAggregator";
import { getEffectiveContextLimit } from "@/common/utils/tokens/contextLimit";
import { usePersistedState } from "@/browser/hooks/usePersistedState";
import { PREFERRED_COMPACTION_MODEL_KEY } from "@/common/constants/storage";
import { resolveCompactionModel } from "@/browser/utils/messages/compactionModelPreference";
Expand Down Expand Up @@ -69,8 +66,7 @@ const CostsTabComponent: React.FC<CostsTabProps> = ({ workspaceId }) => {
const runtimeConfig = workspaceContext?.workspaceMetadata.get(workspaceId)?.runtimeConfig;

// Get model from context usage for per-model threshold storage
// Use lastContextUsage for context window display (last step's usage)
const contextUsageForModel = usage.liveUsage ?? usage.lastContextUsage;
const contextUsageForModel = usage.currentContextUsage;
const currentModel = contextUsageForModel?.model ?? null;
// Align warning with /compact model resolution so it matches actual compaction behavior.
const effectiveCompactionModel = resolveCompactionModel(preferredCompactionModel) ?? currentModel;
Expand All @@ -81,18 +77,13 @@ const CostsTabComponent: React.FC<CostsTabProps> = ({ workspaceId }) => {

// Session usage for cost calculation
// Uses sessionTotal (pre-computed) + liveCostUsage (cumulative during streaming)
const sessionUsage = React.useMemo(() => {
const parts: ChatUsageDisplay[] = [];
if (usage.sessionTotal) parts.push(usage.sessionTotal);
if (usage.liveCostUsage) parts.push(usage.liveCostUsage);
return parts.length > 0 ? sumUsageHistory(parts) : undefined;
}, [usage.sessionTotal, usage.liveCostUsage]);
const sessionUsage = React.useMemo(
() => getSessionCostTotal(usage.sessionTotal, usage.liveCostUsage),
[usage.sessionTotal, usage.liveCostUsage]
);

const hasUsageData =
usage &&
(usage.sessionTotal !== undefined ||
usage.lastContextUsage !== undefined ||
usage.liveUsage !== undefined);
usage && (usage.sessionTotal !== undefined || usage.currentContextUsage !== undefined);
const hasConsumerData = consumers && (consumers.totalTokens > 0 || consumers.isCalculating);
const hasAnyData = hasUsageData || hasConsumerData;

Expand Down Expand Up @@ -120,7 +111,7 @@ const CostsTabComponent: React.FC<CostsTabProps> = ({ workspaceId }) => {
<div data-testid="context-usage-section" className="mt-2 mb-5">
<div data-testid="context-usage-list" className="flex flex-col gap-3">
{(() => {
const contextUsage = usage.liveUsage ?? usage.lastContextUsage;
const contextUsage = usage.currentContextUsage;
const model = contextUsage?.model ?? "unknown";

const contextUsageData = contextUsage
Expand All @@ -134,12 +125,10 @@ const CostsTabComponent: React.FC<CostsTabProps> = ({ workspaceId }) => {
return undefined;

const thresholdTokens = Math.round((autoCompactThreshold / 100) * maxTokens);
const compactionStats = getModelStats(effectiveCompactionModel);
const compactionMaxTokens =
has1MContext(effectiveCompactionModel) &&
supports1MContext(effectiveCompactionModel)
? 1_000_000
: compactionStats?.max_input_tokens;
const compactionMaxTokens = getEffectiveContextLimit(
effectiveCompactionModel,
has1MContext(effectiveCompactionModel)
);

if (compactionMaxTokens && compactionMaxTokens < thresholdTokens) {
return { compactionModelMaxTokens: compactionMaxTokens, thresholdTokens };
Expand Down
28 changes: 5 additions & 23 deletions src/browser/components/RightSidebar/tabs/TabLabels.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@ import { Tooltip, TooltipContent, TooltipTrigger } from "../../ui/tooltip";
import { FileIcon } from "../../FileIcon";
import { formatTabDuration, type ReviewStats } from "./registry";
import { formatKeybind, KEYBINDS } from "@/browser/utils/ui/keybinds";
import { cn } from "@/common/lib/utils";
import { useWorkspaceUsage, useWorkspaceStatsSnapshot } from "@/browser/stores/WorkspaceStore";
import { sumUsageHistory, type ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator";
import { getSessionCostTotal, getTotalCost } from "@/common/utils/tokens/usageAggregator";

interface CostsTabLabelProps {
workspaceId: string;
Expand All @@ -29,21 +28,9 @@ export const CostsTabLabel: React.FC<CostsTabLabelProps> = ({ workspaceId }) =>
const usage = useWorkspaceUsage(workspaceId);

const sessionCost = React.useMemo(() => {
const parts: ChatUsageDisplay[] = [];
if (usage.sessionTotal) parts.push(usage.sessionTotal);
if (usage.liveCostUsage) parts.push(usage.liveCostUsage);
if (parts.length === 0) return null;

const aggregated = sumUsageHistory(parts);
if (!aggregated) return null;

const total =
(aggregated.input.cost_usd ?? 0) +
(aggregated.cached.cost_usd ?? 0) +
(aggregated.cacheCreate.cost_usd ?? 0) +
(aggregated.output.cost_usd ?? 0) +
(aggregated.reasoning.cost_usd ?? 0);
return total > 0 ? total : null;
const aggregated = getSessionCostTotal(usage.sessionTotal, usage.liveCostUsage);
const total = getTotalCost(aggregated);
return total && total > 0 ? total : null;
}, [usage.sessionTotal, usage.liveCostUsage]);

return (
Expand All @@ -67,12 +54,7 @@ export const ReviewTabLabel: React.FC<ReviewTabLabelProps> = ({ reviewStats }) =
<>
Review
{reviewStats !== null && reviewStats.total > 0 && (
<span
className={cn(
"text-[10px]",
reviewStats.read === reviewStats.total ? "text-muted" : "text-muted"
)}
>
<span className="text-muted text-[10px]">
{reviewStats.read}/{reviewStats.total}
</span>
)}
Expand Down
16 changes: 10 additions & 6 deletions src/browser/hooks/useContextSwitchWarning.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import type { WorkspaceUsageState } from "@/browser/stores/WorkspaceStore";
import type { SendMessageOptions } from "@/common/orpc/types";
import type { DisplayedMessage } from "@/common/types/message";
import { useContextSwitchWarning } from "./useContextSwitchWarning";
import { getEffectiveContextLimit } from "@/browser/utils/compaction/contextLimit";
import { getEffectiveContextLimit } from "@/common/utils/tokens/contextLimit";
import {
recordWorkspaceModelChange,
setWorkspaceModelWithOrigin,
Expand Down Expand Up @@ -81,17 +81,21 @@ const createPolicyChurnClient = () => {
return { client, triggerPolicyEvent };
};

const buildUsage = (tokens: number, model?: string): WorkspaceUsageState => ({
totalTokens: tokens,
lastContextUsage: {
const buildUsage = (tokens: number, model?: string): WorkspaceUsageState => {
const contextUsage = {
input: { tokens },
cached: { tokens: 0 },
cacheCreate: { tokens: 0 },
output: { tokens: 0 },
reasoning: { tokens: 0 },
model,
},
});
};
return {
totalTokens: tokens,
lastContextUsage: contextUsage,
currentContextUsage: contextUsage,
};
};

const buildAssistantMessage = (model: string): DisplayedMessage => ({
type: "assistant",
Expand Down
7 changes: 4 additions & 3 deletions src/browser/hooks/useContextSwitchWarning.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import type { AppRouter } from "@/node/orpc/router";
import type { SendMessageOptions } from "@/common/orpc/types";
import type { DisplayedMessage } from "@/common/types/message";
import type { WorkspaceUsageState } from "@/browser/stores/WorkspaceStore";
import { getContextTokens } from "@/common/utils/tokens/usageAggregator";
import { normalizeGatewayModel } from "@/common/utils/ai/models";
import { usePolicy } from "@/browser/contexts/PolicyContext";
import {
Expand All @@ -20,7 +21,7 @@ import {
type ContextSwitchWarning,
} from "@/browser/utils/compaction/contextSwitchCheck";
import { getHigherContextCompactionSuggestion } from "@/browser/utils/compaction/suggestion";
import { getEffectiveContextLimit } from "@/browser/utils/compaction/contextLimit";
import { getEffectiveContextLimit } from "@/common/utils/tokens/contextLimit";
import {
consumeWorkspaceModelChange,
setWorkspaceModelWithOrigin,
Expand Down Expand Up @@ -158,8 +159,8 @@ export function useContextSwitchWarning(
}, [workspaceId, pendingModel, use1M, checkOptions]);

const getCurrentTokens = useCallback(() => {
const usage = workspaceUsage?.liveUsage ?? workspaceUsage?.lastContextUsage;
return usage ? usage.input.tokens + usage.cached.tokens + usage.cacheCreate.tokens : 0;
const usage = workspaceUsage?.currentContextUsage;
return usage ? getContextTokens(usage) : 0;
}, [workspaceUsage]);

const tokens = getCurrentTokens();
Expand Down
5 changes: 4 additions & 1 deletion src/browser/stores/WorkspaceConsumerManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import type { WorkspaceConsumersState } from "./WorkspaceStore";
import type { StreamingMessageAggregator } from "@/browser/utils/messages/StreamingMessageAggregator";
import type { ChatStats } from "@/common/types/chatStats";
import type { MuxMessage } from "@/common/types/message";
import { sliceMessagesFromLatestCompactionBoundary } from "@/common/utils/messages/compactionBoundary";

const TOKENIZER_CANCELLED_MESSAGE = "Cancelled by newer request";

Expand Down Expand Up @@ -197,7 +198,9 @@ export class WorkspaceConsumerManager {
// Run in next tick to avoid blocking caller
void (async () => {
try {
const messages = aggregator.getAllMessages();
// Only count tokens for the current compaction epoch β€” pre-boundary
// messages carry stale context and inflate the consumer breakdown.
const messages = sliceMessagesFromLatestCompactionBoundary(aggregator.getAllMessages());
const model = aggregator.getCurrentModel() ?? "unknown";

// Calculate in piscina pool with timeout protection
Expand Down
23 changes: 21 additions & 2 deletions src/browser/stores/WorkspaceStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import type {
} from "@/common/types/stream";
import { MapStore } from "./MapStore";
import { createDisplayUsage } from "@/common/utils/tokens/displayUsage";
import { isDurableCompactionBoundaryMarker } from "@/common/utils/messages/compactionBoundary";
import { WorkspaceConsumerManager } from "./WorkspaceConsumerManager";
import type { ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator";
import { sumUsageHistory } from "@/common/utils/tokens/usageAggregator";
Expand Down Expand Up @@ -203,6 +204,9 @@ export interface WorkspaceUsageState {
liveUsage?: ChatUsageDisplay;
/** Live cost usage during streaming (cumulative across all steps) */
liveCostUsage?: ChatUsageDisplay;
/** Best-available context usage: live when streaming, else last completed.
* Consumers should prefer this over manually deriving liveUsage ?? lastContextUsage. */
currentContextUsage?: ChatUsageDisplay;
}

/**
Expand Down Expand Up @@ -1255,11 +1259,15 @@ export class WorkspaceStore {
sessionTotal.reasoning.tokens
: 0;

// Get last message's context usage (unchanged from before)
// Get last message's context usage β€” only search within the current
// compaction epoch. Pre-boundary messages carry stale contextUsage from
// before compaction; including them inflates the usage indicator and
// triggers premature auto-compaction.
const messages = aggregator.getAllMessages();
const lastContextUsage = (() => {
for (let i = messages.length - 1; i >= 0; i--) {
const msg = messages[i];
if (isDurableCompactionBoundaryMarker(msg)) break;
if (msg.role === "assistant") {
if (msg.metadata?.compacted) continue;
const rawUsage = msg.metadata?.contextUsage;
Expand Down Expand Up @@ -1298,7 +1306,18 @@ export class WorkspaceStore {
? createDisplayUsage(rawCumulativeUsage, model, rawCumulativeProviderMetadata)
: undefined;

return { sessionTotal, lastRequest, lastContextUsage, totalTokens, liveUsage, liveCostUsage };
// Derived: best-available context usage (live when streaming, else last completed)
const currentContextUsage = liveUsage ?? lastContextUsage;

return {
sessionTotal,
lastRequest,
lastContextUsage,
totalTokens,
liveUsage,
liveCostUsage,
currentContextUsage,
};
});
}

Expand Down
20 changes: 13 additions & 7 deletions src/browser/utils/compaction/autoCompactionCheck.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@ const createUsageEntry = (
// Helper to create mock WorkspaceUsageState
const createMockUsage = (
lastEntryTokens: number,
_historicalTokens?: number, // Kept for backward compat but unused (session-usage.json handles historical)
model: string = KNOWN_MODELS.SONNET.id,
liveUsage?: ChatUsageDisplay
): WorkspaceUsageState => {
// Create lastContextUsage representing the most recent context window state
const lastContextUsage = createUsageEntry(lastEntryTokens, model);

return { lastContextUsage, totalTokens: 0, liveUsage };
const currentContextUsage = liveUsage ?? lastContextUsage;
return { lastContextUsage, totalTokens: 0, liveUsage, currentContextUsage };
};

describe("checkAutoCompaction", () => {
Expand Down Expand Up @@ -120,7 +120,8 @@ describe("checkAutoCompaction", () => {
test("handles historical usage correctly - ignores it in calculation", () => {
// Scenario: After compaction, historical = 70K, recent = 5K
// Should calculate based on 5K (2.5%), not 75K (37.5%)
const usage = createMockUsage(5_000, 70_000);
// (session-usage.json handles historical separately; only lastContextUsage matters)
const usage = createMockUsage(5_000);
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false);

expect(result.usagePercentage).toBe(2.5);
Expand All @@ -140,6 +141,7 @@ describe("checkAutoCompaction", () => {
};
const usage: WorkspaceUsageState = {
lastContextUsage: usageEntry,
currentContextUsage: usageEntry,
totalTokens: 0,
};

Expand All @@ -163,6 +165,7 @@ describe("checkAutoCompaction", () => {
};
const usage: WorkspaceUsageState = {
lastContextUsage: usageEntry,
currentContextUsage: usageEntry,
totalTokens: 0,
};

Expand Down Expand Up @@ -202,7 +205,7 @@ describe("checkAutoCompaction", () => {
});

test("ignores use1M for models that don't support it (GPT)", () => {
const usage = createMockUsage(100_000, undefined, KNOWN_MODELS.GPT_MINI.id);
const usage = createMockUsage(100_000, KNOWN_MODELS.GPT_MINI.id);
// GPT Mini has 272k context, so 100k = 36.76%
const result = checkAutoCompaction(usage, KNOWN_MODELS.GPT_MINI.id, true);

Expand Down Expand Up @@ -261,6 +264,7 @@ describe("checkAutoCompaction", () => {
};
const usage: WorkspaceUsageState = {
lastContextUsage: zeroEntry,
currentContextUsage: zeroEntry,
totalTokens: 0,
};

Expand Down Expand Up @@ -349,7 +353,7 @@ describe("checkAutoCompaction", () => {
test("shouldForceCompact uses liveUsage when available", () => {
// lastUsage at 50%, liveUsage at 75% - should trigger based on live
const liveUsage = createUsageEntry(150_000); // 75%
const usage = createMockUsage(100_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
const usage = createMockUsage(100_000, KNOWN_MODELS.SONNET.id, liveUsage);
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false);

expect(result.shouldForceCompact).toBe(true);
Expand All @@ -367,7 +371,7 @@ describe("checkAutoCompaction", () => {
test("shouldForceCompact respects 1M context mode", () => {
// 75% of 1M = 750k tokens
const liveUsage = createUsageEntry(750_000);
const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
const usage = createMockUsage(50_000, KNOWN_MODELS.SONNET.id, liveUsage);
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, true);

expect(result.shouldForceCompact).toBe(true);
Expand All @@ -378,6 +382,7 @@ describe("checkAutoCompaction", () => {
const usage: WorkspaceUsageState = {
totalTokens: 0,
liveUsage,
currentContextUsage: liveUsage,
};
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false);

Expand All @@ -391,6 +396,7 @@ describe("checkAutoCompaction", () => {
const usage: WorkspaceUsageState = {
totalTokens: 0,
liveUsage,
currentContextUsage: liveUsage,
};
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false);

Expand All @@ -401,7 +407,7 @@ describe("checkAutoCompaction", () => {
test("shouldShowWarning uses max of last and live usage", () => {
// lastUsage at 50% (below warning), liveUsage at 72% (above warning)
const liveUsage = createUsageEntry(144_000); // 72%
const usage = createMockUsage(100_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
const usage = createMockUsage(100_000, KNOWN_MODELS.SONNET.id, liveUsage);
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false);

expect(result.shouldShowWarning).toBe(true); // 72% >= 60%
Expand Down
Loading