coder · ammar-agent · Feb 10, 2026 · Feb 10, 2026 · Feb 10, 2026 · Feb 10, 2026
diff --git a/src/browser/components/ChatInput/index.tsx b/src/browser/components/ChatInput/index.tsx
@@ -472,7 +472,7 @@ const ChatInputInner: React.FC<ChatInputProps> = (props) => {
   const workspaceIdForUsage = variant === "workspace" ? props.workspaceId : "";
   const usage = useWorkspaceUsage(workspaceIdForUsage);
   const { has1MContext } = useProviderOptions();
-  const lastUsage = usage?.liveUsage ?? usage?.lastContextUsage;
+  const lastUsage = usage?.currentContextUsage;
   const usageModel = lastUsage?.model ?? null;
   const use1M = has1MContext(usageModel ?? "");
   const contextUsageData = useMemo(() => {

diff --git a/src/browser/components/RightSidebar/CostsTab.tsx b/src/browser/components/RightSidebar/CostsTab.tsx
@@ -1,11 +1,8 @@
 import React from "react";
 import { useWorkspaceUsage, useWorkspaceConsumers } from "@/browser/stores/WorkspaceStore";
 import { getModelStats } from "@/common/utils/tokens/modelStats";
-import {
-  sumUsageHistory,
-  formatCostWithDollar,
-  type ChatUsageDisplay,
-} from "@/common/utils/tokens/usageAggregator";
+import { getSessionCostTotal, formatCostWithDollar } from "@/common/utils/tokens/usageAggregator";
+import { getEffectiveContextLimit } from "@/common/utils/tokens/contextLimit";
 import { usePersistedState } from "@/browser/hooks/usePersistedState";
 import { PREFERRED_COMPACTION_MODEL_KEY } from "@/common/constants/storage";
 import { resolveCompactionModel } from "@/browser/utils/messages/compactionModelPreference";
@@ -69,8 +66,7 @@ const CostsTabComponent: React.FC<CostsTabProps> = ({ workspaceId }) => {
   const runtimeConfig = workspaceContext?.workspaceMetadata.get(workspaceId)?.runtimeConfig;
 
   // Get model from context usage for per-model threshold storage
-  // Use lastContextUsage for context window display (last step's usage)
-  const contextUsageForModel = usage.liveUsage ?? usage.lastContextUsage;
+  const contextUsageForModel = usage.currentContextUsage;
   const currentModel = contextUsageForModel?.model ?? null;
   // Align warning with /compact model resolution so it matches actual compaction behavior.
   const effectiveCompactionModel = resolveCompactionModel(preferredCompactionModel) ?? currentModel;
@@ -81,18 +77,13 @@ const CostsTabComponent: React.FC<CostsTabProps> = ({ workspaceId }) => {
 
   // Session usage for cost calculation
   // Uses sessionTotal (pre-computed) + liveCostUsage (cumulative during streaming)
-  const sessionUsage = React.useMemo(() => {
-    const parts: ChatUsageDisplay[] = [];
-    if (usage.sessionTotal) parts.push(usage.sessionTotal);
-    if (usage.liveCostUsage) parts.push(usage.liveCostUsage);
-    return parts.length > 0 ? sumUsageHistory(parts) : undefined;
-  }, [usage.sessionTotal, usage.liveCostUsage]);
+  const sessionUsage = React.useMemo(
+    () => getSessionCostTotal(usage.sessionTotal, usage.liveCostUsage),
+    [usage.sessionTotal, usage.liveCostUsage]
+  );
 
   const hasUsageData =
-    usage &&
-    (usage.sessionTotal !== undefined ||
-      usage.lastContextUsage !== undefined ||
-      usage.liveUsage !== undefined);
+    usage && (usage.sessionTotal !== undefined || usage.currentContextUsage !== undefined);
   const hasConsumerData = consumers && (consumers.totalTokens > 0 || consumers.isCalculating);
   const hasAnyData = hasUsageData || hasConsumerData;
 
@@ -120,7 +111,7 @@ const CostsTabComponent: React.FC<CostsTabProps> = ({ workspaceId }) => {
         <div data-testid="context-usage-section" className="mt-2 mb-5">
           <div data-testid="context-usage-list" className="flex flex-col gap-3">
             {(() => {
-              const contextUsage = usage.liveUsage ?? usage.lastContextUsage;
+              const contextUsage = usage.currentContextUsage;
               const model = contextUsage?.model ?? "unknown";
 
               const contextUsageData = contextUsage
@@ -134,12 +125,10 @@ const CostsTabComponent: React.FC<CostsTabProps> = ({ workspaceId }) => {
                   return undefined;
 
                 const thresholdTokens = Math.round((autoCompactThreshold / 100) * maxTokens);
-                const compactionStats = getModelStats(effectiveCompactionModel);
-                const compactionMaxTokens =
-                  has1MContext(effectiveCompactionModel) &&
-                  supports1MContext(effectiveCompactionModel)
-                    ? 1_000_000
-                    : compactionStats?.max_input_tokens;
+                const compactionMaxTokens = getEffectiveContextLimit(
+                  effectiveCompactionModel,
+                  has1MContext(effectiveCompactionModel)
+                );
 
                 if (compactionMaxTokens && compactionMaxTokens < thresholdTokens) {
                   return { compactionModelMaxTokens: compactionMaxTokens, thresholdTokens };

diff --git a/src/browser/components/RightSidebar/tabs/TabLabels.tsx b/src/browser/components/RightSidebar/tabs/TabLabels.tsx
@@ -13,9 +13,8 @@ import { Tooltip, TooltipContent, TooltipTrigger } from "../../ui/tooltip";
 import { FileIcon } from "../../FileIcon";
 import { formatTabDuration, type ReviewStats } from "./registry";
 import { formatKeybind, KEYBINDS } from "@/browser/utils/ui/keybinds";
-import { cn } from "@/common/lib/utils";
 import { useWorkspaceUsage, useWorkspaceStatsSnapshot } from "@/browser/stores/WorkspaceStore";
-import { sumUsageHistory, type ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator";
+import { getSessionCostTotal, getTotalCost } from "@/common/utils/tokens/usageAggregator";
 
 interface CostsTabLabelProps {
   workspaceId: string;
@@ -29,21 +28,9 @@ export const CostsTabLabel: React.FC<CostsTabLabelProps> = ({ workspaceId }) =>
   const usage = useWorkspaceUsage(workspaceId);
 
   const sessionCost = React.useMemo(() => {
-    const parts: ChatUsageDisplay[] = [];
-    if (usage.sessionTotal) parts.push(usage.sessionTotal);
-    if (usage.liveCostUsage) parts.push(usage.liveCostUsage);
-    if (parts.length === 0) return null;
-
-    const aggregated = sumUsageHistory(parts);
-    if (!aggregated) return null;
-
-    const total =
-      (aggregated.input.cost_usd ?? 0) +
-      (aggregated.cached.cost_usd ?? 0) +
-      (aggregated.cacheCreate.cost_usd ?? 0) +
-      (aggregated.output.cost_usd ?? 0) +
-      (aggregated.reasoning.cost_usd ?? 0);
-    return total > 0 ? total : null;
+    const aggregated = getSessionCostTotal(usage.sessionTotal, usage.liveCostUsage);
+    const total = getTotalCost(aggregated);
+    return total && total > 0 ? total : null;
   }, [usage.sessionTotal, usage.liveCostUsage]);
 
   return (
@@ -67,12 +54,7 @@ export const ReviewTabLabel: React.FC<ReviewTabLabelProps> = ({ reviewStats }) =
   <>
     Review
     {reviewStats !== null && reviewStats.total > 0 && (
-      <span
-        className={cn(
-          "text-[10px]",
-          reviewStats.read === reviewStats.total ? "text-muted" : "text-muted"
-        )}
-      >
+      <span className="text-muted text-[10px]">
         {reviewStats.read}/{reviewStats.total}
       </span>
     )}

diff --git a/src/browser/hooks/useContextSwitchWarning.test.ts b/src/browser/hooks/useContextSwitchWarning.test.ts
@@ -8,7 +8,7 @@ import type { WorkspaceUsageState } from "@/browser/stores/WorkspaceStore";
 import type { SendMessageOptions } from "@/common/orpc/types";
 import type { DisplayedMessage } from "@/common/types/message";
 import { useContextSwitchWarning } from "./useContextSwitchWarning";
-import { getEffectiveContextLimit } from "@/browser/utils/compaction/contextLimit";
+import { getEffectiveContextLimit } from "@/common/utils/tokens/contextLimit";
 import {
   recordWorkspaceModelChange,
   setWorkspaceModelWithOrigin,
@@ -81,17 +81,21 @@ const createPolicyChurnClient = () => {
   return { client, triggerPolicyEvent };
 };
 
-const buildUsage = (tokens: number, model?: string): WorkspaceUsageState => ({
-  totalTokens: tokens,
-  lastContextUsage: {
+const buildUsage = (tokens: number, model?: string): WorkspaceUsageState => {
+  const contextUsage = {
     input: { tokens },
     cached: { tokens: 0 },
     cacheCreate: { tokens: 0 },
     output: { tokens: 0 },
     reasoning: { tokens: 0 },
     model,
-  },
-});
+  };
+  return {
+    totalTokens: tokens,
+    lastContextUsage: contextUsage,
+    currentContextUsage: contextUsage,
+  };
+};
 
 const buildAssistantMessage = (model: string): DisplayedMessage => ({
   type: "assistant",

diff --git a/src/browser/hooks/useContextSwitchWarning.ts b/src/browser/hooks/useContextSwitchWarning.ts
@@ -11,6 +11,7 @@ import type { AppRouter } from "@/node/orpc/router";
 import type { SendMessageOptions } from "@/common/orpc/types";
 import type { DisplayedMessage } from "@/common/types/message";
 import type { WorkspaceUsageState } from "@/browser/stores/WorkspaceStore";
+import { getContextTokens } from "@/common/utils/tokens/usageAggregator";
 import { normalizeGatewayModel } from "@/common/utils/ai/models";
 import { usePolicy } from "@/browser/contexts/PolicyContext";
 import {
@@ -20,7 +21,7 @@ import {
   type ContextSwitchWarning,
 } from "@/browser/utils/compaction/contextSwitchCheck";
 import { getHigherContextCompactionSuggestion } from "@/browser/utils/compaction/suggestion";
-import { getEffectiveContextLimit } from "@/browser/utils/compaction/contextLimit";
+import { getEffectiveContextLimit } from "@/common/utils/tokens/contextLimit";
 import {
   consumeWorkspaceModelChange,
   setWorkspaceModelWithOrigin,
@@ -158,8 +159,8 @@ export function useContextSwitchWarning(
   }, [workspaceId, pendingModel, use1M, checkOptions]);
 
   const getCurrentTokens = useCallback(() => {
-    const usage = workspaceUsage?.liveUsage ?? workspaceUsage?.lastContextUsage;
-    return usage ? usage.input.tokens + usage.cached.tokens + usage.cacheCreate.tokens : 0;
+    const usage = workspaceUsage?.currentContextUsage;
+    return usage ? getContextTokens(usage) : 0;
   }, [workspaceUsage]);
 
   const tokens = getCurrentTokens();

diff --git a/src/browser/stores/WorkspaceConsumerManager.ts b/src/browser/stores/WorkspaceConsumerManager.ts
@@ -2,6 +2,7 @@ import type { WorkspaceConsumersState } from "./WorkspaceStore";
 import type { StreamingMessageAggregator } from "@/browser/utils/messages/StreamingMessageAggregator";
 import type { ChatStats } from "@/common/types/chatStats";
 import type { MuxMessage } from "@/common/types/message";
+import { sliceMessagesFromLatestCompactionBoundary } from "@/common/utils/messages/compactionBoundary";
 
 const TOKENIZER_CANCELLED_MESSAGE = "Cancelled by newer request";
 
@@ -197,7 +198,9 @@ export class WorkspaceConsumerManager {
     // Run in next tick to avoid blocking caller
     void (async () => {
       try {
-        const messages = aggregator.getAllMessages();
+        // Only count tokens for the current compaction epoch — pre-boundary
+        // messages carry stale context and inflate the consumer breakdown.
+        const messages = sliceMessagesFromLatestCompactionBoundary(aggregator.getAllMessages());
         const model = aggregator.getCurrentModel() ?? "unknown";
 
         // Calculate in piscina pool with timeout protection

diff --git a/src/browser/stores/WorkspaceStore.ts b/src/browser/stores/WorkspaceStore.ts
@@ -35,6 +35,7 @@ import type {
 } from "@/common/types/stream";
 import { MapStore } from "./MapStore";
 import { createDisplayUsage } from "@/common/utils/tokens/displayUsage";
+import { isDurableCompactionBoundaryMarker } from "@/common/utils/messages/compactionBoundary";
 import { WorkspaceConsumerManager } from "./WorkspaceConsumerManager";
 import type { ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator";
 import { sumUsageHistory } from "@/common/utils/tokens/usageAggregator";
@@ -203,6 +204,9 @@ export interface WorkspaceUsageState {
   liveUsage?: ChatUsageDisplay;
   /** Live cost usage during streaming (cumulative across all steps) */
   liveCostUsage?: ChatUsageDisplay;
+  /** Best-available context usage: live when streaming, else last completed.
+   *  Consumers should prefer this over manually deriving liveUsage ?? lastContextUsage. */
+  currentContextUsage?: ChatUsageDisplay;
 }
 
 /**
@@ -1255,11 +1259,15 @@ export class WorkspaceStore {
           sessionTotal.reasoning.tokens
         : 0;
 
-      // Get last message's context usage (unchanged from before)
+      // Get last message's context usage — only search within the current
+      // compaction epoch. Pre-boundary messages carry stale contextUsage from
+      // before compaction; including them inflates the usage indicator and
+      // triggers premature auto-compaction.
       const messages = aggregator.getAllMessages();
       const lastContextUsage = (() => {
         for (let i = messages.length - 1; i >= 0; i--) {
           const msg = messages[i];
+          if (isDurableCompactionBoundaryMarker(msg)) break;
           if (msg.role === "assistant") {
             if (msg.metadata?.compacted) continue;
             const rawUsage = msg.metadata?.contextUsage;
@@ -1298,7 +1306,18 @@ export class WorkspaceStore {
           ? createDisplayUsage(rawCumulativeUsage, model, rawCumulativeProviderMetadata)
           : undefined;
 
-      return { sessionTotal, lastRequest, lastContextUsage, totalTokens, liveUsage, liveCostUsage };
+      // Derived: best-available context usage (live when streaming, else last completed)
+      const currentContextUsage = liveUsage ?? lastContextUsage;
+
+      return {
+        sessionTotal,
+        lastRequest,
+        lastContextUsage,
+        totalTokens,
+        liveUsage,
+        liveCostUsage,
+        currentContextUsage,
+      };
     });
   }
 

diff --git a/src/browser/utils/compaction/autoCompactionCheck.test.ts b/src/browser/utils/compaction/autoCompactionCheck.test.ts
@@ -28,14 +28,14 @@ const createUsageEntry = (
 // Helper to create mock WorkspaceUsageState
 const createMockUsage = (
   lastEntryTokens: number,
-  _historicalTokens?: number, // Kept for backward compat but unused (session-usage.json handles historical)
   model: string = KNOWN_MODELS.SONNET.id,
   liveUsage?: ChatUsageDisplay
 ): WorkspaceUsageState => {
   // Create lastContextUsage representing the most recent context window state
   const lastContextUsage = createUsageEntry(lastEntryTokens, model);
 
-  return { lastContextUsage, totalTokens: 0, liveUsage };
+  const currentContextUsage = liveUsage ?? lastContextUsage;
+  return { lastContextUsage, totalTokens: 0, liveUsage, currentContextUsage };
 };
 
 describe("checkAutoCompaction", () => {
@@ -120,7 +120,8 @@ describe("checkAutoCompaction", () => {
     test("handles historical usage correctly - ignores it in calculation", () => {
       // Scenario: After compaction, historical = 70K, recent = 5K
       // Should calculate based on 5K (2.5%), not 75K (37.5%)
-      const usage = createMockUsage(5_000, 70_000);
+      // (session-usage.json handles historical separately; only lastContextUsage matters)
+      const usage = createMockUsage(5_000);
       const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false);
 
       expect(result.usagePercentage).toBe(2.5);
@@ -140,6 +141,7 @@ describe("checkAutoCompaction", () => {
       };
       const usage: WorkspaceUsageState = {
         lastContextUsage: usageEntry,
+        currentContextUsage: usageEntry,
         totalTokens: 0,
       };
 
@@ -163,6 +165,7 @@ describe("checkAutoCompaction", () => {
       };
       const usage: WorkspaceUsageState = {
         lastContextUsage: usageEntry,
+        currentContextUsage: usageEntry,
         totalTokens: 0,
       };
 
@@ -202,7 +205,7 @@ describe("checkAutoCompaction", () => {
     });
 
     test("ignores use1M for models that don't support it (GPT)", () => {
-      const usage = createMockUsage(100_000, undefined, KNOWN_MODELS.GPT_MINI.id);
+      const usage = createMockUsage(100_000, KNOWN_MODELS.GPT_MINI.id);
       // GPT Mini has 272k context, so 100k = 36.76%
       const result = checkAutoCompaction(usage, KNOWN_MODELS.GPT_MINI.id, true);
 
@@ -261,6 +264,7 @@ describe("checkAutoCompaction", () => {
       };
       const usage: WorkspaceUsageState = {
         lastContextUsage: zeroEntry,
+        currentContextUsage: zeroEntry,
         totalTokens: 0,
       };
 
@@ -349,7 +353,7 @@ describe("checkAutoCompaction", () => {
     test("shouldForceCompact uses liveUsage when available", () => {
       // lastUsage at 50%, liveUsage at 75% - should trigger based on live
       const liveUsage = createUsageEntry(150_000); // 75%
-      const usage = createMockUsage(100_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
+      const usage = createMockUsage(100_000, KNOWN_MODELS.SONNET.id, liveUsage);
       const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false);
 
       expect(result.shouldForceCompact).toBe(true);
@@ -367,7 +371,7 @@ describe("checkAutoCompaction", () => {
     test("shouldForceCompact respects 1M context mode", () => {
       // 75% of 1M = 750k tokens
       const liveUsage = createUsageEntry(750_000);
-      const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
+      const usage = createMockUsage(50_000, KNOWN_MODELS.SONNET.id, liveUsage);
       const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, true);
 
       expect(result.shouldForceCompact).toBe(true);
@@ -378,6 +382,7 @@ describe("checkAutoCompaction", () => {
       const usage: WorkspaceUsageState = {
         totalTokens: 0,
         liveUsage,
+        currentContextUsage: liveUsage,
       };
       const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false);
 
@@ -391,6 +396,7 @@ describe("checkAutoCompaction", () => {
       const usage: WorkspaceUsageState = {
         totalTokens: 0,
         liveUsage,
+        currentContextUsage: liveUsage,
       };
       const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false);
 
@@ -401,7 +407,7 @@ describe("checkAutoCompaction", () => {
     test("shouldShowWarning uses max of last and live usage", () => {
       // lastUsage at 50% (below warning), liveUsage at 72% (above warning)
       const liveUsage = createUsageEntry(144_000); // 72%
-      const usage = createMockUsage(100_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
+      const usage = createMockUsage(100_000, KNOWN_MODELS.SONNET.id, liveUsage);
       const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false);
 
       expect(result.shouldShowWarning).toBe(true); // 72% >= 60%