diff --git a/client/src/components/Chat/Input/TokenUsage/index.tsx b/client/src/components/Chat/Input/TokenUsage/index.tsx
index e0140f6f28..d4ba946d31 100644
--- a/client/src/components/Chat/Input/TokenUsage/index.tsx
+++ b/client/src/components/Chat/Input/TokenUsage/index.tsx
@@ -72,11 +72,14 @@ function TokenUsageIndicator({
 /** Config gate kept outside the indicator so disabled deployments mount nothing */
 const TokenUsage = memo(function TokenUsage(props: TokenUsageProps) {
   const { data: startupConfig } = useGetStartupConfig();
-  if (startupConfig?.interface?.contextUsage === false) {
+  /** Wait for config before mounting: until it loads `contextUsage === false`
+   *  reads as undefined, so a disabled deployment would briefly mount the
+   *  indicator and fire the token-config query on first load */
+  if (startupConfig == null || startupConfig.interface?.contextUsage === false) {
     return null;
   }
   return (
-    <TokenUsageIndicator {...props} showCost={startupConfig?.interface?.contextCost === true} />
+    <TokenUsageIndicator {...props} showCost={startupConfig.interface?.contextCost === true} />
   );
 });
 
diff --git a/client/src/hooks/Chat/useTokenUsage.ts b/client/src/hooks/Chat/useTokenUsage.ts
index 190c477893..3d80388cec 100644
--- a/client/src/hooks/Chat/useTokenUsage.ts
+++ b/client/src/hooks/Chat/useTokenUsage.ts
@@ -7,12 +7,13 @@ import type { ContextSnapshot, UsageTotals } from '~/store/usage';
 import type { BranchTotals } from '~/utils/tokens';
 import {
   liveTokensFamily,
+  removeUsageAtoms,
   usageTotalsFamily,
   branchTotalsFamily,
   contextSnapshotFamily,
 } from '~/store/usage';
+import { buildIndex, sumBranch, clearIndex, costFromUnits } from '~/utils';
 import { useLatestMessageId } from '~/hooks/Messages/useLatestMessage';
-import { buildIndex, sumBranch, costFromUnits } from '~/utils';
 import { useTokenConfigQuery } from '~/data-provider';
 import useTokenLimits from './useTokenLimits';
 
@@ -120,7 +121,17 @@ export default function useTokenUsage({
       }
       rebuild(event.query.state.data as TMessage[] | undefined);
     });
-    return unsubscribe;
+    return () => {
+      unsubscribe();
+      /** Bound memory to open conversations — drop this one's token index and
+       *  usage atoms on switch/unmount; both rebuild from the query cache on
+       *  return. NEW_CONVO is migrated to its real id by finalizeUsage, so
+       *  leave it alone to avoid racing that handoff. */
+      if (conversationKey !== Constants.NEW_CONVO) {
+        clearIndex(conversationKey);
+        removeUsageAtoms(conversationKey);
+      }
+    };
   }, [conversationKey, queryClient, setBranchTotals]);
 
   useEffect(() => {
diff --git a/client/src/hooks/SSE/useUsageHandler.ts b/client/src/hooks/SSE/useUsageHandler.ts
index 9d5eb9a71b..eece21fe1c 100644
--- a/client/src/hooks/SSE/useUsageHandler.ts
+++ b/client/src/hooks/SSE/useUsageHandler.ts
@@ -172,7 +172,9 @@ export default function useUsageHandler(): UsageHandlers {
       if (!folded || data.usage_type != null) {
         return;
       }
-      confirmedRef.current += data.output_tokens ?? 0;
+      /** Use the repaired completion count (not raw output_tokens) so the
+       *  snapshot gauge keeps the full response for under-reporting providers */
+      confirmedRef.current += normalizeUsageUnits(data).output;
       streamCharsRef.current = 0;
       setLive(getConvoKey(submission), confirmedRef.current);
     };
diff --git a/packages/api/src/endpoints/custom/initialize.spec.ts b/packages/api/src/endpoints/custom/initialize.spec.ts
index 9aa7f0112a..c9d05a3001 100644
--- a/packages/api/src/endpoints/custom/initialize.spec.ts
+++ b/packages/api/src/endpoints/custom/initialize.spec.ts
@@ -376,9 +376,21 @@ describe('initializeCustom – token-config fetch header forwarding', () => {
       } as unknown as BaseInitializeParams['db'],
     };
 
-    const result = (await initializeCustom(params)) as { endpointTokenConfig?: unknown };
+    const result = (await initializeCustom(params)) as {
+      endpointTokenConfig?: Record<string, Record<string, number>>;
+    };
 
     expect(fetchModels).not.toHaveBeenCalled();
-    expect(result.endpointTokenConfig).toEqual(tokenConfig);
+    /** Original rates pass through, plus the billing-shape cache keys so
+     *  getCacheMultiplier (which reads `write`/`read`) finds them */
+    expect(result.endpointTokenConfig?.['gpt-4']).toEqual({
+      prompt: 1.5,
+      completion: 4.5,
+      context: 32000,
+      cacheRead: 0.3,
+      cacheWrite: 1.8,
+      write: 1.8,
+      read: 0.3,
+    });
   });
 });
diff --git a/packages/api/src/endpoints/custom/initialize.ts b/packages/api/src/endpoints/custom/initialize.ts
index 9a9e334dfe..0f83794920 100644
--- a/packages/api/src/endpoints/custom/initialize.ts
+++ b/packages/api/src/endpoints/custom/initialize.ts
@@ -36,6 +36,29 @@ export function getTokenConfigKey(
     : endpoint;
 }
 
+/**
+ * Maps an admin-facing static `tokenConfig` to the billing shape: the UI uses
+ * `cacheWrite`/`cacheRead`, but `getCacheMultiplier` indexes `write`/`read`.
+ * Adds those keys (preserving the originals) so cache tokens bill at the
+ * configured rate instead of the prompt-rate fallback.
+ */
+function toBillingTokenConfig(
+  tokenConfig: Record<string, Record<string, number>>,
+): EndpointTokenConfig {
+  const result: EndpointTokenConfig = {};
+  for (const [model, rates] of Object.entries(tokenConfig)) {
+    const mapped = { ...rates } as Record<string, number>;
+    if (rates.cacheWrite != null) {
+      mapped.write = rates.cacheWrite;
+    }
+    if (rates.cacheRead != null) {
+      mapped.read = rates.cacheRead;
+    }
+    result[model] = mapped as EndpointTokenConfig[string];
+  }
+  return result;
+}
+
 /**
  * Builds custom options from endpoint configuration
  */
@@ -161,8 +184,12 @@ export async function initializeCustom({
 
   if (hasTokenConfig) {
     /** A static override is authoritative — use it for the agent's billing
-     *  and balance checks, not just the advertised UI token config */
-    endpointTokenConfig = endpointConfig.tokenConfig as EndpointTokenConfig;
+     *  and balance checks, not just the advertised UI token config. Mirror
+     *  the admin-facing `cacheWrite`/`cacheRead` keys onto the `write`/`read`
+     *  keys the billing multiplier reads. */
+    endpointTokenConfig = toBillingTokenConfig(
+      endpointConfig.tokenConfig as Record<string, Record<string, number>>,
+    );
   } else {
     const cachedConfig =
       FetchTokenConfig[endpoint.toLowerCase() as keyof typeof FetchTokenConfig] &&