diff --git a/client/src/components/Chat/Input/TokenUsage/index.tsx b/client/src/components/Chat/Input/TokenUsage/index.tsx index e0140f6f28..d4ba946d31 100644 --- a/client/src/components/Chat/Input/TokenUsage/index.tsx +++ b/client/src/components/Chat/Input/TokenUsage/index.tsx @@ -72,11 +72,14 @@ function TokenUsageIndicator({ /** Config gate kept outside the indicator so disabled deployments mount nothing */ const TokenUsage = memo(function TokenUsage(props: TokenUsageProps) { const { data: startupConfig } = useGetStartupConfig(); - if (startupConfig?.interface?.contextUsage === false) { + /** Wait for config before mounting: until it loads `contextUsage === false` + * reads as undefined, so a disabled deployment would briefly mount the + * indicator and fire the token-config query on first load */ + if (startupConfig == null || startupConfig.interface?.contextUsage === false) { return null; } return ( - + ); }); diff --git a/client/src/hooks/Chat/useTokenUsage.ts b/client/src/hooks/Chat/useTokenUsage.ts index 190c477893..3d80388cec 100644 --- a/client/src/hooks/Chat/useTokenUsage.ts +++ b/client/src/hooks/Chat/useTokenUsage.ts @@ -7,12 +7,13 @@ import type { ContextSnapshot, UsageTotals } from '~/store/usage'; import type { BranchTotals } from '~/utils/tokens'; import { liveTokensFamily, + removeUsageAtoms, usageTotalsFamily, branchTotalsFamily, contextSnapshotFamily, } from '~/store/usage'; +import { buildIndex, sumBranch, clearIndex, costFromUnits } from '~/utils'; import { useLatestMessageId } from '~/hooks/Messages/useLatestMessage'; -import { buildIndex, sumBranch, costFromUnits } from '~/utils'; import { useTokenConfigQuery } from '~/data-provider'; import useTokenLimits from './useTokenLimits'; @@ -120,7 +121,17 @@ export default function useTokenUsage({ } rebuild(event.query.state.data as TMessage[] | undefined); }); - return unsubscribe; + return () => { + unsubscribe(); + /** Bound memory to open conversations — drop this one's token index and + * usage atoms on switch/unmount; both rebuild from the query cache on + * return. NEW_CONVO is migrated to its real id by finalizeUsage, so + * leave it alone to avoid racing that handoff. */ + if (conversationKey !== Constants.NEW_CONVO) { + clearIndex(conversationKey); + removeUsageAtoms(conversationKey); + } + }; }, [conversationKey, queryClient, setBranchTotals]); useEffect(() => { diff --git a/client/src/hooks/SSE/useUsageHandler.ts b/client/src/hooks/SSE/useUsageHandler.ts index 9d5eb9a71b..eece21fe1c 100644 --- a/client/src/hooks/SSE/useUsageHandler.ts +++ b/client/src/hooks/SSE/useUsageHandler.ts @@ -172,7 +172,9 @@ export default function useUsageHandler(): UsageHandlers { if (!folded || data.usage_type != null) { return; } - confirmedRef.current += data.output_tokens ?? 0; + /** Use the repaired completion count (not raw output_tokens) so the + * snapshot gauge keeps the full response for under-reporting providers */ + confirmedRef.current += normalizeUsageUnits(data).output; streamCharsRef.current = 0; setLive(getConvoKey(submission), confirmedRef.current); }; diff --git a/packages/api/src/endpoints/custom/initialize.spec.ts b/packages/api/src/endpoints/custom/initialize.spec.ts index 9aa7f0112a..c9d05a3001 100644 --- a/packages/api/src/endpoints/custom/initialize.spec.ts +++ b/packages/api/src/endpoints/custom/initialize.spec.ts @@ -376,9 +376,21 @@ describe('initializeCustom – token-config fetch header forwarding', () => { } as unknown as BaseInitializeParams['db'], }; - const result = (await initializeCustom(params)) as { endpointTokenConfig?: unknown }; + const result = (await initializeCustom(params)) as { + endpointTokenConfig?: Record>; + }; expect(fetchModels).not.toHaveBeenCalled(); - expect(result.endpointTokenConfig).toEqual(tokenConfig); + /** Original rates pass through, plus the billing-shape cache keys so + * getCacheMultiplier (which reads `write`/`read`) finds them */ + expect(result.endpointTokenConfig?.['gpt-4']).toEqual({ + prompt: 1.5, + completion: 4.5, + context: 32000, + cacheRead: 0.3, + cacheWrite: 1.8, + write: 1.8, + read: 0.3, + }); }); }); diff --git a/packages/api/src/endpoints/custom/initialize.ts b/packages/api/src/endpoints/custom/initialize.ts index 9a9e334dfe..0f83794920 100644 --- a/packages/api/src/endpoints/custom/initialize.ts +++ b/packages/api/src/endpoints/custom/initialize.ts @@ -36,6 +36,29 @@ export function getTokenConfigKey( : endpoint; } +/** + * Maps an admin-facing static `tokenConfig` to the billing shape: the UI uses + * `cacheWrite`/`cacheRead`, but `getCacheMultiplier` indexes `write`/`read`. + * Adds those keys (preserving the originals) so cache tokens bill at the + * configured rate instead of the prompt-rate fallback. + */ +function toBillingTokenConfig( + tokenConfig: Record>, +): EndpointTokenConfig { + const result: EndpointTokenConfig = {}; + for (const [model, rates] of Object.entries(tokenConfig)) { + const mapped = { ...rates } as Record; + if (rates.cacheWrite != null) { + mapped.write = rates.cacheWrite; + } + if (rates.cacheRead != null) { + mapped.read = rates.cacheRead; + } + result[model] = mapped as EndpointTokenConfig[string]; + } + return result; +} + /** * Builds custom options from endpoint configuration */ @@ -161,8 +184,12 @@ export async function initializeCustom({ if (hasTokenConfig) { /** A static override is authoritative — use it for the agent's billing - * and balance checks, not just the advertised UI token config */ - endpointTokenConfig = endpointConfig.tokenConfig as EndpointTokenConfig; + * and balance checks, not just the advertised UI token config. Mirror + * the admin-facing `cacheWrite`/`cacheRead` keys onto the `write`/`read` + * keys the billing multiplier reads. */ + endpointTokenConfig = toBillingTokenConfig( + endpointConfig.tokenConfig as Record>, + ); } else { const cachedConfig = FetchTokenConfig[endpoint.toLowerCase() as keyof typeof FetchTokenConfig] &&