🩹 fix: Repaired Output in Gauge, Cache-Rate Keys, Config Gate, Usage Cleanup

- live/completed gauge counts the repaired completion (normalized output),
  so under-reporting providers don't drop the response from used context
- translate static tokenConfig cacheWrite/cacheRead onto the write/read
  keys getCacheMultiplier reads, so cache tokens bill at the configured
  rate instead of the prompt-rate fallback
- clear the token index and usage atoms when leaving a conversation, so
  visited histories don't accumulate in memory for the tab's lifetime
- wait for startupConfig before mounting the gauge, so a deployment with
  contextUsage disabled never briefly mounts it or fires the token-config
  query on first load
This commit is contained in:
Danny Avila 2026-06-13 13:25:45 -04:00
parent 5777435aef
commit df37d1f0ec
5 changed files with 64 additions and 9 deletions

View file

@ -72,11 +72,14 @@ function TokenUsageIndicator({
/** Config gate kept outside the indicator so disabled deployments mount nothing */
const TokenUsage = memo(function TokenUsage(props: TokenUsageProps) {
const { data: startupConfig } = useGetStartupConfig();
if (startupConfig?.interface?.contextUsage === false) {
/** Wait for config before mounting: until it loads `contextUsage === false`
* reads as undefined, so a disabled deployment would briefly mount the
* indicator and fire the token-config query on first load */
if (startupConfig == null || startupConfig.interface?.contextUsage === false) {
return null;
}
return (
<TokenUsageIndicator {...props} showCost={startupConfig?.interface?.contextCost === true} />
<TokenUsageIndicator {...props} showCost={startupConfig.interface?.contextCost === true} />
);
});

View file

@ -7,12 +7,13 @@ import type { ContextSnapshot, UsageTotals } from '~/store/usage';
import type { BranchTotals } from '~/utils/tokens';
import {
liveTokensFamily,
removeUsageAtoms,
usageTotalsFamily,
branchTotalsFamily,
contextSnapshotFamily,
} from '~/store/usage';
import { buildIndex, sumBranch, clearIndex, costFromUnits } from '~/utils';
import { useLatestMessageId } from '~/hooks/Messages/useLatestMessage';
import { buildIndex, sumBranch, costFromUnits } from '~/utils';
import { useTokenConfigQuery } from '~/data-provider';
import useTokenLimits from './useTokenLimits';
@ -120,7 +121,17 @@ export default function useTokenUsage({
}
rebuild(event.query.state.data as TMessage[] | undefined);
});
return unsubscribe;
return () => {
unsubscribe();
/** Bound memory to open conversations drop this one's token index and
* usage atoms on switch/unmount; both rebuild from the query cache on
* return. NEW_CONVO is migrated to its real id by finalizeUsage, so
* leave it alone to avoid racing that handoff. */
if (conversationKey !== Constants.NEW_CONVO) {
clearIndex(conversationKey);
removeUsageAtoms(conversationKey);
}
};
}, [conversationKey, queryClient, setBranchTotals]);
useEffect(() => {

View file

@ -172,7 +172,9 @@ export default function useUsageHandler(): UsageHandlers {
if (!folded || data.usage_type != null) {
return;
}
confirmedRef.current += data.output_tokens ?? 0;
/** Use the repaired completion count (not raw output_tokens) so the
* snapshot gauge keeps the full response for under-reporting providers */
confirmedRef.current += normalizeUsageUnits(data).output;
streamCharsRef.current = 0;
setLive(getConvoKey(submission), confirmedRef.current);
};

View file

@ -376,9 +376,21 @@ describe('initializeCustom token-config fetch header forwarding', () => {
} as unknown as BaseInitializeParams['db'],
};
const result = (await initializeCustom(params)) as { endpointTokenConfig?: unknown };
const result = (await initializeCustom(params)) as {
endpointTokenConfig?: Record<string, Record<string, number>>;
};
expect(fetchModels).not.toHaveBeenCalled();
expect(result.endpointTokenConfig).toEqual(tokenConfig);
/** Original rates pass through, plus the billing-shape cache keys so
* getCacheMultiplier (which reads `write`/`read`) finds them */
expect(result.endpointTokenConfig?.['gpt-4']).toEqual({
prompt: 1.5,
completion: 4.5,
context: 32000,
cacheRead: 0.3,
cacheWrite: 1.8,
write: 1.8,
read: 0.3,
});
});
});

View file

@ -36,6 +36,29 @@ export function getTokenConfigKey(
: endpoint;
}
/**
* Maps an admin-facing static `tokenConfig` to the billing shape: the UI uses
* `cacheWrite`/`cacheRead`, but `getCacheMultiplier` indexes `write`/`read`.
* Adds those keys (preserving the originals) so cache tokens bill at the
* configured rate instead of the prompt-rate fallback.
*/
function toBillingTokenConfig(
tokenConfig: Record<string, Record<string, number>>,
): EndpointTokenConfig {
const result: EndpointTokenConfig = {};
for (const [model, rates] of Object.entries(tokenConfig)) {
const mapped = { ...rates } as Record<string, number>;
if (rates.cacheWrite != null) {
mapped.write = rates.cacheWrite;
}
if (rates.cacheRead != null) {
mapped.read = rates.cacheRead;
}
result[model] = mapped as EndpointTokenConfig[string];
}
return result;
}
/**
* Builds custom options from endpoint configuration
*/
@ -161,8 +184,12 @@ export async function initializeCustom({
if (hasTokenConfig) {
/** A static override is authoritative use it for the agent's billing
* and balance checks, not just the advertised UI token config */
endpointTokenConfig = endpointConfig.tokenConfig as EndpointTokenConfig;
* and balance checks, not just the advertised UI token config. Mirror
* the admin-facing `cacheWrite`/`cacheRead` keys onto the `write`/`read`
* keys the billing multiplier reads. */
endpointTokenConfig = toBillingTokenConfig(
endpointConfig.tokenConfig as Record<string, Record<string, number>>,
);
} else {
const cachedConfig =
FetchTokenConfig[endpoint.toLowerCase() as keyof typeof FetchTokenConfig] &&