mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-06-20 20:20:42 +00:00
🩹 fix: Repaired Output in Gauge, Cache-Rate Keys, Config Gate, Usage Cleanup
- live/completed gauge counts the repaired completion (normalized output), so under-reporting providers don't drop the response from used context - translate static tokenConfig cacheWrite/cacheRead onto the write/read keys getCacheMultiplier reads, so cache tokens bill at the configured rate instead of the prompt-rate fallback - clear the token index and usage atoms when leaving a conversation, so visited histories don't accumulate in memory for the tab's lifetime - wait for startupConfig before mounting the gauge, so a deployment with contextUsage disabled never briefly mounts it or fires the token-config query on first load
This commit is contained in:
parent
5777435aef
commit
df37d1f0ec
5 changed files with 64 additions and 9 deletions
|
|
@ -72,11 +72,14 @@ function TokenUsageIndicator({
|
|||
/** Config gate kept outside the indicator so disabled deployments mount nothing */
|
||||
const TokenUsage = memo(function TokenUsage(props: TokenUsageProps) {
|
||||
const { data: startupConfig } = useGetStartupConfig();
|
||||
if (startupConfig?.interface?.contextUsage === false) {
|
||||
/** Wait for config before mounting: until it loads `contextUsage === false`
|
||||
* reads as undefined, so a disabled deployment would briefly mount the
|
||||
* indicator and fire the token-config query on first load */
|
||||
if (startupConfig == null || startupConfig.interface?.contextUsage === false) {
|
||||
return null;
|
||||
}
|
||||
return (
|
||||
<TokenUsageIndicator {...props} showCost={startupConfig?.interface?.contextCost === true} />
|
||||
<TokenUsageIndicator {...props} showCost={startupConfig.interface?.contextCost === true} />
|
||||
);
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -7,12 +7,13 @@ import type { ContextSnapshot, UsageTotals } from '~/store/usage';
|
|||
import type { BranchTotals } from '~/utils/tokens';
|
||||
import {
|
||||
liveTokensFamily,
|
||||
removeUsageAtoms,
|
||||
usageTotalsFamily,
|
||||
branchTotalsFamily,
|
||||
contextSnapshotFamily,
|
||||
} from '~/store/usage';
|
||||
import { buildIndex, sumBranch, clearIndex, costFromUnits } from '~/utils';
|
||||
import { useLatestMessageId } from '~/hooks/Messages/useLatestMessage';
|
||||
import { buildIndex, sumBranch, costFromUnits } from '~/utils';
|
||||
import { useTokenConfigQuery } from '~/data-provider';
|
||||
import useTokenLimits from './useTokenLimits';
|
||||
|
||||
|
|
@ -120,7 +121,17 @@ export default function useTokenUsage({
|
|||
}
|
||||
rebuild(event.query.state.data as TMessage[] | undefined);
|
||||
});
|
||||
return unsubscribe;
|
||||
return () => {
|
||||
unsubscribe();
|
||||
/** Bound memory to open conversations — drop this one's token index and
|
||||
* usage atoms on switch/unmount; both rebuild from the query cache on
|
||||
* return. NEW_CONVO is migrated to its real id by finalizeUsage, so
|
||||
* leave it alone to avoid racing that handoff. */
|
||||
if (conversationKey !== Constants.NEW_CONVO) {
|
||||
clearIndex(conversationKey);
|
||||
removeUsageAtoms(conversationKey);
|
||||
}
|
||||
};
|
||||
}, [conversationKey, queryClient, setBranchTotals]);
|
||||
|
||||
useEffect(() => {
|
||||
|
|
|
|||
|
|
@ -172,7 +172,9 @@ export default function useUsageHandler(): UsageHandlers {
|
|||
if (!folded || data.usage_type != null) {
|
||||
return;
|
||||
}
|
||||
confirmedRef.current += data.output_tokens ?? 0;
|
||||
/** Use the repaired completion count (not raw output_tokens) so the
|
||||
* snapshot gauge keeps the full response for under-reporting providers */
|
||||
confirmedRef.current += normalizeUsageUnits(data).output;
|
||||
streamCharsRef.current = 0;
|
||||
setLive(getConvoKey(submission), confirmedRef.current);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -376,9 +376,21 @@ describe('initializeCustom – token-config fetch header forwarding', () => {
|
|||
} as unknown as BaseInitializeParams['db'],
|
||||
};
|
||||
|
||||
const result = (await initializeCustom(params)) as { endpointTokenConfig?: unknown };
|
||||
const result = (await initializeCustom(params)) as {
|
||||
endpointTokenConfig?: Record<string, Record<string, number>>;
|
||||
};
|
||||
|
||||
expect(fetchModels).not.toHaveBeenCalled();
|
||||
expect(result.endpointTokenConfig).toEqual(tokenConfig);
|
||||
/** Original rates pass through, plus the billing-shape cache keys so
|
||||
* getCacheMultiplier (which reads `write`/`read`) finds them */
|
||||
expect(result.endpointTokenConfig?.['gpt-4']).toEqual({
|
||||
prompt: 1.5,
|
||||
completion: 4.5,
|
||||
context: 32000,
|
||||
cacheRead: 0.3,
|
||||
cacheWrite: 1.8,
|
||||
write: 1.8,
|
||||
read: 0.3,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -36,6 +36,29 @@ export function getTokenConfigKey(
|
|||
: endpoint;
|
||||
}
|
||||
|
||||
/**
|
||||
* Maps an admin-facing static `tokenConfig` to the billing shape: the UI uses
|
||||
* `cacheWrite`/`cacheRead`, but `getCacheMultiplier` indexes `write`/`read`.
|
||||
* Adds those keys (preserving the originals) so cache tokens bill at the
|
||||
* configured rate instead of the prompt-rate fallback.
|
||||
*/
|
||||
function toBillingTokenConfig(
|
||||
tokenConfig: Record<string, Record<string, number>>,
|
||||
): EndpointTokenConfig {
|
||||
const result: EndpointTokenConfig = {};
|
||||
for (const [model, rates] of Object.entries(tokenConfig)) {
|
||||
const mapped = { ...rates } as Record<string, number>;
|
||||
if (rates.cacheWrite != null) {
|
||||
mapped.write = rates.cacheWrite;
|
||||
}
|
||||
if (rates.cacheRead != null) {
|
||||
mapped.read = rates.cacheRead;
|
||||
}
|
||||
result[model] = mapped as EndpointTokenConfig[string];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds custom options from endpoint configuration
|
||||
*/
|
||||
|
|
@ -161,8 +184,12 @@ export async function initializeCustom({
|
|||
|
||||
if (hasTokenConfig) {
|
||||
/** A static override is authoritative — use it for the agent's billing
|
||||
* and balance checks, not just the advertised UI token config */
|
||||
endpointTokenConfig = endpointConfig.tokenConfig as EndpointTokenConfig;
|
||||
* and balance checks, not just the advertised UI token config. Mirror
|
||||
* the admin-facing `cacheWrite`/`cacheRead` keys onto the `write`/`read`
|
||||
* keys the billing multiplier reads. */
|
||||
endpointTokenConfig = toBillingTokenConfig(
|
||||
endpointConfig.tokenConfig as Record<string, Record<string, number>>,
|
||||
);
|
||||
} else {
|
||||
const cachedConfig =
|
||||
FetchTokenConfig[endpoint.toLowerCase() as keyof typeof FetchTokenConfig] &&
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue