📊 feat: Granular Tool Token Breakdown with Deferred Splits

This commit is contained in:
Danny Avila 2026-06-11 00:06:55 -04:00
parent e5ab7cf1b1
commit 1843cff325
6 changed files with 127 additions and 7 deletions

View file

@ -215,6 +215,7 @@ describe('usage events through the real agents pipeline', () => {
expect(effectiveInstructionTokens).toBeGreaterThan(0);
expect(remainingContextTokens).toBeGreaterThan(0);
expect(remainingContextTokens).toBeLessThan(contextBudget);
expect(breakdown.toolTokenCounts.add).toBeGreaterThan(0);
}
/** Tool loop grows the context between calls */

View file

@ -1,5 +1,5 @@
import type { TokenUsageView } from '~/hooks/Chat/useTokenUsage';
import { formatTokens, formatCost } from '~/utils';
import { groupToolTokens, formatTokens, formatCost } from '~/utils';
import { useLocalize } from '~/hooks';
interface RowProps {
@ -42,6 +42,22 @@ export default function Breakdown({ view, showCost }: BreakdownProps) {
const messageTokens = Math.max(0, usedTokens - instructionTokens);
const freeTokens = maxTokens != null ? Math.max(0, maxTokens - usedTokens) : null;
const groups =
breakdown?.toolTokenCounts != null
? groupToolTokens(breakdown.toolTokenCounts, breakdown.deferredToolNames)
: null;
const toolRows =
groups == null
? null
: ([
[localize('com_ui_context_tools_system'), groups.system],
[localize('com_ui_context_tools_mcp'), groups.mcp],
[localize('com_ui_skills'), groups.skills],
[localize('com_ui_context_subagents'), groups.subagents],
[localize('com_ui_context_tools_system_deferred'), groups.systemDeferred],
[localize('com_ui_context_tools_mcp_deferred'), groups.mcpDeferred],
] as const);
return (
<div className="w-64 space-y-3" role="region" aria-label={localize('com_ui_context_usage')}>
<div className="flex items-center justify-between">
@ -78,11 +94,18 @@ export default function Breakdown({ view, showCost }: BreakdownProps) {
max={maxTokens}
/>
<Row label={localize('com_ui_context_system')} value={systemTokens} max={maxTokens} />
<Row
label={localize('com_ui_context_tools')}
value={breakdown.toolSchemaTokens}
max={maxTokens}
/>
{toolRows != null ? (
toolRows.map(
([label, value]) =>
value > 0 && <Row key={label} label={label} value={value} max={maxTokens} />,
)
) : (
<Row
label={localize('com_ui_context_tools')}
value={breakdown.toolSchemaTokens}
max={maxTokens}
/>
)}
{breakdown.summaryTokens > 0 && (
<Row
label={localize('com_ui_context_summary')}

View file

@ -887,9 +887,14 @@
"com_ui_context_filter_sort": "Filter and Sort by Context",
"com_ui_context_free": "Free space",
"com_ui_context_messages": "Messages",
"com_ui_context_subagents": "Subagents",
"com_ui_context_summary": "Summary",
"com_ui_context_system": "System prompt",
"com_ui_context_tools": "Tool schemas",
"com_ui_context_tools_mcp": "MCP tools",
"com_ui_context_tools_mcp_deferred": "MCP tools (deferred)",
"com_ui_context_tools_system": "System tools",
"com_ui_context_tools_system_deferred": "System tools (deferred)",
"com_ui_context_unknown": "Context size unknown",
"com_ui_context_usage": "Context usage",
"com_ui_context_usage_label": "Context window: {{0}} of {{1}} tokens used ({{2}}%)",

View file

@ -10,8 +10,10 @@ import {
estimateTokens,
calcUsageCost,
formatCost,
groupToolTokens,
countTrailingOutputChars,
EMPTY_BRANCH,
EMPTY_TOOL_GROUPS,
} from './tokens';
const CONVO = 'convo-1';
@ -154,6 +156,36 @@ describe('formatCost', () => {
});
});
describe('groupToolTokens', () => {
it('classifies tools into system, mcp, skills, and subagent groups', () => {
const groups = groupToolTokens(
{
execute_code: 500,
web_search: 300,
skill: 200,
subagent: 150,
'search_mcp_Google-Workspace': 400,
fetch_mcp_Github: 250,
},
['fetch_mcp_Github', 'web_search'],
);
expect(groups).toEqual({
system: 500,
mcp: 400,
skills: 200,
subagents: 150,
systemDeferred: 300,
mcpDeferred: 250,
});
});
it('returns empty groups without counts and skips zero entries', () => {
expect(groupToolTokens(undefined)).toBe(EMPTY_TOOL_GROUPS);
expect(groupToolTokens({ execute_code: 0 })).toEqual(EMPTY_TOOL_GROUPS);
});
});
describe('countTrailingOutputChars', () => {
const text = (value: string) => ({ type: 'text', text: value });
const think = (value: string) => ({ type: 'think', think: value });

View file

@ -1,4 +1,4 @@
import { Constants } from 'librechat-data-provider';
import { Tools, Constants } from 'librechat-data-provider';
import type { TMessage, TTokenUsageEvent, TModelTokenomics } from 'librechat-data-provider';
export interface TokenEntry {
@ -130,6 +130,61 @@ export function sumBranch(
return { ...totals, tailId };
}
export interface ToolTokenGroups {
system: number;
mcp: number;
skills: number;
subagents: number;
systemDeferred: number;
mcpDeferred: number;
}
export const EMPTY_TOOL_GROUPS: ToolTokenGroups = {
system: 0,
mcp: 0,
skills: 0,
subagents: 0,
systemDeferred: 0,
mcpDeferred: 0,
};
/**
* Classifies per-tool schema tokens into display groups: built-in system
* tools, MCP tools, skills, and subagents with deferred (on-demand) tools
* split out for the system/MCP groups.
*/
export function groupToolTokens(
toolTokenCounts?: Record<string, number>,
deferredToolNames?: string[],
): ToolTokenGroups {
if (toolTokenCounts == null) {
return EMPTY_TOOL_GROUPS;
}
const deferred = new Set(deferredToolNames ?? []);
const groups = { ...EMPTY_TOOL_GROUPS };
for (const [name, tokens] of Object.entries(toolTokenCounts)) {
if (tokens <= 0) {
continue;
}
if (name === Tools.skill) {
groups.skills += tokens;
} else if (name === Constants.SUBAGENT) {
groups.subagents += tokens;
} else if (name.includes(Constants.mcp_delimiter)) {
if (deferred.has(name)) {
groups.mcpDeferred += tokens;
} else {
groups.mcp += tokens;
}
} else if (deferred.has(name)) {
groups.systemDeferred += tokens;
} else {
groups.system += tokens;
}
}
return groups;
}
function getOutputChars(part: unknown): number | null {
if (part == null || typeof part !== 'object') {
return null;

View file

@ -58,6 +58,10 @@ export type TTokenBudgetBreakdown = {
messageCount: number;
messageTokens: number;
availableForMessages: number;
/** Per-tool schema token counts (post-multiplier), keyed by tool name */
toolTokenCounts?: Record<string, number>;
/** Names of counted tools that are deferred (`defer_loading`) and discovered */
deferredToolNames?: string[];
};
/** Per-model-call context snapshot, dispatched after pruning and before the LLM call. */