mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-06-26 17:31:27 +00:00
* 💸 feat: Per-Agent Endpoint Token Config in Multi-Endpoint Billing
Price each collected/emitted usage item with the producing agent's resolved
endpoint token config, instead of the primary agent's for the whole graph.
Previously AgentClient.recordCollectedUsage and the subagent usage emitter used
a single this.options.endpointTokenConfig (the primary's) for every usage item.
A connected agent or subagent on a different custom endpoint that shares a model
id with an entry in the primary's tokenConfig was therefore mis-priced (a model
absent from it already fell back to the built-in rate map — no regression).
- Tag each usage with its producing agent: ModelEndHandler stamps
usage.agentId = agentContext.agentId; createSubagentUsageSink stamps the
child's subagentAgentId (UsageMetadata gains an optional agentId).
- buildAgentToolContext retains endpointTokenConfig so initialize.js can build
an agentId -> endpointTokenConfig map from agentToolContexts (the one map that
holds every agent, including pure subagents pruned from agentConfigs).
- AgentClient.resolveAgentEndpointTokenConfig(usage) looks up that map by
agentId, falling back to the primary config; used by both the billing path
(new optional resolveEndpointTokenConfig on recordCollectedUsage) and the
subagent cost emitter.
- recordCollectedUsage's resolver is optional and falls back to the batch
endpointTokenConfig, so the shared responses.js/openai.js call sites are
unchanged.
- Tests: two-endpoint graph with a colliding model id prices per-agent; resolver
nullish falls back to batch; subagent sink tags the child agent id.
* fix: Align emit-path cost with per-agent billing; honor known-agent built-in pricing
Addresses Codex review on the per-agent endpoint token config:
- Emit path (callbacks.js) now prices each on_token_usage event with the
producing agent's config (resolved via usageCost.resolveEndpointTokenConfig),
so streamed/persisted metadata.usage.cost matches the per-agent balance
transaction. The agentId tag is resolved server-side and stripped from the
emitted/persisted payload.
- Resolver (resolveAgentTokenConfig) now treats a known agent's config as
authoritative, including undefined → built-in pricing, so a known non-custom
agent in a custom-primary graph is no longer charged the primary's rates.
Only untagged/unknown usage falls back to the primary config.
- endpointTokenConfigByAgentId records every known agent (value may be
undefined) so the resolver distinguishes known-no-rates from unknown.
195 lines
6.3 KiB
JavaScript
195 lines
6.3 KiB
JavaScript
jest.mock('@librechat/data-schemas', () => ({
|
|
logger: { error: jest.fn(), debug: jest.fn() },
|
|
}));
|
|
jest.mock('@librechat/api', () => ({
|
|
sendEvent: jest.fn(),
|
|
emitEvent: jest.fn(),
|
|
createToolExecuteHandler: jest.fn(),
|
|
markSummarizationUsage: (usage) => usage,
|
|
}));
|
|
jest.mock('~/server/services/Files/Citations', () => ({
|
|
processFileCitations: jest.fn(),
|
|
}));
|
|
jest.mock('~/server/services/Files/Code/process', () => ({
|
|
processCodeOutput: jest.fn(),
|
|
runPreviewFinalize: jest.fn(),
|
|
}));
|
|
jest.mock('~/server/services/Files/process', () => ({
|
|
saveBase64Image: jest.fn(),
|
|
}));
|
|
|
|
const { ModelEndHandler } = require('../callbacks');
|
|
|
|
const buildGraph = () => ({
|
|
getAgentContext: () => ({
|
|
provider: 'vertexai',
|
|
clientOptions: { model: 'gemini-3.1-flash-lite-preview' },
|
|
}),
|
|
});
|
|
|
|
describe('ModelEndHandler — Vertex thoughtSignature capture (issue #13006 follow-up)', () => {
|
|
it('maps non-empty signatures onto tool_call_ids in order', async () => {
|
|
const collectedUsage = [];
|
|
const collectedThoughtSignatures = {};
|
|
const handler = new ModelEndHandler(collectedUsage, collectedThoughtSignatures);
|
|
|
|
await handler.handle(
|
|
'on_chat_model_end',
|
|
{
|
|
output: {
|
|
usage_metadata: { input_tokens: 10, output_tokens: 5, total_tokens: 15 },
|
|
tool_calls: [
|
|
{ id: 'tc_a', name: 'a', args: {} },
|
|
{ id: 'tc_b', name: 'b', args: {} },
|
|
],
|
|
additional_kwargs: { signatures: ['SIG_A', '', 'SIG_B'] },
|
|
},
|
|
},
|
|
{ ls_model_name: 'gemini-3.1-flash-lite-preview', user_id: 'u1' },
|
|
buildGraph(),
|
|
);
|
|
|
|
expect(collectedThoughtSignatures).toEqual({ tc_a: 'SIG_A', tc_b: 'SIG_B' });
|
|
expect(collectedUsage).toHaveLength(1);
|
|
});
|
|
|
|
it('accumulates per-id across multiple model_end events (multi-step tool turn)', async () => {
|
|
const collectedUsage = [];
|
|
const collectedThoughtSignatures = {};
|
|
const handler = new ModelEndHandler(collectedUsage, collectedThoughtSignatures);
|
|
|
|
await handler.handle(
|
|
'on_chat_model_end',
|
|
{
|
|
output: {
|
|
usage_metadata: { input_tokens: 5, output_tokens: 5, total_tokens: 10 },
|
|
tool_calls: [{ id: 'tc_step1', name: 'a', args: {} }],
|
|
additional_kwargs: { signatures: ['SIG_step1'] },
|
|
},
|
|
},
|
|
{ ls_model_name: 'g', user_id: 'u' },
|
|
buildGraph(),
|
|
);
|
|
await handler.handle(
|
|
'on_chat_model_end',
|
|
{
|
|
output: {
|
|
usage_metadata: { input_tokens: 5, output_tokens: 5, total_tokens: 10 },
|
|
tool_calls: [{ id: 'tc_step2', name: 'b', args: {} }],
|
|
additional_kwargs: { signatures: ['SIG_step2'] },
|
|
},
|
|
},
|
|
{ ls_model_name: 'g', user_id: 'u' },
|
|
buildGraph(),
|
|
);
|
|
|
|
expect(collectedThoughtSignatures).toEqual({
|
|
tc_step1: 'SIG_step1',
|
|
tc_step2: 'SIG_step2',
|
|
});
|
|
});
|
|
|
|
it('is a no-op for signatures when collectedThoughtSignatures is null', async () => {
|
|
const collectedUsage = [];
|
|
const handler = new ModelEndHandler(collectedUsage, null);
|
|
|
|
await handler.handle(
|
|
'on_chat_model_end',
|
|
{
|
|
output: {
|
|
usage_metadata: { input_tokens: 5, output_tokens: 5, total_tokens: 10 },
|
|
tool_calls: [{ id: 'tc1', name: 'a', args: {} }],
|
|
additional_kwargs: { signatures: ['SIG'] },
|
|
},
|
|
},
|
|
{ ls_model_name: 'g', user_id: 'u' },
|
|
buildGraph(),
|
|
);
|
|
|
|
expect(collectedUsage).toHaveLength(1);
|
|
});
|
|
|
|
it('does not store anything when signatures field is missing (non-Vertex providers)', async () => {
|
|
const collectedUsage = [];
|
|
const collectedThoughtSignatures = {};
|
|
const handler = new ModelEndHandler(collectedUsage, collectedThoughtSignatures);
|
|
|
|
await handler.handle(
|
|
'on_chat_model_end',
|
|
{
|
|
output: {
|
|
usage_metadata: { input_tokens: 5, output_tokens: 5, total_tokens: 10 },
|
|
tool_calls: [{ id: 'tc1', name: 'a', args: {} }],
|
|
additional_kwargs: {},
|
|
},
|
|
},
|
|
{ ls_model_name: 'gpt-4', user_id: 'u' },
|
|
buildGraph(),
|
|
);
|
|
|
|
expect(collectedThoughtSignatures).toEqual({});
|
|
});
|
|
|
|
it('does not store anything when tool_calls is missing', async () => {
|
|
const collectedUsage = [];
|
|
const collectedThoughtSignatures = {};
|
|
const handler = new ModelEndHandler(collectedUsage, collectedThoughtSignatures);
|
|
|
|
await handler.handle(
|
|
'on_chat_model_end',
|
|
{
|
|
output: {
|
|
usage_metadata: { input_tokens: 5, output_tokens: 5, total_tokens: 10 },
|
|
additional_kwargs: { signatures: ['SIG_orphan'] },
|
|
},
|
|
},
|
|
{ ls_model_name: 'g', user_id: 'u' },
|
|
buildGraph(),
|
|
);
|
|
|
|
expect(collectedThoughtSignatures).toEqual({});
|
|
});
|
|
|
|
it('tags the producing agent on collected + emitted usage for per-endpoint pricing', async () => {
|
|
const collectedUsage = [];
|
|
const emitUsage = jest.fn();
|
|
const handler = new ModelEndHandler(collectedUsage, null, emitUsage);
|
|
const graph = {
|
|
getAgentContext: () => ({
|
|
provider: 'openai',
|
|
agentId: 'agent_sub',
|
|
clientOptions: { model: 'gpt-4' },
|
|
}),
|
|
};
|
|
|
|
await handler.handle(
|
|
'on_chat_model_end',
|
|
{ output: { usage_metadata: { input_tokens: 10, output_tokens: 5, total_tokens: 15 } } },
|
|
{ ls_model_name: 'gpt-4', run_id: 'r1', user_id: 'u1' },
|
|
graph,
|
|
);
|
|
|
|
expect(collectedUsage[0].agentId).toBe('agent_sub');
|
|
expect(emitUsage).toHaveBeenCalledWith(expect.objectContaining({ agentId: 'agent_sub' }));
|
|
});
|
|
|
|
it('leaves usage untagged when the graph context has no agentId (single-endpoint)', async () => {
|
|
const collectedUsage = [];
|
|
const emitUsage = jest.fn();
|
|
const handler = new ModelEndHandler(collectedUsage, null, emitUsage);
|
|
|
|
await handler.handle(
|
|
'on_chat_model_end',
|
|
{ output: { usage_metadata: { input_tokens: 10, output_tokens: 5, total_tokens: 15 } } },
|
|
{ ls_model_name: 'gemini-3.1-flash-lite-preview', run_id: 'r1', user_id: 'u1' },
|
|
buildGraph(),
|
|
);
|
|
|
|
expect(collectedUsage[0].agentId).toBeUndefined();
|
|
expect(emitUsage).toHaveBeenCalledWith(expect.objectContaining({ agentId: undefined }));
|
|
});
|
|
|
|
it('throws when collectedUsage is not an array (existing contract)', () => {
|
|
expect(() => new ModelEndHandler(null)).toThrow('collectedUsage must be an array');
|
|
});
|
|
});
|