diff --git a/api/app/clients/prompts/formatAgentMessages.spec.js b/api/app/clients/prompts/formatAgentMessages.spec.js index 87b5a9b7f6..d8e9262ba9 100644 --- a/api/app/clients/prompts/formatAgentMessages.spec.js +++ b/api/app/clients/prompts/formatAgentMessages.spec.js @@ -362,4 +362,153 @@ describe('formatAgentMessages', () => { ); expect(hasErrorContent).toBe(false); }); + + describe('Vertex Gemini thoughtSignatures persistence (issue #13006 follow-up)', () => { + const SIG_A = 'AY89a1/sigA=='; + const SIG_B = 'AY89a1/sigB=='; + + it('restores additional_kwargs.signatures onto the AIMessage that owns the tool_call', () => { + const payload = [ + { role: 'user', content: 'list files' }, + { + role: 'assistant', + metadata: { thoughtSignatures: { t1: SIG_A } }, + content: [ + { type: ContentTypes.TEXT, [ContentTypes.TEXT]: '', tool_call_ids: ['t1'] }, + { + type: ContentTypes.TOOL_CALL, + tool_call: { id: 't1', name: 'bash', args: '{}', output: 'ok' }, + }, + ], + }, + ]; + + const result = formatAgentMessages(payload); + + const assistant = result.find((m) => m instanceof AIMessage); + expect(assistant.tool_calls).toHaveLength(1); + expect(assistant.additional_kwargs?.signatures).toEqual([SIG_A]); + }); + + it('attaches signatures per-step in multi-step tool turns (codex review fix)', () => { + // Reproduces the Codex P1 concern: an assistant turn where the agent + // loop made two LLM cycles, each emitting its own tool_call. Each step + // must carry its OWN signature on resume — Vertex validates per-step, + // not per-turn. + const payload = [ + { role: 'user', content: 'do two things' }, + { + role: 'assistant', + metadata: { thoughtSignatures: { t1: SIG_A, t2: SIG_B } }, + content: [ + { type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'first', tool_call_ids: ['t1'] }, + { + type: ContentTypes.TOOL_CALL, + tool_call: { id: 't1', name: 'a', args: '{}', output: 'okA' }, + }, + { type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'second', tool_call_ids: ['t2'] }, + { + type: ContentTypes.TOOL_CALL, + tool_call: { id: 't2', name: 'b', args: '{}', output: 'okB' }, + }, + ], + }, + ]; + + const result = formatAgentMessages(payload); + const aiMessages = result.filter((m) => m instanceof AIMessage); + expect(aiMessages).toHaveLength(2); + expect(aiMessages[0].tool_calls).toHaveLength(1); + expect(aiMessages[0].additional_kwargs?.signatures).toEqual([SIG_A]); + expect(aiMessages[1].tool_calls).toHaveLength(1); + expect(aiMessages[1].additional_kwargs?.signatures).toEqual([SIG_B]); + }); + + it('preserves tool_call ordering when signatures are partial', () => { + // Mixed case: only some tool_calls have stored signatures. Position- + // aligned array (with empty placeholders) lets the agents-side + // dispatcher attach the correct signature to the correct functionCall. + const payload = [ + { role: 'user', content: 'two parallel tools' }, + { + role: 'assistant', + metadata: { thoughtSignatures: { t2: SIG_B } }, + content: [ + { type: ContentTypes.TEXT, [ContentTypes.TEXT]: '', tool_call_ids: ['t1', 't2'] }, + { + type: ContentTypes.TOOL_CALL, + tool_call: { id: 't1', name: 'a', args: '{}', output: 'okA' }, + }, + { + type: ContentTypes.TOOL_CALL, + tool_call: { id: 't2', name: 'b', args: '{}', output: 'okB' }, + }, + ], + }, + ]; + + const result = formatAgentMessages(payload); + const assistant = result.find((m) => m instanceof AIMessage); + expect(assistant.additional_kwargs?.signatures).toEqual(['', SIG_B]); + }); + + it('no-op when metadata.thoughtSignatures is absent', () => { + const payload = [ + { role: 'user', content: 'hi' }, + { + role: 'assistant', + content: [ + { type: ContentTypes.TEXT, [ContentTypes.TEXT]: '', tool_call_ids: ['t1'] }, + { + type: ContentTypes.TOOL_CALL, + tool_call: { id: 't1', name: 'bash', args: '{}', output: 'ok' }, + }, + ], + }, + ]; + + const result = formatAgentMessages(payload); + const assistant = result.find((m) => m instanceof AIMessage); + expect(assistant.additional_kwargs?.signatures).toBeUndefined(); + }); + + it('no-op when assistant message has no tool_calls', () => { + const payload = [ + { role: 'user', content: 'hi' }, + { + role: 'assistant', + metadata: { thoughtSignatures: { t1: SIG_A } }, + content: 'plain text reply', + }, + ]; + + const result = formatAgentMessages(payload); + const assistant = result.find((m) => m instanceof AIMessage); + expect(assistant.additional_kwargs?.signatures).toBeUndefined(); + }); + + it('no-op when no tool_call has a corresponding stored signature', () => { + // The persisted map exists but addresses different tool_call_ids + // (e.g., the previous turn's signatures, somehow leaked). Don't + // fabricate empty arrays onto the AIMessage. + const payload = [ + { role: 'user', content: 'hi' }, + { + role: 'assistant', + metadata: { thoughtSignatures: { unrelated_id: SIG_A } }, + content: [ + { type: ContentTypes.TEXT, [ContentTypes.TEXT]: '', tool_call_ids: ['t1'] }, + { + type: ContentTypes.TOOL_CALL, + tool_call: { id: 't1', name: 'bash', args: '{}', output: 'ok' }, + }, + ], + }, + ]; + + const result = formatAgentMessages(payload); + const assistant = result.find((m) => m instanceof AIMessage); + expect(assistant.additional_kwargs?.signatures).toBeUndefined(); + }); + }); }); diff --git a/api/app/clients/prompts/formatMessages.js b/api/app/clients/prompts/formatMessages.js index 964c7f1800..8435ff5280 100644 --- a/api/app/clients/prompts/formatMessages.js +++ b/api/app/clients/prompts/formatMessages.js @@ -156,6 +156,17 @@ const formatAgentMessages = (payload) => { let currentContent = []; let lastAIMessage = null; + /** + * Every AIMessage produced from this TMessage that received `tool_calls`, + * in order. Multi-step tool turns (where the agent loop cycles the LLM + * multiple times with intervening tool results) produce one AIMessage per + * cycle, each owning a different `tool_call_id`. We attach persisted + * Vertex Gemini 3 thought signatures (`metadata.thoughtSignatures`, + * keyed by `tool_call_id`) onto each one so every step has its right + * signature on resume — Vertex validates per-step, not per-turn + * (issue #13006 follow-up). + */ + const toolBearingAIMessages = []; let hasReasoning = false; for (const part of message.content) { @@ -203,6 +214,9 @@ const formatAgentMessages = (payload) => { tool_call.args = args; lastAIMessage.tool_calls.push(tool_call); + if (toolBearingAIMessages[toolBearingAIMessages.length - 1] !== lastAIMessage) { + toolBearingAIMessages.push(lastAIMessage); + } // Add the corresponding ToolMessage messages.push( @@ -236,6 +250,26 @@ const formatAgentMessages = (payload) => { if (currentContent.length > 0) { messages.push(new AIMessage({ content: currentContent })); } + + /** + * Restore signatures per-step. The persisted shape is + * `{ [tool_call_id]: signature }`; for each tool-bearing AIMessage we + * build a position-aligned `additional_kwargs.signatures` array (empty + * placeholders for tool_calls without a stored signature). Agents' + * `fixThoughtSignatures` then dispatches the non-empty entries to + * functionCall parts in order — order matches because non-empty + * signatures and tool_calls share their original parts ordering. + */ + const sigsByCallId = message.metadata?.thoughtSignatures; + if (sigsByCallId && typeof sigsByCallId === 'object' && toolBearingAIMessages.length > 0) { + for (const aiMsg of toolBearingAIMessages) { + const sigs = aiMsg.tool_calls.map((tc) => sigsByCallId[tc.id] ?? ''); + if (sigs.some((s) => typeof s === 'string' && s.length > 0)) { + aiMsg.additional_kwargs ??= {}; + aiMsg.additional_kwargs.signatures = sigs; + } + } + } } return messages; diff --git a/api/server/controllers/agents/__tests__/modelEndHandler.spec.js b/api/server/controllers/agents/__tests__/modelEndHandler.spec.js new file mode 100644 index 0000000000..fdd2c88b6c --- /dev/null +++ b/api/server/controllers/agents/__tests__/modelEndHandler.spec.js @@ -0,0 +1,156 @@ +jest.mock('@librechat/data-schemas', () => ({ + logger: { error: jest.fn(), debug: jest.fn() }, +})); +jest.mock('@librechat/api', () => ({ + sendEvent: jest.fn(), + emitEvent: jest.fn(), + createToolExecuteHandler: jest.fn(), + markSummarizationUsage: (usage) => usage, +})); +jest.mock('~/server/services/Files/Citations', () => ({ + processFileCitations: jest.fn(), +})); +jest.mock('~/server/services/Files/Code/process', () => ({ + processCodeOutput: jest.fn(), + runPreviewFinalize: jest.fn(), +})); +jest.mock('~/server/services/Files/process', () => ({ + saveBase64Image: jest.fn(), +})); + +const { ModelEndHandler } = require('../callbacks'); + +const buildGraph = () => ({ + getAgentContext: () => ({ + provider: 'vertexai', + clientOptions: { model: 'gemini-3.1-flash-lite-preview' }, + }), +}); + +describe('ModelEndHandler — Vertex thoughtSignature capture (issue #13006 follow-up)', () => { + it('maps non-empty signatures onto tool_call_ids in order', async () => { + const collectedUsage = []; + const collectedThoughtSignatures = {}; + const handler = new ModelEndHandler(collectedUsage, collectedThoughtSignatures); + + await handler.handle( + 'on_chat_model_end', + { + output: { + usage_metadata: { input_tokens: 10, output_tokens: 5, total_tokens: 15 }, + tool_calls: [ + { id: 'tc_a', name: 'a', args: {} }, + { id: 'tc_b', name: 'b', args: {} }, + ], + additional_kwargs: { signatures: ['SIG_A', '', 'SIG_B'] }, + }, + }, + { ls_model_name: 'gemini-3.1-flash-lite-preview', user_id: 'u1' }, + buildGraph(), + ); + + expect(collectedThoughtSignatures).toEqual({ tc_a: 'SIG_A', tc_b: 'SIG_B' }); + expect(collectedUsage).toHaveLength(1); + }); + + it('accumulates per-id across multiple model_end events (multi-step tool turn)', async () => { + const collectedUsage = []; + const collectedThoughtSignatures = {}; + const handler = new ModelEndHandler(collectedUsage, collectedThoughtSignatures); + + await handler.handle( + 'on_chat_model_end', + { + output: { + usage_metadata: { input_tokens: 5, output_tokens: 5, total_tokens: 10 }, + tool_calls: [{ id: 'tc_step1', name: 'a', args: {} }], + additional_kwargs: { signatures: ['SIG_step1'] }, + }, + }, + { ls_model_name: 'g', user_id: 'u' }, + buildGraph(), + ); + await handler.handle( + 'on_chat_model_end', + { + output: { + usage_metadata: { input_tokens: 5, output_tokens: 5, total_tokens: 10 }, + tool_calls: [{ id: 'tc_step2', name: 'b', args: {} }], + additional_kwargs: { signatures: ['SIG_step2'] }, + }, + }, + { ls_model_name: 'g', user_id: 'u' }, + buildGraph(), + ); + + expect(collectedThoughtSignatures).toEqual({ + tc_step1: 'SIG_step1', + tc_step2: 'SIG_step2', + }); + }); + + it('is a no-op for signatures when collectedThoughtSignatures is null', async () => { + const collectedUsage = []; + const handler = new ModelEndHandler(collectedUsage, null); + + await handler.handle( + 'on_chat_model_end', + { + output: { + usage_metadata: { input_tokens: 5, output_tokens: 5, total_tokens: 10 }, + tool_calls: [{ id: 'tc1', name: 'a', args: {} }], + additional_kwargs: { signatures: ['SIG'] }, + }, + }, + { ls_model_name: 'g', user_id: 'u' }, + buildGraph(), + ); + + expect(collectedUsage).toHaveLength(1); + }); + + it('does not store anything when signatures field is missing (non-Vertex providers)', async () => { + const collectedUsage = []; + const collectedThoughtSignatures = {}; + const handler = new ModelEndHandler(collectedUsage, collectedThoughtSignatures); + + await handler.handle( + 'on_chat_model_end', + { + output: { + usage_metadata: { input_tokens: 5, output_tokens: 5, total_tokens: 10 }, + tool_calls: [{ id: 'tc1', name: 'a', args: {} }], + additional_kwargs: {}, + }, + }, + { ls_model_name: 'gpt-4', user_id: 'u' }, + buildGraph(), + ); + + expect(collectedThoughtSignatures).toEqual({}); + }); + + it('does not store anything when tool_calls is missing', async () => { + const collectedUsage = []; + const collectedThoughtSignatures = {}; + const handler = new ModelEndHandler(collectedUsage, collectedThoughtSignatures); + + await handler.handle( + 'on_chat_model_end', + { + output: { + usage_metadata: { input_tokens: 5, output_tokens: 5, total_tokens: 10 }, + additional_kwargs: { signatures: ['SIG_orphan'] }, + }, + }, + { ls_model_name: 'g', user_id: 'u' }, + buildGraph(), + ); + + expect(collectedThoughtSignatures).toEqual({}); + }); + + it('throws when collectedUsage is not an array (existing contract)', () => { + expect(() => new ModelEndHandler(null)).toThrow('collectedUsage must be an array'); + }); +}); diff --git a/api/server/controllers/agents/callbacks.js b/api/server/controllers/agents/callbacks.js index 6390e60666..314ee481a6 100644 --- a/api/server/controllers/agents/callbacks.js +++ b/api/server/controllers/agents/callbacks.js @@ -21,12 +21,24 @@ const { saveBase64Image } = require('~/server/services/Files/process'); class ModelEndHandler { /** * @param {Array} collectedUsage + * @param {Record | null} [collectedThoughtSignatures] Map of + * `tool_call_id → thoughtSignature` accumulated across `chat_model_end` + * events. Used to persist Vertex Gemini 3 thought signatures across DB + * round-trips so resumed conversations don't 400 on the next API call. + * Each `model_end` may emit multiple tool calls (one per LLM cycle in a + * tool-using turn); per-id storage preserves the mapping so each tool + * call's signature can be restored onto the right reconstructed + * AIMessage rather than being concentrated on the last one. + * Optional; when `null`, the handler is a no-op for signatures. Non-Vertex + * providers don't emit `additional_kwargs.signatures`, so capture is also + * a no-op for them even when the map is provided. */ - constructor(collectedUsage) { + constructor(collectedUsage, collectedThoughtSignatures = null) { if (!Array.isArray(collectedUsage)) { throw new Error('collectedUsage must be an array'); } this.collectedUsage = collectedUsage; + this.collectedThoughtSignatures = collectedThoughtSignatures; } finalize(errorMessage) { @@ -82,6 +94,30 @@ class ModelEndHandler { const taggedUsage = markSummarizationUsage(usage, metadata); this.collectedUsage.push(taggedUsage); + + /** + * `additional_kwargs.signatures` is a flat array indexed by response + * part position (text + functionCall interleaved). `tool_calls` is + * just the function calls in their original order. Non-empty + * signatures correspond 1:1 with `tool_calls` in order — see + * `partsToSignatures` in `@langchain/google-common`. Walk both in a + * single pass to map each signature onto the right `tool_call.id`. + */ + const signatures = data?.output?.additional_kwargs?.signatures; + const toolCalls = data?.output?.tool_calls; + if ( + this.collectedThoughtSignatures && + Array.isArray(signatures) && + Array.isArray(toolCalls) + ) { + let toolIdx = 0; + for (const sig of signatures) { + if (typeof sig !== 'string' || sig.length === 0) continue; + if (toolIdx >= toolCalls.length) break; + const id = toolCalls[toolIdx++]?.id; + if (id) this.collectedThoughtSignatures[id] = sig; + } + } } catch (error) { logger.error('Error handling model end event:', error); return this.finalize(errorMessage); @@ -183,6 +219,7 @@ function getDefaultHandlers({ aggregateContent, toolEndCallback, collectedUsage, + collectedThoughtSignatures = null, streamId = null, toolExecuteOptions = null, summarizationOptions = null, @@ -194,7 +231,7 @@ function getDefaultHandlers({ ); } const handlers = { - [GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(collectedUsage), + [GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(collectedUsage, collectedThoughtSignatures), [GraphEvents.TOOL_END]: new ToolEndHandler(toolEndCallback, logger), [GraphEvents.ON_RUN_STEP]: { /** @@ -1023,6 +1060,7 @@ function buildSummarizationHandlers({ isStreaming, res }) { } module.exports = { + ModelEndHandler, agentLogHandler, agentLogHandlerObj, getDefaultHandlers, diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index 40c4ab6d96..0338918412 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -82,6 +82,7 @@ class AgentClient extends BaseClient { agentConfigs, contentParts, collectedUsage, + collectedThoughtSignatures, artifactPromises, maxContextTokens, subagentAggregatorsByToolCallId, @@ -94,6 +95,12 @@ class AgentClient extends BaseClient { this.contentParts = contentParts; /** @type {Array} */ this.collectedUsage = collectedUsage; + /** Vertex Gemini 3 thought signatures captured during the run, keyed by + * `tool_call_id`. Persisted on `responseMessage.metadata.thoughtSignatures` + * and restored as `additional_kwargs.signatures` on subsequent turns to + * keep tool round-trips valid across DB reconstruction. + * @type {Record | undefined} */ + this.collectedThoughtSignatures = collectedThoughtSignatures; /** @type {ArtifactPromises} */ this.artifactPromises = artifactPromises; /** Per-request map of `createContentAggregator` instances keyed by @@ -722,7 +729,11 @@ class AgentClient extends BaseClient { }); const completion = filterMalformedContentParts(this.contentParts); - return { completion }; + const signatures = this.collectedThoughtSignatures; + if (!signatures || Object.keys(signatures).length === 0) { + return { completion }; + } + return { completion, metadata: { thoughtSignatures: signatures } }; } /** diff --git a/api/server/services/Endpoints/agents/initialize.js b/api/server/services/Endpoints/agents/initialize.js index 36ddafc925..9f24438b5f 100644 --- a/api/server/services/Endpoints/agents/initialize.js +++ b/api/server/services/Endpoints/agents/initialize.js @@ -109,6 +109,18 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => { /** @type {Array} */ const collectedUsage = []; + /** + * Vertex Gemini 3 thought signatures captured from `chat_model_end` events, + * keyed by `tool_call_id`. Persisted on + * `responseMessage.metadata.thoughtSignatures` so subsequent conversation + * turns can restore each signature onto the right reconstructed AIMessage's + * `additional_kwargs.signatures` and avoid 400s when resuming after a tool + * round-trip without a final text reply. Always allocated; capture path + * is a no-op for providers that don't emit signatures (OpenAI, Anthropic, + * Bedrock, etc.). + * @type {Record} + */ + const collectedThoughtSignatures = {}; /** @type {ArtifactPromises} */ const artifactPromises = []; const { contentParts, aggregateContent } = createContentAggregator(); @@ -215,6 +227,7 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => { aggregateContent, toolEndCallback, collectedUsage, + collectedThoughtSignatures, streamId, subagentAggregatorsByToolCallId, }); @@ -780,6 +793,7 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => { agentConfigs, eventHandlers, collectedUsage, + collectedThoughtSignatures, aggregateContent, artifactPromises, primeInvokedSkills: handlePrimeInvokedSkills,