diff --git a/api/package.json b/api/package.json index b044b49670..8f6e983b49 100644 --- a/api/package.json +++ b/api/package.json @@ -46,7 +46,7 @@ "@azure/storage-blob": "^12.30.0", "@google/genai": "^2.8.0", "@keyv/redis": "^4.3.3", - "@librechat/agents": "^3.2.52", + "@librechat/agents": "^3.2.53", "@librechat/api": "*", "@librechat/data-schemas": "*", "@microsoft/microsoft-graph-client": "^3.0.7", diff --git a/api/server/controllers/agents/__tests__/jobReplacement.spec.js b/api/server/controllers/agents/__tests__/jobReplacement.spec.js index 7f7a775b75..c4247fcf30 100644 --- a/api/server/controllers/agents/__tests__/jobReplacement.spec.js +++ b/api/server/controllers/agents/__tests__/jobReplacement.spec.js @@ -288,3 +288,331 @@ describe('Job Replacement Detection', () => { }); }); }); + +/** + * HITL terminal-side-effect guards (PR #13942). + * + * Jobs are keyed by streamId == conversationId, so a NEW request REPLACES the running + * one on the same conversation. The replaced generation's tail (its pause attempt, its + * checkpoint prune, its resume catch-path terminal writes) must not clobber the live + * generation's state. Each guard re-reads the live job and compares createdAt against the + * generation's own captured identity before acting. These mirror the predicates in + * client.js (handleRunInterrupt / chatCompletion finally) and resume.js. + */ +describe('HITL Terminal-Side-Effect Guards', () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + + describe('F22 — pause is skipped when the generation was replaced', () => { + // Mirrors client.js handleRunInterrupt pre-check, run BEFORE approvals.pause. + const shouldPause = async ({ jobCreatedAt, streamId }) => { + if (jobCreatedAt != null) { + const liveJob = await mockGenerationJobManager.getJob(streamId); + if (!liveJob || liveJob.createdAt !== jobCreatedAt) { + return false; + } + } + return true; + }; + + it('does not pause when a newer job replaced this one', async () => { + mockGenerationJobManager.getJob.mockResolvedValue({ createdAt: 2000 }); + expect(await shouldPause({ jobCreatedAt: 1000, streamId: 'c1' })).toBe(false); + }); + + it('does not pause when the job is already gone', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(null); + expect(await shouldPause({ jobCreatedAt: 1000, streamId: 'c1' })).toBe(false); + }); + + it('pauses when this is still the live job', async () => { + mockGenerationJobManager.getJob.mockResolvedValue({ createdAt: 1000 }); + expect(await shouldPause({ jobCreatedAt: 1000, streamId: 'c1' })).toBe(true); + }); + + it('pauses without a lookup when identity is unknown (legacy job)', async () => { + expect(await shouldPause({ jobCreatedAt: null, streamId: 'c1' })).toBe(true); + expect(mockGenerationJobManager.getJob).not.toHaveBeenCalled(); + }); + }); + + describe('F21 — checkpoint prune is skipped when the generation was replaced', () => { + // Mirrors client.js chatCompletion finally: prune only when NOT replaced. + const shouldPrune = async ({ resumableStreamId, jobCreatedAt }) => { + let replaced = false; + if (resumableStreamId && jobCreatedAt != null) { + const liveJob = await mockGenerationJobManager.getJob(resumableStreamId); + replaced = !liveJob || liveJob.createdAt !== jobCreatedAt; + } + return !replaced; + }; + + it('skips the prune when a newer job replaced this one', async () => { + mockGenerationJobManager.getJob.mockResolvedValue({ createdAt: 2000 }); + expect(await shouldPrune({ resumableStreamId: 'c1', jobCreatedAt: 1000 })).toBe(false); + }); + + it('prunes when this is still the live job', async () => { + mockGenerationJobManager.getJob.mockResolvedValue({ createdAt: 1000 }); + expect(await shouldPrune({ resumableStreamId: 'c1', jobCreatedAt: 1000 })).toBe(true); + }); + + it('prunes without a lookup when there is no resumable stream id', async () => { + expect(await shouldPrune({ resumableStreamId: undefined, jobCreatedAt: 1000 })).toBe(true); + expect(mockGenerationJobManager.getJob).not.toHaveBeenCalled(); + }); + }); + + describe('F24 — resume catch-path terminal writes are skipped when replaced', () => { + // Mirrors resume.js: stillLive gate around emitError/completeJob/deleteAgentCheckpoint. + const stillLive = async ({ streamId, jobCreatedAt }) => { + let live = true; + try { + const liveJob = await mockGenerationJobManager.getJob(streamId); + live = !!liveJob && liveJob.createdAt === jobCreatedAt; + } catch { + live = true; // read failed — fail open and run the terminal writes + } + return live; + }; + + it('runs terminal writes when this is still the live job', async () => { + mockGenerationJobManager.getJob.mockResolvedValue({ createdAt: 1000 }); + expect(await stillLive({ streamId: 'c1', jobCreatedAt: 1000 })).toBe(true); + }); + + it('skips terminal writes when a newer job replaced this one', async () => { + mockGenerationJobManager.getJob.mockResolvedValue({ createdAt: 2000 }); + expect(await stillLive({ streamId: 'c1', jobCreatedAt: 1000 })).toBe(false); + }); + + it('fails open (runs terminal writes) when the liveness read throws', async () => { + mockGenerationJobManager.getJob.mockRejectedValue(new Error('store down')); + expect(await stillLive({ streamId: 'c1', jobCreatedAt: 1000 })).toBe(true); + }); + }); + + describe('F23 — resumed turn sources files from the job, not the racy DB row', () => { + // Mirrors resume.js: prefer the body, then job.metadata.userMessage.files, then DB. + const resolveFiles = ({ bodyFiles, metaFiles, dbFiles }) => { + if (Array.isArray(bodyFiles) && bodyFiles.length > 0) { + return bodyFiles; + } + if (Array.isArray(metaFiles) && metaFiles.length > 0) { + return metaFiles; + } + return Array.isArray(dbFiles) && dbFiles.length > 0 ? dbFiles : undefined; + }; + + it('prefers job-metadata files over the DB row (no DB-save race)', () => { + expect( + resolveFiles({ + bodyFiles: [], + metaFiles: [{ file_id: 'meta' }], + dbFiles: [{ file_id: 'db' }], + }), + ).toEqual([{ file_id: 'meta' }]); + }); + + it('falls back to the DB row when the job has no persisted files (older job)', () => { + expect( + resolveFiles({ bodyFiles: [], metaFiles: undefined, dbFiles: [{ file_id: 'db' }] }), + ).toEqual([{ file_id: 'db' }]); + }); + + it('keeps files already present on the resume body', () => { + expect( + resolveFiles({ + bodyFiles: [{ file_id: 'body' }], + metaFiles: [{ file_id: 'meta' }], + dbFiles: [], + }), + ).toEqual([{ file_id: 'body' }]); + }); + }); +}); + +/** + * Round-18 follow-ups to the guards above (Codex review 4594099963). + */ +describe('HITL Resume Fidelity Guards (round 18)', () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + + describe('G1 — resume re-checks ownership AGAIN right before terminal writes', () => { + // The start-of-finalize guard can go stale across saveMessage + title generation, + // so resume.js re-reads the live job immediately before emitDone/completeJob/prune. + // Same predicate as the catch-path (F24), applied at the success path's second point. + const stillLiveBeforeFinalize = async ({ streamId, jobCreatedAt }) => { + const liveJob = await mockGenerationJobManager.getJob(streamId); + return !!liveJob && liveJob.createdAt === jobCreatedAt; + }; + + it('runs terminal writes when still the live job at the second check', async () => { + mockGenerationJobManager.getJob.mockResolvedValue({ createdAt: 1000 }); + expect(await stillLiveBeforeFinalize({ streamId: 'c1', jobCreatedAt: 1000 })).toBe(true); + }); + + it('skips terminal writes when replaced DURING finalize (after the first check passed)', async () => { + // First check passed earlier with createdAt 1000; a new request replaced it to 2000 + // while saveMessage + title generation awaited. The second check must catch it. + mockGenerationJobManager.getJob.mockResolvedValue({ createdAt: 2000 }); + expect(await stillLiveBeforeFinalize({ streamId: 'c1', jobCreatedAt: 1000 })).toBe(false); + }); + }); + + describe('G2 — uploaded files are seeded into the AWAITED preliminary user message', () => { + // Mirrors getPreliminaryUserMessage: files from the request are persisted on the + // preliminary (awaited, pre-run) metadata so they land before any interrupt emits. + const buildPreliminaryUserMessage = ({ messageId, files }) => { + if (typeof messageId !== 'string' || messageId.length === 0) { + return null; + } + return { + messageId, + ...(Array.isArray(files) && files.length > 0 && { files }), + }; + }; + + it('includes files when the request carries them', () => { + const msg = buildPreliminaryUserMessage({ messageId: 'm1', files: [{ file_id: 'a' }] }); + expect(msg.files).toEqual([{ file_id: 'a' }]); + }); + + it('omits files when none were uploaded (no empty array)', () => { + const msg = buildPreliminaryUserMessage({ messageId: 'm1', files: [] }); + expect(msg).not.toHaveProperty('files'); + }); + }); + + describe('G3 — resume replays pre-pause discovered deferred tools', () => { + // Mirrors createRun's merge: discovered set is union(message-extracted, replayed), + // gated entirely on the agent actually having deferred tools. + const resolveDiscovered = ({ hasAnyDeferredTools, messageExtracted, replayed }) => { + const set = new Set(); + if (hasAnyDeferredTools) { + for (const n of messageExtracted ?? []) { + set.add(n); + } + for (const n of replayed ?? []) { + set.add(n); + } + } + return set; + }; + + it('replays captured names on resume (messages empty) so the paused tool is present', () => { + const set = resolveDiscovered({ + hasAnyDeferredTools: true, + messageExtracted: [], + replayed: ['deep_tool'], + }); + expect(set.has('deep_tool')).toBe(true); + }); + + it('unions replayed names with message-extracted names', () => { + const set = resolveDiscovered({ + hasAnyDeferredTools: true, + messageExtracted: ['from_history'], + replayed: ['deep_tool'], + }); + expect([...set].sort()).toEqual(['deep_tool', 'from_history']); + }); + + it('is inert when the agent has no deferred tools', () => { + const set = resolveDiscovered({ + hasAnyDeferredTools: false, + messageExtracted: ['x'], + replayed: ['deep_tool'], + }); + expect(set.size).toBe(0); + }); + }); + + describe("H3 — resume replays the paused turn's model parameters (ephemeral agents)", () => { + // Mirrors restoreResumeContext: spread persisted model_parameters back onto the body, + // excluding `model` (replayed via the fingerprinted RESUME_CONTEXT_KEYS path). + const replayModelParameters = (body, resumeContext) => { + const params = resumeContext?.model_parameters; + if (params && typeof params === 'object') { + const { model: _model, ...rest } = params; + Object.assign(body, rest); + } + return body; + }; + + it('restores non-default params (temperature, max tokens) onto the resume body', () => { + const body = { conversationId: 'c1', endpoint: 'agents' }; + replayModelParameters(body, { + model_parameters: { model: 'gpt-4o', temperature: 0.2, max_tokens: 1024 }, + }); + expect(body).toMatchObject({ temperature: 0.2, max_tokens: 1024 }); + }); + + it('does NOT overwrite model (kept consistent with the resume fingerprint)', () => { + const body = { model: 'pinned-model' }; + replayModelParameters(body, { model_parameters: { model: 'other-model', temperature: 0.9 } }); + expect(body.model).toBe('pinned-model'); + }); + + it('overwrites a client-supplied param with the captured authoritative value', () => { + const body = { temperature: 1.0 }; // crafted/stale client value + replayModelParameters(body, { model_parameters: { temperature: 0.2 } }); + expect(body.temperature).toBe(0.2); + }); + + it('is a no-op when nothing was captured', () => { + const body = { conversationId: 'c1' }; + replayModelParameters(body, {}); + expect(body).toEqual({ conversationId: 'c1' }); + }); + }); + + describe('J2 — pause unfinished-save is skipped once a fast resume took over', () => { + // Mirrors request.js: only mark the paused row unfinished while the job is STILL paused + // on THIS generation's action. A claim transitions it out of requires_action and a + // replacement bumps createdAt — either means a /resume now owns the row, so marking it + // unfinished would clobber the resumed turn's completed content. Fail open on read error. + const shouldMarkUnfinished = async ({ jobCreatedAt, streamId }) => { + let stillPaused = true; + try { + const liveJob = await mockGenerationJobManager.getJob(streamId); + stillPaused = + !!liveJob && + liveJob.status === 'requires_action' && + (jobCreatedAt == null || liveJob.createdAt === jobCreatedAt); + } catch { + stillPaused = true; + } + return stillPaused; + }; + + it('marks unfinished while still paused on this generation', async () => { + mockGenerationJobManager.getJob.mockResolvedValue({ + status: 'requires_action', + createdAt: 1000, + }); + expect(await shouldMarkUnfinished({ jobCreatedAt: 1000, streamId: 'c1' })).toBe(true); + }); + + it('skips the unfinished-save once a fast resume claimed it (no longer requires_action)', async () => { + mockGenerationJobManager.getJob.mockResolvedValue({ status: 'running', createdAt: 1000 }); + expect(await shouldMarkUnfinished({ jobCreatedAt: 1000, streamId: 'c1' })).toBe(false); + }); + + it('skips the unfinished-save when a newer request replaced the job', async () => { + mockGenerationJobManager.getJob.mockResolvedValue({ + status: 'requires_action', + createdAt: 2000, + }); + expect(await shouldMarkUnfinished({ jobCreatedAt: 1000, streamId: 'c1' })).toBe(false); + }); + + it('fails open (marks unfinished) when the liveness read throws', async () => { + mockGenerationJobManager.getJob.mockRejectedValue(new Error('store down')); + expect(await shouldMarkUnfinished({ jobCreatedAt: 1000, streamId: 'c1' })).toBe(true); + }); + }); +}); diff --git a/api/server/controllers/agents/__tests__/resume.spec.js b/api/server/controllers/agents/__tests__/resume.spec.js new file mode 100644 index 0000000000..dc07de7c7d --- /dev/null +++ b/api/server/controllers/agents/__tests__/resume.spec.js @@ -0,0 +1,1114 @@ +/** + * Integration tests for the HITL resume controller (POST /agents/chat/resume). + * + * Drives the real `ResumeAgentController` end-to-end over supertest with the SDK + * run, durable checkpointer, Mongo, and concurrency cache mocked out. The pure + * decision/liveness helpers (`isPendingActionStale`, `mapToolApprovalResolutions`, + * `findUndecidedToolCalls`, `findDisallowedDecisions`, `buildAbortedResponseMetadata`, + * `sanitizeMessageForTransmit`) run for real via `requireActual`, so the test + * exercises the actual guard ladder and the pause -> approve -> resume -> finalize + * lifecycle rather than re-implemented stubs. + * + * Covers: + * - the authorization / staleness / agent-and-endpoint / actionId guard ladder + * - tool_approval validation (undecided, policy-disallowed decision) + * - ask_user_question answer requirement + * - concurrency gate (429) and the atomic single-winner claim (409) + * - the happy path: ACK, run reconstruction, resumeCompletion, finalize (save the + * now-finished response, emit done, complete job, prune checkpoint) + * - re-pause (no double finalize), abort-during-resume (no double finalize), + * and the resume-failure terminal path + */ + +const express = require('express'); +const request = require('supertest'); +const { Constants } = require('librechat-data-provider'); + +const USER_ID = 'user-1'; +const TENANT_ID = 'tenant-1'; +const AGENT_ID = 'agent-abc'; +const CONVO_ID = 'convo-123'; +const ACTION_ID = 'action-xyz'; +const RESPONSE_MSG_ID = 'resp-1'; +const USER_MSG_ID = 'umsg-1'; +const THREAD_PARENT_ID = 'thread-parent-1'; + +const mockLogger = { + debug: jest.fn(), + warn: jest.fn(), + error: jest.fn(), + info: jest.fn(), +}; + +const mockJobStore = { + getJob: jest.fn(), + updateJob: jest.fn(), +}; + +const mockGenerationJobManager = { + getJob: jest.fn(), + getJobStore: jest.fn(() => mockJobStore), + getResumeState: jest.fn(), + setContentParts: jest.fn(), + emitChunk: jest.fn(), + emitDone: jest.fn(), + emitError: jest.fn(), + completeJob: jest.fn(), + expireApproval: jest.fn(), + approvals: { resolve: jest.fn() }, +}; + +const mockDeleteAgentCheckpoint = jest.fn(); +const mockDecrementPendingRequest = jest.fn(); +const mockCheckAndIncrementPendingRequest = jest.fn(); + +const mockSaveMessage = jest.fn(); +const mockGetConvo = jest.fn(); +const mockGetMessages = jest.fn(); +const mockDisposeClient = jest.fn(); +const mockGetMCPRequestContext = jest.fn(); +const mockCleanupMCPRequestContextForReq = jest.fn(); + +jest.mock('@librechat/data-schemas', () => ({ + ...jest.requireActual('@librechat/data-schemas'), + logger: mockLogger, +})); + +jest.mock('@librechat/api', () => ({ + ...jest.requireActual('@librechat/api'), + GenerationJobManager: mockGenerationJobManager, + deleteAgentCheckpoint: (...args) => mockDeleteAgentCheckpoint(...args), + decrementPendingRequest: (...args) => mockDecrementPendingRequest(...args), + checkAndIncrementPendingRequest: (...args) => mockCheckAndIncrementPendingRequest(...args), +})); + +jest.mock('~/models', () => ({ + saveMessage: (...args) => mockSaveMessage(...args), + getConvo: (...args) => mockGetConvo(...args), + getMessages: (...args) => mockGetMessages(...args), +})); + +jest.mock('~/server/cleanup', () => ({ + disposeClient: (...args) => mockDisposeClient(...args), +})); + +jest.mock('~/server/services/MCPRequestContext', () => ({ + getMCPRequestContext: (...args) => mockGetMCPRequestContext(...args), + cleanupMCPRequestContextForReq: (...args) => mockCleanupMCPRequestContextForReq(...args), +})); + +// Import after mocks +const ResumeAgentController = require('~/server/controllers/agents/resume'); + +/** Drain the microtask + immediate queues so the post-ACK continuation settles. */ +const flush = () => new Promise((resolve) => setImmediate(resolve)); + +/** A live, resolvable paused tool-approval job (single tool call `tc1`). */ +function makeToolApprovalJob(overrides = {}) { + const metaOverrides = overrides.metadata ?? {}; + const pendingOverrides = metaOverrides.pendingAction ?? {}; + return { + status: 'requires_action', + abortController: new AbortController(), + ...overrides, + metadata: { + userId: USER_ID, + tenantId: TENANT_ID, + agent_id: AGENT_ID, + endpoint: 'agents', + responseMessageId: RESPONSE_MSG_ID, + sender: 'TestAgent', + iconURL: 'https://example.com/icon.png', + model: 'claude-test', + isTemporary: false, + userMessage: { + messageId: USER_MSG_ID, + parentMessageId: THREAD_PARENT_ID, + text: 'please run the tool', + }, + ...metaOverrides, + pendingAction: { + actionId: ACTION_ID, + expiresAt: Date.now() + 60_000, + payload: { + type: 'tool_approval', + action_requests: [{ tool_call_id: 'tc1' }], + review_configs: [{ tool_call_id: 'tc1', allowed_decisions: ['approve', 'reject'] }], + }, + ...pendingOverrides, + }, + }, + }; +} + +/** A live, resolvable paused ask-user-question job. */ +function makeAskUserJob(overrides = {}) { + const job = makeToolApprovalJob(overrides); + job.metadata.pendingAction.payload = { + type: 'ask_user_question', + question: 'What should I name the file?', + }; + return job; +} + +/** A mock reconstructed client for the post-ACK path. */ +function makeClient(overrides = {}) { + return { + sender: 'TestAgent', + contentParts: [{ type: 'text', text: 'resumed answer' }], + artifactPromises: [], + pendingApproval: false, + buildResponseMetadata: jest.fn(() => null), + resumeCompletion: jest.fn().mockResolvedValue(undefined), + ...overrides, + }; +} + +describe('ResumeAgentController (POST /agents/chat/resume)', () => { + let app; + let mockInitializeClient; + let mockAddTitle; + let capturedInit; + let settle; + let settled; + + beforeEach(() => { + jest.clearAllMocks(); + + capturedInit = null; + mockCheckAndIncrementPendingRequest.mockResolvedValue({ allowed: true }); + mockDecrementPendingRequest.mockResolvedValue(undefined); + mockDeleteAgentCheckpoint.mockResolvedValue(undefined); + mockCleanupMCPRequestContextForReq.mockResolvedValue(undefined); + mockSaveMessage.mockResolvedValue(undefined); + mockGetConvo.mockResolvedValue(null); + mockGetMessages.mockResolvedValue([]); + mockJobStore.getJob.mockResolvedValue({ tokenUsage: null, contextUsage: null }); + mockJobStore.updateJob.mockResolvedValue(undefined); + mockGenerationJobManager.getResumeState.mockResolvedValue({ aggregatedContent: [] }); + mockGenerationJobManager.emitDone.mockResolvedValue(undefined); + mockGenerationJobManager.emitError.mockResolvedValue(undefined); + mockGenerationJobManager.emitChunk.mockResolvedValue(undefined); + mockGenerationJobManager.completeJob.mockResolvedValue(undefined); + mockGenerationJobManager.approvals.resolve.mockResolvedValue(true); + + // `decrementPendingRequest` runs in the controller's `finally` on every + // post-ACK path, so resolving on it signals the async continuation is done. + settled = new Promise((resolve) => { + settle = resolve; + }); + mockDecrementPendingRequest.mockImplementation(async () => { + settle(); + }); + + mockAddTitle = jest.fn().mockResolvedValue(undefined); + mockInitializeClient = jest.fn(async ({ req }) => { + // Capture the request state the controller seeds BEFORE reconstruction. + capturedInit = { + parentMessageId: req.body.parentMessageId, + files: req.body.files, + conversationCreatedAt: req.conversationCreatedAt, + timezone: req.body.timezone, + }; + return { client: makeClient(), userMCPAuthMap: { server1: { token: 't' } } }; + }); + + app = express(); + app.use(express.json()); + app.use((req, _res, next) => { + req.user = { id: USER_ID, tenantId: TENANT_ID }; + req.config = { + endpoints: { agents: { checkpointer: { type: 'mongo' } } }, + interfaceConfig: {}, + }; + next(); + }); + app.post('/api/agents/chat/resume', (req, res, next) => + ResumeAgentController(req, res, next, mockInitializeClient, mockAddTitle), + ); + }); + + const post = (body) => request(app).post('/api/agents/chat/resume').send(body); + + const approveBody = (extra = {}) => ({ + conversationId: CONVO_ID, + actionId: ACTION_ID, + agent_id: AGENT_ID, + endpoint: 'agents', + decisions: [{ tool_call_id: 'tc1', decision: 'approve' }], + ...extra, + }); + + describe('temporal context restore', () => { + it('restores req.conversationCreatedAt from the convo before initializeClient', async () => { + // Temporal prompt vars must resolve against the paused anchor, not resume wall-clock. + mockGetConvo.mockResolvedValue({ createdAt: new Date('2020-01-02T03:04:05.000Z') }); + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + const res = await post(approveBody()); + expect(res.status).toBe(200); + await settled; + expect(capturedInit.conversationCreatedAt).toBe('2020-01-02T03:04:05.000Z'); + }); + + it('leaves conversationCreatedAt unset when the convo lookup yields nothing', async () => { + mockGetConvo.mockResolvedValue(null); + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + const res = await post(approveBody()); + expect(res.status).toBe(200); + await settled; + expect(capturedInit.conversationCreatedAt).toBeUndefined(); + }); + }); + + describe('MCP request-context lifecycle', () => { + it('pre-seeds the run-scoped MCP context before initializeClient and tears it down after', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + const res = await post(approveBody()); + expect(res.status).toBe(200); + await settled; // the controller's finally has run + + // Seeded with a null `res` + cleanupOnResponse:false so the post-ACK tool load + // finds the existing store instead of getting undefined (res is already finished). + expect(mockGetMCPRequestContext).toHaveBeenCalledWith(expect.anything(), undefined, { + cleanupOnResponse: false, + }); + // ...and seeded BEFORE the client (hence tool loading) is built. + expect(mockGetMCPRequestContext.mock.invocationCallOrder[0]).toBeLessThan( + mockInitializeClient.mock.invocationCallOrder[0], + ); + // ...then torn down exactly once in the finally. + expect(mockCleanupMCPRequestContextForReq).toHaveBeenCalledTimes(1); + }); + }); + + describe('request guards (rejected before claiming the action)', () => { + it('400 when conversationId is missing', async () => { + const res = await post({ actionId: ACTION_ID }); + expect(res.status).toBe(400); + expect(res.body.error).toMatch(/conversationId is required/i); + expect(mockGenerationJobManager.getJob).not.toHaveBeenCalled(); + }); + + it('400 when conversationId is the "new" placeholder', async () => { + const res = await post({ conversationId: 'new', actionId: ACTION_ID }); + expect(res.status).toBe(400); + expect(mockGenerationJobManager.getJob).not.toHaveBeenCalled(); + }); + + it('404 when there is no paused job for the conversation', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(null); + const res = await post(approveBody()); + expect(res.status).toBe(404); + expect(res.body.error).toMatch(/no paused generation/i); + }); + + it('403 when the job belongs to another user', async () => { + mockGenerationJobManager.getJob.mockResolvedValue( + makeToolApprovalJob({ metadata: { userId: 'someone-else' } }), + ); + const res = await post(approveBody()); + expect(res.status).toBe(403); + expect(mockGenerationJobManager.approvals.resolve).not.toHaveBeenCalled(); + }); + + it('403 on a tenant mismatch', async () => { + mockGenerationJobManager.getJob.mockResolvedValue( + makeToolApprovalJob({ metadata: { tenantId: 'other-tenant' } }), + ); + const res = await post(approveBody()); + expect(res.status).toBe(403); + }); + + it('403 when the resume omits the paused agent_id', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + const res = await post(approveBody({ agent_id: undefined })); + expect(res.status).toBe(403); + expect(res.body.error).toMatch(/different agent/i); + }); + + it('403 when the resume claims a different agent_id', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + const res = await post(approveBody({ agent_id: 'agent-other' })); + expect(res.status).toBe(403); + expect(res.body.error).toMatch(/different agent/i); + }); + + it('403 when the resume claims a different endpoint', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + const res = await post(approveBody({ endpoint: 'bedrock' })); + expect(res.status).toBe(403); + expect(res.body.error).toMatch(/different endpoint/i); + }); + + it('403 when the resume OMITS the paused endpoint (no fall-through to ephemeral)', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + const res = await post(approveBody({ endpoint: undefined })); + expect(res.status).toBe(403); + expect(res.body.error).toMatch(/different endpoint/i); + expect(mockGenerationJobManager.approvals.resolve).not.toHaveBeenCalled(); + }); + + it('409 when the job is not in requires_action (already terminal; no expire)', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob({ status: 'running' })); + const res = await post(approveBody()); + expect(res.status).toBe(409); + expect(res.body.error).toMatch(/no live pending action/i); + // Already resolved/terminal — nothing to expire. + expect(mockGenerationJobManager.expireApproval).not.toHaveBeenCalled(); + }); + + it('409 AND drives expiry when the pending action has expired (stale)', async () => { + const job = makeToolApprovalJob(); + job.metadata.pendingAction.expiresAt = Date.now() - 1_000; + mockGenerationJobManager.getJob.mockResolvedValue(job); + const res = await post(approveBody()); + expect(res.status).toBe(409); + expect(res.body.error).toMatch(/no live pending action/i); + // The stale action is expired NOW (expire CAS + terminal SSE) so an attached SSE + // client gets a terminal event instead of hanging until the periodic sweeper runs. + expect(mockGenerationJobManager.expireApproval).toHaveBeenCalledWith(CONVO_ID, ACTION_ID); + }); + + it('400 when actionId is missing', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + const res = await post(approveBody({ actionId: undefined })); + expect(res.status).toBe(400); + expect(res.body.error).toMatch(/actionId is required/i); + }); + + it('409 when actionId targets a stale action', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + const res = await post(approveBody({ actionId: 'stale-action' })); + expect(res.status).toBe(409); + expect(res.body.error).toMatch(/stale action/i); + }); + + it('400 when a tool call is left undecided', async () => { + const job = makeToolApprovalJob(); + job.metadata.pendingAction.payload.action_requests = [ + { tool_call_id: 'tc1' }, + { tool_call_id: 'tc2' }, + ]; + job.metadata.pendingAction.payload.review_configs = [ + { tool_call_id: 'tc1', allowed_decisions: ['approve', 'reject'] }, + { tool_call_id: 'tc2', allowed_decisions: ['approve', 'reject'] }, + ]; + mockGenerationJobManager.getJob.mockResolvedValue(job); + const res = await post(approveBody()); // only decides tc1 + expect(res.status).toBe(400); + expect(res.body.error).toMatch(/must be decided/i); + expect(res.body.undecided).toEqual(['tc2']); + expect(mockGenerationJobManager.approvals.resolve).not.toHaveBeenCalled(); + }); + + it('403 when a decision is not permitted by the tool policy', async () => { + const job = makeToolApprovalJob(); + // Policy restricts tc1 to reject only; an `approve` must be refused. + job.metadata.pendingAction.payload.review_configs = [ + { tool_call_id: 'tc1', allowed_decisions: ['reject'] }, + ]; + mockGenerationJobManager.getJob.mockResolvedValue(job); + const res = await post(approveBody()); + expect(res.status).toBe(403); + expect(res.body.error).toMatch(/not permitted/i); + expect(mockGenerationJobManager.approvals.resolve).not.toHaveBeenCalled(); + }); + + it('400 when an edit decision omits editedArguments', async () => { + const job = makeToolApprovalJob(); + job.metadata.pendingAction.payload.review_configs = [ + { tool_call_id: 'tc1', allowed_decisions: ['approve', 'edit'] }, + ]; + mockGenerationJobManager.getJob.mockResolvedValue(job); + const res = await post( + approveBody({ decisions: [{ tool_call_id: 'tc1', decision: 'edit' }] }), + ); + expect(res.status).toBe(400); + expect(res.body.error).toMatch(/editedArguments/i); + expect(res.body.incomplete).toEqual(['tc1']); + expect(mockGenerationJobManager.approvals.resolve).not.toHaveBeenCalled(); + }); + + it('400 when a respond decision omits responseText', async () => { + const job = makeToolApprovalJob(); + job.metadata.pendingAction.payload.review_configs = [ + { tool_call_id: 'tc1', allowed_decisions: ['approve', 'respond'] }, + ]; + mockGenerationJobManager.getJob.mockResolvedValue(job); + const res = await post( + approveBody({ decisions: [{ tool_call_id: 'tc1', decision: 'respond' }] }), + ); + expect(res.status).toBe(400); + expect(res.body.error).toMatch(/responseText/i); + }); + + it('accepts a complete edit decision (editedArguments present)', async () => { + const job = makeToolApprovalJob(); + job.metadata.pendingAction.payload.review_configs = [ + { tool_call_id: 'tc1', allowed_decisions: ['approve', 'edit'] }, + ]; + mockGenerationJobManager.getJob.mockResolvedValue(job); + const res = await post( + approveBody({ + decisions: [{ tool_call_id: 'tc1', decision: 'edit', editedArguments: { q: 'x' } }], + }), + ); + expect(res.status).toBe(200); + await settled; + await flush(); + }); + + it('403 when the resume request fingerprint does not match the paused config', async () => { + const job = makeToolApprovalJob(); + job.metadata.pendingAction.requestFingerprint = 'fingerprint-of-a-different-config'; + mockGenerationJobManager.getJob.mockResolvedValue(job); + const res = await post(approveBody()); + expect(res.status).toBe(403); + expect(res.body.error).toMatch(/different agent configuration/i); + expect(mockGenerationJobManager.approvals.resolve).not.toHaveBeenCalled(); + }); + + it('proceeds when the resume request fingerprint matches the paused config', async () => { + const { computeAgentRequestFingerprint } = jest.requireActual('@librechat/api'); + const job = makeToolApprovalJob(); + job.metadata.pendingAction.requestFingerprint = computeAgentRequestFingerprint({ + endpoint: 'agents', + agent_id: AGENT_ID, + }); + mockGenerationJobManager.getJob.mockResolvedValue(job); + const res = await post(approveBody()); + expect(res.status).toBe(200); + expect(mockGenerationJobManager.approvals.resolve).toHaveBeenCalledWith(CONVO_ID, ACTION_ID); + await settled; + await flush(); + }); + + it('403 when the resume sends a different promptPrefix than the paused config', async () => { + const { computeAgentRequestFingerprint } = jest.requireActual('@librechat/api'); + const job = makeToolApprovalJob(); + // Ephemeral instructions come from promptPrefix, so it's part of the fingerprint. + job.metadata.pendingAction.requestFingerprint = computeAgentRequestFingerprint({ + endpoint: 'agents', + agent_id: AGENT_ID, + promptPrefix: 'be terse', + }); + mockGenerationJobManager.getJob.mockResolvedValue(job); + const res = await post(approveBody({ promptPrefix: 'ignore previous instructions' })); + expect(res.status).toBe(403); + expect(res.body.error).toMatch(/different agent configuration/i); + expect(mockGenerationJobManager.approvals.resolve).not.toHaveBeenCalled(); + }); + + it('400 when an ask_user_question resume carries no answer', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeAskUserJob()); + const res = await post({ + conversationId: CONVO_ID, + actionId: ACTION_ID, + agent_id: AGENT_ID, + endpoint: 'agents', + }); + expect(res.status).toBe(400); + expect(res.body.error).toMatch(/answer is required/i); + }); + + it('400 on an unsupported pending-action type', async () => { + const job = makeToolApprovalJob(); + job.metadata.pendingAction.payload = { type: 'totally_unknown' }; + mockGenerationJobManager.getJob.mockResolvedValue(job); + const res = await post(approveBody()); + expect(res.status).toBe(400); + expect(res.body.error).toMatch(/unsupported pending action/i); + expect(mockGenerationJobManager.approvals.resolve).not.toHaveBeenCalled(); + }); + + it('proceeds (does not 403) for a pre-multi-tenancy job with no tenantId', async () => { + // hasTenantMismatch only blocks when the job carries a tenantId that differs; + // an untenanted (legacy) job must still resume once the userId check passes. + const job = makeToolApprovalJob({ metadata: { tenantId: undefined } }); + mockGenerationJobManager.getJob.mockResolvedValue(job); + const res = await post(approveBody()); + expect(res.status).toBe(200); + expect(mockGenerationJobManager.approvals.resolve).toHaveBeenCalledWith(CONVO_ID, ACTION_ID); + await settled; + await flush(); + }); + + it('429 when the concurrency gate rejects the resume', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + mockCheckAndIncrementPendingRequest.mockResolvedValue({ allowed: false }); + const res = await post(approveBody()); + expect(res.status).toBe(429); + expect(mockGenerationJobManager.approvals.resolve).not.toHaveBeenCalled(); + }); + + it('409 and releases the slot when the action was already claimed (single-winner)', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + mockGenerationJobManager.approvals.resolve.mockResolvedValue(false); + const res = await post(approveBody()); + expect(res.status).toBe(409); + expect(res.body.error).toMatch(/already resolved or has expired/i); + expect(mockGenerationJobManager.approvals.resolve).toHaveBeenCalledWith(CONVO_ID, ACTION_ID); + expect(mockDecrementPendingRequest).toHaveBeenCalledWith(USER_ID); + expect(mockInitializeClient).not.toHaveBeenCalled(); + }); + + it('releases the slot when the claim itself throws (store error, not a leak)', async () => { + // The increment happens before the claim, which runs before the run's own + // try/finally — a store error here must still release the slot or a retry of the + // still-paused approval gets spuriously 429'd until the counter TTL expires. + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + mockGenerationJobManager.approvals.resolve.mockRejectedValue(new Error('redis down')); + const res = await post(approveBody()); + expect(res.status).toBe(500); + expect(mockDecrementPendingRequest).toHaveBeenCalledWith(USER_ID); + expect(mockInitializeClient).not.toHaveBeenCalled(); + }); + }); + + describe('happy path: approve -> reconstruct -> resume -> finalize', () => { + it('ACKs immediately and claims the action atomically with the submitted actionId', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + const res = await post(approveBody()); + expect(res.status).toBe(200); + expect(res.body).toEqual({ + streamId: CONVO_ID, + conversationId: CONVO_ID, + status: 'resuming', + }); + expect(mockGenerationJobManager.approvals.resolve).toHaveBeenCalledWith(CONVO_ID, ACTION_ID); + await settled; + await flush(); + }); + + it('seeds the thread parent before reconstruction and maps the decision to the SDK', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + await post(approveBody()); + await settled; + await flush(); + + // initializeAgent scopes thread files off req.body.parentMessageId, seeded + // from the paused user message's parent before initializeClient runs. + expect(capturedInit.parentMessageId).toBe(THREAD_PARENT_ID); + + expect(mockInitializeClient).toHaveBeenCalledTimes(1); + const client = await mockInitializeClient.mock.results[0].value.then((r) => r.client); + expect(client.resumeCompletion).toHaveBeenCalledWith( + expect.objectContaining({ + resumeValue: { tc1: { type: 'approve' } }, + userMCPAuthMap: { server1: { token: 't' } }, + }), + ); + }); + + it('restores the paused user message files before reconstruction (execute-code files)', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + // The resume body carries no files; the controller must source them from the + // persisted user message so an approved code/read-file tool keeps its uploads. + mockGetMessages.mockResolvedValue([{ files: [{ file_id: 'f1' }] }]); + + await post(approveBody()); + await settled; + await flush(); + + expect(capturedInit.files).toEqual([{ file_id: 'f1' }]); + }); + + it('ignores client-supplied resume files, sourcing from the paused job (security)', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + // The paused turn's authoritative files (DB row); a crafted client tries to swap them. + mockGetMessages.mockResolvedValue([{ files: [{ file_id: 'paused' }] }]); + + await post(approveBody({ files: [{ file_id: 'attacker-supplied' }] })); + await settled; + await flush(); + + // The crafted client files must NOT reach initializeAgent — only the paused set. + expect(capturedInit.files).toEqual([{ file_id: 'paused' }]); + }); + + it('clears client-supplied resume files when the paused turn had none (security)', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + mockGetMessages.mockResolvedValue([{ files: [] }]); // the paused turn had no files + + await post(approveBody({ files: [{ file_id: 'attacker-supplied' }] })); + await settled; + await flush(); + + expect(capturedInit.files).toEqual([]); + }); + + it('prefers job-metadata files over both the client body and the DB row', async () => { + mockGenerationJobManager.getJob.mockResolvedValue( + makeToolApprovalJob({ + metadata: { + userMessage: { + messageId: USER_MSG_ID, + parentMessageId: THREAD_PARENT_ID, + text: 'x', + files: [{ file_id: 'meta' }], + }, + }, + }), + ); + mockGetMessages.mockResolvedValue([{ files: [{ file_id: 'db' }] }]); + + await post(approveBody({ files: [{ file_id: 'attacker-supplied' }] })); + await settled; + await flush(); + + expect(capturedInit.files).toEqual([{ file_id: 'meta' }]); + }); + + it('carries the restored files onto the final requestMessage (user bubble keeps attachments)', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + // job.metadata.userMessage is persisted without files; the final SSE must still + // carry the restored uploads or the user bubble loses its attachments on resume. + mockGetMessages.mockResolvedValue([{ files: [{ file_id: 'f1', filename: 'a.pdf' }] }]); + + await post(approveBody()); + await settled; + await flush(); + + const [, finalEvent] = mockGenerationJobManager.emitDone.mock.calls[0]; + expect(finalEvent.requestMessage).toMatchObject({ + messageId: USER_MSG_ID, + isCreatedByUser: true, + files: [{ file_id: 'f1', filename: 'a.pdf' }], + }); + }); + + it('persists the finished response, emits done, completes the job, and prunes the checkpoint', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + await post(approveBody()); + await settled; + await flush(); + + expect(mockSaveMessage).toHaveBeenCalledWith( + expect.objectContaining({ userId: USER_ID, isTemporary: false }), + expect.objectContaining({ + messageId: RESPONSE_MSG_ID, + parentMessageId: USER_MSG_ID, + conversationId: CONVO_ID, + content: [{ type: 'text', text: 'resumed answer' }], + unfinished: false, + error: false, + isCreatedByUser: false, + user: USER_ID, + agent_id: AGENT_ID, + }), + expect.objectContaining({ + context: 'api/server/controllers/agents/resume.js - resumed response end', + }), + ); + + // Assert the finalEvent STRUCTURE, not just the hardcoded `final: true` literal — + // a `final: true`-only check would still pass if the entire content / title / + // requestMessage build in finalizeResumedTurn were deleted. + const [doneStreamId, finalEvent] = mockGenerationJobManager.emitDone.mock.calls[0]; + expect(doneStreamId).toBe(CONVO_ID); + expect(finalEvent).toMatchObject({ + final: true, + conversation: { conversationId: CONVO_ID }, + responseMessage: { + messageId: RESPONSE_MSG_ID, + content: [{ type: 'text', text: 'resumed answer' }], + unfinished: false, + }, + requestMessage: { messageId: USER_MSG_ID, isCreatedByUser: true }, + }); + expect(typeof finalEvent.title).toBe('string'); + + expect(mockGenerationJobManager.completeJob).toHaveBeenCalledWith(CONVO_ID); + expect(mockDeleteAgentCheckpoint).toHaveBeenCalledWith(CONVO_ID, { type: 'mongo' }); + expect(mockDecrementPendingRequest).toHaveBeenCalledWith(USER_ID); + expect(mockDisposeClient).toHaveBeenCalledTimes(1); + }); + + it('skips finalization (no save/emitDone/complete) when the job was replaced mid-resume', async () => { + // The paused job has createdAt 1000; a concurrent request reused this conversationId, + // so the live job now has a different createdAt — finalizing would clobber the newer + // turn's job. The finally still runs (slot release), so `settled` resolves. + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob({ createdAt: 1000 })); + mockJobStore.getJob.mockResolvedValue({ + tokenUsage: null, + contextUsage: null, + createdAt: 2000, + }); + await post(approveBody()); + await settled; + await flush(); + + expect(mockSaveMessage).not.toHaveBeenCalled(); + expect(mockGenerationJobManager.emitDone).not.toHaveBeenCalled(); + expect(mockGenerationJobManager.completeJob).not.toHaveBeenCalled(); + }); + + it('does not release the slot in the finally when the client already released it on pause', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + // Simulate handleRunInterrupt having released the concurrency slot on a re-pause. + mockInitializeClient.mockResolvedValue({ + client: makeClient({ pendingRequestReleased: true }), + userMCPAuthMap: {}, + }); + let disposed; + const disposedP = new Promise((resolve) => { + disposed = resolve; + }); + mockDisposeClient.mockImplementation(() => disposed()); + + await post(approveBody()); + await disposedP; + await flush(); + + // The finally must NOT double-release — handleRunInterrupt already did. + expect(mockDecrementPendingRequest).not.toHaveBeenCalled(); + }); + + it('persists tool artifacts produced by the resumed continuation as attachments', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + const artifact = { type: 'image', file_id: 'img-1' }; + // The lean resume path bypasses BaseClient.sendMessage's artifact await, so the + // controller must await client.artifactPromises itself (and drop null results). + mockInitializeClient.mockResolvedValue({ + client: makeClient({ + artifactPromises: [Promise.resolve(artifact), Promise.resolve(null)], + }), + userMCPAuthMap: {}, + }); + + await post(approveBody()); + await settled; + await flush(); + + expect(mockSaveMessage).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ attachments: [artifact] }), + expect.anything(), + ); + }); + + it('falls back to the aggregated store content when the live client content is empty', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + // No live content on the rebuilt client → the saved response must use the + // pre-pause aggregated content from the store, not an empty array. + mockInitializeClient.mockResolvedValue({ + client: makeClient({ contentParts: [] }), + userMCPAuthMap: {}, + }); + mockGenerationJobManager.getResumeState.mockResolvedValue({ + aggregatedContent: [{ type: 'text', text: 'from-store' }], + }); + + await post(approveBody()); + await settled; + await flush(); + + expect(mockSaveMessage).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ content: [{ type: 'text', text: 'from-store' }] }), + expect.anything(), + ); + }); + + it('strips malformed tool_call parts from the saved content', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + mockInitializeClient.mockResolvedValue({ + client: makeClient({ + contentParts: [ + { type: 'text', text: 'kept' }, + { type: 'tool_call' }, // malformed: no tool_call payload — must be filtered + ], + }), + userMCPAuthMap: {}, + }); + + await post(approveBody()); + await settled; + await flush(); + + expect(mockSaveMessage).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ content: [{ type: 'text', text: 'kept' }] }), + expect.anything(), + ); + }); + + it('merges previously persisted attachments with the resumed segment artifacts', async () => { + const priorArtifact = { type: 'image', file_id: 'prior-1' }; + const newArtifact = { type: 'image', file_id: 'new-1' }; + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + // An earlier pause segment already saved an attachment on the response row. + mockGetMessages.mockResolvedValue([{ attachments: [priorArtifact] }]); + mockInitializeClient.mockResolvedValue({ + client: makeClient({ artifactPromises: [Promise.resolve(newArtifact)] }), + userMCPAuthMap: {}, + }); + + await post(approveBody()); + await settled; + await flush(); + + expect(mockSaveMessage).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ attachments: [priorArtifact, newArtifact] }), + expect.anything(), + ); + }); + + it('persists the resumed run context calibration (contextMeta) onto the saved response', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + const contextMeta = { calibrationRatio: 0.8 }; + mockInitializeClient.mockResolvedValue({ + client: makeClient({ contextMeta }), + userMCPAuthMap: {}, + }); + + await post(approveBody()); + await settled; + await flush(); + + expect(mockSaveMessage).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ contextMeta }), + expect.anything(), + ); + }); + + it('carries manualSkills/alwaysAppliedSkills onto the resumed requestMessage', async () => { + const job = makeToolApprovalJob(); + job.metadata.userMessage.manualSkills = ['skill-a']; + job.metadata.userMessage.alwaysAppliedSkills = ['skill-b']; + mockGenerationJobManager.getJob.mockResolvedValue(job); + + await post(approveBody()); + await settled; + await flush(); + + const [, finalEvent] = mockGenerationJobManager.emitDone.mock.calls[0]; + expect(finalEvent.requestMessage).toMatchObject({ + manualSkills: ['skill-a'], + alwaysAppliedSkills: ['skill-b'], + }); + }); + + it('attaches client response metadata to the saved message when present', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + const contextUsage = { tokenCount: 1234 }; + mockInitializeClient.mockResolvedValue({ + client: makeClient({ buildResponseMetadata: jest.fn(() => ({ contextUsage })) }), + userMCPAuthMap: {}, + }); + + await post(approveBody()); + await settled; + await flush(); + + expect(mockSaveMessage).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ metadata: expect.objectContaining({ contextUsage }) }), + expect.anything(), + ); + }); + + it('resumes an ask_user_question with the free-form answer', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeAskUserJob()); + const res = await post({ + conversationId: CONVO_ID, + actionId: ACTION_ID, + agent_id: AGENT_ID, + endpoint: 'agents', + answer: 'call it report.pdf', + }); + expect(res.status).toBe(200); + await settled; + await flush(); + + const client = await mockInitializeClient.mock.results[0].value.then((r) => r.client); + expect(client.resumeCompletion).toHaveBeenCalledWith( + expect.objectContaining({ resumeValue: { answer: 'call it report.pdf' } }), + ); + expect(mockGenerationJobManager.completeJob).toHaveBeenCalledWith(CONVO_ID); + }); + + it('generates a title for a first-turn pause before completing the stream', async () => { + const job = makeToolApprovalJob(); + job.metadata.userMessage.parentMessageId = Constants.NO_PARENT; + mockGenerationJobManager.getJob.mockResolvedValue(job); + mockGetConvo.mockResolvedValue({ title: 'New Chat' }); + + await post(approveBody()); + await settled; + await flush(); + + expect(mockAddTitle).toHaveBeenCalledTimes(1); + // Title is emitted (and the job completed) — order matters but both must happen. + expect(mockGenerationJobManager.completeJob).toHaveBeenCalledWith(CONVO_ID); + }); + + it('still finalizes the turn when first-turn title generation throws', async () => { + const job = makeToolApprovalJob(); + job.metadata.userMessage.parentMessageId = Constants.NO_PARENT; + mockGenerationJobManager.getJob.mockResolvedValue(job); + mockGetConvo.mockResolvedValue({ title: 'New Chat' }); + // Title generation is best-effort: a throw must not break the resumed turn. + mockAddTitle.mockRejectedValue(new Error('title service down')); + + await post(approveBody()); + await settled; + await flush(); + + expect(mockLogger.error).toHaveBeenCalled(); + expect(mockSaveMessage).toHaveBeenCalledTimes(1); + expect(mockGenerationJobManager.emitDone).toHaveBeenCalledWith(CONVO_ID, expect.any(Object)); + expect(mockGenerationJobManager.completeJob).toHaveBeenCalledWith(CONVO_ID); + }); + }); + + describe('non-finalizing outcomes', () => { + it('re-pause: does not finalize when the run pauses again', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + mockInitializeClient.mockResolvedValue({ + client: makeClient({ pendingApproval: true }), + userMCPAuthMap: {}, + }); + + const res = await post(approveBody()); + expect(res.status).toBe(200); + await settled; + await flush(); + + // It persists progress (unfinished) but must NOT finalize the turn. + expect(mockSaveMessage).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ unfinished: true }), + expect.anything(), + ); + expect(mockGenerationJobManager.emitDone).not.toHaveBeenCalled(); + expect(mockGenerationJobManager.completeJob).not.toHaveBeenCalled(); + expect(mockDeleteAgentCheckpoint).not.toHaveBeenCalled(); + // The slot is still released and the client disposed. + expect(mockDecrementPendingRequest).toHaveBeenCalledWith(USER_ID); + expect(mockDisposeClient).toHaveBeenCalledTimes(1); + }); + + it('re-pause: persists the segment content (unfinished) so an expiring re-pause keeps it', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + mockInitializeClient.mockResolvedValue({ + client: makeClient({ + pendingApproval: true, + contentParts: [{ type: 'text', text: 'streamed this segment' }], + artifactPromises: [], + }), + userMCPAuthMap: {}, + }); + + const res = await post(approveBody()); + expect(res.status).toBe(200); + await settled; + await flush(); + + expect(mockSaveMessage).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ + content: [{ type: 'text', text: 'streamed this segment' }], + unfinished: true, + }), + expect.objectContaining({ + context: 'api/server/controllers/agents/resume.js - re-pause progress persist', + }), + ); + expect(mockGenerationJobManager.emitDone).not.toHaveBeenCalled(); + }); + + it('re-pause: persists artifacts produced before pausing again (unfinished)', async () => { + const artifact = { type: 'image', file_id: 'seg-1' }; + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + mockInitializeClient.mockResolvedValue({ + client: makeClient({ + pendingApproval: true, + artifactPromises: [Promise.resolve(artifact)], + }), + userMCPAuthMap: {}, + }); + + const res = await post(approveBody()); + expect(res.status).toBe(200); + await settled; + await flush(); + + // No finalize, but the segment's artifact is persisted unfinished so the next + // resume's finalize can merge it (otherwise the fresh client drops it). + expect(mockGenerationJobManager.emitDone).not.toHaveBeenCalled(); + expect(mockSaveMessage).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ attachments: [artifact], unfinished: true }), + expect.objectContaining({ + context: 'api/server/controllers/agents/resume.js - re-pause progress persist', + }), + ); + }); + + it('abort-during-resume: lets the abort route finalize, does not double-save', async () => { + const job = makeToolApprovalJob(); + mockGenerationJobManager.getJob.mockResolvedValue(job); + mockInitializeClient.mockImplementation(async () => { + job.abortController.abort(); + return { client: makeClient(), userMCPAuthMap: {} }; + }); + + const res = await post(approveBody()); + expect(res.status).toBe(200); + await settled; + await flush(); + + expect(mockSaveMessage).not.toHaveBeenCalled(); + expect(mockGenerationJobManager.emitDone).not.toHaveBeenCalled(); + expect(mockGenerationJobManager.completeJob).not.toHaveBeenCalled(); + expect(mockDecrementPendingRequest).toHaveBeenCalledWith(USER_ID); + }); + + it('resume failure: emits an error, finalizes the job, and prunes the checkpoint', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + mockInitializeClient.mockResolvedValue({ + client: makeClient({ + resumeCompletion: jest.fn().mockRejectedValue(new Error('boom')), + }), + userMCPAuthMap: {}, + }); + + const res = await post(approveBody()); + expect(res.status).toBe(200); // already ACKed before the failure + await settled; + await flush(); + + expect(mockGenerationJobManager.emitError).toHaveBeenCalledWith(CONVO_ID, 'boom'); + expect(mockGenerationJobManager.completeJob).toHaveBeenCalledWith(CONVO_ID, 'boom'); + expect(mockDeleteAgentCheckpoint).toHaveBeenCalledWith(CONVO_ID, { type: 'mongo' }); + expect(mockDecrementPendingRequest).toHaveBeenCalledWith(USER_ID); + expect(mockSaveMessage).not.toHaveBeenCalled(); + }); + + it('forces a terminal job state when completeJob also fails during a resume error', async () => { + mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob()); + mockInitializeClient.mockResolvedValue({ + client: makeClient({ + resumeCompletion: jest.fn().mockRejectedValue(new Error('boom')), + }), + userMCPAuthMap: {}, + }); + // The error path's completeJob also fails → last-resort updateJob must force a + // terminal state so the job isn't orphaned in `running`. + mockGenerationJobManager.completeJob.mockRejectedValue(new Error('complete failed')); + + await post(approveBody()); + await settled; + await flush(); + + expect(mockJobStore.updateJob).toHaveBeenCalledWith( + CONVO_ID, + expect.objectContaining({ status: 'error', error: 'Resume failed' }), + ); + expect(mockDecrementPendingRequest).toHaveBeenCalledWith(USER_ID); + }); + }); +}); diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index df2515c2ab..f9432ae821 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -33,6 +33,13 @@ const { GenerationJobManager, getTransactionsConfig, resolveRecursionLimit, + buildPendingAction, + computeAgentRequestFingerprint, + extractDiscoveredToolsFromHistory, + pickResumeContext, + getApprovalTtlMs, + isHITLEnabled, + deleteAgentCheckpoint, getRequestMemories, createMemoryProcessor, agentHasInlineMemoryTools, @@ -54,6 +61,7 @@ const { hasUrlContextTool, appendYouTubeVideoParts, resolveYouTubeInjectionConfig, + decrementPendingRequest, } = require('@librechat/api'); const { Callback, @@ -69,6 +77,7 @@ const { Permissions, VisionModes, ContentTypes, + ApprovalEvents, EModelEndpoint, PermissionTypes, AgentCapabilities, @@ -1165,6 +1174,150 @@ class AgentClient extends BaseClient { * @param {Record>} [params.userMCPAuthMap] * @param {AbortController} [params.abortController] */ + /** + * @deprecated Agent Chain — strip hidden intermediate sequential-agent content + * before persistence, keeping only the last part + tool_call parts. Mirrors the + * chat path so a HITL resume doesn't persist/emit intermediate outputs the + * agent's `hide_sequential_outputs` setting is meant to hide. + */ + applyHideSequentialOutputsFilter() { + if (!this.options.agent?.hide_sequential_outputs || !Array.isArray(this.contentParts)) { + return; + } + this.contentParts = this.contentParts.filter( + (part, index) => + index >= this.contentParts.length - 1 || + part.type === ContentTypes.TOOL_CALL || + part.tool_call_ids, + ); + } + + /** + * Surface any human-in-the-loop interrupt the SDK captured during the most + * recent `processStream` / `resume`. When the run paused for tool approval (or + * an ask-user question), mark the job `requires_action`, persist the pending + * review record, and emit it to live clients — then set `this.pendingApproval` + * so the controller leaves the turn unfinalized for the resume route to continue. + * + * No-op when the run completed without an interrupt, or when the job was aborted + * between the interrupt firing and this mark (a late interrupt must not pause a + * dead job — the atomic `pause` transition returns false and we drop it). + * + * @param {AgentRun} run + * @param {string} [streamId] + */ + async handleRunInterrupt(run, streamId) { + if (!streamId || typeof run?.getInterrupt !== 'function') { + return; + } + const interrupt = run.getInterrupt(); + if (!interrupt?.payload) { + return; + } + + const appConfig = this.options.req?.config; + const checkpointerCfg = appConfig?.endpoints?.[EModelEndpoint.agents]?.checkpointer; + // Persist the resolved model parameters (temperature, max tokens, custom endpoint + // params, …) so an ephemeral-agent resume continues with the SAME settings the run + // paused on. The resume payload omits them and they aren't part of the fingerprint, so + // without this the rebuilt ephemeral run falls back to defaults. (Saved agents source + // these from the DB record server-side, so this is belt-and-suspenders for them.) + const resumeContext = pickResumeContext(this.options.req?.body); + const resolvedModelParameters = this.options.agent?.model_parameters; + if (resolvedModelParameters && typeof resolvedModelParameters === 'object') { + resumeContext.model_parameters = resolvedModelParameters; + } + const pendingAction = buildPendingAction(interrupt.payload, { + streamId, + conversationId: this.conversationId, + // runId mirrors the LangGraph checkpoint namespace when the SDK provides it + // (its documented meaning), falling back to the response message id. + runId: interrupt.checkpointNs ?? this.responseMessageId, + responseMessageId: this.responseMessageId, + interruptId: interrupt.interruptId, + // thread_id was bound to conversationId at run config (config.configurable); + // fall back to it when the SDK doesn't echo threadId on the interrupt. + threadId: interrupt.threadId ?? this.conversationId, + ttlMs: getApprovalTtlMs(checkpointerCfg), + // Pin the graph-determining request fields so resume can't rebuild this paused + // run on a different agent/tool set (esp. ephemeral agents, whose agent_id is + // undefined so the id guard can't tell two configs apart). + requestFingerprint: computeAgentRequestFingerprint(this.options.req?.body ?? {}), + // Persist those same fields verbatim so the resume route can REPLAY them — a + // reload/cross-replica resume can't reconstruct the ephemeral config client-side, + // so the server restores it and rebuilds the same graph (and the fingerprint matches). + resumeContext, + }); + + // Job-replacement guard: streamId == conversationId is reused per conversation, so a + // newer request can replace this run's job. If this (older) run hits an interrupt + // after a replacement, pausing would flip the NEWER job to requires_action with this + // stale run's pending action, blocking fresh work behind the wrong approval. Only + // pause when the live job is still the one THIS run created (mirrors request.js). + if (this.jobCreatedAt != null) { + const liveJob = await GenerationJobManager.getJobStore().getJob(streamId); + if (!liveJob || liveJob.createdAt !== this.jobCreatedAt) { + logger.debug(`[AgentClient] Interrupt fired but job ${streamId} was replaced; not pausing`); + return; + } + } + + const paused = await GenerationJobManager.approvals.pause(streamId, pendingAction); + if (!paused) { + logger.debug( + `[AgentClient] Interrupt fired but job ${streamId} was not running; not pausing`, + ); + return; + } + + // Capture deferred tools discovered (via tool_search) earlier in THIS turn so resume + // can replay them into createRun. The resumed graph is rebuilt with `messages: []` + // (state comes from the checkpoint), so the in-turn tool_search results that mark a + // deferred tool discovered aren't present there — without this the paused deferred + // tool would be missing from the rebuilt schema-only toolMap and resume would fail + // with "unknown tool". Inert for non-deferred turns (the set comes back empty). + try { + const runMessages = + typeof run.getRunMessages === 'function' ? run.getRunMessages() : undefined; + if (Array.isArray(runMessages) && runMessages.length > 0) { + const discovered = extractDiscoveredToolsFromHistory(runMessages); + if (discovered.size > 0) { + await GenerationJobManager.updateMetadata(streamId, { + discoveredTools: Array.from(discovered), + }); + } + } + } catch (err) { + logger.warn( + `[AgentClient] Failed to capture discovered tools for resume on ${streamId}`, + err?.message ?? err, + ); + } + + this.pendingApproval = pendingAction; + // Release the concurrency slot this request held the MOMENT the turn is durably + // paused — before the approval card is emitted — so the user's `/resume` can + // re-acquire one immediately. Otherwise a fast Approve races the HTTP-driver + // teardown (request.js pause branch / resume.js finally) that would otherwise + // release it, and `/resume` 429s under LIMIT_CONCURRENT_MESSAGES. Idempotent via + // the flag; if it fails here, the teardown still releases (it checks the flag). + if (!this.pendingRequestReleased) { + try { + await decrementPendingRequest(this.options.req?.user?.id); + this.pendingRequestReleased = true; + } catch (err) { + logger.error(`[AgentClient] Failed to release request slot on pause ${streamId}`, err); + } + } + await GenerationJobManager.emitChunk(streamId, { + event: ApprovalEvents.ON_PENDING_ACTION, + data: pendingAction, + }); + logger.debug( + `[AgentClient] Paused ${streamId} for ${interrupt.payload.type} (action ${pendingAction.actionId})`, + ); + } + async chatCompletion({ payload, userMCPAuthMap, abortController = null }) { /** @type {Partial} */ let config; @@ -1405,6 +1558,11 @@ class AgentClient extends BaseClient { run = await createRun({ agents, messages, + // This controller implements the full HITL pause/resume lifecycle (handleRunInterrupt + // persists the pending action; the /resume route rebuilds + continues the run), so it + // opts into the tool-approval wiring. Non-resumable callers (OpenAI-compat, Responses) + // leave this off so an approval-gated tool can't pause where there's no resume path. + hitlCapable: true, indexTokenCountMap, initialSummary, initialSessions, @@ -1452,16 +1610,29 @@ class AgentClient extends BaseClient { /** @deprecated Agent Chain */ config.configurable.last_agent_id = agents[agents.length - 1].id; + + // HITL: clear any checkpoint orphaned by a prior paused turn in this + // conversation (one that expired or was aborted while paused) so this fresh + // turn starts clean instead of rehydrating a stale interrupt — thread_id is + // the stable conversationId. No-op when HITL is off or nothing is orphaned. + if (streamId && isHITLEnabled(agentsEConfig?.toolApproval)) { + await deleteAgentCheckpoint(this.conversationId, agentsEConfig?.checkpointer); + } + await run.processStream({ messages }, config, { callbacks: { [Callback.TOOL_ERROR]: logToolError, }, }); + // HITL: if the run paused for tool approval, mark the job + // `requires_action` + emit the prompt and leave the turn unfinalized + // (the resume route continues it). No-op when the run completed. + await this.handleRunInterrupt(run, streamId); + config.signal = null; }; - const hideSequentialOutputs = config.configurable.hide_sequential_outputs; await runAgents(initialMessages); /** @@ -1501,20 +1672,7 @@ class AgentClient extends BaseClient { this.contentParts.unshift(...manualParts); } - /** @deprecated Agent Chain */ - if (hideSequentialOutputs) { - this.contentParts = this.contentParts.filter((part, index) => { - // Include parts that are either: - // 1. At or after the finalContentStart index - // 2. Of type tool_call - // 3. Have tool_call_ids property - return ( - index >= this.contentParts.length - 1 || - part.type === ContentTypes.TOOL_CALL || - part.tool_call_ids - ); - }); - } + this.applyHideSequentialOutputsFilter(); } catch (err) { if (abortController.signal.aborted) { logger.debug( @@ -1584,12 +1742,292 @@ class AgentClient extends BaseClient { this._resolveRun(this.run ?? null); this._resolveRun = null; } + + // HITL: a turn that completed (or errored) without pausing leaves a dead + // checkpoint. thread_id is the conversationId — stable across turns — so it + // MUST be pruned before the next turn, or LangGraph would resume this turn's + // state instead of starting fresh. Skip when paused (the checkpoint is needed + // to resume) or when HITL is off (none was written). The Mongo TTL is the backstop. + const agentsEConfig = appConfig?.endpoints?.[EModelEndpoint.agents]; + if (!this.pendingApproval && isHITLEnabled(agentsEConfig?.toolApproval)) { + try { + // Job-replacement guard: only prune if THIS generation is still the live job. + // A newer request can replace this one on the same conversationId; if this + // (older) run's finally lands after the newer run paused, pruning by + // conversationId would delete the NEWER run's checkpoint and break its /resume. + const resumableStreamId = this.options.req?._resumableStreamId; + let replaced = false; + if (resumableStreamId && this.jobCreatedAt != null) { + const liveJob = await GenerationJobManager.getJobStore().getJob(resumableStreamId); + replaced = !liveJob || liveJob.createdAt !== this.jobCreatedAt; + } + if (replaced) { + logger.debug('[AgentClient] Skipping checkpoint prune — job was replaced', { + streamId: resumableStreamId, + }); + } else { + await deleteAgentCheckpoint(this.conversationId, agentsEConfig?.checkpointer); + } + } catch (err) { + logger.warn('[AgentClient] Failed to prune checkpoint after completion', err); + } + } + run = null; config = null; memoryPromise = null; } } + /** + * Resume a run that paused for human-in-the-loop review. + * + * The original run lives in a detached background task that exits when the run + * pauses, so resume REBUILDS the run on a fresh graph bound to the same + * `thread_id` (= conversationId) and the durable checkpointer. LangGraph rehydrates + * the paused graph state from the checkpoint; `run.resume(value)` re-enters the + * interrupted node with the user's decision. State comes from the checkpoint, so + * no message history is rebuilt here — `createRun` only needs the agent(s) to + * reconstruct the graph structure. + * + * `seedContent` is the content streamed before the pause (the assistant message + + * its tool call). In Redis mode the job store's append log already spans the pause, + * so the finalized message is complete regardless; seeding keeps the in-memory store + * complete too. The run drives events through the same `streamId`, so the client's + * open SSE receives the continuation live. + * + * Unlike `chatCompletion`, this does NOT prune the checkpoint in its `finally` — the + * resume controller owns checkpoint lifecycle (it must also clean up on failures that + * happen before this method runs, and keep the checkpoint on a re-pause). + * + * @param {object} params + * @param {Agents.ToolApprovalDecisionMap | { answer: string }} params.resumeValue + * @param {Array} [params.seedContent] - content aggregated before the pause + * @param {AbortController} [params.abortController] + * @param {Pick} [params.commandOptions] + */ + async resumeCompletion({ + resumeValue, + seedContent = [], + abortController = null, + commandOptions, + userMCPAuthMap, + discoveredToolNames, + }) { + /** @type {Partial} */ + let config; + /** @type {ReturnType} */ + let run; + const appConfig = this.options.req.config; + const balanceConfig = getBalanceConfig(appConfig); + const transactionsConfig = getTransactionsConfig(appConfig); + try { + if (!abortController) { + abortController = new AbortController(); + } + + /** @type {AppConfig['endpoints']['agents']} */ + const agentsEConfig = appConfig.endpoints?.[EModelEndpoint.agents]; + + config = { + runName: 'AgentRun', + configurable: { + thread_id: this.conversationId, + last_agent_index: this.agentConfigs?.size ?? 0, + user_id: this.user ?? this.options.req.user?.id, + hide_sequential_outputs: this.options.agent.hide_sequential_outputs, + requestBody: { + messageId: this.responseMessageId, + conversationId: this.conversationId, + parentMessageId: this.parentMessageId, + }, + user: createSafeUser(this.options.req.user), + }, + recursionLimit: resolveRecursionLimit(agentsEConfig, this.options.agent), + signal: abortController.signal, + streamMode: 'values', + version: 'v2', + }; + + // Seed pre-pause content so the in-memory job store reports the complete turn + // (Redis aggregates across the pause via its append log; this covers in-memory). + if (Array.isArray(seedContent) && seedContent.length > 0) { + this.contentParts.push(...seedContent); + } + + const tokenCounter = createTokenCounter(this.getEncoding()); + const agents = [this.options.agent]; + if (this.agentConfigs && this.agentConfigs.size > 0) { + agents.push(...this.agentConfigs.values()); + } + + // Re-prime skill files invoked in the pre-pause segment (mirrors the normal path's + // `primeInvokedSkills(payload)`), so an approved code/file-backed tool keeps the + // injected skill-file session refs instead of running without them. The pre-pause + // content carries the `skill` tool_calls, so it stands in for the message payload. + let skillSessions; + if ( + typeof this.options.primeInvokedSkills === 'function' && + Array.isArray(seedContent) && + seedContent.length > 0 + ) { + try { + const primed = await this.options.primeInvokedSkills([ + { role: 'assistant', content: seedContent }, + ]); + skillSessions = primed?.initialSessions; + } catch (err) { + logger.warn( + '[api/server/controllers/agents/client.js #resumeCompletion] Failed to re-prime skill sessions', + err?.message ?? err, + ); + } + } + + // Seed code-env / skill tool sessions so an approved code/file/skill-backed tool + // runs with the same uploaded-file context the pre-pause run had — the rebuilt + // graph otherwise has no `Graph.sessions` entries (especially cross-replica). + const initialSessions = buildInitialToolSessions({ skillSessions, agents }); + + run = await createRun({ + agents, + // State (messages, tool calls) is rehydrated from the checkpoint by + // run.resume; createRun only needs the agents to rebuild the graph. + messages: [], + // The resumed run can pause AGAIN (another tool, a follow-up question), and this + // controller owns that lifecycle, so it must keep the HITL wiring on the rebuilt run. + hitlCapable: true, + // Replay deferred tools discovered before the pause. With `messages: []` the + // discovery scan finds nothing, so a deferred tool the paused call targets + // would be absent from the rebuilt toolMap; these names (captured at pause) + // force it back in. Undefined/empty for non-deferred turns — a harmless no-op. + discoveredToolNames, + initialSessions, + runId: this.responseMessageId, + signal: abortController.signal, + customHandlers: this.options.eventHandlers, + requestBody: config.configurable.requestBody, + user: createSafeUser(this.options.req?.user), + tenantId: this.options.req?.user?.tenantId, + summarizationConfig: appConfig?.summarization, + appConfig, + tokenCounter, + subagentUsageSink: createSubagentUsageSink( + this.collectedUsage, + this.buildSubagentUsageEmitter(appConfig), + ), + }); + + if (!run) { + throw new Error('Failed to create run for resume'); + } + + this.run = run; + if (this._resolveRun) { + this._resolveRun(run); + this._resolveRun = null; + } + + const streamId = this.options.req?._resumableStreamId; + // Do NOT cache the rebuilt graph on resume: it was created with `messages: []`, so + // RedisJobStore.getContentParts() (which prefers a cached graph over reconstructing + // from the chunk log) would return only the resumed segment and drop the pre-pause + // assistant/tool-call content on a same-replica reload/status poll. Skipping it makes + // introspection fall back to the durable chunk reconstruction, which is complete. + // `setContentParts` still points the in-memory store at the seeded client content. + if (streamId && this.contentParts) { + GenerationJobManager.setContentParts(streamId, this.contentParts); + } + + // Carry the user's MCP auth into the rebuilt run so an approved MCP tool executes + // with the same OAuth/user credentials it had before the pause. + if (userMCPAuthMap != null) { + config.configurable.userMCPAuthMap = userMCPAuthMap; + } + + /** @deprecated Agent Chain */ + config.configurable.last_agent_id = agents[agents.length - 1].id; + + await run.resume( + resumeValue, + config, + { callbacks: { [Callback.TOOL_ERROR]: logToolError } }, + commandOptions, + ); + + config.signal = null; + + // The model may pause AGAIN (another tool needs approval, or a follow-up + // question). Re-arm the same interrupt gate so the cycle can repeat. + await this.handleRunInterrupt(run, streamId); + + // Mirror chatCompletion: strip hidden intermediate sequential-agent content + // before resume finalize/re-pause persistence reads `this.contentParts`, so a + // resumed sequential chain doesn't persist/emit outputs hide_sequential_outputs + // is meant to hide. + this.applyHideSequentialOutputsFilter(); + } catch (err) { + if (abortController.signal.aborted) { + logger.debug( + '[api/server/controllers/agents/client.js #resumeCompletion] Aborted by user', + { + conversationId: this.conversationId, + name: err?.name, + code: err?.code, + }, + ); + } else { + logger.error( + '[api/server/controllers/agents/client.js #resumeCompletion] Unhandled error', + err, + ); + this.contentParts.push({ + type: ContentTypes.ERROR, + [ContentTypes.ERROR]: `An error occurred while resuming the request${err?.message ? `: ${err.message}` : ''}`, + }); + } + } finally { + const ratio = this.run?.getCalibrationRatio() ?? 0; + if (ratio > 0 && ratio !== 1) { + this.contextMeta = { + calibrationRatio: Math.round(ratio * 1000) / 1000, + encoding: this.getEncoding(), + }; + } else { + this.contextMeta = undefined; + } + + this.finalizeSubagentContent(); + + if (this.pendingSubagentEmits.length > 0) { + await Promise.allSettled(this.pendingSubagentEmits); + this.pendingSubagentEmits = []; + } + + try { + const wasAborted = abortController?.signal?.aborted; + if (!wasAborted) { + await this.recordCollectedUsage({ + context: 'message', + balance: balanceConfig, + transactions: transactionsConfig, + }); + } + } catch (err) { + logger.error( + '[api/server/controllers/agents/client.js #resumeCompletion] Error in cleanup phase', + err, + ); + } + if (this._resolveRun) { + this._resolveRun(this.run ?? null); + this._resolveRun = null; + } + run = null; + config = null; + } + } + /** * Resolves with the agent run once it is initialized, or `null` if * initialization fails. Lets immediate-mode title generation await the run diff --git a/api/server/controllers/agents/client.test.js b/api/server/controllers/agents/client.test.js index 1320e362c1..1646a448a7 100644 --- a/api/server/controllers/agents/client.test.js +++ b/api/server/controllers/agents/client.test.js @@ -48,6 +48,32 @@ jest.mock('~/config', () => ({ })), })); +describe('AgentClient - applyHideSequentialOutputsFilter', () => { + const textPart = (text) => ({ type: ContentTypes.TEXT, text }); + const toolCallPart = (id) => ({ type: ContentTypes.TOOL_CALL, tool_call: { id } }); + + it('keeps only the last part + tool_call parts when hide_sequential_outputs is on', () => { + const ctx = { + options: { agent: { hide_sequential_outputs: true } }, + contentParts: [ + textPart('intermediate'), + toolCallPart('tc1'), + textPart('reasoning'), + textPart('final'), + ], + }; + AgentClient.prototype.applyHideSequentialOutputsFilter.call(ctx); + expect(ctx.contentParts).toEqual([toolCallPart('tc1'), textPart('final')]); + }); + + it('is a no-op when hide_sequential_outputs is off', () => { + const parts = [textPart('a'), textPart('b')]; + const ctx = { options: { agent: { hide_sequential_outputs: false } }, contentParts: parts }; + AgentClient.prototype.applyHideSequentialOutputsFilter.call(ctx); + expect(ctx.contentParts).toEqual([textPart('a'), textPart('b')]); + }); +}); + describe('AgentClient - titleConvo', () => { let client; let mockRun; diff --git a/api/server/controllers/agents/request.js b/api/server/controllers/agents/request.js index 0b8c0cee1a..81213a23d9 100644 --- a/api/server/controllers/agents/request.js +++ b/api/server/controllers/agents/request.js @@ -94,7 +94,10 @@ function getPreliminaryResponseMessageId({ messageId, responseMessageId }) { return `${messageId.replace(/_+$/, '')}_`; } -function getPreliminaryUserMessage({ messageId, parentMessageId, text, quotes }, conversationId) { +function getPreliminaryUserMessage( + { messageId, parentMessageId, text, quotes, files, manualSkills, alwaysAppliedSkills }, + conversationId, +) { if (typeof messageId !== 'string' || messageId.length === 0) { return null; } @@ -113,6 +116,17 @@ function getPreliminaryUserMessage({ messageId, parentMessageId, text, quotes }, conversationId, text, ...(referencedQuotes != null && { quotes: referencedQuotes }), + // Persist the turn's uploaded files on this AWAITED preliminary write so they land on + // job.metadata.userMessage BEFORE the run can reach its first interrupt. onStart's + // later writes are fire-and-forget, so a fast approval could otherwise read the job + // and resume an approved code/read-file tool without the paused turn's uploads. + ...(Array.isArray(files) && files.length > 0 && { files }), + // Carry skill selections so a HITL-resumed turn's reconstructed `requestMessage` + // keeps its skill pills — the client's final handler replaces the user bubble from + // this object, and they'd otherwise vanish until a full reload refetches the row. + ...(Array.isArray(manualSkills) && manualSkills.length > 0 && { manualSkills }), + ...(Array.isArray(alwaysAppliedSkills) && + alwaysAppliedSkills.length > 0 && { alwaysAppliedSkills }), }; } @@ -248,6 +262,12 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit endpoint: endpointOption.endpoint, iconURL: endpointIconURL, model: responseModel, + // Persist the originating agent so a HITL resume can refuse to rebuild this + // paused run on a different agent (see resume.js). + agent_id: endpointOption.agent_id ?? req.body?.agent_id, + // Persist temporary-chat state so a HITL resume keeps the resumed response + // non-persisted instead of trusting the resume request to re-send the flag. + isTemporary: req.body?.isTemporary, responseMessageId: preliminaryResponseMessageId, userMessage: preliminaryUserMessage, }); @@ -344,6 +364,10 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit } client = result.client; + // Tag the client with THIS generation's identity so HITL terminal side-effects + // (pause CAS, checkpoint prune) can tell whether a newer request has since replaced + // this job on the same conversationId before acting on it. + client.jobCreatedAt = jobCreatedAt; // Resolve title timing from the public agents endpoint first, then fall // back to the agent's actual backing provider/custom endpoint. @@ -450,12 +474,44 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit conversationId: userMsg.conversationId, text: userMsg.text, quotes: userMsg.quotes, + // Persist the turn's uploaded files here (authoritative job metadata) so a + // HITL resume sources them from the job, not the user DB row — which the + // approval prompt can race (the row save may still be in flight when a fast + // /resume reads it). Without this an approved tool run can rebuild without the + // paused turn's files. + ...(Array.isArray(req.body?.files) && + req.body.files.length > 0 && { files: req.body.files }), + // Skill selections aren't on `userMsg` yet at onStart (BaseClient adds them + // later), so source them from the request — otherwise this update overwrites + // the preliminary metadata and a HITL-resumed turn loses its skill pills. + ...(Array.isArray(req.body?.manualSkills) && + req.body.manualSkills.length > 0 && { manualSkills: req.body.manualSkills }), + ...(Array.isArray(req.body?.alwaysAppliedSkills) && + req.body.alwaysAppliedSkills.length > 0 && { + alwaysAppliedSkills: req.body.alwaysAppliedSkills, + }), }, }); GenerationJobManager.emitChunk(streamId, { created: true, - message: userMessage, + // Skill selections aren't on `userMessage` yet at onStart (BaseClient adds + // them later), so attach them from the request — this is the message + // `trackUserMessage` persists as the authoritative job.metadata.userMessage, + // and it's what the live client renders the user bubble from. + message: { + ...userMessage, + // Carry files so trackUserMessage (the authoritative writer) persists them on + // job.metadata.userMessage for a HITL resume (see the updateMetadata above). + ...(Array.isArray(req.body?.files) && + req.body.files.length > 0 && { files: req.body.files }), + ...(Array.isArray(req.body?.manualSkills) && + req.body.manualSkills.length > 0 && { manualSkills: req.body.manualSkills }), + ...(Array.isArray(req.body?.alwaysAppliedSkills) && + req.body.alwaysAppliedSkills.length > 0 && { + alwaysAppliedSkills: req.body.alwaysAppliedSkills, + }), + }, streamId, }); }; @@ -503,6 +559,99 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit const response = await sendPromise; + // HITL: the turn paused for human review (see AgentClient.handleRunInterrupt). + // The job is already `requires_action` with the pending action persisted and + // emitted to the client; the resume route owns finishing this turn. Settle the + // in-flight user-message / conversation save, then tear down WITHOUT saving a + // partial response, emitting a terminal event, or completing the job. + if (client?.pendingApproval) { + if (response?.databasePromise) { + try { + await response.databasePromise; + } catch (dbErr) { + logger.error( + '[ResumableAgentController] Error settling databasePromise on HITL pause', + dbErr, + ); + } + delete response.databasePromise; + } + // BaseClient saved the response as completed (unfinished:false), but the turn + // is paused awaiting a decision. Re-mark it unfinished so an expired / never- + // resumed approval doesn't leave a "finished" response in history; the resume + // path overwrites it with the full completed message on success. + if (response?.messageId) { + // Guard against a fast /resume: the user can approve the instant the + // pending-action SSE lands, and resume.js can then claim + finalize — saving + // the COMPLETED response — while we're still awaiting `response.databasePromise` + // above. Marking the row unfinished now would clobber that completed content + // with this stale pre-pause response. Only mark unfinished while the job is + // STILL paused on THIS generation's action: a claim transitions it out of + // `requires_action`, and a replacement bumps `createdAt`. Fail open on a read + // error so a genuinely never-resumed approval isn't left looking "finished". + let stillPaused = true; + try { + const liveJob = await GenerationJobManager.getJob(streamId); + stillPaused = + !!liveJob && + liveJob.status === 'requires_action' && + (client?.jobCreatedAt == null || liveJob.createdAt === client.jobCreatedAt); + } catch (readErr) { + logger.warn( + '[ResumableAgentController] Pause unfinished-save liveness check failed; proceeding', + readErr?.message ?? readErr, + ); + } + if (!stillPaused) { + logger.debug( + `[ResumableAgentController] Skipping pause unfinished-save — ${streamId} already resumed/replaced`, + ); + } else { + try { + await saveMessage( + { + userId, + isTemporary: req?.body?.isTemporary, + interfaceConfig: req?.config?.interfaceConfig, + }, + { + ...response, + endpoint: endpointOption.endpoint, + unfinished: true, + user: userId, + }, + { + context: + 'api/server/controllers/agents/request.js - HITL pause (mark unfinished)', + }, + ); + } catch (saveErr) { + logger.error( + '[ResumableAgentController] Failed to mark paused response unfinished', + saveErr, + ); + } + } + } + titleAbortController.abort(); + acceptsTitleEvents = false; + resolveConvoReady(); + // handleRunInterrupt already released the concurrency slot the moment it paused + // (so a fast /resume isn't 429'd); only release here if that didn't happen. + // Always run the MCP request-context cleanup. + await cleanupMCPRequestContextForReq(req); + if (!client?.pendingRequestReleased) { + await decrementPendingRequest(userId); + } + if (client) { + disposeClient(client); + } + logger.debug( + `[ResumableAgentController] Turn paused for approval; awaiting resume: ${streamId}`, + ); + return; + } + const messageId = response.messageId; const endpoint = endpointOption.endpoint; response.endpoint = endpoint; diff --git a/api/server/controllers/agents/resume.js b/api/server/controllers/agents/resume.js new file mode 100644 index 0000000000..cc7512dc6f --- /dev/null +++ b/api/server/controllers/agents/resume.js @@ -0,0 +1,700 @@ +const { logger } = require('@librechat/data-schemas'); +const { Constants, EModelEndpoint } = require('librechat-data-provider'); +const { + GenerationJobManager, + isPendingActionStale, + mapToolApprovalResolutions, + mapAskUserAnswer, + findUndecidedToolCalls, + findDisallowedDecisions, + findIncompleteDecisions, + computeAgentRequestFingerprint, + deleteAgentCheckpoint, + buildAbortedResponseMetadata, + sanitizeMessageForTransmit, + filterMalformedContentParts, + decrementPendingRequest, + checkAndIncrementPendingRequest, +} = require('@librechat/api'); +const { disposeClient } = require('~/server/cleanup'); +const { + getMCPRequestContext, + cleanupMCPRequestContextForReq, +} = require('~/server/services/MCPRequestContext'); +const { saveMessage, getConvo, getMessages } = require('~/models'); + +/** De-duplicate a merged attachment list by a stable artifact identity. */ +function mergeAttachments(existing, incoming) { + const seen = new Set(); + const out = []; + for (const attachment of [...(existing ?? []), ...(incoming ?? [])]) { + if (!attachment) { + continue; + } + const key = + attachment.file_id ?? + attachment.filepath ?? + attachment.filename ?? + JSON.stringify(attachment); + if (seen.has(key)) { + continue; + } + seen.add(key); + out.push(attachment); + } + return out; +} + +/** + * Resolve the current segment's tool artifacts and merge them with any already + * persisted on the response row. A resumed turn can span multiple pause segments; + * each rebuilt client has its own `artifactPromises`, and the final finalize would + * otherwise OVERWRITE the row's attachments with only the last segment's. Reading + * the persisted row and merging keeps every segment's artifacts on the saved message. + */ +async function resolveAccumulatedAttachments({ client, conversationId, responseMessageId }) { + const promises = Array.isArray(client?.artifactPromises) ? client.artifactPromises : []; + const resolved = promises.length > 0 ? (await Promise.all(promises)).filter(Boolean) : []; + let existing = []; + if (responseMessageId) { + try { + const [row] = await getMessages( + { conversationId, messageId: responseMessageId }, + 'attachments', + ); + existing = Array.isArray(row?.attachments) ? row.attachments : []; + } catch (err) { + logger.warn( + '[ResumeAgentController] Failed to read prior attachments for merge', + err?.message ?? err, + ); + } + } + return mergeAttachments(existing, resolved); +} + +/** Resolve the segment's content for an unfinished save (mirrors finalize's source). */ +async function resolveSegmentContent(client, streamId) { + const liveContent = Array.isArray(client?.contentParts) ? client.contentParts : []; + const rawContent = + liveContent.length > 0 + ? liveContent + : ((await GenerationJobManager.getResumeState(streamId))?.aggregatedContent ?? []); + return filterMalformedContentParts(rawContent); +} + +/** + * A resumed segment that streamed content / produced artifacts and then paused AGAIN + * must persist that progress before returning. The next resume rebuilds a fresh client + * (empty `contentParts`/`artifactPromises`), so without this an approval that later + * expires or is reaped would leave only the EARLIER pause's content on the saved row — + * the user loses everything streamed during this segment. Saved as a partial (`$set`, + * still `unfinished`) so a subsequent successful resume overwrites it on finalize. + */ +async function persistRePauseProgress({ req, client, job, streamId, conversationId }) { + const userId = req.user.id; + const meta = job.metadata ?? {}; + const responseMessageId = meta.responseMessageId ?? client.responseMessageId; + if (!responseMessageId) { + return; + } + const content = await resolveSegmentContent(client, streamId); + const attachments = await resolveAccumulatedAttachments({ + client, + conversationId, + responseMessageId, + }); + if (content.length === 0 && attachments.length === 0) { + return; + } + try { + await saveMessage( + { + userId, + isTemporary: meta.isTemporary ?? req.body?.isTemporary, + interfaceConfig: req?.config?.interfaceConfig, + }, + { + messageId: responseMessageId, + conversationId, + ...(content.length > 0 && { content }), + ...(attachments.length > 0 && { attachments }), + unfinished: true, + user: userId, + }, + { context: 'api/server/controllers/agents/resume.js - re-pause progress persist' }, + ); + } catch (err) { + logger.error('[ResumeAgentController] Failed to persist re-pause progress', err); + } +} + +/** Untenanted jobs (pre-multi-tenancy) remain accessible if the userId check passes. */ +function hasTenantMismatch(job, user) { + return job.metadata?.tenantId != null && job.metadata.tenantId !== user.tenantId; +} + +/** + * Build the SDK resume value from the wire decision payload, validating against the + * pending action. Returns `{ resumeValue }` on success or `{ error }` with an HTTP + * status for the route to surface. + */ +function resolveResumeValue(pendingAction, body) { + const payload = pendingAction.payload; + if (payload?.type === 'tool_approval') { + const resolutions = Array.isArray(body.decisions) ? body.decisions : []; + const undecided = findUndecidedToolCalls(payload, resolutions); + if (undecided.length > 0) { + return { status: 400, error: 'Every paused tool call must be decided', undecided }; + } + // Enforce the policy's per-tool allowed_decisions — a crafted POST must not + // approve a tool the policy restricted to (e.g.) reject/respond. + const disallowed = findDisallowedDecisions(payload, resolutions); + if (disallowed.length > 0) { + return { status: 403, error: 'Decision not permitted for one or more tools', disallowed }; + } + // `edit`/`respond` must carry their payload — otherwise toSdkDecision's defensive + // defaults ({} / '') would resume with an empty input/result the user didn't approve. + const incomplete = findIncompleteDecisions(resolutions); + if (incomplete.length > 0) { + return { + status: 400, + error: 'edit requires editedArguments and respond requires responseText', + incomplete, + }; + } + return { resumeValue: mapToolApprovalResolutions(resolutions) }; + } + if (payload?.type === 'ask_user_question') { + if (typeof body.answer !== 'string' || body.answer.length === 0) { + return { status: 400, error: 'An answer is required' }; + } + return { resumeValue: mapAskUserAnswer({ answer: body.answer }) }; + } + return { status: 400, error: 'Unsupported pending action type' }; +} + +/** + * Finalize a resumed turn that ran to completion: persist the (now complete) + * response message, emit the terminal event over the existing SSE, complete the + * job, and prune the checkpoint. Mirrors the abort route's save shape but for a + * successful finish. Best-effort title generation for a first-turn pause. + */ +async function finalizeResumedTurn({ req, client, job, streamId, conversationId, addTitle }) { + const userId = req.user.id; + const checkpointerCfg = req.config?.endpoints?.[EModelEndpoint.agents]?.checkpointer; + const meta = job.metadata ?? {}; + const userMessage = meta.userMessage; + // The response hangs off the user message; the *user* message's own parent decides + // whether this is the first turn of the conversation (title eligibility). + const parentMessageId = userMessage?.messageId ?? Constants.NO_PARENT; + const isFirstTurn = (userMessage?.parentMessageId ?? Constants.NO_PARENT) === Constants.NO_PARENT; + const responseMessageId = meta.responseMessageId ?? `${userMessage?.messageId ?? 'resumed'}_`; + // Sourced from the paused job (persisted at creation), not the resume body — a + // temporary chat must stay temporary on resume so its messages aren't persisted. + const isTemporary = meta.isTemporary ?? req.body?.isTemporary; + + // Read the raw job data BEFORE completeJob deletes it — its tracked token/context + // usage backs the response message's cost rollup (parity with normal completion). + const jobData = await GenerationJobManager.getJobStore().getJob(streamId); + + // Job-replacement guard (mirrors the normal request path): jobs are keyed by streamId + // (== conversationId), so a new/concurrent request reusing this conversation overwrites + // the record with a fresh createdAt. If that happened while we were resuming, finalizing + // now would emit `done` to / complete / delete the NEWER turn's job. Skip all terminal + // side effects when the job we paused is no longer the live one; the caller's `finally` + // still disposes the client + releases the slot. + if (!jobData || jobData.createdAt !== job.createdAt) { + logger.warn( + `[ResumeAgentController] Skipping resumed finalization — job ${streamId} was replaced`, + ); + return; + } + // Prefer the resumed run's live content: it's complete (seeded with the pre-pause + // content) and avoids a Redis re-read that can race appendChunk writes still in + // flight. Fall back to the aggregated store content only when the live array is empty. + const liveContent = Array.isArray(client?.contentParts) ? client.contentParts : []; + const rawContent = + liveContent.length > 0 + ? liveContent + : ((await GenerationJobManager.getResumeState(streamId))?.aggregatedContent ?? []); + // Parity with the normal agents path (AgentClient strips these before saving): + // drop empty/malformed tool_call parts so a resumed turn can't persist an invalid + // part that breaks reload/rendering. + const content = filterMalformedContentParts(rawContent); + + const responseMessage = { + messageId: responseMessageId, + parentMessageId, + conversationId, + content, + sender: meta.sender ?? client?.sender ?? 'AI', + endpoint: meta.endpoint, + iconURL: meta.iconURL, + model: meta.model, + unfinished: false, + error: false, + isCreatedByUser: false, + user: userId, + }; + if (meta.agent_id ?? req.body?.agent_id) { + responseMessage.agent_id = meta.agent_id ?? req.body.agent_id; + } + // Persist tool artifacts (code files, images, UI resources) the resumed continuation + // produced — BaseClient.sendMessage awaits these before saving, but the lean resume + // path bypasses it, so do it here or they vanish on reload / for late subscribers. + // MERGE with any already on the row (earlier pause segments) rather than overwrite — + // the final segment's client only holds its own segment's artifacts. + const attachments = await resolveAccumulatedAttachments({ + client, + conversationId, + responseMessageId, + }); + if (attachments.length > 0) { + responseMessage.attachments = attachments; + } + + // Response metadata: the resume client only sees POST-resume usage, while the job's + // tracked tokenUsage is cumulative across the pause. Take the cumulative usage (+ + // summary marker) from the job, and contextUsage / thoughtSignatures from the client + // (which the abort-only helper drops). Cumulative usage wins so cost isn't underreported. + const clientMeta = client?.buildResponseMetadata?.() ?? null; + const cumulativeMeta = jobData ? buildAbortedResponseMetadata(jobData) : null; + const responseMetadata = { + ...(clientMeta ?? {}), + ...(cumulativeMeta?.usage ? { usage: cumulativeMeta.usage } : {}), + ...(cumulativeMeta?.summaryUsedTokens != null + ? { summaryUsedTokens: cumulativeMeta.summaryUsedTokens } + : {}), + }; + if (Object.keys(responseMetadata).length > 0) { + responseMessage.metadata = responseMetadata; + } + // Carry the resumed run's context-window calibration (BaseClient.sendMessage persists + // this on the response). Without it, the NEXT turn can't seed its pruner from this + // run and falls back to uncalibrated token accounting. + if (client?.contextMeta != null) { + responseMessage.contextMeta = client.contextMeta; + } + + await saveMessage( + { userId, isTemporary, interfaceConfig: req?.config?.interfaceConfig }, + responseMessage, + { context: 'api/server/controllers/agents/resume.js - resumed response end' }, + ); + + const convo = await getConvo(userId, conversationId); + const conversation = { ...(convo ?? {}), conversationId }; + + // First-turn pause: the title was deferred when the turn paused. Generate it BEFORE + // completing the stream so the `title` event still reaches the live client (emitChunk + // no-ops once completeJob tears down the runtime) and the final event carries the real + // title instead of "New Chat". Best-effort — a failure must not fail the resumed turn. + if ( + addTitle && + isFirstTurn && + !isTemporary && + userMessage?.text && + (!convo || !convo.title || convo.title === 'New Chat') + ) { + try { + await addTitle(req, { + text: userMessage.text, + conversationId, + client, + onTitleGenerated: ({ conversationId: titleConvoId, title }) => { + conversation.title = title; + return GenerationJobManager.emitChunk(streamId, { + event: 'title', + data: { conversationId: titleConvoId, title }, + }); + }, + }); + } catch (err) { + logger.error('[ResumeAgentController] Title generation failed after resume', err); + } + } + conversation.title = conversation.title || 'New Chat'; + + // Re-check ownership immediately before the terminal writes. The start-of-function + // guard can go stale across the awaits above: saveMessage and (first-turn) title + // generation can take long enough for a new request to replace this job on the same + // conversationId (streamId == conversationId). Without this second read, emitDone / + // completeJob / prune below would emit `done` to and tear down the REPLACEMENT job — + // the same hazard the catch-path guard prevents on the failure path. + const liveJobBeforeFinalize = await GenerationJobManager.getJobStore().getJob(streamId); + if (!liveJobBeforeFinalize || liveJobBeforeFinalize.createdAt !== job.createdAt) { + logger.warn( + `[ResumeAgentController] Skipping resumed terminal writes — job ${streamId} was replaced mid-finalize`, + ); + return; + } + + const finalEvent = { + final: true, + conversation, + title: conversation.title, + requestMessage: userMessage + ? sanitizeMessageForTransmit({ + ...userMessage, + conversationId, + isCreatedByUser: true, + // job.metadata.userMessage is persisted without files; carry the restored + // uploads (seeded onto req.body.files before reconstruction) so the final SSE + // doesn't blank the user bubble's attachments — matching the normal path. + ...(Array.isArray(req.body?.files) && req.body.files.length > 0 + ? { files: req.body.files } + : {}), + }) + : null, + responseMessage: { ...responseMessage }, + }; + + await GenerationJobManager.emitDone(streamId, finalEvent); + // Awaited (not fire-and-forget) so the job's terminal write lands before the + // checkpoint prune, and so a failure here doesn't race the controller's error path. + try { + await GenerationJobManager.completeJob(streamId); + } catch (completeErr) { + logger.error('[ResumeAgentController] Failed to complete resumed turn', completeErr); + } + await deleteAgentCheckpoint(conversationId, checkpointerCfg); +} + +/** + * Resume a generation that paused for human-in-the-loop review. + * + * The original run lives in a detached background task that exits when the run + * pauses, so this REBUILDS the run from the durable checkpoint (same `thread_id`) + * and continues it with the user's decision. The continuation streams over the + * client's existing SSE (events flow through the same `streamId`). + * + * Flow: authorize → map decisions → atomically claim the resume (single-winner) → + * ACK → reconstruct the client → `resumeCompletion` → finalize (or re-pause). + * + * Shares chat.js's middleware (auth, agent access, `buildEndpointOption`) so the + * agent/endpoint are reconstructed from the request exactly like a normal turn. + * + * @param {express.Request} req + * @param {express.Response} res + * @param {express.NextFunction} next + * @param {Function} initializeClient + * @param {Function} addTitle + */ +const ResumeAgentController = async (req, res, next, initializeClient, addTitle) => { + const userId = req.user.id; + const { conversationId, actionId } = req.body; + const streamId = conversationId; + + if (!streamId || streamId === 'new') { + return res.status(400).json({ error: 'conversationId is required to resume' }); + } + + const job = await GenerationJobManager.getJob(streamId); + if (!job) { + return res.status(404).json({ error: 'No paused generation for this conversation' }); + } + if (job.metadata?.userId && job.metadata.userId !== userId) { + return res.status(403).json({ error: 'Unauthorized' }); + } + if (hasTenantMismatch(job, req.user)) { + return res.status(403).json({ error: 'Unauthorized' }); + } + + // The resume must rebuild the SAME agent/endpoint that paused. Require an EXACT + // agent_id match when the paused job had one — a request that omits agent_id (or + // claims an ephemeral / non-agents endpoint) must not rebuild the claimed checkpoint + // on a different graph. The conversation's agent is stable, so a correct client always + // sends the right one. + const originalAgentId = job.metadata?.agent_id; + if (originalAgentId && req.body.agent_id !== originalAgentId) { + return res.status(403).json({ error: 'Cannot resume with a different agent' }); + } + // Require an EXACT endpoint match (like agent_id): a request that OMITS endpoint must + // not fall through — the shared chat middleware treats a missing/non-agents endpoint + // as the ephemeral agent, so omitting it could rebuild the claimed checkpoint on a + // different graph. A correct client always echoes the paused endpoint. + const originalEndpoint = job.metadata?.endpoint; + if (originalEndpoint && req.body.endpoint !== originalEndpoint) { + return res.status(403).json({ error: 'Cannot resume on a different endpoint' }); + } + + const pendingAction = job.metadata?.pendingAction; + if (job.status !== 'requires_action') { + return res.status(409).json({ error: 'No live pending action to resume' }); + } + if (isPendingActionStale({ pendingAction })) { + // The action expired between the pending-action SSE and this submit. Drive the expiry + // NOW (expire CAS + terminal SSE) instead of waiting for the periodic sweeper — + // otherwise the job sits `requires_action` with a dead action and any attached SSE + // client never gets a terminal event, so the stream appears to hang even though the + // UI already reported the action as expired. + try { + await GenerationJobManager.expireApproval(streamId, pendingAction?.actionId); + } catch (err) { + logger.warn( + '[ResumeAgentController] Failed to expire stale action on submit', + err?.message ?? err, + ); + } + return res.status(409).json({ error: 'No live pending action to resume' }); + } + // Require the actionId the UI sends: without it, a stale/malformed client could + // resolve whatever action is currently pending (e.g. answer a different question). + if (!actionId) { + return res.status(400).json({ error: 'actionId is required to resume' }); + } + if (pendingAction.actionId !== actionId) { + return res.status(409).json({ error: 'This decision targets a stale action' }); + } + + // Pin the graph identity: the resume must rebuild the SAME agent/graph + tool set the + // run paused on. The agent_id + endpoint guards above cover saved agents; the + // fingerprint additionally catches an ephemeral-agent config swap (its agent_id is + // undefined, so the id guard can't tell two ephemeral configs apart). Enforced only + // when the paused action carries a fingerprint (in-flight pauses from before this + // change won't), and recomputed from the resume body's graph-determining fields. + const pinnedFingerprint = pendingAction.requestFingerprint; + if (pinnedFingerprint && pinnedFingerprint !== computeAgentRequestFingerprint(req.body ?? {})) { + return res.status(403).json({ error: 'Cannot resume with a different agent configuration' }); + } + + const mapped = resolveResumeValue(pendingAction, req.body); + if (mapped.error) { + return res.status(mapped.status).json({ + error: mapped.error, + ...(mapped.undecided && { undecided: mapped.undecided }), + ...(mapped.disallowed && { disallowed: mapped.disallowed }), + ...(mapped.incomplete && { incomplete: mapped.incomplete }), + }); + } + + // Count the resume against the concurrency limit. The original turn released its slot + // when it paused, so resuming must re-acquire one — otherwise pausing several turns + // and resuming them at once would bypass LIMIT_CONCURRENT_MESSAGES. + const { allowed } = await checkAndIncrementPendingRequest(userId); + if (!allowed) { + return res.status(429).json({ error: 'Too many concurrent requests' }); + } + + // Atomically claim the resume. The single winner drives the run; a racing second + // submit (double-click, two tabs) gets false and must not re-drive — that would + // re-execute tools and double-bill. + // + // The claim runs AFTER the slot increment above but BEFORE the run's own try/finally + // that releases it, so a store/Redis error here (unlike the clean `!claimed` branch) + // would leak the concurrency slot until the counter TTL expires — spuriously 429'ing + // the user when they retry the still-paused approval. Release the slot on that path too. + let claimed; + try { + claimed = await GenerationJobManager.approvals.resolve(streamId, pendingAction.actionId); + } catch (err) { + await decrementPendingRequest(userId); + logger.error('[ResumeAgentController] Failed to claim resume', err); + return res.status(500).json({ error: 'Failed to resume' }); + } + if (!claimed) { + await decrementPendingRequest(userId); + return res.status(409).json({ error: 'This action was already resolved or has expired' }); + } + + // Seed the run-scoped MCP request-context store BEFORE the ACK: once `res.json` + // finishes the response, a later `getMCPRequestContext(req, res)` (from tool loading) + // sees `res` as ended and returns undefined, leaving the resumed run without its MCP + // connection store — approved MCP / OAuth-overlay tools would then run without their + // request-scoped connections. Pre-seeding with a null `res` + `cleanupOnResponse:false` + // mirrors the normal stream path (request.js); torn down in the `finally` below. + req._resumableStreamId = streamId; + getMCPRequestContext(req, undefined, { cleanupOnResponse: false }); + + // ACK immediately; the continuation streams over the client's existing SSE. + res.json({ streamId, conversationId, status: 'resuming' }); + + // Seed the original thread parent BEFORE initializeClient: initializeAgent scopes + // thread files / code artifacts off `req.body.parentMessageId`, and the resume body + // doesn't carry it. This is the user message's parent (the thread position); + // `client.parentMessageId` below is a different value — the response's parent, i.e. + // the user message id. + req.body.parentMessageId = job.metadata.userMessage?.parentMessageId ?? Constants.NO_PARENT; + + // Restore the paused user message's OWN uploaded files. initializeAgent rebuilds + // code/file sessions by walking the conversation from `parentMessageId`, but + // execute-code files are excluded from that lookup, so files uploaded on the paused + // turn would be dropped — an approved code/read-file tool would resume without them. + // + // SECURITY: ALWAYS source files from the paused job, never from the `/resume` body. + // `files` is not pinned by the resume fingerprint or replayed via resumeContext, so + // honoring a client-supplied `files` array would let a crafted/buggy client resume an + // approved code/read-file tool against a DIFFERENT file set than the one the user + // approved. A resume reconstructs the SAME paused turn, so there is no legitimate + // reason for the client to supply its own files. Prefer the files persisted on the JOB + // at onStart (race-free), fall back to the DB row for older jobs, and CLEAR otherwise + // so a client-supplied set can never leak through. + const metaFiles = job.metadata.userMessage?.files; + if (Array.isArray(metaFiles) && metaFiles.length > 0) { + req.body.files = metaFiles; + } else { + let restoredFiles = false; + const pausedUserMessageId = job.metadata.userMessage?.messageId; + if (pausedUserMessageId) { + try { + const [row] = await getMessages( + { conversationId, messageId: pausedUserMessageId }, + 'files', + ); + if (Array.isArray(row?.files) && row.files.length > 0) { + req.body.files = row.files; + restoredFiles = true; + } + } catch (err) { + logger.warn( + '[ResumeAgentController] Failed to restore paused user message files', + err?.message ?? err, + ); + } + } + if (!restoredFiles) { + // No paused files (or the lookup failed): drop any client-supplied files so a + // crafted resume body can't inject a file set the paused turn never had. + req.body.files = []; + } + } + + // Restore the conversation's createdAt so temporal prompt vars ({{current_datetime}}, + // {{iso_datetime}}, ...) resolve against the SAME anchor the paused graph used rather + // than the resume wall-clock. initializeAgent reads `req.conversationCreatedAt`; the + // normal path sets it from the convo timestamp (resolveConversationCreatedAt), so mirror + // that here. (The original `timezone` is replayed onto req.body via RESUME_CONTEXT_KEYS.) + try { + const resumedConvo = await getConvo(userId, conversationId); + const createdAt = resumedConvo?.createdAt ? new Date(resumedConvo.createdAt) : null; + if (createdAt && !Number.isNaN(createdAt.getTime())) { + req.conversationCreatedAt = createdAt.toISOString(); + } + } catch (err) { + logger.warn( + '[ResumeAgentController] Failed to restore conversation timestamp anchor', + err?.message ?? err, + ); + } + + let client = null; + try { + const result = await initializeClient({ + req, + res, + endpointOption: req.body.endpointOption, + signal: job.abortController.signal, + }); + client = result.client; + + // Bind the rebuilt client to the in-flight turn's identity (no new user message). + client.conversationId = streamId; + // The resume operates on the SAME job (it moved it running again), so its identity is + // the paused job's createdAt — used by the re-pause CAS pre-check + checkpoint prune to + // avoid acting on a job a newer request has since replaced. + client.jobCreatedAt = job.createdAt; + client.responseMessageId = job.metadata.responseMessageId; + client.parentMessageId = job.metadata.userMessage?.messageId ?? Constants.NO_PARENT; + // Read the pre-pause content BEFORE swapping the store's content reference: the + // in-memory store's setContentParts REPLACES the stored array, so reading the + // resume state afterward would see the new (empty) client array and lose the seed. + const resumeState = await GenerationJobManager.getResumeState(streamId); + const seedContent = resumeState?.aggregatedContent ?? []; + if (client.contentParts) { + GenerationJobManager.setContentParts(streamId, client.contentParts); + } + + await client.resumeCompletion({ + resumeValue: mapped.resumeValue, + seedContent, + abortController: job.abortController, + // Carry the user's MCP auth so approved MCP tools run with their credentials. + userMCPAuthMap: result.userMCPAuthMap, + // Replay deferred tools discovered before the pause (captured at pause). The rebuilt + // graph passes `messages: []`, so without these an approved deferred tool would be + // absent from the schema-only toolMap and resume would fail with "unknown tool". + discoveredToolNames: job.metadata?.discoveredTools, + }); + + // The model may pause AGAIN (another tool, or a follow-up question). The pending + // action is already persisted + emitted; leave the job `requires_action`. + if (client.pendingApproval) { + logger.debug(`[ResumeAgentController] Re-paused for approval: ${streamId}`); + // Persist this segment's content + artifacts before the fresh client (next + // resume) drops them, so an expiring re-pause doesn't lose them; finalize later + // overwrites content and merges attachments onto the saved message. + await persistRePauseProgress({ req, client, job, streamId, conversationId }); + return; + } + + // If the user aborted mid-resume, the abort route already emitted the terminal + // event and finalized the job — don't double-save / double-finalize here. + if (job.abortController.signal.aborted) { + logger.debug( + `[ResumeAgentController] Aborted during resume; abort route finalizes: ${streamId}`, + ); + return; + } + + await finalizeResumedTurn({ req, client, job, streamId, conversationId, addTitle }); + } catch (err) { + logger.error('[ResumeAgentController] Resume failed', err); + // Job-replacement guard (mirrors finalizeResumedTurn's success-path guard): if a + // newer request reused this conversationId while the resume was failing, do NOT emit + // the error to / complete / prune the NEWER turn's job. The finally still releases + // the slot + disposes. Proceed with finalization if the replacement check itself fails. + let stillLive = true; + try { + const liveJob = await GenerationJobManager.getJobStore().getJob(streamId); + stillLive = !!liveJob && liveJob.createdAt === job.createdAt; + } catch (readErr) { + logger.warn('[ResumeAgentController] Replacement check failed; finalizing anyway', readErr); + } + if (!stillLive) { + logger.warn( + `[ResumeAgentController] Skipping failed-resume finalization — job ${streamId} was replaced`, + ); + } else { + try { + await GenerationJobManager.emitError(streamId, err?.message ?? 'Resume failed'); + } catch (emitErr) { + logger.error('[ResumeAgentController] Failed to emit resume error', emitErr); + } + try { + await GenerationJobManager.completeJob(streamId, err?.message ?? 'Resume failed'); + } catch (completeErr) { + logger.error('[ResumeAgentController] Failed to finalize failed resume', completeErr); + // Last resort: force a terminal state so the job isn't orphaned in `running`. + await GenerationJobManager.getJobStore() + .updateJob(streamId, { + status: 'error', + completedAt: Date.now(), + error: 'Resume failed', + }) + .catch((updErr) => + logger.error('[ResumeAgentController] Fallback job finalize failed', updErr), + ); + } + await deleteAgentCheckpoint( + conversationId, + req.config?.endpoints?.[EModelEndpoint.agents]?.checkpointer, + ); + } + } finally { + // Tear down the MCP request-context store seeded before the ACK (parity with + // request.js's finishResumableRequest). No-op if it was never seeded. + await cleanupMCPRequestContextForReq(req); + // Release the concurrency slot taken above — UNLESS handleRunInterrupt already + // released it on a re-pause (so a fast /resume isn't 429'd). On a normal finish or + // error it didn't, so release here. A re-pause re-acquires its own slot next resume. + if (!client?.pendingRequestReleased) { + await decrementPendingRequest(userId); + } + if (client) { + disposeClient(client); + } + } +}; + +module.exports = ResumeAgentController; diff --git a/api/server/middleware/moderateText.js b/api/server/middleware/moderateText.js index 1aed0991ad..3356f75921 100644 --- a/api/server/middleware/moderateText.js +++ b/api/server/middleware/moderateText.js @@ -17,8 +17,17 @@ async function moderateText(req, res, next) { * `getReferencedQuotes` first (matching `BaseClient`); moderating the merged * string also covers content split across a quote and the typed body. The * moderation API accepts an array of inputs. + * + * `answer` covers the HITL resume payload (POST /agents/chat/resume) for an + * ask-user question — the user's free-form text — so it's moderated like a typed + * message. A tool-approval resume carries no user text and is skipped below. */ - const safeText = typeof text === 'string' ? text : ''; + let safeText = ''; + if (typeof text === 'string') { + safeText = text; + } else if (typeof req.body.answer === 'string') { + safeText = req.body.answer; + } const inputs = []; if (safeText.length > 0) { inputs.push(safeText); @@ -28,7 +37,36 @@ async function moderateText(req, res, next) { inputs.push(...quotes); inputs.push(mergeQuotedText(safeText, quotes)); } - const input = inputs.length > 1 ? inputs : (inputs[0] ?? text); + // A tool-approval resume can carry user-authored text in `decisions[]`: the + // `respond` substitute result, a `reject` reason, and `edit`ed tool arguments — + // moderate all of them like typed text (edited args stringified). + if (Array.isArray(req.body.decisions)) { + for (const decision of req.body.decisions) { + if (typeof decision?.responseText === 'string' && decision.responseText.length > 0) { + inputs.push(decision.responseText); + } + if (typeof decision?.reason === 'string' && decision.reason.length > 0) { + inputs.push(decision.reason); + } + if (decision?.editedArguments != null) { + try { + const edited = JSON.stringify(decision.editedArguments); + if (typeof edited === 'string' && edited.length > 0) { + inputs.push(edited); + } + } catch { + /* ignore unstringifiable edited args */ + } + } + } + } + // Nothing to moderate (e.g. a tool-approval resume with no `respond` text) — + // don't post an empty/undefined `input`, which the moderation API rejects and which + // would otherwise deny the request. + if (inputs.length === 0) { + return next(); + } + const input = inputs.length > 1 ? inputs : inputs[0]; const response = await axios.post( process.env.OPENAI_MODERATION_REVERSE_PROXY || 'https://api.openai.com/v1/moderations', diff --git a/api/server/routes/agents/__tests__/abort.spec.js b/api/server/routes/agents/__tests__/abort.spec.js index 9fe007596c..15efb19a39 100644 --- a/api/server/routes/agents/__tests__/abort.spec.js +++ b/api/server/routes/agents/__tests__/abort.spec.js @@ -293,6 +293,40 @@ describe('Agent Abort Endpoint', () => { ); }); + it('saves the aborted partial as temporary from job metadata, not the request body', async () => { + const jobStreamId = 'test-stream-123'; + + mockGenerationJobManager.getJob.mockResolvedValue({ + metadata: { userId: 'test-user-123' }, + }); + + // The job was a temporary chat; the stop button posts only conversationId. + mockGenerationJobManager.abortJob.mockResolvedValue({ + success: true, + jobData: { + userMessage: { messageId: 'user-msg-123' }, + responseMessageId: 'response-msg-456', + conversationId: jobStreamId, + isTemporary: true, + }, + content: [{ type: 'text', text: 'Partial...' }], + text: 'Partial...', + }); + + mockSaveMessage.mockResolvedValue(); + + const response = await request(app) + .post('/api/agents/chat/abort') + .send({ conversationId: jobStreamId }); // no isTemporary in body + + expect(response.status).toBe(200); + expect(mockSaveMessage).toHaveBeenCalledWith( + expect.objectContaining({ isTemporary: true }), + expect.anything(), + expect.anything(), + ); + }); + it('should handle saveMessage errors gracefully', async () => { const jobStreamId = 'test-stream-123'; diff --git a/api/server/routes/agents/chat.js b/api/server/routes/agents/chat.js index 8ffbde6552..9463406652 100644 --- a/api/server/routes/agents/chat.js +++ b/api/server/routes/agents/chat.js @@ -1,5 +1,12 @@ const express = require('express'); -const { createMessageFilterPii, generateCheckAccess, skipAgentCheck } = require('@librechat/api'); +const { logger } = require('@librechat/data-schemas'); +const { + createMessageFilterPii, + generateCheckAccess, + skipAgentCheck, + applyResumeContext, + GenerationJobManager, +} = require('@librechat/api'); const { PermissionTypes, Permissions, PermissionBits } = require('librechat-data-provider'); const { moderateText, @@ -10,6 +17,7 @@ const { } = require('~/server/middleware'); const { initializeClient } = require('~/server/services/Endpoints/agents'); const AgentController = require('~/server/controllers/agents/request'); +const ResumeController = require('~/server/controllers/agents/resume'); const addTitle = require('~/server/services/Endpoints/agents/title'); const { getRoleByName } = require('~/models'); @@ -25,6 +33,45 @@ const checkAgentResourceAccess = canAccessAgentFromBody({ requiredPermission: PermissionBits.VIEW, }); +/** + * Replay the paused turn's graph-determining config onto a resume request BEFORE the + * rest of the chain (PII filter, agent-access, buildEndpointOption) reads it. The client + * can't reliably re-send the ephemeral-agent config after a reload/cross-session, so the + * server restores it from the pending action — the resume then rebuilds the SAME + * agent/graph the run paused on (and a crafted resume can't swap the tool set). No-op for + * every non-resume route. + */ +const restoreResumeContext = async (req, res, next) => { + if (req.path !== '/resume') { + return next(); + } + try { + const streamId = req.body?.conversationId; + if (streamId) { + const job = await GenerationJobManager.getJob(streamId); + const resumeContext = job?.metadata?.pendingAction?.resumeContext; + applyResumeContext(req.body, resumeContext); + // Replay the paused turn's resolved model parameters. Ephemeral agents derive these + // (temperature, max tokens, custom endpoint params) from the request body, which the + // resume payload omits — without this the continuation runs with defaults. They're + // scattered top-level fields (folded into model_parameters by buildOptions' rest + // spread), not part of the RESUME_CONTEXT_KEYS allowlist, so merge them back here. + // Authoritative: overwrites any client-supplied values with the captured set. + // `model` is excluded — it's replayed via RESUME_CONTEXT_KEYS to the exact value the + // resume fingerprint was pinned on, so overwriting it here could trip that check. + const resumedModelParameters = resumeContext?.model_parameters; + if (resumedModelParameters && typeof resumedModelParameters === 'object') { + const { model: _replayedModel, ...replayParams } = resumedModelParameters; + Object.assign(req.body, replayParams); + } + } + } catch (err) { + logger.warn('[agents/chat] Failed to restore resume context', err?.message ?? err); + } + next(); +}; + +router.use(restoreResumeContext); router.use(createMessageFilterPii({ getConfig: (req) => req.config?.messageFilter?.pii })); router.use(moderateText); router.use(checkAgentAccess); @@ -36,6 +83,21 @@ const controller = async (req, res, next) => { await AgentController(req, res, next, initializeClient, addTitle); }; +const resumeController = async (req, res, next) => { + await ResumeController(req, res, next, initializeClient, addTitle); +}; + +/** + * @route POST /resume + * @desc Resume a generation paused for human-in-the-loop review (tool approval or + * ask-user answer). Shares this router's middleware so the agent/endpoint are + * reconstructed from the request exactly like a normal turn. Declared before + * `/:endpoint` so it is not captured as an ephemeral endpoint name. + * @access Private + * @returns {void} + */ +router.post('/resume', resumeController); + /** * @route POST / (regular endpoint) * @desc Chat with an assistant diff --git a/api/server/routes/agents/index.js b/api/server/routes/agents/index.js index 7fb23f6c66..fe36869983 100644 --- a/api/server/routes/agents/index.js +++ b/api/server/routes/agents/index.js @@ -5,6 +5,8 @@ const { hasPersistableAbortContent, buildAbortedResponseMetadata, isPendingActionStale, + isHITLEnabled, + deleteAgentCheckpoint, } = require('@librechat/api'); const { createSseStreamTelemetry } = require('@librechat/api/telemetry'); const { logger } = require('@librechat/data-schemas'); @@ -231,7 +233,7 @@ router.get('/chat/status/:conversationId', async (req, res) => { * @access Private * @description Mounted before chatRouter to bypass buildEndpointOption middleware */ -router.post('/chat/abort', async (req, res) => { +router.post('/chat/abort', configMiddleware, async (req, res) => { logger.debug(`[AgentStream] ========== ABORT ENDPOINT HIT ==========`); logger.debug(`[AgentStream] Method: ${req.method}, Path: ${req.path}`); logger.debug(`[AgentStream] Body:`, req.body); @@ -288,6 +290,17 @@ router.post('/chat/abort', async (req, res) => { abortResultResponseMessageId: abortResult.jobData?.responseMessageId, }); + // HITL: prune the durable checkpoint of a run aborted while paused, so a new turn + // in this conversation can't rehydrate the stale interrupt before the Mongo TTL + // reclaims it (thread_id is the stable conversationId). Idempotent / no-op when + // HITL is off or nothing was written. + const agentsCfg = req.config?.endpoints?.agents; + if (isHITLEnabled(agentsCfg?.toolApproval)) { + await deleteAgentCheckpoint(jobStreamId, agentsCfg?.checkpointer).catch((err) => + logger.error(`[AgentStream] Failed to prune checkpoint on abort: ${jobStreamId}`, err), + ); + } + // CRITICAL: Save partial response BEFORE returning to prevent race condition. // If user sends a follow-up immediately after abort, the parentMessageId must exist in DB. // Only save if we have a valid responseMessageId (skip early aborts before generation started) @@ -327,7 +340,10 @@ router.post('/chat/abort', async (req, res) => { await saveMessage( { userId: req?.user?.id, - isTemporary: req?.body?.isTemporary, + // Source from the job, not the request: the stop button posts only the + // conversationId, so trusting req.body.isTemporary would persist an aborted + // temporary-chat partial as a normal (orphaned) message. + isTemporary: jobData?.isTemporary ?? req?.body?.isTemporary, interfaceConfig: req?.config?.interfaceConfig, }, responseMessage, diff --git a/client/src/components/Chat/Messages/Content/ApprovalContext.tsx b/client/src/components/Chat/Messages/Content/ApprovalContext.tsx new file mode 100644 index 0000000000..aed523c42d --- /dev/null +++ b/client/src/components/Chat/Messages/Content/ApprovalContext.tsx @@ -0,0 +1,292 @@ +import { createContext, useCallback, useContext, useMemo, useRef, useState } from 'react'; +import { Constants } from 'librechat-data-provider'; +import type { Agents } from 'librechat-data-provider'; +import { + useSubmitToolApprovalMutation, + useSubmitAskAnswerMutation, + type ResumeAgentFields, +} from '~/data-provider'; +import { ChatContext } from '~/Providers/ChatContext'; +import { useGetEphemeralAgent } from '~/store/agents'; + +/** Per-action submission lifecycle, surfaced to the cards so they can disable + * controls and explain a terminal outcome. */ +type ActionStatus = 'idle' | 'submitting' | 'submitted' | 'expired' | 'error'; + +interface ApprovalContextValue { + /** Record (or clear) a card's decision for its tool_call within an action. */ + setDecision: ( + actionId: string, + toolCallId: string, + decision: Agents.ToolApprovalResolution | null, + ) => void; + /** Current decision a card holds, if any (drives selected-state styling). */ + getDecision: (actionId: string, toolCallId: string) => Agents.ToolApprovalResolution | undefined; + /** Every recorded decision for an action, in registration order (the submit batch). */ + getDecisions: (actionId: string) => Agents.ToolApprovalResolution[]; + /** Declare that a tool_call belongs to an action so submit can require all. */ + registerToolCall: (actionId: string, toolCallId: string) => void; + /** Drop a tool_call's registration when its card unmounts, so a resolved/removed + * card can't keep `isReady` false and wedge the batch submit. */ + unregisterToolCall: (actionId: string, toolCallId: string) => void; + /** The first-registered tool_call for an action — the single card that owns + * the batch submit button (avoids N buttons across sibling cards). */ + getLeadToolCallId: (actionId: string) => string | undefined; + /** Number of tool calls paused under an action (for a "1 of N" label). */ + getRegisteredCount: (actionId: string) => number; + /** True once every registered tool_call in the action has a decision. */ + isReady: (actionId: string) => boolean; + /** Lifecycle status for an action (so cards can disable / show messages). */ + getStatus: (actionId: string) => ActionStatus; + /** Set an action's submission status (driven by the cards' submit via `useResumeSubmit`). */ + setStatus: (actionId: string, status: ActionStatus) => void; +} + +const ApprovalContext = createContext(null); + +/** Cards call this; outside a provider it degrades to inert no-ops so a tool + * call without an active approval never crashes. */ +export const useApprovalContext = (): ApprovalContextValue => { + const ctx = useContext(ApprovalContext); + return ctx ?? FALLBACK; +}; + +const FALLBACK: ApprovalContextValue = { + setDecision: () => undefined, + getDecision: () => undefined, + getDecisions: () => [], + registerToolCall: () => undefined, + unregisterToolCall: () => undefined, + getLeadToolCallId: () => undefined, + getRegisteredCount: () => 0, + isReady: () => false, + getStatus: () => 'idle', + setStatus: () => undefined, +}; + +const isExpiredError = (error: unknown): boolean => { + const status = (error as { response?: { status?: number } } | undefined)?.response?.status; + return status === 409; +}; + +/** + * Coordinates human-in-the-loop decisions for a single response message. + * + * An action may pause multiple tool calls (same `actionId`); each `ToolApproval` + * card registers its `tool_call_id` and records a decision here, and the lead card + * submits ONCE with the full `decisions[]` covering every paused call (the server + * rejects a partial batch). + * + * Intentionally PURE state — it does NOT read `ChatContext`, the agent store, or + * React Query. Message content renders in places without those providers (shared / + * exported views, tests), so the provider must be safe to mount anywhere. The + * context-dependent submit lives in {@link useResumeSubmit}, which the cards call — + * and the cards only render inside a live chat view where those providers exist. + */ +export default function ApprovalProvider({ children }: { children: React.ReactNode }) { + /** actionId → (tool_call_id → resolution). Mutable ref + a version bump so + * reads are synchronous for `isReady`/submit while renders stay cheap. */ + const decisionsRef = useRef(new Map>()); + const registeredRef = useRef(new Map>()); + const [, bump] = useState(0); + const rerender = useCallback(() => bump((v) => v + 1), []); + const [statusByAction, setStatusByAction] = useState>({}); + + const registerToolCall = useCallback( + (actionId: string, toolCallId: string) => { + const set = registeredRef.current.get(actionId) ?? new Set(); + if (set.has(toolCallId)) { + return; + } + set.add(toolCallId); + registeredRef.current.set(actionId, set); + /** A newly-registered call shifts the lead / "of N" count for sibling + * cards — re-render so they reflect it. */ + rerender(); + }, + [rerender], + ); + + const unregisterToolCall = useCallback( + (actionId: string, toolCallId: string) => { + const set = registeredRef.current.get(actionId); + if (!set || !set.has(toolCallId)) { + return; + } + set.delete(toolCallId); + // Also drop any decision it held so a stale entry can't linger. + decisionsRef.current.get(actionId)?.delete(toolCallId); + if (set.size === 0) { + registeredRef.current.delete(actionId); + decisionsRef.current.delete(actionId); + } + rerender(); + }, + [rerender], + ); + + const getLeadToolCallId = useCallback( + (actionId: string) => registeredRef.current.get(actionId)?.values().next().value, + [], + ); + + const getRegisteredCount = useCallback( + (actionId: string) => registeredRef.current.get(actionId)?.size ?? 0, + [], + ); + + const setDecision = useCallback( + (actionId: string, toolCallId: string, decision: Agents.ToolApprovalResolution | null) => { + const map = decisionsRef.current.get(actionId) ?? new Map(); + if (decision == null) { + map.delete(toolCallId); + } else { + map.set(toolCallId, decision); + } + decisionsRef.current.set(actionId, map); + rerender(); + }, + [rerender], + ); + + const getDecision = useCallback( + (actionId: string, toolCallId: string) => decisionsRef.current.get(actionId)?.get(toolCallId), + [], + ); + + const getDecisions = useCallback( + (actionId: string) => Array.from(decisionsRef.current.get(actionId)?.values() ?? []), + [], + ); + + const isReady = useCallback((actionId: string) => { + const registered = registeredRef.current.get(actionId); + const decided = decisionsRef.current.get(actionId); + if (!registered || registered.size === 0) { + return false; + } + for (const toolCallId of registered) { + if (!decided?.has(toolCallId)) { + return false; + } + } + return true; + }, []); + + const getStatus = useCallback( + (actionId: string): ActionStatus => statusByAction[actionId] ?? 'idle', + [statusByAction], + ); + + const setStatus = useCallback((actionId: string, status: ActionStatus) => { + setStatusByAction((prev) => ({ ...prev, [actionId]: status })); + }, []); + + const value = useMemo( + () => ({ + setDecision, + getDecision, + getDecisions, + registerToolCall, + unregisterToolCall, + getLeadToolCallId, + getRegisteredCount, + isReady, + getStatus, + setStatus, + }), + [ + setDecision, + getDecision, + getDecisions, + registerToolCall, + unregisterToolCall, + getLeadToolCallId, + getRegisteredCount, + isReady, + getStatus, + setStatus, + ], + ); + + return {children}; +} + +/** + * Submit hook for the approval cards. Sources the resume body's agent/endpoint + * fields from the active conversation (so the route's shared `buildEndpointOption` + * middleware reconstructs the same agent) and fires the resume mutation, threading + * the result back into the action's status. + * + * Reads `ChatContext` / the agent store / React Query. The cards render it from + * live chat views but ALSO from contexts without a `ChatContext.Provider` (e.g. a + * subagent tool paused inside a portaled dialog, or a search/citation render that + * passes chat context as a prop), so it reads the context non-throwingly: with no + * conversation, `buildResumeFields` returns null and the controls are inert rather + * than crashing. + */ +export function useResumeSubmit() { + const conversation = useContext(ChatContext)?.conversation; + const getEphemeralAgent = useGetEphemeralAgent(); + const approvalMutation = useSubmitToolApprovalMutation(); + const askMutation = useSubmitAskAnswerMutation(); + const { getDecisions, isReady, setStatus } = useApprovalContext(); + + const buildResumeFields = useCallback((): ResumeAgentFields | null => { + const conversationId = conversation?.conversationId; + if (!conversationId || conversationId === Constants.NEW_CONVO) { + return null; + } + return { + conversationId, + endpoint: conversation?.endpoint, + endpointType: conversation?.endpointType, + agent_id: conversation?.agent_id, + model: conversation?.model, + spec: conversation?.spec, + // Ephemeral agents derive their instructions from promptPrefix — re-send it so + // the resumed run rebuilds the same graph and matches the server fingerprint. + promptPrefix: conversation?.promptPrefix, + ephemeralAgent: getEphemeralAgent(conversationId), + }; + }, [conversation, getEphemeralAgent]); + + const submitToolApproval = useCallback( + (actionId: string) => { + const fields = buildResumeFields(); + const decisions = getDecisions(actionId); + if (!fields || decisions.length === 0 || !isReady(actionId)) { + return; + } + setStatus(actionId, 'submitting'); + approvalMutation.mutate( + { ...fields, actionId, decisions }, + { + onSuccess: () => setStatus(actionId, 'submitted'), + onError: (error) => setStatus(actionId, isExpiredError(error) ? 'expired' : 'error'), + }, + ); + }, + [approvalMutation, buildResumeFields, getDecisions, isReady, setStatus], + ); + + const submitAskAnswer = useCallback( + (actionId: string, answer: string) => { + const fields = buildResumeFields(); + if (!fields || answer.length === 0) { + return; + } + setStatus(actionId, 'submitting'); + askMutation.mutate( + { ...fields, actionId, answer }, + { + onSuccess: () => setStatus(actionId, 'submitted'), + onError: (error) => setStatus(actionId, isExpiredError(error) ? 'expired' : 'error'), + }, + ); + }, + [askMutation, buildResumeFields, setStatus], + ); + + return { submitToolApproval, submitAskAnswer }; +} diff --git a/client/src/components/Chat/Messages/Content/AskUserQuestion.tsx b/client/src/components/Chat/Messages/Content/AskUserQuestion.tsx new file mode 100644 index 0000000000..55f85b26af --- /dev/null +++ b/client/src/components/Chat/Messages/Content/AskUserQuestion.tsx @@ -0,0 +1,93 @@ +import { useState } from 'react'; +import { TriangleAlert } from 'lucide-react'; +import { Button, TextareaAutosize } from '@librechat/client'; +import type { Agents } from 'librechat-data-provider'; +import { useApprovalContext, useResumeSubmit } from './ApprovalContext'; +import { useLocalize } from '~/hooks'; + +/** + * Renders an `ask_user_question` pause: the prompt, optional description, any + * curated option buttons, and a free-form text answer. Each path submits the + * answer through {@link useApprovalContext} to resume the paused run; the + * continuation arrives on the existing SSE. + */ +export default function AskUserQuestion({ + actionId, + question, +}: { + actionId: string; + question: Agents.AskUserQuestionRequest; +}) { + const localize = useLocalize(); + const { getStatus } = useApprovalContext(); + const { submitAskAnswer } = useResumeSubmit(); + const [answer, setAnswer] = useState(''); + + const status = getStatus(actionId); + const locked = status === 'submitting' || status === 'submitted' || status === 'expired'; + + if (status === 'submitted') { + return null; + } + + const trimmed = answer.trim(); + + return ( +
+

{question.question}

+ {question.description != null && question.description.length > 0 && ( +

{question.description}

+ )} + + {question.options != null && question.options.length > 0 && ( +
+ {question.options.map((option) => ( + + ))} +
+ )} + + setAnswer(e.target.value)} + minRows={2} + maxRows={12} + placeholder={localize('com_ui_your_answer')} + className="w-full resize-none rounded-md border border-border-light bg-surface-primary p-2 text-sm" + aria-label={localize('com_ui_your_answer')} + /> + +
+ + {status === 'expired' && ( + + + )} + {status === 'error' && ( + + + )} +
+
+ ); +} diff --git a/client/src/components/Chat/Messages/Content/ContentParts.tsx b/client/src/components/Chat/Messages/Content/ContentParts.tsx index 00569c11f5..148c8a3e2d 100644 --- a/client/src/components/Chat/Messages/Content/ContentParts.tsx +++ b/client/src/components/Chat/Messages/Content/ContentParts.tsx @@ -12,6 +12,7 @@ import { mapAttachments, groupSequentialToolCalls } from '~/utils'; import { MessageContext, SearchContext } from '~/Providers'; import PendingSkillCall from './Parts/PendingSkillCall'; import { EditTextPart, EmptyText } from './Parts'; +import ApprovalProvider from './ApprovalContext'; import MemoryArtifacts from './MemoryArtifacts'; import ToolCallGroup from './ToolCallGroup'; import Container from './Container'; @@ -373,7 +374,7 @@ const ContentParts = memo(function ContentParts({ const hasParallelContent = safeContent.some((part) => part?.groupId != null); if (hasParallelContent) { return ( - <> + {renderPendingSkills()} - + ); } // Sequential content: render parts in order (90% of cases) return ( - - - {renderPendingSkills()} - {showEmptyCursor && ( - - - - )} - {groupedParts.map((group) => { - if (group.type === 'single') { - const { part, idx } = group.part; - return renderPart(part, idx, idx === lastContentIdx); - } - const { groupId } = group; - return ( - p.idx === lastContentIdx)} - renderPart={renderGroupedPart} - lastContentIdx={lastContentIdx} - groupAttachments={group.groupAttachments} - initialExpansionState={toolGroupExpansionRef.current.get(groupId)} - onExpansionChange={(state) => handleGroupExpansionChange(groupId, state)} - /> - ); - })} - + + + + {renderPendingSkills()} + {showEmptyCursor && ( + + + + )} + {groupedParts.map((group) => { + if (group.type === 'single') { + const { part, idx } = group.part; + return renderPart(part, idx, idx === lastContentIdx); + } + const { groupId } = group; + return ( + p.idx === lastContentIdx)} + renderPart={renderGroupedPart} + lastContentIdx={lastContentIdx} + groupAttachments={group.groupAttachments} + initialExpansionState={toolGroupExpansionRef.current.get(groupId)} + onExpansionChange={(state) => handleGroupExpansionChange(groupId, state)} + /> + ); + })} + + ); }); diff --git a/client/src/components/Chat/Messages/Content/Part.tsx b/client/src/components/Chat/Messages/Content/Part.tsx index 6df833370a..6998cdbe5f 100644 --- a/client/src/components/Chat/Messages/Content/Part.tsx +++ b/client/src/components/Chat/Messages/Content/Part.tsx @@ -22,16 +22,19 @@ import { BashCall, SubagentCall, } from './Parts'; +import { getAskUserQuestionPart } from '~/utils/approval'; +import { isBashProgrammaticToolCall } from './routing'; import { ErrorMessage } from './MessageContent'; +import AskUserQuestion from './AskUserQuestion'; import RetrievalCall from './RetrievalCall'; import { getCachedPreview } from '~/utils'; +import ToolApproval from './ToolApproval'; import AgentHandoff from './AgentHandoff'; import CodeAnalyze from './CodeAnalyze'; import Container from './Container'; import WebSearch from './WebSearch'; import ToolCall from './ToolCall'; import Image from './Image'; -import { isBashProgrammaticToolCall } from './routing'; type PartProps = { part?: TMessageContentParts; @@ -58,6 +61,16 @@ const Part = memo(function Part({ return null; } + const askUserQuestion = getAskUserQuestionPart(part); + if (askUserQuestion) { + return ( + + ); + } + if (part.type === ContentTypes.ERROR) { return ( - ); - } else if ( - isToolCall && - (toolCall.name === Tools.execute_code || - toolCall.name === Constants.PROGRAMMATIC_TOOL_CALLING || - toolCall.name === Constants.BASH_PROGRAMMATIC_TOOL_CALLING) - ) { - return ( - - ); - } else if ( - isToolCall && - (toolCall.name === 'image_gen_oai' || - toolCall.name === 'image_edit_oai' || - toolCall.name === 'gemini_image_gen') - ) { - return ( - - ); - } else if (isToolCall && toolCall.name === 'skill') { - return ( - - ); - } else if (isToolCall && toolCall.name === Constants.SUBAGENT) { - /** `subagent_content` is the aggregated content-parts array the - * backend writes onto the tool_call at message-save time so the - * child's activity survives a page refresh. Not present on older - * runs recorded before the persistence path existed — those fall - * back to the Recoil atom (live session) or the raw tool output - * inside `SubagentCall`. */ - const persistedContent = ( - toolCall as unknown as { - subagent_content?: TMessageContentParts[]; + if (isToolCall) { + const card = (() => { + if (isBashProgrammaticToolCall(toolCall.name, toolCall.args)) { + return ( + + ); + } else if ( + toolCall.name === Tools.execute_code || + toolCall.name === Constants.PROGRAMMATIC_TOOL_CALLING || + toolCall.name === Constants.BASH_PROGRAMMATIC_TOOL_CALLING + ) { + return ( + + ); + } else if ( + toolCall.name === 'image_gen_oai' || + toolCall.name === 'image_edit_oai' || + toolCall.name === 'gemini_image_gen' + ) { + return ( + + ); + } else if (toolCall.name === 'skill') { + return ( + + ); + } else if (toolCall.name === Constants.SUBAGENT) { + /** `subagent_content` is the aggregated content-parts array the + * backend writes onto the tool_call at message-save time so the + * child's activity survives a page refresh. Not present on older + * runs recorded before the persistence path existed — those fall + * back to the Recoil atom (live session) or the raw tool output + * inside `SubagentCall`. */ + const persistedContent = ( + toolCall as unknown as { + subagent_content?: TMessageContentParts[]; + } + ).subagent_content; + return ( + + ); + } else if (toolCall.name === 'read_file') { + return ( + + ); + } else if (toolCall.name === 'create_file' || toolCall.name === 'edit_file') { + return ( + + ); + } else if (toolCall.name === Tools.bash_tool) { + return ( + + ); + } else if (toolCall.name === Tools.web_search) { + return ( + + ); + } else if (toolCall.name === 'file_search' || toolCall.name === 'retrieval') { + return ( + + ); + } else if (toolCall.name?.startsWith(Constants.LC_TRANSFER_TO_)) { + return ; } - ).subagent_content; - return ( - - ); - } else if (isToolCall && toolCall.name === 'read_file') { - return ( - - ); - } else if (isToolCall && (toolCall.name === 'create_file' || toolCall.name === 'edit_file')) { - return ( - - ); - } else if (isToolCall && toolCall.name === Tools.bash_tool) { - return ( - - ); - } else if (isToolCall && toolCall.name === Tools.web_search) { - return ( - - ); - } else if (isToolCall && (toolCall.name === 'file_search' || toolCall.name === 'retrieval')) { - return ( - - ); - } else if (isToolCall && toolCall.name?.startsWith(Constants.LC_TRANSFER_TO_)) { - return ; - } else if (isToolCall) { - return ( - - ); + return ( + + ); + })(); + + /** Render approval controls for ANY paused agent tool — not just the generic + * card — so a HITL policy that gates a specialized tool (bash, code, file…) + * still surfaces approve/reject/edit/respond. Only while the call is unresolved + * (no output yet). */ + if (toolCall.approval != null && (toolCall.output?.length ?? 0) === 0) { + return ( + <> + {card} + + + ); + } + return card; } else if (toolCall.type === ToolCallTypes.CODE_INTERPRETER) { const code_interpreter = toolCall[ToolCallTypes.CODE_INTERPRETER]; return ( diff --git a/client/src/components/Chat/Messages/Content/Parts/SubagentCall.tsx b/client/src/components/Chat/Messages/Content/Parts/SubagentCall.tsx index 23f5537e24..69060ca939 100644 --- a/client/src/components/Chat/Messages/Content/Parts/SubagentCall.tsx +++ b/client/src/components/Chat/Messages/Content/Parts/SubagentCall.tsx @@ -3,19 +3,18 @@ import { useRecoilValue } from 'recoil'; import { ContentTypes, EModelEndpoint } from 'librechat-data-provider'; import { ArrowDown, ChevronRight, Maximize2, Minimize2, Users } from 'lucide-react'; import { OGDialog, OGDialogContent, OGDialogTitle, OGDialogDescription } from '@librechat/client'; - -import type { TAttachment, TMessage, TMessageContentParts } from 'librechat-data-provider'; +import type { Agents, TAttachment, TMessage, TMessageContentParts } from 'librechat-data-provider'; import type { PartWithIndex } from '~/components/Chat/Messages/Content/ParallelContent'; import type { SubagentTickerLine } from '~/utils/subagentContent'; - import ToolCallGroup from '~/components/Chat/Messages/Content/ToolCallGroup'; import MarkdownLite from '~/components/Chat/Messages/Content/MarkdownLite'; +import ToolApproval from '~/components/Chat/Messages/Content/ToolApproval'; import { cn, groupSequentialToolCalls, parseToolName } from '~/utils'; import Container from '~/components/Chat/Messages/Content/Container'; import ToolCall from '~/components/Chat/Messages/Content/ToolCall'; import { MessageContext } from '~/Providers/MessageContext'; -import { subagentProgressByToolCallId } from '~/store'; import MessageIcon from '~/components/Share/MessageIcon'; +import { subagentProgressByToolCallId } from '~/store'; import { useAgentsMapContext } from '~/Providers'; import { AttachmentGroup } from './Attachment'; import { useLocalize } from '~/hooks'; @@ -841,15 +840,17 @@ function SubagentDialogPart({ const tc = ( part as { [ContentTypes.TOOL_CALL]?: { + id?: string; args?: string | Record; output?: string; name?: string; progress?: number; + approval?: Agents.ToolCall['approval']; }; } )[ContentTypes.TOOL_CALL]; if (!tc) return null; - return ( + const toolCall = ( ); + // Surface approve/reject/edit controls for a tool paused INSIDE this subagent — + // its tool_call lives in subagent_content, not as a top-level message part, so the + // top-level Part.tsx render never sees it. Only while unresolved (no output yet). + // The dialog portals but React context still flows, so ToolApproval resolves here. + if (tc.approval != null && (tc.output?.length ?? 0) === 0) { + return ( + <> + {toolCall} + + + ); + } + return toolCall; } return null; } diff --git a/client/src/components/Chat/Messages/Content/ToolApproval.tsx b/client/src/components/Chat/Messages/Content/ToolApproval.tsx new file mode 100644 index 0000000000..1068eadd1e --- /dev/null +++ b/client/src/components/Chat/Messages/Content/ToolApproval.tsx @@ -0,0 +1,273 @@ +import { useEffect, useMemo, useState } from 'react'; +import { Button, TextareaAutosize } from '@librechat/client'; +import { Check, X, Pencil, MessageSquare, TriangleAlert } from 'lucide-react'; +import type { Agents } from 'librechat-data-provider'; +import type { TranslationKeys } from '~/hooks'; +import { useApprovalContext, useResumeSubmit } from './ApprovalContext'; +import { useLocalize } from '~/hooks'; +import { cn, logger } from '~/utils'; + +/** + * The resume route rejects an `edit` whose `editedArguments` isn't a plain object + * (`findIncompleteDecisions`), so mirror that here — `JSON.parse` alone also accepts + * `null`/arrays/primitives, which would enable Submit for a value that can only 400. + */ +function isPlainObject(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +type DecisionType = Agents.ToolApprovalDecisionType; + +const DECISION_ICON: Record> = { + approve: Check, + reject: X, + edit: Pencil, + respond: MessageSquare, +}; + +const DECISION_LABEL: Record = { + approve: 'com_ui_approve', + reject: 'com_ui_reject', + edit: 'com_ui_edit', + respond: 'com_ui_respond', +}; + +/** Pretty-print tool args as JSON for the `edit` textarea seed. */ +function seedArgs(args: string | Record | undefined): string { + if (args == null) { + return '{}'; + } + if (typeof args === 'string') { + try { + return JSON.stringify(JSON.parse(args), null, 2); + } catch { + return args; + } + } + try { + return JSON.stringify(args, null, 2); + } catch (e) { + logger.error('ToolApproval - failed to stringify args', e); + return '{}'; + } +} + +/** + * Renders approve / reject / edit / respond controls for a paused tool call, + * scoped to the decisions the server allows. Records its decision in the + * batch {@link useApprovalContext}; the lead card additionally renders the + * single submit button covering every paused call in the action. + */ +export default function ToolApproval({ + approval, + toolCallId, + args, +}: { + approval: NonNullable; + toolCallId: string; + args: string | Record | undefined; +}) { + const localize = useLocalize(); + const { actionId, allowed_decisions: allowedDecisions, description } = approval; + const { + registerToolCall, + unregisterToolCall, + setDecision, + isReady, + getStatus, + getLeadToolCallId, + getRegisteredCount, + } = useApprovalContext(); + const { submitToolApproval } = useResumeSubmit(); + + const [active, setActive] = useState(null); + const [editText, setEditText] = useState(() => seedArgs(args)); + const [responseText, setResponseText] = useState(''); + const [reason, setReason] = useState(''); + + useEffect(() => { + registerToolCall(actionId, toolCallId); + // Drop the registration when this card unmounts (e.g. the tool resolved and the + // card was replaced) so a stale entry can't keep the batch's `isReady` false. + return () => unregisterToolCall(actionId, toolCallId); + }, [registerToolCall, unregisterToolCall, actionId, toolCallId]); + + const status = getStatus(actionId); + const locked = status === 'submitting' || status === 'submitted' || status === 'expired'; + + /** Recompute and store this card's decision whenever inputs change. A null + * resolution (e.g. invalid edit JSON) clears it so submit stays disabled. */ + useEffect(() => { + if (locked) { + return; + } + if (active == null) { + setDecision(actionId, toolCallId, null); + return; + } + if (active === 'approve') { + setDecision(actionId, toolCallId, { tool_call_id: toolCallId, decision: 'approve' }); + return; + } + if (active === 'reject') { + setDecision(actionId, toolCallId, { + tool_call_id: toolCallId, + decision: 'reject', + reason: reason.trim() || undefined, + }); + return; + } + if (active === 'respond') { + const trimmed = responseText.trim(); + setDecision( + actionId, + toolCallId, + trimmed.length > 0 + ? { tool_call_id: toolCallId, decision: 'respond', responseText: trimmed } + : null, + ); + return; + } + if (active === 'edit') { + try { + const parsed = JSON.parse(editText) as unknown; + setDecision( + actionId, + toolCallId, + isPlainObject(parsed) + ? { tool_call_id: toolCallId, decision: 'edit', editedArguments: parsed } + : null, + ); + } catch { + setDecision(actionId, toolCallId, null); + } + } + }, [active, editText, responseText, reason, locked, setDecision, actionId, toolCallId]); + + const editIsValid = useMemo(() => { + if (active !== 'edit') { + return true; + } + try { + return isPlainObject(JSON.parse(editText)); + } catch { + return false; + } + }, [active, editText]); + + const isLead = getLeadToolCallId(actionId) === toolCallId; + const count = getRegisteredCount(actionId); + const ready = isReady(actionId); + + const submitLabel = useMemo(() => { + if (status === 'submitting') { + return localize('com_ui_submitting'); + } + if (count > 1) { + return localize('com_ui_submit_decisions', { 0: count }); + } + return localize('com_ui_submit'); + }, [status, count, localize]); + + if (status === 'submitted') { + return null; + } + + return ( +
+ {description != null && description.length > 0 && ( +

{description}

+ )} +
+ {allowedDecisions.map((decision) => { + const Icon = DECISION_ICON[decision]; + return ( + + ); + })} +
+ + {active === 'edit' && ( +
+ setEditText(e.target.value)} + minRows={3} + maxRows={16} + className={cn( + 'w-full resize-none rounded-md border bg-surface-primary p-2 font-mono text-xs', + editIsValid ? 'border-border-light' : 'border-red-500', + )} + aria-label={localize('com_ui_edit')} + /> + {!editIsValid && ( + {localize('com_ui_invalid_json')} + )} +
+ )} + + {active === 'respond' && ( + setResponseText(e.target.value)} + minRows={2} + maxRows={12} + placeholder={localize('com_ui_tool_response_placeholder')} + className="w-full resize-none rounded-md border border-border-light bg-surface-primary p-2 text-sm" + aria-label={localize('com_ui_respond')} + /> + )} + + {active === 'reject' && ( + setReason(e.target.value)} + minRows={1} + maxRows={6} + placeholder={localize('com_ui_reject_reason_placeholder')} + className="w-full resize-none rounded-md border border-border-light bg-surface-primary p-2 text-sm" + aria-label={localize('com_ui_reject')} + /> + )} + + {isLead && ( +
+ + {status === 'expired' && ( + + + )} + {status === 'error' && ( + + + )} +
+ )} +
+ ); +} diff --git a/client/src/data-provider/SSE/mutations.ts b/client/src/data-provider/SSE/mutations.ts index 0861babbe9..2d95bb377b 100644 --- a/client/src/data-provider/SSE/mutations.ts +++ b/client/src/data-provider/SSE/mutations.ts @@ -1,5 +1,6 @@ import { useMutation } from '@tanstack/react-query'; -import { apiBaseUrl, request } from 'librechat-data-provider'; +import { apiBaseUrl, request, EModelEndpoint } from 'librechat-data-provider'; +import type { Agents, TEphemeralAgent } from 'librechat-data-provider'; export interface AbortStreamParams { /** The stream ID to abort (if known) */ @@ -40,3 +41,102 @@ export function useAbortStreamMutation() { mutationFn: abortStream, }); } + +/** + * Agent/endpoint selection fields the resume route needs so its shared + * `buildEndpointOption` middleware can reconstruct the same agent that paused. + * Mirrors the fields a normal `POST /api/agents/chat` message carries; sourced + * from the active conversation (and the conversation's ephemeral-agent state). + */ +export interface ResumeAgentFields { + conversationId: string; + endpoint?: EModelEndpoint | string | null; + endpointType?: EModelEndpoint | string | null; + agent_id?: string | null; + model?: string | null; + spec?: string | null; + /** Ephemeral agents derive instructions from this; re-sent so resume rebuilds the + * same graph (and matches the server's request fingerprint). */ + promptPrefix?: string | null; + ephemeralAgent?: TEphemeralAgent | null; + isTemporary?: boolean; +} + +/** Successful resume ACK. The continuation streams over the existing SSE. */ +export interface ResumeResponse { + streamId: string; + conversationId: string; + status: 'resuming'; +} + +/** + * Shared base for both resume payloads: the agent selection fields, plus the + * `endpoint` defaulted to `agents` (the resume route lives under the agents + * router and `buildEndpointOption` keys off it). + */ +const buildResumeBase = (fields: ResumeAgentFields) => ({ + ...fields, + endpoint: fields.endpoint ?? EModelEndpoint.agents, +}); + +export interface SubmitToolApprovalParams extends ResumeAgentFields { + /** Identifies the paused action; the server 409s a stale/mismatched id. */ + actionId: string; + /** One entry per paused tool_call_id — the server 400s a partial batch. */ + decisions: Agents.ToolApprovalResolution[]; +} + +/** + * Submit a batch of tool-approval decisions to resume a paused generation. + * POSTs to the shared resume route; the continuation streams over the existing + * SSE connection (this only fires the POST — it does not open a new stream). + */ +export const submitToolApproval = async ( + params: SubmitToolApprovalParams, +): Promise => { + const { actionId, decisions, ...fields } = params; + return request.post(`${apiBaseUrl()}/api/agents/chat/resume`, { + ...buildResumeBase(fields), + actionId, + decisions, + }) as Promise; +}; + +/** + * React Query mutation hook for submitting tool-approval decisions. + * Mirrors {@link useAbortStreamMutation}; the resumed stream arrives on the SSE. + */ +export function useSubmitToolApprovalMutation() { + return useMutation({ + mutationFn: submitToolApproval, + }); +} + +export interface SubmitAskAnswerParams extends ResumeAgentFields { + actionId: string; + /** Free-form answer to the agent's ask-user question. */ + answer: string; +} + +/** + * Submit an ask-user-question answer to resume a paused generation. + * POSTs to the shared resume route; the continuation streams over the existing SSE. + */ +export const submitAskAnswer = async (params: SubmitAskAnswerParams): Promise => { + const { actionId, answer, ...fields } = params; + return request.post(`${apiBaseUrl()}/api/agents/chat/resume`, { + ...buildResumeBase(fields), + actionId, + answer, + }) as Promise; +}; + +/** + * React Query mutation hook for submitting an ask-user answer. + * Mirrors {@link useAbortStreamMutation}; the resumed stream arrives on the SSE. + */ +export function useSubmitAskAnswerMutation() { + return useMutation({ + mutationFn: submitAskAnswer, + }); +} diff --git a/client/src/data-provider/SSE/queries.ts b/client/src/data-provider/SSE/queries.ts index 8cd116394b..3f9830712f 100644 --- a/client/src/data-provider/SSE/queries.ts +++ b/client/src/data-provider/SSE/queries.ts @@ -1,6 +1,6 @@ import { useEffect, useMemo, useState } from 'react'; -import { apiBaseUrl, QueryKeys, request, dataService } from 'librechat-data-provider'; import { useQuery, useQueries, useQueryClient } from '@tanstack/react-query'; +import { apiBaseUrl, QueryKeys, request, dataService } from 'librechat-data-provider'; import type { Agents, TConversation } from 'librechat-data-provider'; import { isNotFoundError, updateConvoInAllQueries } from '~/utils'; import { useGetStartupConfig } from '../Endpoints'; @@ -8,10 +8,13 @@ import { useGetStartupConfig } from '../Endpoints'; export interface StreamStatusResponse { active: boolean; streamId?: string; - status?: 'running' | 'complete' | 'error' | 'aborted'; + status?: 'running' | 'complete' | 'error' | 'aborted' | 'requires_action'; aggregatedContent?: Array<{ type: string; text?: string }>; createdAt?: number; resumeState?: Agents.ResumeState; + /** Live pending approval when `status === 'requires_action'`; mirrors + * `resumeState.pendingAction`, surfaced top-level for the resume-on-load path. */ + pendingAction?: Agents.PendingAction; } export const streamStatusQueryKey = (conversationId: string) => ['streamStatus', conversationId]; diff --git a/client/src/hooks/SSE/__tests__/useResumeOnLoad.spec.tsx b/client/src/hooks/SSE/__tests__/useResumeOnLoad.spec.tsx index fdb6d79c9f..03a5a9b3d1 100644 --- a/client/src/hooks/SSE/__tests__/useResumeOnLoad.spec.tsx +++ b/client/src/hooks/SSE/__tests__/useResumeOnLoad.spec.tsx @@ -261,6 +261,62 @@ describe('useResumeOnLoad', () => { ); }); + it('strips the paused user/assistant rows from submission.messages (no duplicate on resume)', async () => { + const observedSubmissions: Array = []; + mockUseStreamStatus.mockReturnValue({ + isSuccess: true, + isFetching: false, + data: { + active: true, + status: 'running', + streamId: CONVERSATION_ID, + resumeState: { + runSteps: [], + aggregatedContent: [{ type: 'text', text: 'Streaming...' }], + responseMessageId: RESPONSE_MESSAGE_ID, + conversationId: CONVERSATION_ID, + sender: 'Agent', + userMessage: { + messageId: USER_MESSAGE_ID, + parentMessageId: Constants.NO_PARENT, + conversationId: CONVERSATION_ID, + text: 'Hello', + }, + }, + }, + }); + + renderUseResumeOnLoad({ + // The reloaded DB array already holds the paused user row + the partial + // (unfinished) assistant row under the same ids the resume re-supplies. + messages: [ + buildUserMessage(CONVERSATION_ID), + { + messageId: RESPONSE_MESSAGE_ID, + parentMessageId: USER_MESSAGE_ID, + conversationId: CONVERSATION_ID, + text: '', + isCreatedByUser: false, + unfinished: true, + } as TMessage, + ], + onSubmission: (currentSubmission) => observedSubmissions.push(currentSubmission), + }); + + await act(async () => { + await Promise.resolve(); + }); + + const submission = observedSubmissions[observedSubmissions.length - 1]; + const ids = (submission?.messages ?? []).map((m) => m.messageId); + // Stripped from the flat array (re-supplied via the placeholders + final event)... + expect(ids).not.toContain(USER_MESSAGE_ID); + expect(ids).not.toContain(RESPONSE_MESSAGE_ID); + // ...but still carried on the placeholders for re-insertion. + expect(submission?.userMessage?.messageId).toBe(USER_MESSAGE_ID); + expect(submission?.initialResponse?.messageId).toBe(RESPONSE_MESSAGE_ID); + }); + it('restores the branch that owns a pending OAuth resume user message', async () => { const rootUser = buildUserMessage(CONVERSATION_ID, 'root-user'); const branchOneResponse = { diff --git a/client/src/hooks/SSE/useResumableSSE.ts b/client/src/hooks/SSE/useResumableSSE.ts index 501f8a73c5..e898a261f8 100644 --- a/client/src/hooks/SSE/useResumableSSE.ts +++ b/client/src/hooks/SSE/useResumableSSE.ts @@ -12,6 +12,7 @@ import { apiBaseUrl, UsageEvents, createPayload, + ApprovalEvents, ViolationTypes, removeNullishValues, } from 'librechat-data-provider'; @@ -29,10 +30,13 @@ import type { TResData } from '~/common'; import { logger, clearAllDrafts, + applyPendingAction, removeConvoFromAllQueries, upsertConvoInAllQueries, + countTaggedApprovalParts, countTrailingOutputChars, markStreamStartFailedMetadata, + findPendingActionMessageIndex, } from '~/utils'; import { useGetUserBalance, @@ -449,6 +453,9 @@ export default function useResumableSSE( const submissionRef = useRef(null); const optimisticStreamIdsRef = useRef(new Set()); const createdStreamIdsRef = useRef(new Set()); + /** Pending action whose tool-call content part hasn't rendered yet — retried + * on the next frame so a fast pause-before-render race still attaches. */ + const pendingActionRetryRef = useRef(null); const { stepHandler, @@ -509,6 +516,60 @@ export default function useResumableSSE( } }; + /** + * Maps a pending action onto the in-flight response message so the + * approval / ask-user UI renders. Syncs the result back into the step + * handler's map so subsequent deltas build on the approval-tagged content + * rather than clobbering it. + * + * If the mapping is a no-op (the paused tool-call content part hasn't + * rendered yet — a pause-before-render race), retry on subsequent frames + * so the approval still attaches. `ask_user_question` always applies (it + * appends a synthetic part), so only `tool_approval` ever retries. + */ + // Keep retrying across frames (not just once): Recoil/React message updates from + // the preceding created/step events are async and can take several frames under + // load, so a single retry would drop a valid pause and leave the run with no + // approval controls. Bounded so a genuinely-absent target can't spin forever. + const PENDING_ACTION_MAX_RETRY_FRAMES = 120; + const applyPendingActionToMessages = (pendingAction: Agents.PendingAction, attempt = 0) => { + const retryNextFrame = () => { + if (attempt < PENDING_ACTION_MAX_RETRY_FRAMES) { + pendingActionRetryRef.current = requestAnimationFrame(() => + applyPendingActionToMessages(pendingAction, attempt + 1), + ); + } + }; + const messages = getMessages() ?? []; + const index = findPendingActionMessageIndex(messages, pendingAction); + if (index < 0) { + retryNextFrame(); + return; + } + const updated = applyPendingAction(messages[index], pendingAction); + const changed = updated !== messages[index]; + if (changed) { + const nextMessages = [...messages]; + nextMessages[index] = updated; + setMessages(nextMessages); + syncStepMessage(updated); + } + // A `tool_approval` pause can carry several `action_requests` whose tool-call + // parts render on different frames; tagging only the first to arrive would leave + // late siblings with no approval card, and the resume route then 400s the partial + // batch ("every paused tool call must be decided"). Keep retrying (bounded) until + // EVERY paused call is tagged. `ask_user_question` applies its single synthetic + // part in one shot, so it only needs the original "did anything change" retry. + if (pendingAction.payload.type === 'tool_approval') { + const expected = pendingAction.payload.action_requests.length; + if (countTaggedApprovalParts(updated, pendingAction.actionId) < expected) { + retryNextFrame(); + } + } else if (!changed) { + retryNextFrame(); + } + }; + const baseUrl = `${apiBaseUrl()}/api/agents/chat/stream/${encodeURIComponent(currentStreamId)}`; const url = isResume ? `${baseUrl}?resume=true` : baseUrl; logger.log('ResumableSSE', 'Subscribing to stream:', url, { isResume }); @@ -614,6 +675,12 @@ export default function useResumableSSE( return; } + if (data.event === ApprovalEvents.ON_PENDING_ACTION) { + applyPendingActionToMessages(data.data as Agents.PendingAction); + setIsSubmitting(true); + return; + } + if (data.event != null) { if ( data.event === StepEvents.ON_MESSAGE_DELTA || @@ -749,6 +816,15 @@ export default function useResumableSSE( } } + /** + * Re-pause on reconnect: the run is parked on a human-review + * interrupt. Re-apply the pending action so the approval / ask-user + * controls render after a reload or dropped connection. + */ + if (data.resumeState?.pendingAction) { + applyPendingActionToMessages(data.resumeState.pendingAction as Agents.PendingAction); + } + if (data.resumeState?.titleEvent) { titleHandler(data.resumeState.titleEvent); } @@ -763,6 +839,10 @@ export default function useResumableSSE( contextHandler(replayEvent.data, resumeSubmission); } else if (replayEvent.event === UsageEvents.ON_TOKEN_USAGE) { usageHandler(replayEvent.data, resumeSubmission); + } else if (replayEvent.event === ApprovalEvents.ON_PENDING_ACTION) { + // A pause that landed after the resume snapshot must still render its + // controls (mirror the live handler), not fall through to stepHandler. + applyPendingActionToMessages(replayEvent.data as Agents.PendingAction); } else if (replayEvent.event != null) { if ( replayEvent.event === StepEvents.ON_MESSAGE_DELTA || @@ -784,6 +864,12 @@ export default function useResumableSSE( contextHandler(pendingEvent.data, resumeSubmission); } else if (pendingEvent.event === UsageEvents.ON_TOKEN_USAGE) { usageHandler(pendingEvent.data, resumeSubmission); + } else if (pendingEvent.event === ApprovalEvents.ON_PENDING_ACTION) { + // In-memory mode can surface a pause that landed between getResumeState() + // and the subscription here; route it to the same handler as a live event + // so the approval / ask-user controls render (else the stream sits paused + // with no UI until a full status reload). + applyPendingActionToMessages(pendingEvent.data as Agents.PendingAction); } else if (pendingEvent.event != null) { if ( pendingEvent.event === StepEvents.ON_MESSAGE_DELTA || @@ -1289,6 +1375,10 @@ export default function useResumableSSE( clearTimeout(reconnectTimeoutRef.current); reconnectTimeoutRef.current = null; } + if (pendingActionRetryRef.current != null) { + cancelAnimationFrame(pendingActionRetryRef.current); + pendingActionRetryRef.current = null; + } // Reset reconnect counter before closing (so abort handler doesn't think we're reconnecting) reconnectAttemptRef.current = 0; if (sseRef.current) { diff --git a/client/src/hooks/SSE/useResumeOnLoad.ts b/client/src/hooks/SSE/useResumeOnLoad.ts index 97c1bc91c0..abd7f748cf 100644 --- a/client/src/hooks/SSE/useResumeOnLoad.ts +++ b/client/src/hooks/SSE/useResumeOnLoad.ts @@ -3,7 +3,7 @@ import { useSetRecoilState, useRecoilValue, useRecoilCallback } from 'recoil'; import { Constants, tMessageSchema, isAssistantsEndpoint } from 'librechat-data-provider'; import type { TMessage, TConversation, TSubmission, Agents } from 'librechat-data-provider'; import type { StreamStatusResponse } from '~/data-provider'; -import { getBranchSiblingIndexesForTarget } from '~/utils'; +import { getBranchSiblingIndexesForTarget, applyPendingAction } from '~/utils'; import { useStreamStatus } from '~/data-provider'; import store from '~/store'; @@ -130,7 +130,7 @@ function buildSubmissionFromResumeState( // ALWAYS use aggregatedContent from resumeState - it has the latest content from the running job. // DB content may be stale (saved at disconnect, but generation continued). - const initialResponse: TMessage = { + let initialResponse: TMessage = { messageId: existingResponseMessage?.messageId ?? responseMessageId, parentMessageId: existingResponseMessage?.parentMessageId ?? userMessage.messageId, conversationId, @@ -144,14 +144,33 @@ function buildSubmissionFromResumeState( iconURL: preferDefinedString(existingResponseMessage?.iconURL, resumeState.iconURL), } as TMessage; + // Re-paused turn: seed the approval / ask-user controls straight onto the + // placeholder so they render on load without waiting for the SSE sync replay. + if (resumeState.pendingAction) { + initialResponse = applyPendingAction(initialResponse, resumeState.pendingAction); + } + const conversation: TConversation = { conversationId, title: 'Resumed Chat', endpoint: null, } as TConversation; + // On reload, `messages` is the full DB array, which already holds the paused user + // row and the partial (unfinished) assistant row under the same ids that + // `userMessage` / `initialResponse` (and the resume final event's request/response + // messages) re-supply. Strip them so createdHandler/finalHandler — which build + // `[...messages, requestMessage, responseMessage]` — don't append a duplicate pair. + const pausedResponseIdUnpadded = initialResponse.messageId.replace(/_+$/, ''); + const dedupedMessages = messages.filter( + (m) => + m.messageId !== userMessage.messageId && + m.messageId !== initialResponse.messageId && + m.messageId !== pausedResponseIdUnpadded, + ); + return { - messages, + messages: dedupedMessages, userMessage, initialResponse, conversation, diff --git a/client/src/hooks/SSE/useSSE.ts b/client/src/hooks/SSE/useSSE.ts index 8f18f1d556..f1f17eaee1 100644 --- a/client/src/hooks/SSE/useSSE.ts +++ b/client/src/hooks/SSE/useSSE.ts @@ -7,16 +7,23 @@ import { UsageEvents, StepEvents, createPayload, + ApprovalEvents, removeNullishValues, } from 'librechat-data-provider'; -import type { TMessage, TPayload, TSubmission, EventSubmission } from 'librechat-data-provider'; +import type { + Agents, + TMessage, + TPayload, + TSubmission, + EventSubmission, +} from 'librechat-data-provider'; import type { EventHandlerParams } from './useEventHandlers'; import type { TResData } from '~/common'; +import { clearAllDrafts, applyPendingAction, findPendingActionMessageIndex } from '~/utils'; import { useGetStartupConfig, useGetUserBalance } from '~/data-provider'; import { useAuthContext } from '~/hooks/AuthContext'; import useEventHandlers from './useEventHandlers'; import useUsageHandler from './useUsageHandler'; -import { clearAllDrafts } from '~/utils'; import store from '~/store'; type ChatHelpers = Pick< @@ -137,6 +144,18 @@ export default function useSSE( contextHandler(data.data, { ...submission, userMessage }); } else if (data.event === UsageEvents.ON_TOKEN_USAGE) { usageHandler(data.data, { ...submission, userMessage }); + } else if (data.event === ApprovalEvents.ON_PENDING_ACTION) { + const pendingAction = data.data as Agents.PendingAction; + const messages = getMessages() ?? []; + const index = findPendingActionMessageIndex(messages, pendingAction); + if (index >= 0) { + const updated = applyPendingAction(messages[index], pendingAction); + if (updated !== messages[index]) { + const nextMessages = [...messages]; + nextMessages[index] = updated; + setMessages(nextMessages); + } + } } else if (data.event != null) { if ( data.event === StepEvents.ON_MESSAGE_DELTA || diff --git a/client/src/locales/en/translation.json b/client/src/locales/en/translation.json index e7af1f886a..1c32ffa291 100644 --- a/client/src/locales/en/translation.json +++ b/client/src/locales/en/translation.json @@ -829,6 +829,9 @@ "com_ui_api_keys_empty_text": "API keys let external applications run your agents through the OpenAI-compatible API. Create a key to get started.", "com_ui_api_keys_empty_title": "Connect your agents to external apps", "com_ui_api_keys_load_error": "Failed to load API keys", + "com_ui_approval_error": "Something went wrong submitting your decision. Please try again.", + "com_ui_approval_expired": "This request expired or was already handled.", + "com_ui_approve": "Approve", "com_ui_archive": "Archive", "com_ui_archive_delete_error": "Failed to delete archived conversation", "com_ui_archive_error": "Failed to archive conversation", @@ -1228,6 +1231,7 @@ "com_ui_initializing": "Initializing...", "com_ui_input": "Input", "com_ui_instructions": "Instructions", + "com_ui_invalid_json": "Invalid JSON", "com_ui_invocation_auto": "Auto", "com_ui_invocation_auto_info": "The skill is automatically applied by the agent when relevant to the conversation", "com_ui_invocation_both": "Both", @@ -1505,6 +1509,8 @@ "com_ui_regenerating": "Regenerating...", "com_ui_region": "Region", "com_ui_reinitialize": "Reinitialize", + "com_ui_reject": "Reject", + "com_ui_reject_reason_placeholder": "Optional: reason for rejecting (shared with the agent)", "com_ui_relevance": "Relevance", "com_ui_remote_access": "Remote Access", "com_ui_remote_agent_role_editor": "Editor", @@ -1534,6 +1540,7 @@ "com_ui_reset_var": "Reset {{0}}", "com_ui_reset_zoom": "Reset Zoom", "com_ui_resource": "resource", + "com_ui_respond": "Respond", "com_ui_response": "Response", "com_ui_result": "Result", "com_ui_result_found": "{{count}} result found", @@ -1794,6 +1801,8 @@ "com_ui_subagent_ticker_writing": "Writing", "com_ui_subagent_waiting": "Waiting for first update…", "com_ui_submit": "Submit", + "com_ui_submit_decisions": "Submit {{0}} decisions", + "com_ui_submitting": "Submitting…", "com_ui_summarized": "Summarized", "com_ui_summarizing": "Summarizing...", "com_ui_support_contact": "Support Contact", @@ -1825,6 +1834,7 @@ "com_ui_tool_name_image_edit": "Image Edit", "com_ui_tool_name_image_gen": "Image Generation", "com_ui_tool_name_web_search": "Web Search", + "com_ui_tool_response_placeholder": "Type a substitute result to return to the agent", "com_ui_tools": "Tools", "com_ui_tools_and_actions": "Tools and Actions", "com_ui_transferred_to": "Transferred to", @@ -1927,6 +1937,7 @@ "com_ui_xhigh": "Extra High", "com_ui_yes": "Yes", "com_ui_you": "You", + "com_ui_your_answer": "Type your answer…", "com_ui_your_api_key": "Your API Key", "com_ui_your_projects": "Your projects", "com_ui_zoom": "Zoom", diff --git a/client/src/utils/approval.spec.ts b/client/src/utils/approval.spec.ts new file mode 100644 index 0000000000..49265a9995 --- /dev/null +++ b/client/src/utils/approval.spec.ts @@ -0,0 +1,299 @@ +import { ContentTypes } from 'librechat-data-provider'; +import type { Agents, TMessage, TMessageContentParts } from 'librechat-data-provider'; +import { + ASK_USER_QUESTION, + applyPendingAction, + countTaggedApprovalParts, + getAskUserQuestionPart, + findPendingActionMessageIndex, +} from './approval'; + +const toolCallPart = (id: string, extra: Record = {}): TMessageContentParts => + ({ + type: ContentTypes.TOOL_CALL, + [ContentTypes.TOOL_CALL]: { id, name: 'search', args: '{}', ...extra }, + }) as unknown as TMessageContentParts; + +const textPart = (text: string): TMessageContentParts => + ({ type: ContentTypes.TEXT, text }) as unknown as TMessageContentParts; + +const msg = (over: Partial = {}): TMessage => + ({ messageId: 'm1', isCreatedByUser: false, content: [], ...over }) as unknown as TMessage; + +const toolApprovalAction = (over: Record = {}): Agents.PendingAction => + ({ + actionId: 'a1', + streamId: 's1', + createdAt: 0, + payload: { + type: 'tool_approval', + action_requests: [ + { name: 'search', arguments: '{}', tool_call_id: 'tc1', description: 'Run search' }, + ], + review_configs: [ + { action_name: 'search', tool_call_id: 'tc1', allowed_decisions: ['approve', 'reject'] }, + ], + }, + ...over, + }) as unknown as Agents.PendingAction; + +const askAction = (over: Record = {}): Agents.PendingAction => + ({ + actionId: 'a1', + streamId: 's1', + createdAt: 0, + payload: { type: 'ask_user_question', question: { question: 'What name?' } }, + ...over, + }) as unknown as Agents.PendingAction; + +const getToolCall = (part: TMessageContentParts | undefined) => + (part as unknown as { tool_call?: Agents.ToolCall & { approval?: unknown } })?.tool_call; + +describe('applyPendingAction — tool_approval', () => { + it('joins by tool_call_id (not position) and sets approval from the matching request + review', () => { + // tc1 is the SECOND part — a by-position join would mis-target the first. + const message = msg({ content: [toolCallPart('tcX'), toolCallPart('tc1')] }); + const result = applyPendingAction(message, toolApprovalAction()); + + expect(result).not.toBe(message); // new reference: something changed + expect(getToolCall(result.content?.[0] as TMessageContentParts)?.approval).toBeUndefined(); + expect(getToolCall(result.content?.[1] as TMessageContentParts)?.approval).toEqual({ + actionId: 'a1', + allowed_decisions: ['approve', 'reject'], + description: 'Run search', + }); + }); + + it('leaves a completed tool call (with output) untouched and returns the same message reference', () => { + const message = msg({ content: [toolCallPart('tc1', { output: 'already ran' })] }); + const result = applyPendingAction(message, toolApprovalAction()); + expect(result).toBe(message); + }); + + it('defaults allowed_decisions to [] when no review config matches the tool call', () => { + const action = toolApprovalAction({ + payload: { + type: 'tool_approval', + action_requests: [{ name: 'search', arguments: '{}', tool_call_id: 'tc1' }], + review_configs: [], // no config for tc1 + }, + }); + const message = msg({ content: [toolCallPart('tc1')] }); + const result = applyPendingAction(message, action); + expect(getToolCall(result.content?.[0] as TMessageContentParts)?.approval).toMatchObject({ + allowed_decisions: [], + }); + }); + + it('returns the same message when content is empty or not an array', () => { + const empty = msg({ content: [] }); + expect(applyPendingAction(empty, toolApprovalAction())).toBe(empty); + const nonArray = msg({ content: undefined as unknown as TMessageContentParts[] }); + expect(applyPendingAction(nonArray, toolApprovalAction())).toBe(nonArray); + }); + + it('returns the same message when no tool call matches the pending request', () => { + const message = msg({ content: [toolCallPart('other')] }); + expect(applyPendingAction(message, toolApprovalAction())).toBe(message); + }); +}); + +describe('countTaggedApprovalParts', () => { + const twoToolAction = () => + toolApprovalAction({ + payload: { + type: 'tool_approval', + action_requests: [ + { name: 'search', arguments: '{}', tool_call_id: 'tc1' }, + { name: 'search', arguments: '{}', tool_call_id: 'tc2' }, + ], + review_configs: [], + }, + }); + + it('counts tagged parts so a partial multi-tool apply is detectable (1 of 2)', () => { + const action = twoToolAction(); + // Only tc1 has rendered when the action is first applied → 1 < 2, retry should continue. + const partial = applyPendingAction(msg({ content: [toolCallPart('tc1')] }), action); + expect(countTaggedApprovalParts(partial, 'a1')).toBe(1); + // Both siblings present → both tagged → retry can stop. + const full = applyPendingAction( + msg({ content: [toolCallPart('tc1'), toolCallPart('tc2')] }), + action, + ); + expect(countTaggedApprovalParts(full, 'a1')).toBe(2); + }); + + it('returns 0 when nothing is tagged or content is not an array', () => { + expect(countTaggedApprovalParts(msg({ content: [toolCallPart('tc1')] }), 'a1')).toBe(0); + expect(countTaggedApprovalParts(msg({ content: [textPart('hi')] }), 'a1')).toBe(0); + expect( + countTaggedApprovalParts( + msg({ content: undefined as unknown as TMessageContentParts[] }), + 'a1', + ), + ).toBe(0); + }); + + it('ignores parts tagged with a different actionId', () => { + const tagged = applyPendingAction( + msg({ content: [toolCallPart('tc1')] }), + toolApprovalAction(), + ); + expect(countTaggedApprovalParts(tagged, 'a1')).toBe(1); + expect(countTaggedApprovalParts(tagged, 'other-action')).toBe(0); + }); +}); + +describe('applyPendingAction — subagent-nested tool calls', () => { + const subagentMsg = (childId: string): TMessage => + msg({ + content: [ + { + type: ContentTypes.TOOL_CALL, + [ContentTypes.TOOL_CALL]: { + id: 'sub1', + name: 'subagent', + args: '{}', + subagent_content: [toolCallPart(childId)], + }, + } as unknown as TMessageContentParts, + ], + }); + + const childAction = () => + toolApprovalAction({ + payload: { + type: 'tool_approval', + action_requests: [{ name: 'search', arguments: '{}', tool_call_id: 'child-tc1' }], + review_configs: [ + { + action_name: 'search', + tool_call_id: 'child-tc1', + allowed_decisions: ['approve', 'reject'], + }, + ], + }, + }); + + it('tags a tool paused inside a subagent and makes it countable', () => { + const message = subagentMsg('child-tc1'); + const result = applyPendingAction(message, childAction()); + expect(result).not.toBe(message); // new reference + + const parentToolCall = getToolCall(result.content?.[0] as TMessageContentParts) as + | { subagent_content?: TMessageContentParts[] } + | undefined; + const nested = parentToolCall?.subagent_content?.[0]; + expect(getToolCall(nested)?.approval).toMatchObject({ actionId: 'a1' }); + // The retry loop's "all tagged" check is now reachable for the nested call. + expect(countTaggedApprovalParts(result, 'a1')).toBe(1); + }); + + it('returns the same message when no nested tool call matches', () => { + const message = subagentMsg('child-other'); + expect(applyPendingAction(message, childAction())).toBe(message); + expect(countTaggedApprovalParts(message, 'a1')).toBe(0); + }); +}); + +describe('applyPendingAction — ask_user_question', () => { + it('appends a synthetic ask-user-question part carrying the actionId and question', () => { + const message = msg({ content: [textPart('hello')] }); + const result = applyPendingAction(message, askAction()); + expect(result.content).toHaveLength(2); + const part = getAskUserQuestionPart(result.content?.[1] as TMessageContentParts); + expect(part?.[ASK_USER_QUESTION]).toMatchObject({ + actionId: 'a1', + question: { question: 'What name?' }, + }); + }); + + it('is idempotent on replay: the same actionId replaces in place rather than stacking', () => { + const once = applyPendingAction(msg({ content: [] }), askAction()); + const twice = applyPendingAction(once, askAction()); + expect(twice.content).toHaveLength(1); // not duplicated + + const other = applyPendingAction(twice, askAction({ actionId: 'a2' })); + expect(other.content).toHaveLength(2); // a different action does append + }); + + it('coerces non-array content to a single-part array', () => { + const message = msg({ content: undefined as unknown as TMessageContentParts[] }); + const result = applyPendingAction(message, askAction()); + expect(result.content).toHaveLength(1); + }); +}); + +describe('applyPendingAction — unsupported type', () => { + it('returns the original message unchanged', () => { + const message = msg({ content: [textPart('hi')] }); + const action = { + actionId: 'a1', + payload: { type: 'mystery' }, + } as unknown as Agents.PendingAction; + expect(applyPendingAction(message, action)).toBe(message); + }); +}); + +describe('getAskUserQuestionPart', () => { + it('returns the typed part for an ask-user-question synthetic part', () => { + const appended = applyPendingAction(msg({ content: [] }), askAction()); + const part = getAskUserQuestionPart(appended.content?.[0] as TMessageContentParts); + expect(part?.type).toBe(ASK_USER_QUESTION); + expect(part?.[ASK_USER_QUESTION].actionId).toBe('a1'); + }); + + it('returns undefined for a non-ask part, undefined, or a wrong type', () => { + expect(getAskUserQuestionPart(toolCallPart('tc1'))).toBeUndefined(); + expect(getAskUserQuestionPart(undefined)).toBeUndefined(); + expect(getAskUserQuestionPart(textPart('x'))).toBeUndefined(); + }); +}); + +describe('findPendingActionMessageIndex', () => { + const assistant = (messageId: string) => msg({ messageId, isCreatedByUser: false }); + const user = (messageId: string) => msg({ messageId, isCreatedByUser: true }); + + it('returns the index of the assistant message matching responseMessageId exactly', () => { + const messages = [user('u1'), assistant('r1'), assistant('r2')]; + const idx = findPendingActionMessageIndex( + messages, + toolApprovalAction({ responseMessageId: 'r1' }), + ); + expect(idx).toBe(1); + }); + + it('returns -1 (retry) when a provided responseMessageId matches only a user message', () => { + // `_` style ids could collide with the user bubble — never resolve to it; + // -1 makes the caller retry on the next frame once the assistant placeholder renders. + const messages = [user('shared'), assistant('r-last')]; + const idx = findPendingActionMessageIndex( + messages, + toolApprovalAction({ responseMessageId: 'shared' }), + ); + expect(idx).toBe(-1); + }); + + it('returns -1 (retry) when a provided responseMessageId is not found at all', () => { + // Provided-but-absent means the in-flight assistant placeholder is not in the list + // yet — defer rather than attach the prompt/approval to a prior reply. + const messages = [assistant('r1'), user('u1'), assistant('r2')]; + const idx = findPendingActionMessageIndex( + messages, + toolApprovalAction({ responseMessageId: 'missing' }), + ); + expect(idx).toBe(-1); + }); + + it('falls back to the last assistant message only when no responseMessageId is provided', () => { + const messages = [assistant('r1'), user('u1'), assistant('r2')]; + const idx = findPendingActionMessageIndex(messages, toolApprovalAction()); + expect(idx).toBe(2); + }); + + it('returns -1 for an empty list or when no assistant message exists', () => { + expect(findPendingActionMessageIndex([], toolApprovalAction())).toBe(-1); + expect(findPendingActionMessageIndex([user('u1')], toolApprovalAction())).toBe(-1); + }); +}); diff --git a/client/src/utils/approval.ts b/client/src/utils/approval.ts new file mode 100644 index 0000000000..6fdf29b8d9 --- /dev/null +++ b/client/src/utils/approval.ts @@ -0,0 +1,273 @@ +import { ContentTypes } from 'librechat-data-provider'; +import type { Agents, TMessage, TMessageContentParts } from 'librechat-data-provider'; + +/** + * UI-only content-part type used to render an `ask_user_question` pause inline + * with the assistant's other content parts. It rides on the standard `content` + * array (which `Agents.MessageContentComplex` allows for arbitrary `type` + * strings), so it survives SSE, sync, and DB rehydration without a new wire + * type or an extra field on `TMessage`. + */ +export const ASK_USER_QUESTION = 'ask_user_question' as const; + +/** Shape of the synthetic content part carrying an ask-user pending action. */ +export interface AskUserQuestionPart { + type: typeof ASK_USER_QUESTION; + [ASK_USER_QUESTION]: { + actionId: string; + question: Agents.AskUserQuestionRequest; + }; +} + +/** + * The synthetic type isn't in the `ContentTypes` union, so this reads the + * `type` field through a cast rather than a type-predicate (which TS rejects + * because `AskUserQuestionPart` isn't assignable to the strict + * `TMessageContentParts` union). + */ +const isAskUserQuestionPart = (part: TMessageContentParts | undefined): boolean => + (part as { type?: string } | undefined)?.type === ASK_USER_QUESTION && + part != null && + ASK_USER_QUESTION in part; + +const getToolCallId = (part: TMessageContentParts | undefined): string => + (part?.[ContentTypes.TOOL_CALL] as Agents.ToolCall | undefined)?.id ?? ''; + +/** A tool-call value that may carry approval state and/or nested subagent content. */ +type ToolCallWithApproval = Agents.ToolCall & { + approval?: unknown; + subagent_content?: TMessageContentParts[]; +}; + +/** + * Tags one tool-call part with the pending action's approval when it matches an + * `action_request` (joined by `tool_call_id`, NOT position) and is still unresolved. + * + * Recurses into a subagent's `subagent_content`: a tool paused INSIDE a subagent + * lives there, not as a top-level part, so without this the approval never attaches + * and the user gets no controls. Returns a NEW part only when something changed. + */ +function tagApprovalOnPart( + part: TMessageContentParts, + actionId: string, + requestByToolCallId: Map, + reviewByToolCallId: Map, +): { part: TMessageContentParts; changed: boolean } { + if (part?.type !== ContentTypes.TOOL_CALL) { + return { part, changed: false }; + } + const toolCall = part[ContentTypes.TOOL_CALL] as ToolCallWithApproval | undefined; + if (!toolCall) { + return { part, changed: false }; + } + + let nextToolCall = toolCall; + let changed = false; + + // Descend into nested subagent tool calls first. + if (Array.isArray(toolCall.subagent_content) && toolCall.subagent_content.length > 0) { + let nestedChanged = false; + const nextNested = toolCall.subagent_content.map((nestedPart) => { + const res = tagApprovalOnPart(nestedPart, actionId, requestByToolCallId, reviewByToolCallId); + if (res.changed) { + nestedChanged = true; + } + return res.part; + }); + if (nestedChanged) { + nextToolCall = { ...nextToolCall, subagent_content: nextNested }; + changed = true; + } + } + + // Tag this call itself when it's one of the paused requests and still unresolved + // (an `output` means the pause already resolved — leave it alone). + const toolCallId = getToolCallId(part); + const request = toolCallId ? requestByToolCallId.get(toolCallId) : undefined; + if (request && (nextToolCall.output?.length ?? 0) === 0) { + const reviewConfig = reviewByToolCallId.get(toolCallId); + nextToolCall = { + ...nextToolCall, + approval: { + actionId, + allowed_decisions: reviewConfig?.allowed_decisions ?? [], + description: request.description, + }, + }; + changed = true; + } + + if (!changed) { + return { part, changed: false }; + } + return { + part: { ...part, [ContentTypes.TOOL_CALL]: nextToolCall } as TMessageContentParts, + changed: true, + }; +} + +/** + * Maps a tool-approval pending action onto a message's tool-call content parts, + * including tool calls nested inside subagents (see {@link tagApprovalOnPart}). + * + * Returns a NEW message only when something changed (referential stability lets + * React bail out of needless re-renders); otherwise the original is returned. + */ +function applyToolApproval( + message: TMessage, + actionId: string, + payload: Agents.ToolApprovalInterruptPayload, +): TMessage { + const content = message.content; + if (!Array.isArray(content) || content.length === 0) { + return message; + } + + const reviewByToolCallId = new Map(); + for (const config of payload.review_configs) { + reviewByToolCallId.set(config.tool_call_id, config); + } + const requestByToolCallId = new Map(); + for (const requestItem of payload.action_requests) { + requestByToolCallId.set(requestItem.tool_call_id, requestItem); + } + + let changed = false; + const nextContent = content.map((part) => { + const res = tagApprovalOnPart(part, actionId, requestByToolCallId, reviewByToolCallId); + if (res.changed) { + changed = true; + } + return res.part; + }); + + if (!changed) { + return message; + } + return { ...message, content: nextContent }; +} + +/** + * Appends (or refreshes) an ask-user-question content part for the pending + * action. Idempotent: replaces an existing part with the same `actionId` rather + * than stacking duplicates on reconnect/replay. + */ +function applyAskUserQuestion( + message: TMessage, + actionId: string, + payload: Agents.AskUserQuestionInterruptPayload, +): TMessage { + const content = Array.isArray(message.content) ? message.content : []; + const askPart = { + type: ASK_USER_QUESTION, + [ASK_USER_QUESTION]: { actionId, question: payload.question }, + } as unknown as TMessageContentParts; + + const existingIdx = content.findIndex( + (part) => + isAskUserQuestionPart(part) && + (part as unknown as AskUserQuestionPart)[ASK_USER_QUESTION].actionId === actionId, + ); + if (existingIdx >= 0) { + const nextContent = [...content]; + nextContent[existingIdx] = askPart; + return { ...message, content: nextContent }; + } + return { ...message, content: [...content, askPart] }; +} + +/** + * Applies a {@link Agents.PendingAction} onto the target response message, + * dispatching on the interrupt type. Pure — returns a new message only when the + * mapping actually changed something. + */ +export function applyPendingAction( + message: TMessage, + pendingAction: Agents.PendingAction, +): TMessage { + const { payload, actionId } = pendingAction; + if (payload.type === 'tool_approval') { + return applyToolApproval(message, actionId, payload); + } + if (payload.type === 'ask_user_question') { + return applyAskUserQuestion(message, actionId, payload); + } + return message; +} + +/** + * Counts the tool-call content parts already tagged with this action's approval — + * i.e. how many of a tool-approval pending action's `action_requests` have rendered + * and been mapped. A multi-tool pause can render its sibling cards across several + * frames, so the SSE retry compares this against `action_requests.length` to know + * whether EVERY paused call is tagged yet. + */ +export function countTaggedApprovalParts(message: TMessage, actionId: string): number { + const content = message.content; + if (!Array.isArray(content)) { + return 0; + } + const countIn = (parts: TMessageContentParts[]): number => { + let count = 0; + for (const part of parts) { + if (part?.type !== ContentTypes.TOOL_CALL) { + continue; + } + const toolCall = part[ContentTypes.TOOL_CALL] as ToolCallWithApproval | undefined; + if ((toolCall?.approval as { actionId?: string } | undefined)?.actionId === actionId) { + count += 1; + } + // Nested subagent tool calls count too, so the retry loop's "all tagged" check + // is reachable for a tool paused inside a subagent. + if (Array.isArray(toolCall?.subagent_content)) { + count += countIn(toolCall.subagent_content); + } + } + return count; + }; + return countIn(content); +} + +/** Returns the ask-user-question synthetic part when `part` is one, else undefined. */ +export function getAskUserQuestionPart( + part: TMessageContentParts | undefined, +): AskUserQuestionPart | undefined { + return isAskUserQuestionPart(part) ? (part as unknown as AskUserQuestionPart) : undefined; +} + +/** + * Resolves the assistant response message a pending action targets within + * `messages`. Returns the index, or -1 when the assistant placeholder isn't present + * yet (the caller retries on the next frame). + * + * Only ever matches an ASSISTANT message. The `responseMessageId` for a fresh turn + * is the user message id with a trailing underscore (`_`), so a naive + * underscore-strip would resolve to the just-created USER message before the + * assistant placeholder exists — appending the prompt to the wrong bubble and never + * triggering the retry. Matching strictly on assistant messages avoids that. + */ +export function findPendingActionMessageIndex( + messages: TMessage[], + pendingAction: Agents.PendingAction, +): number { + const isAssistant = (message: TMessage | undefined) => message?.isCreatedByUser === false; + const { responseMessageId } = pendingAction; + if (responseMessageId) { + // When the id is provided, ONLY an exact assistant match counts. A miss means the + // assistant placeholder for this turn hasn't been inserted yet — return -1 so the + // caller retries on the next frame. Falling back to the last assistant here would + // attach the prompt/approval to a PRIOR reply (applyAskUserQuestion always appends), + // and the retry would never run. The id is the in-flight response, so once it renders + // the retry resolves it. + return messages.findIndex( + (message) => message.messageId === responseMessageId && isAssistant(message), + ); + } + /** No responseMessageId: best-effort to the last assistant (the in-flight placeholder). */ + for (let i = messages.length - 1; i >= 0; i--) { + if (isAssistant(messages[i])) { + return i; + } + } + return -1; +} diff --git a/client/src/utils/index.ts b/client/src/utils/index.ts index ea322b047f..43dc5baeea 100644 --- a/client/src/utils/index.ts +++ b/client/src/utils/index.ts @@ -38,6 +38,7 @@ export * from './previewCache'; export * from './groupToolCalls'; export * from './toolLabels'; export * from './favoritesError'; +export * from './approval'; export { default as cn } from './cn'; export { default as logger } from './logger'; export { default as getLoginError } from './getLoginError'; diff --git a/package-lock.json b/package-lock.json index e61012372b..e7ac78345b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -61,7 +61,7 @@ "@azure/storage-blob": "^12.30.0", "@google/genai": "^2.8.0", "@keyv/redis": "^4.3.3", - "@librechat/agents": "^3.2.52", + "@librechat/agents": "^3.2.53", "@librechat/api": "*", "@librechat/data-schemas": "*", "@microsoft/microsoft-graph-client": "^3.0.7", @@ -9511,13 +9511,13 @@ } }, "node_modules/@langchain/langgraph": { - "version": "1.4.5", - "resolved": "https://registry.npmjs.org/@langchain/langgraph/-/langgraph-1.4.5.tgz", - "integrity": "sha512-V+o29JPBaMoK/e+8R/m81XaC8h5iNuwWymvgLFhXfJbf7E2xt2mQUkcVXTi4cudGRHbRd14kidCpfaQbfPoYCw==", + "version": "1.4.7", + "resolved": "https://registry.npmjs.org/@langchain/langgraph/-/langgraph-1.4.7.tgz", + "integrity": "sha512-2tcyf3QGC7v89kqSxMCtRvzg/3L/4yHtOaWC49A8KieCciWJs7LGaxHoPB6QRxXyUgyR+Zg9Q1ss/XJIE+JuSQ==", "license": "MIT", "dependencies": { - "@langchain/langgraph-checkpoint": "^1.1.2", - "@langchain/langgraph-sdk": "~1.9.24", + "@langchain/langgraph-checkpoint": "^1.1.3", + "@langchain/langgraph-sdk": "~1.9.25", "@langchain/protocol": "^0.0.18", "@standard-schema/spec": "1.1.0" }, @@ -9526,19 +9526,13 @@ }, "peerDependencies": { "@langchain/core": "^1.1.48", - "zod": "^3.25.32 || ^4.2.0", - "zod-to-json-schema": "^3.x" - }, - "peerDependenciesMeta": { - "zod-to-json-schema": { - "optional": true - } + "zod": "^3.25.32 || ^4.2.0" } }, "node_modules/@langchain/langgraph-checkpoint": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@langchain/langgraph-checkpoint/-/langgraph-checkpoint-1.1.2.tgz", - "integrity": "sha512-m5Xd7W3G9JrlEhFZ5WAcqZPgE46R9gr1gFDFaVqEKeuwin3tgEp0jlPbru+iFXCug338DcQjFS/Kuuci21ydvw==", + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/@langchain/langgraph-checkpoint/-/langgraph-checkpoint-1.1.3.tgz", + "integrity": "sha512-wgzdQNeEsdw1e+4lvlj0tdq/RYR/k1vPin10g0ymGoehZDDgd9nvIllGXSXN4TFgF9sf5qQP/KTkOcLfeseIhA==", "license": "MIT", "engines": { "node": ">=18" @@ -9547,10 +9541,72 @@ "@langchain/core": "^1.1.48" } }, + "node_modules/@langchain/langgraph-checkpoint-mongodb": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/@langchain/langgraph-checkpoint-mongodb/-/langgraph-checkpoint-mongodb-1.4.0.tgz", + "integrity": "sha512-CFTrK7LrhyjotGn2YEyqYQIT1YDnw8rGC3ZTApkPvLHAW1BpVwC5N2WhA/StGOCqbCNx1HZkskLtBFhjkcKGwA==", + "license": "MIT", + "dependencies": { + "mongodb": "^6.21.0" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "@langchain/core": "^1.1.44", + "@langchain/langgraph-checkpoint": "^1.0.0" + } + }, + "node_modules/@langchain/langgraph-checkpoint-mongodb/node_modules/mongodb": { + "version": "6.21.0", + "resolved": "https://registry.npmjs.org/mongodb/-/mongodb-6.21.0.tgz", + "integrity": "sha512-URyb/VXMjJ4da46OeSXg+puO39XH9DeQpWCslifrRn9JWugy0D+DvvBvkm2WxmHe61O/H19JM66p1z7RHVkZ6A==", + "license": "Apache-2.0", + "dependencies": { + "@mongodb-js/saslprep": "^1.3.0", + "bson": "^6.10.4", + "mongodb-connection-string-url": "^3.0.2" + }, + "engines": { + "node": ">=16.20.1" + }, + "peerDependencies": { + "@aws-sdk/credential-providers": "^3.188.0", + "@mongodb-js/zstd": "^1.1.0 || ^2.0.0", + "gcp-metadata": "^5.2.0", + "kerberos": "^2.0.1", + "mongodb-client-encryption": ">=6.0.0 <7", + "snappy": "^7.3.2", + "socks": "^2.7.1" + }, + "peerDependenciesMeta": { + "@aws-sdk/credential-providers": { + "optional": true + }, + "@mongodb-js/zstd": { + "optional": true + }, + "gcp-metadata": { + "optional": true + }, + "kerberos": { + "optional": true + }, + "mongodb-client-encryption": { + "optional": true + }, + "snappy": { + "optional": true + }, + "socks": { + "optional": true + } + } + }, "node_modules/@langchain/langgraph-sdk": { - "version": "1.9.24", - "resolved": "https://registry.npmjs.org/@langchain/langgraph-sdk/-/langgraph-sdk-1.9.24.tgz", - "integrity": "sha512-WhM6QdxNipndQjl5nkvqnBt9Wl16oO2p0KiVhndAFLJMwO3bZLEx++lwtbqUFQu1sHyNxiWixgRGm8qZsuHCeA==", + "version": "1.9.25", + "resolved": "https://registry.npmjs.org/@langchain/langgraph-sdk/-/langgraph-sdk-1.9.25.tgz", + "integrity": "sha512-mRKW8zyQUaHox+HirRFMRrPqOvNbQI3xeXDt6kkk4PbBg77V92bsO1WzUVNrmJ81zCkvxyOrWSK8D6ioCj0a8A==", "license": "MIT", "dependencies": { "@langchain/protocol": "^0.0.18", @@ -9823,9 +9879,9 @@ } }, "node_modules/@librechat/agents": { - "version": "3.2.52", - "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.2.52.tgz", - "integrity": "sha512-NS/yXn412h1/JZ5LQpDGxIpW0Pb3r6zDKpAyH/U88CLcSAXwStivU7JouAphtdemJOGQpmGSv7IXpBM7wY0tOw==", + "version": "3.2.53", + "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.2.53.tgz", + "integrity": "sha512-GQ4n14AUUzISgcQMCCBkDfiUEWWujWOoeaIx1D9BgaYo6k1IW/mV1epthpGYbARQNaM7FiKVJFINs3CMJXfBXA==", "license": "MIT", "dependencies": { "@anthropic-ai/sdk": "^0.103.0", @@ -9838,7 +9894,7 @@ "@langchain/google-gauth": "2.2.0", "@langchain/google-genai": "2.2.0", "@langchain/google-vertexai": "2.2.0", - "@langchain/langgraph": "^1.4.5", + "@langchain/langgraph": "^1.4.6", "@langchain/mistralai": "^1.2.0", "@langchain/openai": "1.5.3", "@langchain/textsplitters": "^1.0.1", @@ -42288,6 +42344,9 @@ "name": "@librechat/api", "version": "1.7.34", "license": "ISC", + "dependencies": { + "@langchain/langgraph-checkpoint-mongodb": "^1.4.0" + }, "devDependencies": { "@babel/preset-env": "^7.29.5", "@babel/preset-react": "^7.18.6", @@ -42346,7 +42405,7 @@ "@azure/storage-blob": "^12.30.0", "@google/genai": "^2.8.0", "@keyv/redis": "^4.3.3", - "@librechat/agents": "^3.2.52", + "@librechat/agents": "^3.2.53", "@librechat/data-schemas": "*", "@modelcontextprotocol/sdk": "^1.29.0", "@opentelemetry/api": "^1.9.0", diff --git a/packages/api/package.json b/packages/api/package.json index 35f0d42ef2..0e7c0f098f 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -113,7 +113,7 @@ "@azure/storage-blob": "^12.30.0", "@google/genai": "^2.8.0", "@keyv/redis": "^4.3.3", - "@librechat/agents": "^3.2.52", + "@librechat/agents": "^3.2.53", "@librechat/data-schemas": "*", "@modelcontextprotocol/sdk": "^1.29.0", "@opentelemetry/api": "^1.9.0", @@ -163,5 +163,8 @@ "undici": "^7.24.1", "yauzl": "^3.2.1", "zod": "^3.22.4" + }, + "dependencies": { + "@langchain/langgraph-checkpoint-mongodb": "^1.4.0" } } diff --git a/packages/api/src/agents/__tests__/run-summarization.test.ts b/packages/api/src/agents/__tests__/run-summarization.test.ts index e4b3a85dbb..d67d904b58 100644 --- a/packages/api/src/agents/__tests__/run-summarization.test.ts +++ b/packages/api/src/agents/__tests__/run-summarization.test.ts @@ -69,6 +69,11 @@ jest.mock('@librechat/agents', () => { }; }); +// Stub the durable checkpointer so the HITL-enabled path doesn't need a live Mongo. +jest.mock('~/agents/checkpointer', () => ({ + getAgentCheckpointer: jest.fn().mockResolvedValue({}), +})); + import { Run } from '@librechat/agents'; /** Minimal RunAgent factory */ @@ -1853,3 +1858,149 @@ describe('toolOutputReferences gating', () => { expect(callArgs).not.toHaveProperty('toolOutputReferences'); }); }); + +// --------------------------------------------------------------------------- +// Suite: deferred-tool replay on HITL resume (Codex G3) +// +// The resume path rebuilds the graph with `messages: []` (state comes from the +// durable checkpoint), so the in-turn `tool_search` results that mark a deferred +// tool discovered aren't on the critical path. createRun's `discoveredToolNames` +// input replays those names — captured at pause — so the paused deferred tool is +// promoted back into `toolDefinitions` (and `defer_loading` flipped) and is present +// in the rebuilt schema-only toolMap. Without it, the approved tool would be missing +// and resume would fail with "unknown tool". +// --------------------------------------------------------------------------- +describe('createRun deferred-tool replay (HITL resume)', () => { + /** Agent whose discoverable `deep_tool` lives ONLY in the registry (deferred). */ + const makeDeferredAgent = (registryExtra: Array<[string, Record]> = []) => { + const toolRegistry = new Map>([ + ['deep_tool', { name: 'deep_tool', defer_loading: true }], + ...registryExtra, + ]); + return makeAgent({ + hasDeferredTools: true, + // tool_search is in definitions; the discoverable deep_tool is NOT (deferred). + toolDefinitions: [{ name: 'tool_search' }], + toolRegistry, + }); + }; + + const captureAgents = async ( + agent: ReturnType, + extra: Record, + ) => { + const signal = new AbortController().signal; + await createRun({ + agents: [agent] as never, + signal, + streaming: true, + streamUsage: true, + ...extra, + }); + const createMock = Run.create as jest.Mock; + const callArgs = createMock.mock.calls[0][0]; + return callArgs.graphConfig.agents as Array>; + }; + + const defNames = (agents: Array>): string[] => + (agents[0].toolDefinitions as Array<{ name: string }>).map((d) => d.name); + + it('promotes a replayed discovered tool into toolDefinitions when messages is empty (resume)', async () => { + const agents = await captureAgents(makeDeferredAgent(), { + messages: [], + discoveredToolNames: ['deep_tool'], + }); + expect(defNames(agents)).toContain('deep_tool'); + }); + + it('does NOT include the deferred tool without replayed names (the bug being fixed)', async () => { + const agents = await captureAgents(makeDeferredAgent(), { messages: [] }); + expect(defNames(agents)).not.toContain('deep_tool'); + }); + + it('flips defer_loading=false on the replayed tool so the model binds it', async () => { + const agents = await captureAgents(makeDeferredAgent(), { + messages: [], + discoveredToolNames: ['deep_tool'], + }); + const registry = agents[0].toolRegistry as Map; + expect(registry.get('deep_tool')?.defer_loading).toBe(false); + }); + + it('unions replayed names with names extracted from message history', async () => { + const toolSearchResult = { + _getType: () => 'tool', + name: 'tool_search', + content: JSON.stringify({ tools: [{ name: 'from_history' }] }), + }; + const agents = await captureAgents( + makeDeferredAgent([['from_history', { name: 'from_history', defer_loading: true }]]), + { messages: [toolSearchResult], discoveredToolNames: ['deep_tool'] }, + ); + const names = defNames(agents); + expect(names).toContain('deep_tool'); // replayed + expect(names).toContain('from_history'); // extracted from messages + }); + + it('ignores replayed names when the agent has no deferred tools (inert)', async () => { + const agents = await captureAgents( + makeAgent({ hasDeferredTools: false, toolDefinitions: [], toolRegistry: new Map() }), + { messages: [], discoveredToolNames: ['deep_tool'] }, + ); + expect(defNames(agents)).not.toContain('deep_tool'); + }); +}); + +// --------------------------------------------------------------------------- +// Suite: HITL wiring gated to resumable callers (Codex J3) +// +// The tool-approval wiring (humanInTheLoop switch + PreToolUse hook) must engage ONLY for +// callers that implement the pause/resume lifecycle. AgentClient passes hitlCapable: true; +// the OpenAI-compatible + Responses controllers don't, so an approval-gated tool can't +// pause on a route with no approval surface or resume endpoint. +// --------------------------------------------------------------------------- +describe('HITL wiring is gated on hitlCapable', () => { + const hitlAppConfig = { + config: {}, + fileStrategy: FileSources.local, + imageOutputType: 'png', + endpoints: { + [EModelEndpoint.agents]: { toolApproval: { enabled: true } }, + }, + } as unknown as AppConfig; + + const runAndGetConfig = async (extra: Record) => { + await createRun({ + agents: [makeAgent()] as never, + signal: new AbortController().signal, + appConfig: hitlAppConfig, + streaming: true, + streamUsage: true, + ...extra, + }); + const createMock = Run.create as jest.Mock; + return createMock.mock.calls[0][0] as Record; + }; + + it('attaches humanInTheLoop when the caller is hitlCapable and approval is enabled', async () => { + const config = await runAndGetConfig({ hitlCapable: true }); + expect(config.humanInTheLoop).toBeDefined(); + expect(config.hooks).toBeDefined(); + }); + + it('does NOT attach HITL for a non-resumable caller even when approval is enabled', async () => { + const config = await runAndGetConfig({ hitlCapable: false }); + expect(config).not.toHaveProperty('humanInTheLoop'); + expect(config.graphConfig).toBeDefined(); + // No checkpointer either — the run is identical to the no-HITL path. + expect( + (config.graphConfig as { compileOptions?: { checkpointer?: unknown } }).compileOptions + ?.checkpointer, + ).toBeUndefined(); + }); + + it('defaults to non-HITL when hitlCapable is omitted', async () => { + const config = await runAndGetConfig({}); + expect(config).not.toHaveProperty('humanInTheLoop'); + }); +}); diff --git a/packages/api/src/agents/checkpointer.integration.spec.ts b/packages/api/src/agents/checkpointer.integration.spec.ts new file mode 100644 index 0000000000..ae10502608 --- /dev/null +++ b/packages/api/src/agents/checkpointer.integration.spec.ts @@ -0,0 +1,109 @@ +import mongoose from 'mongoose'; +import { MongoMemoryServer } from 'mongodb-memory-server'; +import { emptyCheckpoint } from '@langchain/langgraph-checkpoint'; +import { + getAgentCheckpointer, + deleteAgentCheckpoint, + __resetCheckpointerForTests, +} from './checkpointer'; + +/** + * Integration tests for the durable Mongo checkpointer seam, against a real + * (in-memory) MongoDB. The unit spec covers config/selection with no connection; + * this proves the part that actually matters for correctness — that a checkpoint + * written for a thread can be read back and that `deleteAgentCheckpoint` truly + * prunes it (the cross-turn isolation guarantee), scoped to a single thread. + */ + +const MONGO_CFG = { type: 'mongo' as const, ttl: 3600 }; + +/** Minimal LangGraph put() args for an empty checkpoint under a thread. */ +function putArgs(threadId: string) { + const config = { configurable: { thread_id: threadId, checkpoint_ns: '' } }; + const metadata = { source: 'input' as const, step: -1, writes: null, parents: {} }; + return { config, checkpoint: emptyCheckpoint(), metadata }; +} + +const readConfig = (threadId: string) => ({ + configurable: { thread_id: threadId, checkpoint_ns: '' }, +}); + +let mongoServer: MongoMemoryServer; + +beforeAll(async () => { + mongoServer = await MongoMemoryServer.create(); + await mongoose.connect(mongoServer.getUri()); +}, 60000); + +afterAll(async () => { + await mongoose.disconnect(); + await mongoServer.stop(); +}); + +beforeEach(() => { + // Force a fresh saver build (+ setup) against the live connection each test. + __resetCheckpointerForTests(); +}); + +afterEach(async () => { + await mongoose.connection.dropDatabase(); +}); + +describe('checkpointer (mongodb-memory-server integration)', () => { + it('builds a real MongoDBSaver when Mongo is connected', async () => { + const saver = await getAgentCheckpointer(MONGO_CFG); + expect(saver).toBeDefined(); + // setup() created the checkpoint collection with a TTL index on `upserted_at`. + const indexes = await mongoose.connection.db!.collection('agent_checkpoints').indexes(); + const ttlIndex = indexes.find((idx) => idx.expireAfterSeconds != null); + expect(ttlIndex).toBeDefined(); + expect(ttlIndex?.expireAfterSeconds).toBe(3600); + }); + + it('returns undefined for the memory type (SDK MemorySaver fallback) even when connected', async () => { + expect(await getAgentCheckpointer({ type: 'memory' })).toBeUndefined(); + }); + + it('memoizes one saver per resolved config', async () => { + const a = await getAgentCheckpointer(MONGO_CFG); + const b = await getAgentCheckpointer(MONGO_CFG); + expect(a).toBe(b); + }); + + it('deleteAgentCheckpoint prunes a thread’s persisted checkpoint', async () => { + const saver = await getAgentCheckpointer(MONGO_CFG); + expect(saver).toBeDefined(); + + const threadId = `convo-${new mongoose.Types.ObjectId().toString()}`; + const { config, checkpoint, metadata } = putArgs(threadId); + await saver!.put(config, checkpoint, metadata); + + // The checkpoint is durably readable before pruning… + expect(await saver!.getTuple(readConfig(threadId))).toBeDefined(); + + await deleteAgentCheckpoint(threadId, MONGO_CFG); + + // …and gone after (so turn N+1 on the same conversationId can't rehydrate it). + expect(await saver!.getTuple(readConfig(threadId))).toBeUndefined(); + }); + + it('prunes only the targeted thread, leaving other conversations intact', async () => { + const saver = await getAgentCheckpointer(MONGO_CFG); + const threadA = `convo-${new mongoose.Types.ObjectId().toString()}`; + const threadB = `convo-${new mongoose.Types.ObjectId().toString()}`; + + const a = putArgs(threadA); + const b = putArgs(threadB); + await saver!.put(a.config, a.checkpoint, a.metadata); + await saver!.put(b.config, b.checkpoint, b.metadata); + + await deleteAgentCheckpoint(threadA, MONGO_CFG); + + expect(await saver!.getTuple(readConfig(threadA))).toBeUndefined(); + expect(await saver!.getTuple(readConfig(threadB))).toBeDefined(); + }); + + it('deleteAgentCheckpoint is a no-op for an undefined threadId', async () => { + await expect(deleteAgentCheckpoint(undefined, MONGO_CFG)).resolves.toBeUndefined(); + }); +}); diff --git a/packages/api/src/agents/checkpointer.spec.ts b/packages/api/src/agents/checkpointer.spec.ts new file mode 100644 index 0000000000..cfadc2df73 --- /dev/null +++ b/packages/api/src/agents/checkpointer.spec.ts @@ -0,0 +1,72 @@ +import { + resolveCheckpointerConfig, + getApprovalTtlMs, + getAgentCheckpointer, + deleteAgentCheckpoint, + DEFAULT_CHECKPOINT_TTL_SECONDS, + __resetCheckpointerForTests, +} from './checkpointer'; + +beforeEach(() => { + __resetCheckpointerForTests(); +}); + +describe('resolveCheckpointerConfig', () => { + test('applies defaults when nothing is configured', () => { + expect(resolveCheckpointerConfig(undefined)).toEqual({ + type: 'mongo', + ttlSeconds: DEFAULT_CHECKPOINT_TTL_SECONDS, + checkpointCollectionName: 'agent_checkpoints', + checkpointWritesCollectionName: 'agent_checkpoint_writes', + }); + }); + + test('honors explicit type, ttl, and collection overrides', () => { + expect( + resolveCheckpointerConfig({ + type: 'memory', + ttl: 60, + checkpointCollectionName: 'cp', + checkpointWritesCollectionName: 'cpw', + }), + ).toEqual({ + type: 'memory', + ttlSeconds: 60, + checkpointCollectionName: 'cp', + checkpointWritesCollectionName: 'cpw', + }); + }); + + test('falls back to the default ttl for non-positive values', () => { + expect(resolveCheckpointerConfig({ ttl: 0 }).ttlSeconds).toBe(DEFAULT_CHECKPOINT_TTL_SECONDS); + expect(resolveCheckpointerConfig({ ttl: -5 }).ttlSeconds).toBe(DEFAULT_CHECKPOINT_TTL_SECONDS); + }); +}); + +describe('getApprovalTtlMs', () => { + test('converts the resolved ttl to milliseconds', () => { + expect(getApprovalTtlMs(undefined)).toBe(DEFAULT_CHECKPOINT_TTL_SECONDS * 1000); + expect(getApprovalTtlMs({ ttl: 60 })).toBe(60_000); + }); +}); + +describe('getAgentCheckpointer', () => { + test('returns undefined for the in-memory type (SDK MemorySaver fallback)', async () => { + await expect(getAgentCheckpointer({ type: 'memory' })).resolves.toBeUndefined(); + }); + + test('returns undefined when Mongo is not connected', async () => { + // No mongoose connection is established in the unit test env (readyState 0). + await expect(getAgentCheckpointer(undefined)).resolves.toBeUndefined(); + }); +}); + +describe('deleteAgentCheckpoint', () => { + test('is a no-op (no throw) for a missing threadId', async () => { + await expect(deleteAgentCheckpoint(undefined)).resolves.toBeUndefined(); + }); + + test('is a no-op (no throw) when no durable saver is available', async () => { + await expect(deleteAgentCheckpoint('conversation-1')).resolves.toBeUndefined(); + }); +}); diff --git a/packages/api/src/agents/checkpointer.ts b/packages/api/src/agents/checkpointer.ts new file mode 100644 index 0000000000..6a99a616aa --- /dev/null +++ b/packages/api/src/agents/checkpointer.ts @@ -0,0 +1,171 @@ +import mongoose from 'mongoose'; +import { logger } from '@librechat/data-schemas'; +import { MongoDBSaver } from '@langchain/langgraph-checkpoint-mongodb'; +import type { TCheckpointerConfig } from 'librechat-data-provider'; + +/** + * Durable checkpointing for human-in-the-loop (HITL) resume. + * + * This is the seam between LibreChat and LangGraph's checkpoint machinery. A run + * that pauses for tool approval suspends its graph state to a checkpoint; resuming + * rebuilds that state on a *fresh* `Run` (see `agents/run.ts`), which only works if + * the checkpoint outlives the original request — across a restart, or on another + * replica. So HITL needs a durable saver, not the SDK's process-local `MemorySaver`. + * + * Two adapters sit behind the one interface ({@link getAgentCheckpointer}): + * - `MongoDBSaver` over the app's existing Mongo connection (the default), and + * - `undefined`, which lets the SDK install its own in-process `MemorySaver` + * (single-process / dev, or whenever Mongo isn't ready yet). + * + * Storage is bounded two ways: a Mongo TTL index reclaims runs that are never + * resolved ({@link DEFAULT_CHECKPOINT_TTL_SECONDS}), and {@link deleteAgentCheckpoint} + * prunes a thread's checkpoints eagerly on every terminal transition. + */ + +/** Default approval window and checkpoint TTL: 24h. */ +export const DEFAULT_CHECKPOINT_TTL_SECONDS = 86400; + +const DEFAULT_CHECKPOINT_COLLECTION = 'agent_checkpoints'; +const DEFAULT_CHECKPOINT_WRITES_COLLECTION = 'agent_checkpoint_writes'; + +/** Checkpointer settings with all defaults applied. */ +export interface ResolvedCheckpointerConfig { + type: 'mongo' | 'memory'; + /** Approval window / TTL in seconds. */ + ttlSeconds: number; + checkpointCollectionName: string; + checkpointWritesCollectionName: string; +} + +/** + * Apply defaults to the YAML `endpoints.agents.checkpointer` block. Mirrors + * {@link resolveRecursionLimit} — the schema stays descriptive, defaults live here. + */ +export function resolveCheckpointerConfig( + cfg: TCheckpointerConfig | undefined, +): ResolvedCheckpointerConfig { + return { + type: cfg?.type ?? 'mongo', + ttlSeconds: + typeof cfg?.ttl === 'number' && cfg.ttl > 0 ? cfg.ttl : DEFAULT_CHECKPOINT_TTL_SECONDS, + checkpointCollectionName: cfg?.checkpointCollectionName ?? DEFAULT_CHECKPOINT_COLLECTION, + checkpointWritesCollectionName: + cfg?.checkpointWritesCollectionName ?? DEFAULT_CHECKPOINT_WRITES_COLLECTION, + }; +} + +/** Approval-window milliseconds from the resolved config; drives pending-action expiry. */ +export function getApprovalTtlMs(cfg: TCheckpointerConfig | undefined): number { + return resolveCheckpointerConfig(cfg).ttlSeconds * 1000; +} + +/** + * One saver per process, built lazily on first use so `setup()` (index creation) + * runs exactly once. Keyed by the resolved settings so a config change rebuilds. + */ +let saverPromise: Promise | undefined; +let cachedKey: string | undefined; + +function settingsKey(resolved: ResolvedCheckpointerConfig): string { + return `${resolved.checkpointCollectionName}|${resolved.checkpointWritesCollectionName}|${resolved.ttlSeconds}`; +} + +/** + * The durable saver to hand to `graphConfig.compileOptions.checkpointer`, or + * `undefined` to let the SDK fall back to its in-process `MemorySaver`. + * + * Returns `undefined` (without caching) when the config selects `memory` or when + * Mongo isn't connected yet, so a later run retries once the connection is up. + * The SDK types the checkpointer as `unknown`, so a `MongoDBSaver` passes directly. + */ +export async function getAgentCheckpointer( + cfg: TCheckpointerConfig | undefined, +): Promise { + const resolved = resolveCheckpointerConfig(cfg); + if (resolved.type === 'memory') { + return undefined; + } + if (mongoose.connection.readyState !== 1) { + logger.warn( + '[checkpointer] Mongoose not connected; HITL runs will use an in-process checkpointer this turn (paused runs will not survive a restart or resolve on another replica).', + ); + return undefined; + } + + const key = settingsKey(resolved); + if (!saverPromise || cachedKey !== key) { + cachedKey = key; + saverPromise = buildMongoSaver(resolved); + } + return saverPromise; +} + +async function buildMongoSaver( + resolved: ResolvedCheckpointerConfig, +): Promise { + try { + const saver = new MongoDBSaver({ + // mongoose vends the live MongoClient; reuse it instead of opening a second + // connection. The driver type is structurally identical but resolves to a + // different `mongodb` copy than checkpoint-mongodb's, hence the cast. + client: mongoose.connection.getClient() as unknown as ConstructorParameters< + typeof MongoDBSaver + >[0]['client'], + checkpointCollectionName: resolved.checkpointCollectionName, + checkpointWritesCollectionName: resolved.checkpointWritesCollectionName, + // TTL index on `upserted_at`: an unresolved paused run is reclaimed after the + // approval window, so a forgotten approval can never leak checkpoints forever. + ttl: resolved.ttlSeconds, + }); + const errors = await saver.setup(); + if (errors.length > 0) { + logger.warn( + '[checkpointer] MongoDBSaver.setup() reported errors (checkpoint indexes may be incomplete):', + errors, + ); + } + logger.info('[checkpointer] Durable Mongo checkpointer ready for HITL resume'); + return saver; + } catch (err) { + // Reset so a later run can retry rather than being stuck on a failed build. + saverPromise = undefined; + cachedKey = undefined; + logger.error( + '[checkpointer] Failed to initialize Mongo checkpointer; falling back to in-process checkpointer:', + err, + ); + return undefined; + } +} + +/** + * Prune a thread's checkpoints on a terminal transition — natural completion, + * abort, or expiry — so the durable store stays bounded. The TTL index is the + * safety net; this is the eager cleanup. No-op in memory mode or before any run + * has built the saver (nothing to delete). + * + * @param threadId - the LangGraph `thread_id` (LibreChat's conversationId). + */ +export async function deleteAgentCheckpoint( + threadId: string | undefined, + cfg?: TCheckpointerConfig, +): Promise { + if (!threadId) { + return; + } + const saver = await getAgentCheckpointer(cfg); + if (!saver) { + return; + } + try { + await saver.deleteThread(threadId); + } catch (err) { + logger.warn(`[checkpointer] Failed to delete checkpoints for thread ${threadId}:`, err); + } +} + +/** Test-only: drop the memoized saver so a fresh build is forced. */ +export function __resetCheckpointerForTests(): void { + saverPromise = undefined; + cachedKey = undefined; +} diff --git a/packages/api/src/agents/hitl/index.ts b/packages/api/src/agents/hitl/index.ts index 62d5b849e4..690ccb1828 100644 --- a/packages/api/src/agents/hitl/index.ts +++ b/packages/api/src/agents/hitl/index.ts @@ -1 +1,3 @@ export * from './policy'; +export * from './runtime'; +export * from './resume'; diff --git a/packages/api/src/agents/hitl/policy.spec.ts b/packages/api/src/agents/hitl/policy.spec.ts index 369922a5ff..579b0503ef 100644 --- a/packages/api/src/agents/hitl/policy.spec.ts +++ b/packages/api/src/agents/hitl/policy.spec.ts @@ -1,12 +1,41 @@ import type { Agents, TToolApprovalPolicy } from 'librechat-data-provider'; import { + resolveToolApprovalPolicy, isHITLEnabled, mapToolApprovalPolicy, buildToolApprovalPayload, buildAskUserQuestionPayload, buildPendingAction, + computeAgentRequestFingerprint, + pickResumeContext, + applyResumeContext, } from './policy'; +describe('resolveToolApprovalPolicy', () => { + test('returns the endpoint policy unchanged (single layer wired today)', () => { + const endpoint: TToolApprovalPolicy = { enabled: true, mode: 'default', deny: ['rm'] }; + // Identity, not a copy — the resolver is a passthrough until more layers ship. + expect(resolveToolApprovalPolicy({ endpoint })).toBe(endpoint); + }); + + test('returns undefined when there is no endpoint policy', () => { + expect(resolveToolApprovalPolicy({})).toBeUndefined(); + expect(resolveToolApprovalPolicy({ endpoint: undefined })).toBeUndefined(); + }); + + test('ignores the reserved agent/skills layers for now (behaviour-preserving)', () => { + const endpoint: TToolApprovalPolicy = { enabled: true, mode: 'bypass' }; + const resolved = resolveToolApprovalPolicy({ + endpoint, + agent: { enabled: true, mode: 'default', ask: ['shell'] }, + skills: [{ deny: ['delete_*'] }], + }); + // Until merge lands, the result must still be exactly the endpoint policy so + // enabling the seam can't change runtime behaviour. + expect(resolved).toBe(endpoint); + }); +}); + describe('isHITLEnabled', () => { test('default-off when no policy configured', () => { expect(isHITLEnabled(undefined)).toBe(false); @@ -227,3 +256,178 @@ describe('buildPendingAction', () => { expect(action.expiresAt).toBeLessThanOrEqual(after); }); }); + +describe('computeAgentRequestFingerprint', () => { + it('is stable for the same graph-determining fields (ignoring other body keys)', () => { + const a = computeAgentRequestFingerprint({ + endpoint: 'agents', + agent_id: 'agent-1', + model: 'gpt', + }); + // Extra/unknown fields on the body must not change the fingerprint. + const b = computeAgentRequestFingerprint({ + endpoint: 'agents', + agent_id: 'agent-1', + model: 'gpt', + ...({ conversationId: 'c', decisions: [] } as Record), + }); + expect(a).toBe(b); + }); + + it('differs when a graph-determining field changes', () => { + const base = { endpoint: 'agents', agent_id: 'agent-1', model: 'gpt' }; + expect(computeAgentRequestFingerprint(base)).not.toBe( + computeAgentRequestFingerprint({ ...base, model: 'other' }), + ); + expect(computeAgentRequestFingerprint(base)).not.toBe( + computeAgentRequestFingerprint({ ...base, agent_id: 'agent-2' }), + ); + }); + + it('differs when promptPrefix changes (ephemeral instructions)', () => { + const base = { endpoint: 'agents', promptPrefix: 'be terse' }; + expect(computeAgentRequestFingerprint(base)).not.toBe( + computeAgentRequestFingerprint({ ...base, promptPrefix: 'be verbose' }), + ); + // null vs absent are treated the same + expect(computeAgentRequestFingerprint({ endpoint: 'agents' })).toBe( + computeAgentRequestFingerprint({ endpoint: 'agents', promptPrefix: null }), + ); + }); + + it('normalizes ephemeralAgent so key/array order does not matter', () => { + const x = computeAgentRequestFingerprint({ + endpoint: 'agents', + ephemeralAgent: { mcp: ['b', 'a'], execute_code: true }, + }); + const y = computeAgentRequestFingerprint({ + endpoint: 'agents', + ephemeralAgent: { execute_code: true, mcp: ['a', 'b'] }, + }); + expect(x).toBe(y); + }); + + it('distinguishes a different ephemeral capability set (the swap it guards against)', () => { + const a = computeAgentRequestFingerprint({ + endpoint: 'agents', + ephemeralAgent: { execute_code: true }, + }); + const b = computeAgentRequestFingerprint({ + endpoint: 'agents', + ephemeralAgent: { execute_code: false, mcp: ['evil'] }, + }); + expect(a).not.toBe(b); + }); +}); + +describe('pickResumeContext / applyResumeContext', () => { + it('picks only the graph-determining fields (incl. addedConvo + timezone), dropping unrelated keys', () => { + const ctx = pickResumeContext({ + endpoint: 'agents', + agent_id: 'a1', + model: 'gpt', + promptPrefix: 'be terse', + ephemeralAgent: { execute_code: true }, + addedConvo: { agent_id: 'secondary' }, + // Feeds temporal prompt vars; must round-trip so resume compiles the same prompt. + timezone: 'America/New_York', + // Graph-determining: skill allowed-tools union into the tool set. + manualSkills: ['code-reviewer'], + conversationId: 'c', + decisions: [], + actionId: 'x', + }); + expect(ctx).toEqual({ + endpoint: 'agents', + agent_id: 'a1', + model: 'gpt', + promptPrefix: 'be terse', + ephemeralAgent: { execute_code: true }, + addedConvo: { agent_id: 'secondary' }, + timezone: 'America/New_York', + manualSkills: ['code-reviewer'], + }); + }); + + it('replays a dropped manualSkills and drops a client-injected one', () => { + // Reload case: the resume client lost manualSkills; the server restores it. + const restored: Record = { conversationId: 'c', actionId: 'x' }; + applyResumeContext(restored, { endpoint: 'agents', manualSkills: ['code-reviewer'] }); + expect(restored.manualSkills).toEqual(['code-reviewer']); + // Security: a paused turn with no manual skill can't be made to inject one. + const injected: Record = { conversationId: 'c', manualSkills: ['evil-skill'] }; + applyResumeContext(injected, { endpoint: 'agents', agent_id: 'a1' }); + expect('manualSkills' in injected).toBe(false); + }); + + it('omits absent (undefined) fields but keeps explicit null', () => { + const ctx = pickResumeContext({ endpoint: 'agents', ephemeralAgent: null }); + expect(ctx).toEqual({ endpoint: 'agents', ephemeralAgent: null }); + expect('agent_id' in ctx).toBe(false); + }); + + it('replays the persisted context onto a body, overwriting what the client sent', () => { + // The reload case: the client lost ephemeralAgent (null); the server restores it. + const body: Record = { + conversationId: 'c', + actionId: 'x', + ephemeralAgent: null, + promptPrefix: 'tampered', + }; + applyResumeContext(body, { + endpoint: 'agents', + ephemeralAgent: { execute_code: true, mcp: ['srv'] }, + promptPrefix: 'original', + }); + expect(body.ephemeralAgent).toEqual({ execute_code: true, mcp: ['srv'] }); + expect(body.promptPrefix).toBe('original'); + expect(body.endpoint).toBe('agents'); + // Non-context fields are untouched. + expect(body.conversationId).toBe('c'); + expect(body.actionId).toBe('x'); + }); + + it('drops graph-determining fields the client sent that the persisted context lacks', () => { + // Security: the paused turn carried no addedConvo/spec, so a crafted resume must not + // be able to inject them (addedConvo isn't covered by the fingerprint). Any + // RESUME_CONTEXT_KEY absent from the persisted context is cleared from the body. + const body: Record = { + conversationId: 'c', + actionId: 'x', + addedConvo: { agent_id: 'injected-secondary' }, + spec: 'injected-spec', + }; + applyResumeContext(body, { endpoint: 'agents', agent_id: 'a1' }); + // Persisted keys are restored... + expect(body.endpoint).toBe('agents'); + expect(body.agent_id).toBe('a1'); + // ...and client-injected graph-determining fields absent from the context are gone. + expect('addedConvo' in body).toBe(false); + expect('spec' in body).toBe(false); + // Non-context fields are untouched. + expect(body.conversationId).toBe('c'); + expect(body.actionId).toBe('x'); + }); + + it('is a no-op for a null/undefined context', () => { + const body: Record = { ephemeralAgent: null }; + applyResumeContext(body, undefined); + expect(body.ephemeralAgent).toBeNull(); + }); + + it('round-trips through buildPendingAction so replay restores the original body', () => { + const original = { + endpoint: 'agents', + ephemeralAgent: { execute_code: true }, + promptPrefix: 'p', + }; + const action = buildPendingAction( + { type: 'ask_user_question', question: { question: 'q' } } as Agents.HumanInterruptPayload, + { streamId: 's', resumeContext: pickResumeContext(original) }, + ); + const reloadedBody: Record = { ephemeralAgent: null }; + applyResumeContext(reloadedBody, action.resumeContext); + expect(reloadedBody.ephemeralAgent).toEqual({ execute_code: true }); + expect(reloadedBody.promptPrefix).toBe('p'); + }); +}); diff --git a/packages/api/src/agents/hitl/policy.ts b/packages/api/src/agents/hitl/policy.ts index bfe0543c7c..795b435aff 100644 --- a/packages/api/src/agents/hitl/policy.ts +++ b/packages/api/src/agents/hitl/policy.ts @@ -1,4 +1,4 @@ -import { randomUUID } from 'crypto'; +import { randomUUID, createHash } from 'crypto'; import type { Agents, TToolApprovalPolicy } from 'librechat-data-provider'; import type { ToolPolicyConfig } from '@librechat/agents'; @@ -11,6 +11,54 @@ import type { ToolPolicyConfig } from '@librechat/agents'; */ const DEFAULT_REVIEW_DECISIONS: Agents.ToolApprovalDecisionType[] = ['approve', 'reject', 'edit']; +/** + * Layered sources that combine into the effective tool-approval policy for a turn. + * + * Only {@link ToolApprovalPolicyLayers.endpoint} is consumed today; `agent` and + * `skills` are reserved seams so future per-agent / per-skill plumbing lands in + * {@link resolveToolApprovalPolicy} rather than being threaded through the run + * call site. + */ +export interface ToolApprovalPolicyLayers { + /** + * App/endpoint policy — `endpoints.agents.toolApproval` from librechat.yaml. + * The baseline, and the sole owner of the `enabled` kill switch. + */ + endpoint?: TToolApprovalPolicy; + /** + * Per-agent override (not yet wired). Layered over `endpoint` to refine + * `mode`/`allow`/`deny`/`ask`/`reason` for a specific agent. Must NOT flip + * `enabled` — enablement stays endpoint-level by design. + */ + agent?: TToolApprovalPolicy; + /** + * Skill-contributed policy (not yet wired). May only TIGHTEN — contribute + * `ask`/`deny` entries — never grant `bypass` or widen `allow`, so a selected + * skill can never silently auto-approve a tool. + */ + skills?: TToolApprovalPolicy[]; +} + +/** + * Resolve the effective tool-approval policy for a turn from its layered sources. + * + * This is the single seam where policy sources combine, kept out of the run call + * site so adding per-agent or per-skill policy later is a change to ONE function + * rather than to `createRun`. Intended precedence once those layers are wired: + * - `endpoint` is the baseline and owns the `enabled` kill switch; + * - `agent` overrides `mode`/`allow`/`deny`/`ask`/`reason`; + * - `skills` may only tighten (add `ask`/`deny`), never loosen. + * + * Today only `endpoint` is consumed, so the result is identical to reading + * `endpoints.agents.toolApproval` directly — `agent`/`skills` are accepted but + * not yet merged. Behaviour-preserving until those layers ship. + */ +export function resolveToolApprovalPolicy( + layers: ToolApprovalPolicyLayers, +): TToolApprovalPolicy | undefined { + return layers.endpoint; +} + /** * Whether the HITL machinery should run for this policy. * @@ -121,6 +169,127 @@ export interface PendingActionContext { interruptId?: string; /** LangGraph `thread_id` (`RunInterruptResult.threadId`) for cross-process resume. */ threadId?: string; + /** Fingerprint of the graph-determining request fields; see {@link computeAgentRequestFingerprint}. */ + requestFingerprint?: string; + /** Graph-determining fields to replay on resume; see {@link RESUME_CONTEXT_KEYS}. */ + resumeContext?: Record; +} + +/** Request fields that decide which agent/graph + tool set a turn runs. */ +export interface AgentRequestFingerprintFields { + endpoint?: string | null; + endpointType?: string | null; + agent_id?: string | null; + model?: string | null; + spec?: string | null; + /** Ephemeral agents derive their system instructions from this; pin it too. */ + promptPrefix?: string | null; + ephemeralAgent?: Record | null; +} + +/** Stable, order-independent serialization of the ephemeral capability config. */ +function normalizeEphemeralAgent(ephemeral: Record | null | undefined): unknown { + if (ephemeral == null || typeof ephemeral !== 'object') { + return null; + } + const out: Record = {}; + for (const key of Object.keys(ephemeral).sort()) { + const value = ephemeral[key]; + out[key] = Array.isArray(value) ? [...value].sort() : value; + } + return out; +} + +/** + * Fingerprint the request fields that determine which agent/graph + tool set a turn + * runs. Persisted on the pending action at pause time and recomputed on resume; a + * mismatch means the resume would rebuild a DIFFERENT graph. This is the guard that + * catches an ephemeral-agent config swap — those have an undefined `agent_id`, so the + * id check alone can't tell two ephemeral configs apart. + */ +/** + * Request fields that determine the agent/graph + tool set, persisted with the pending + * action so the resume can REPLAY them server-side. The client can't reliably re-send + * these after a reload (e.g. the ephemeralAgent state resets), so replaying them from + * the job guarantees the rebuilt run is the SAME graph the pause used — durable resume + * works across reloads/replicas, and a crafted resume can't swap the tool set. + */ +export const RESUME_CONTEXT_KEYS = [ + 'endpoint', + 'endpointType', + 'agent_id', + 'spec', + 'model', + 'promptPrefix', + 'ephemeralAgent', + // The agents build reads addedConvo into endpointOption to add parallel/secondary + // agents; the resume POST can't reconstruct it, so replay it from the paused request. + 'addedConvo', + // Feeds temporal prompt vars ({{current_datetime}} etc.) via initializeAgent. The + // resume POST omits it, so without replay a different-tz client (or none) compiles a + // different system prompt than the paused graph. Replay-only — not in the fingerprint. + 'timezone', + // Manually-selected skills union their allowed-tools into the tool set before tools + // load (initializeAgent → resolveManualSkills), so they're graph-determining. The + // resume POST can't reliably re-send them after a reload; replay them, and the + // delete-absent half of applyResumeContext stops a crafted resume from injecting a + // different skill's tools (manualSkills isn't covered by the fingerprint). Replay-only. + // (alwaysAppliedSkills is NOT here — it's resolved server-side from the DB, not req.body.) + 'manualSkills', +] as const; + +export type ResumeContext = Partial>; + +/** Extract the graph-determining fields from a request body for durable replay. */ +export function pickResumeContext(body: Record | undefined | null): ResumeContext { + const ctx: ResumeContext = {}; + if (body == null) { + return ctx; + } + for (const key of RESUME_CONTEXT_KEYS) { + if (body[key] !== undefined) { + ctx[key] = body[key]; + } + } + return ctx; +} + +/** + * Replay a persisted resume context onto a request body so the rebuilt run matches the + * paused one. Every graph-determining field is forced to the persisted value: a key the + * context HAS overwrites whatever the client sent; a key it LACKS is deleted from the + * body. The delete is the security half — without it, a field the paused turn never + * carried (e.g. `addedConvo`, which {@link computeAgentRequestFingerprint} does NOT + * cover) could be injected by a crafted resume to rebuild the paused single-agent + * checkpoint as a different multi-agent graph/tool set. + */ +export function applyResumeContext( + body: Record | undefined | null, + ctx: ResumeContext | undefined | null, +): void { + if (body == null || ctx == null) { + return; + } + for (const key of RESUME_CONTEXT_KEYS) { + if (ctx[key] !== undefined) { + body[key] = ctx[key]; + } else { + delete body[key]; + } + } +} + +export function computeAgentRequestFingerprint(fields: AgentRequestFingerprintFields): string { + const canonical = JSON.stringify({ + endpoint: fields.endpoint ?? null, + endpointType: fields.endpointType ?? null, + agent_id: fields.agent_id ?? null, + model: fields.model ?? null, + spec: fields.spec ?? null, + promptPrefix: fields.promptPrefix ?? null, + ephemeralAgent: normalizeEphemeralAgent(fields.ephemeralAgent), + }); + return createHash('sha256').update(canonical).digest('hex'); } /** @@ -146,5 +315,7 @@ export function buildPendingAction( expiresAt: typeof ctx.ttlMs === 'number' ? createdAt + ctx.ttlMs : undefined, interruptId: ctx.interruptId, threadId: ctx.threadId, + requestFingerprint: ctx.requestFingerprint, + resumeContext: ctx.resumeContext, }; } diff --git a/packages/api/src/agents/hitl/resume.spec.ts b/packages/api/src/agents/hitl/resume.spec.ts new file mode 100644 index 0000000000..f164366cf7 --- /dev/null +++ b/packages/api/src/agents/hitl/resume.spec.ts @@ -0,0 +1,171 @@ +import type { Agents } from 'librechat-data-provider'; +import { + mapToolApprovalResolutions, + mapAskUserAnswer, + findUndecidedToolCalls, + findDisallowedDecisions, + findIncompleteDecisions, +} from './resume'; + +describe('mapToolApprovalResolutions', () => { + test('maps each decision type to the SDK discriminated shape, keyed by tool_call_id', () => { + const resolutions: Agents.ToolApprovalResolution[] = [ + { tool_call_id: 'a', decision: 'approve' }, + { tool_call_id: 'b', decision: 'reject', reason: 'no' }, + { tool_call_id: 'c', decision: 'edit', editedArguments: { x: 1 } }, + { tool_call_id: 'd', decision: 'respond', responseText: 'done' }, + ]; + + expect(mapToolApprovalResolutions(resolutions)).toEqual({ + a: { type: 'approve' }, + b: { type: 'reject', reason: 'no' }, + c: { type: 'edit', updatedInput: { x: 1 } }, + d: { type: 'respond', responseText: 'done' }, + }); + }); + + test('defaults missing edit args to {} and missing respond text to "" rather than throwing', () => { + const resolutions: Agents.ToolApprovalResolution[] = [ + { tool_call_id: 'a', decision: 'edit' }, + { tool_call_id: 'b', decision: 'respond' }, + ]; + expect(mapToolApprovalResolutions(resolutions)).toEqual({ + a: { type: 'edit', updatedInput: {} }, + b: { type: 'respond', responseText: '' }, + }); + }); + + test('fails closed (reject) on an unrecognized decision', () => { + const resolutions = [ + { tool_call_id: 'a', decision: 'nonsense' as Agents.ToolApprovalDecisionType }, + ]; + expect(mapToolApprovalResolutions(resolutions)).toEqual({ + a: { type: 'reject', reason: 'Unrecognized approval decision' }, + }); + }); + + test('last write wins when the same tool_call_id appears twice', () => { + const resolutions: Agents.ToolApprovalResolution[] = [ + { tool_call_id: 'a', decision: 'approve' }, + { tool_call_id: 'a', decision: 'reject' }, + ]; + expect(mapToolApprovalResolutions(resolutions)).toEqual({ a: { type: 'reject' } }); + }); +}); + +describe('mapAskUserAnswer', () => { + test('passes the answer through unchanged', () => { + expect(mapAskUserAnswer({ answer: 'staging' })).toEqual({ answer: 'staging' }); + }); +}); + +describe('findUndecidedToolCalls', () => { + const payload: Agents.ToolApprovalInterruptPayload = { + type: 'tool_approval', + action_requests: [ + { tool_call_id: 'a', name: 'read', arguments: {} }, + { tool_call_id: 'b', name: 'write', arguments: {} }, + ], + review_configs: [], + }; + + test('returns the tool_call_ids with no decision', () => { + expect(findUndecidedToolCalls(payload, [{ tool_call_id: 'a', decision: 'approve' }])).toEqual([ + 'b', + ]); + }); + + test('returns [] when every requested tool call is decided', () => { + expect( + findUndecidedToolCalls(payload, [ + { tool_call_id: 'a', decision: 'approve' }, + { tool_call_id: 'b', decision: 'reject' }, + ]), + ).toEqual([]); + }); + + test('ignores resolutions for tool calls not in the action', () => { + expect( + findUndecidedToolCalls(payload, [ + { tool_call_id: 'a', decision: 'approve' }, + { tool_call_id: 'b', decision: 'approve' }, + { tool_call_id: 'z', decision: 'approve' }, + ]), + ).toEqual([]); + }); +}); + +describe('findDisallowedDecisions', () => { + const payload: Agents.ToolApprovalInterruptPayload = { + type: 'tool_approval', + action_requests: [ + { tool_call_id: 'a', name: 'read', arguments: {} }, + { tool_call_id: 'b', name: 'write', arguments: {} }, + ], + review_configs: [ + { action_name: 'read', tool_call_id: 'a', allowed_decisions: ['approve', 'reject'] }, + { action_name: 'write', tool_call_id: 'b', allowed_decisions: ['reject', 'respond'] }, + ], + }; + + test('returns [] when every decision is permitted by its review config', () => { + expect( + findDisallowedDecisions(payload, [ + { tool_call_id: 'a', decision: 'approve' }, + { tool_call_id: 'b', decision: 'respond', responseText: 'x' }, + ]), + ).toEqual([]); + }); + + test('flags a decision the policy does not allow for that tool', () => { + // `b` is restricted to reject/respond — approving it must be rejected. + expect( + findDisallowedDecisions(payload, [ + { tool_call_id: 'a', decision: 'approve' }, + { tool_call_id: 'b', decision: 'approve' }, + ]), + ).toEqual(['b']); + }); + + test('fails closed for a tool_call_id with no matching review config', () => { + expect(findDisallowedDecisions(payload, [{ tool_call_id: 'z', decision: 'approve' }])).toEqual([ + 'z', + ]); + }); +}); + +describe('findIncompleteDecisions', () => { + it('flags an edit decision without editedArguments', () => { + expect(findIncompleteDecisions([{ tool_call_id: 'a', decision: 'edit' }])).toEqual(['a']); + }); + + it('flags an edit decision whose editedArguments is not a plain object', () => { + expect( + findIncompleteDecisions([ + { + tool_call_id: 'a', + decision: 'edit', + editedArguments: [] as unknown as Record, + }, + ]), + ).toEqual(['a']); + }); + + it('flags a respond decision without responseText (or empty)', () => { + expect(findIncompleteDecisions([{ tool_call_id: 'a', decision: 'respond' }])).toEqual(['a']); + expect( + findIncompleteDecisions([{ tool_call_id: 'b', decision: 'respond', responseText: '' }]), + ).toEqual(['b']); + }); + + it('accepts complete edit/respond and ignores approve/reject', () => { + expect( + findIncompleteDecisions([ + { tool_call_id: 'a', decision: 'edit', editedArguments: { q: 1 } }, + { tool_call_id: 'b', decision: 'respond', responseText: 'done' }, + { tool_call_id: 'c', decision: 'approve' }, + { tool_call_id: 'd', decision: 'reject', reason: 'no' }, + ]), + ).toEqual([]); + }); +}); diff --git a/packages/api/src/agents/hitl/resume.ts b/packages/api/src/agents/hitl/resume.ts new file mode 100644 index 0000000000..3f34331002 --- /dev/null +++ b/packages/api/src/agents/hitl/resume.ts @@ -0,0 +1,119 @@ +import type { + ToolApprovalDecision, + ToolApprovalDecisionMap, + AskUserQuestionResolution, +} from '@librechat/agents'; +import type { Agents } from 'librechat-data-provider'; + +/** + * Translate the host-facing approval wire format into the SDK's resume value. + * + * The wire format ({@link Agents.ToolApprovalResolution}) is shaped for the UI — + * a flat `decision` string plus optional `editedArguments` / `responseText`. The + * SDK consumes a discriminated {@link ToolApprovalDecision} per tool call. This is + * the single adapter between the two; the resume route maps once, here, instead of + * branching on `decision` at the call site. + * + * Returns the map form (keyed by `tool_call_id`) so a batch that calls the same + * tool twice resolves unambiguously — by-position ordering breaks with duplicates. + */ +export function mapToolApprovalResolutions( + resolutions: readonly Agents.ToolApprovalResolution[], +): ToolApprovalDecisionMap { + const decisions: ToolApprovalDecisionMap = {}; + for (const resolution of resolutions) { + decisions[resolution.tool_call_id] = toSdkDecision(resolution); + } + return decisions; +} + +function toSdkDecision(resolution: Agents.ToolApprovalResolution): ToolApprovalDecision { + switch (resolution.decision) { + case 'approve': + return { type: 'approve' }; + case 'reject': + return { type: 'reject', reason: resolution.reason }; + case 'edit': + // `editedArguments` is required for edit on the wire; default to {} so a + // malformed payload re-runs the tool with empty args rather than throwing. + return { type: 'edit', updatedInput: resolution.editedArguments ?? {} }; + case 'respond': + return { type: 'respond', responseText: resolution.responseText ?? '' }; + default: + // Unknown decision (forward-compat / malformed): fail closed by rejecting, + // never by silently approving a tool the user didn't sanction. + return { type: 'reject', reason: 'Unrecognized approval decision' }; + } +} + +/** Translate the ask-user wire answer into the SDK's resume value. */ +export function mapAskUserAnswer( + resolution: Agents.AskUserQuestionResolution, +): AskUserQuestionResolution { + return { answer: resolution.answer }; +} + +/** + * Validate that a set of resolutions covers exactly the tool calls a pending + * `tool_approval` action is waiting on. Returns the list of `tool_call_id`s that + * were requested but not decided (empty when the batch is fully resolved), so the + * resume route can 400 a partial submission instead of driving a half-decided run. + */ +export function findUndecidedToolCalls( + payload: Agents.ToolApprovalInterruptPayload, + resolutions: readonly Agents.ToolApprovalResolution[], +): string[] { + const decided = new Set(resolutions.map((r) => r.tool_call_id)); + return payload.action_requests.map((a) => a.tool_call_id).filter((id) => !decided.has(id)); +} + +/** + * Enforce the policy's per-tool `allowed_decisions`. Returns the `tool_call_id`s + * whose submitted decision is NOT one the interrupt's `review_configs` permits for + * that tool — so the resume route can reject a crafted request that, e.g., approves + * a tool the policy restricted to `reject`/`respond`. A resolution for a tool with + * no matching review_config (shouldn't happen) is treated as disallowed (fail closed). + */ +export function findDisallowedDecisions( + payload: Agents.ToolApprovalInterruptPayload, + resolutions: readonly Agents.ToolApprovalResolution[], +): string[] { + const allowedByToolCallId = new Map>(); + for (const config of payload.review_configs) { + allowedByToolCallId.set(config.tool_call_id, new Set(config.allowed_decisions)); + } + return resolutions + .filter((r) => !allowedByToolCallId.get(r.tool_call_id)?.has(r.decision)) + .map((r) => r.tool_call_id); +} + +/** + * Enforce that `edit` and `respond` decisions carry their required payload. Returns + * the `tool_call_id`s whose decision is structurally incomplete: + * - `edit` without an object `editedArguments`, or + * - `respond` without a non-empty `responseText`. + * + * Without this, {@link toSdkDecision}'s defensive defaults (`{}` / `''`) would turn a + * crafted or buggy submission into an empty tool input or an empty synthetic result — + * resuming the run with behavior the user never actually approved. The route rejects + * these (400) rather than mapping them. + */ +export function findIncompleteDecisions( + resolutions: readonly Agents.ToolApprovalResolution[], +): string[] { + return resolutions + .filter((r) => { + if (r.decision === 'edit') { + return ( + r.editedArguments == null || + typeof r.editedArguments !== 'object' || + Array.isArray(r.editedArguments) + ); + } + if (r.decision === 'respond') { + return typeof r.responseText !== 'string' || r.responseText.length === 0; + } + return false; + }) + .map((r) => r.tool_call_id); +} diff --git a/packages/api/src/agents/hitl/runtime.spec.ts b/packages/api/src/agents/hitl/runtime.spec.ts new file mode 100644 index 0000000000..79d4ef05f6 --- /dev/null +++ b/packages/api/src/agents/hitl/runtime.spec.ts @@ -0,0 +1,29 @@ +import { HookRegistry } from '@librechat/agents'; +import { buildHITLRunWiring } from './runtime'; + +describe('buildHITLRunWiring', () => { + test('returns undefined when HITL is disabled (the default)', () => { + expect(buildHITLRunWiring(undefined)).toBeUndefined(); + expect(buildHITLRunWiring({})).toBeUndefined(); + expect(buildHITLRunWiring({ enabled: false })).toBeUndefined(); + expect(buildHITLRunWiring({ mode: 'default', allow: ['read_*'] })).toBeUndefined(); + }); + + test('returns the run wiring when enabled', () => { + const wiring = buildHITLRunWiring({ enabled: true }); + expect(wiring).toBeDefined(); + expect(wiring?.humanInTheLoop).toEqual({ enabled: true }); + expect(wiring?.hooks).toBeInstanceOf(HookRegistry); + }); + + test('registers exactly one PreToolUse policy hook', () => { + const wiring = buildHITLRunWiring({ enabled: true, mode: 'bypass', allow: ['x'] }); + const matchers = wiring?.hooks.getMatchers('PreToolUse') ?? []; + expect(matchers).toHaveLength(1); + }); + + test('an enabled policy with no lists still wires (every tool falls through to ask)', () => { + const wiring = buildHITLRunWiring({ enabled: true }); + expect(wiring?.hooks.getMatchers('PreToolUse')).toHaveLength(1); + }); +}); diff --git a/packages/api/src/agents/hitl/runtime.ts b/packages/api/src/agents/hitl/runtime.ts new file mode 100644 index 0000000000..bbc350fc3d --- /dev/null +++ b/packages/api/src/agents/hitl/runtime.ts @@ -0,0 +1,41 @@ +import { HookRegistry, createToolPolicyHook } from '@librechat/agents'; +import type { TToolApprovalPolicy } from 'librechat-data-provider'; +import { isHITLEnabled, mapToolApprovalPolicy } from './policy'; + +/** + * The HITL fragment spread onto a `RunConfig` when tool approval is enabled. + * + * Kept as one object so the run seam attaches the opt-in switch and the policy + * hook together — they're meaningless apart. The checkpointer is resolved + * separately (it's an async, process-wide singleton) and merged into + * `graphConfig.compileOptions` at the call site. + */ +export interface HITLRunWiring { + humanInTheLoop: { enabled: true }; + hooks: HookRegistry; +} + +/** + * Assemble the run-level HITL wiring for a tool-approval policy, or `undefined` + * when HITL is disabled (the default) — in which case the run attaches nothing + * and behaves exactly as it did before this feature. + * + * The returned `hooks` registry carries a single `PreToolUse` policy hook built + * from {@link mapToolApprovalPolicy}. An enabled policy with no allow/deny/ask + * lists falls through to `mode: 'default'`, i.e. every tool prompts — the safe + * default for "HITL on, nothing else specified". + */ +export function buildHITLRunWiring( + policy: TToolApprovalPolicy | undefined, +): HITLRunWiring | undefined { + if (!isHITLEnabled(policy)) { + return undefined; + } + + const registry = new HookRegistry(); + registry.register('PreToolUse', { + hooks: [createToolPolicyHook(mapToolApprovalPolicy(policy) ?? {})], + }); + + return { humanInTheLoop: { enabled: true }, hooks: registry }; +} diff --git a/packages/api/src/agents/index.ts b/packages/api/src/agents/index.ts index a94eb39d9d..28c70bb538 100644 --- a/packages/api/src/agents/index.ts +++ b/packages/api/src/agents/index.ts @@ -3,6 +3,7 @@ export * from './attachments'; export * from './chain'; export * from './client'; export * from './config'; +export * from './checkpointer'; export * from './contact'; export * from './context'; export * from './discovery'; diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index e23f8270e9..810b56e1e3 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -36,9 +36,12 @@ import type { SubagentUsageEvent } from '~/agents/usage'; import type * as t from '~/types'; import { getLLMConfig as getAnthropicLLMConfig } from '~/endpoints/anthropic/llm'; import { getProviderConfig } from '~/endpoints/config/providers'; +import { resolveToolApprovalPolicy } from '~/agents/hitl/policy'; import { extractDefaultParams } from '~/endpoints/openai/llm'; import { resolveHeaders, createSafeUser } from '~/utils/env'; +import { getAgentCheckpointer } from '~/agents/checkpointer'; import { getOpenAIConfig } from '~/endpoints/openai/config'; +import { buildHITLRunWiring } from '~/agents/hitl/runtime'; import { buildLangfuseConfig } from '~/langfuse/config'; import { resolveConfigHeaders } from '~/utils/headers'; import { applyTestRunHook } from '~/agents/testHook'; @@ -870,6 +873,7 @@ export async function createRun({ signal, agents, messages, + discoveredToolNames, requestBody, user, tenantId, @@ -882,6 +886,7 @@ export async function createRun({ calibrationRatio, appConfig, subagentUsageSink, + hitlCapable = false, streaming = true, streamUsage = true, }: { @@ -895,6 +900,16 @@ export async function createRun({ tenantId?: string; /** Message history for extracting previously discovered tools */ messages?: BaseMessage[]; + /** + * Pre-discovered deferred-tool names to force-load directly, bypassing message + * extraction. The HITL resume path rebuilds the graph with `messages: []` (state + * comes from the durable checkpoint), so the in-turn `tool_search` results that + * would normally mark a deferred tool discovered aren't present — without this the + * paused tool would be absent from the rebuilt schema-only toolMap and resume would + * fail with "unknown tool". Captured at pause via `extractDiscoveredToolsFromHistory` + * and replayed here. Merged with (not replacing) any names extracted from `messages`. + */ + discoveredToolNames?: string[]; summarizationConfig?: SummarizationConfig; /** Cross-run summary from formatAgentMessages, forwarded to AgentContext */ initialSummary?: { text: string; tokenCount: number }; @@ -914,6 +929,15 @@ export async function createRun({ * Switch to the `RunConfig` pick once the dependency is bumped. */ subagentUsageSink?: (event: SubagentUsageEvent) => void; + /** + * Whether the caller implements the HITL pause/resume lifecycle (inspects + * `run.getInterrupt()`, persists a pending action, exposes a resume route). Gates the + * tool-approval wiring: only AgentClient (chat + resume) sets this. The OpenAI-compatible + * and Responses controllers leave it false, so an approval-gated tool can't pause on a + * route that has no approval surface or resume endpoint (it would otherwise emit a normal + * final response / `[DONE]` with the tool call left unresolved). + */ + hitlCapable?: boolean; } & Pick< RunConfig, 'tokenCounter' | 'customHandlers' | 'indexTokenCountMap' | 'initialSessions' @@ -928,10 +952,22 @@ export async function createRun({ */ const hasAnyDeferredTools = agents.some((agent) => agent.hasDeferredTools === true); - const discoveredTools = - hasAnyDeferredTools && messages?.length - ? extractDiscoveredToolsFromHistory(messages) - : new Set(); + const discoveredTools = new Set(); + if (hasAnyDeferredTools) { + // Normal path: extract from this run's message history (tool_search results). + if (messages?.length) { + for (const name of extractDiscoveredToolsFromHistory(messages)) { + discoveredTools.add(name); + } + } + // Resume path: replay names captured at pause, since `messages` is empty (the + // paused run's tool_search results live only in the checkpoint, not here). + if (discoveredToolNames?.length) { + for (const name of discoveredToolNames) { + discoveredTools.add(name); + } + } + } const buildAgentInput = (agent: RunAgent, opts: { isSubagent?: boolean } = {}): AgentInputs => { const isSubagent = opts.isSubagent === true; @@ -1134,6 +1170,34 @@ export async function createRun({ */ const enableToolOutputReferences = anyAgentHasCodeEnv(agents); + /** + * Human-in-the-loop tool approval — OFF by default. When the agents endpoint + * opts in (`toolApproval.enabled`), attach the `PreToolUse` policy hook + the + * `humanInTheLoop` switch, and bind a durable checkpointer so a run that pauses + * for review can be rebuilt and resumed on any worker (see `agents/checkpointer.ts` + * and the resume route). When disabled, nothing attaches and the run is identical + * to before this feature shipped. + */ + const agentsEndpointConfig = appConfig?.endpoints?.[EModelEndpoint.agents]; + // Resolve the effective policy through the single seam so per-agent / per-skill + // sources can layer in later without touching this call site (see + // `resolveToolApprovalPolicy`). Only the endpoint layer is wired today, so this + // is identical to reading `toolApproval` directly. + const toolApprovalPolicy = resolveToolApprovalPolicy({ + endpoint: agentsEndpointConfig?.toolApproval, + }); + // Gate HITL to callers that actually implement the pause/resume lifecycle. The + // OpenAI-compatible + Responses controllers also call createRun/processStream but never + // inspect `run.getInterrupt()` or persist a pending action — so an approval-gated tool + // would pause with no approval surface or resume endpoint, and the route would emit a + // normal final response / `[DONE]` with the tool call dangling. Only AgentClient (chat + + // resume) passes `hitlCapable`; without it the run is identical to the no-HITL path. + const hitl = hitlCapable ? buildHITLRunWiring(toolApprovalPolicy) : undefined; + if (hitl) { + const checkpointer = await getAgentCheckpointer(agentsEndpointConfig?.checkpointer); + graphConfig.compileOptions = { ...graphConfig.compileOptions, checkpointer }; + } + /** * Built as a variable (not an inline literal) so the extra * `subagentUsageSink` field passes assignability against SDK versions @@ -1159,6 +1223,10 @@ export async function createRun({ ...(enableToolOutputReferences && { toolOutputReferences: { enabled: true }, }), + // HITL opt-in: the `humanInTheLoop` switch + the PreToolUse policy hook. Spread + // here (not just `compileOptions.checkpointer` above) so an `ask` decision raises + // a real interrupt — without these the run would never pause. Absent when disabled. + ...(hitl && { humanInTheLoop: hitl.humanInTheLoop, hooks: hitl.hooks }), }; const run = await Run.create(runConfig); diff --git a/packages/api/src/middleware/messageFilterPii.spec.ts b/packages/api/src/middleware/messageFilterPii.spec.ts index c67227ff2d..fc5c4f3ff6 100644 --- a/packages/api/src/middleware/messageFilterPii.spec.ts +++ b/packages/api/src/middleware/messageFilterPii.spec.ts @@ -89,6 +89,47 @@ describe('messageFilterPii middleware', () => { expect(capturedRes.status).toBe(400); }); + const SK = 'sk-proj-FAKE1234567890ABCDEF'; + + it('rejects a resume ask-user answer containing a blocked token', () => { + const { capturedRes, nextCalls } = runMiddleware({}, { answer: `the key is ${SK}` }); + expect(nextCalls).toBe(0); + expect(capturedRes.status).toBe(400); + }); + + it('rejects a tool-approval decision responseText containing a blocked token', () => { + const { capturedRes, nextCalls } = runMiddleware( + {}, + { decisions: [{ tool_call_id: 'a', decision: 'respond', responseText: `use ${SK}` }] }, + ); + expect(nextCalls).toBe(0); + expect(capturedRes.status).toBe(400); + }); + + it('rejects a reject-reason containing a blocked token', () => { + const { capturedRes, nextCalls } = runMiddleware( + {}, + { decisions: [{ tool_call_id: 'a', decision: 'reject', reason: `leaked ${SK}` }] }, + ); + expect(nextCalls).toBe(0); + expect(capturedRes.status).toBe(400); + }); + + it('rejects edited tool arguments containing a blocked token (stringified)', () => { + const { capturedRes, nextCalls } = runMiddleware( + {}, + { decisions: [{ tool_call_id: 'a', decision: 'edit', editedArguments: { token: SK } }] }, + ); + expect(nextCalls).toBe(0); + expect(capturedRes.status).toBe(400); + }); + + it('passes a clean resume answer through', () => { + const { capturedRes, nextCalls } = runMiddleware({}, { answer: 'name it report.pdf' }); + expect(nextCalls).toBe(1); + expect(capturedRes.status).toBeUndefined(); + }); + it('honors a starterPatterns subset (sk passes when only bearer is enabled)', () => { const { capturedRes, nextCalls } = runMiddleware( { starterPatterns: ['bearer_header'] }, diff --git a/packages/api/src/middleware/messageFilterPii.ts b/packages/api/src/middleware/messageFilterPii.ts index ff322cba8f..b2140950c9 100644 --- a/packages/api/src/middleware/messageFilterPii.ts +++ b/packages/api/src/middleware/messageFilterPii.ts @@ -141,6 +141,36 @@ export function createMessageFilterPii(options: CreateMessageFilterPiiOptions): candidates.push(...quotes); candidates.push(mergeQuotedText(text, quotes)); } + /** + * The shared `/agents/chat/resume` route carries user-authored text in different + * fields than a typed message: an ask-user `answer`, and a tool-approval decision's + * `respond` text, `reject` reason, and edited tool arguments. Scan them too — else a + * blocked token could ride a resume payload straight back into the model/tool, + * bypassing the filter the typed path enforces. + */ + if (typeof req.body?.answer === 'string' && req.body.answer.length > 0) { + candidates.push(req.body.answer); + } + if (Array.isArray(req.body?.decisions)) { + for (const decision of req.body.decisions) { + if (typeof decision?.responseText === 'string' && decision.responseText.length > 0) { + candidates.push(decision.responseText); + } + if (typeof decision?.reason === 'string' && decision.reason.length > 0) { + candidates.push(decision.reason); + } + if (decision?.editedArguments != null) { + try { + const edited = JSON.stringify(decision.editedArguments); + if (edited.length > 0) { + candidates.push(edited); + } + } catch { + /* ignore unstringifiable edited args */ + } + } + } + } if (candidates.length === 0) { next(); return; diff --git a/packages/api/src/stream/GenerationJobManager.ts b/packages/api/src/stream/GenerationJobManager.ts index 1986641189..1509335276 100644 --- a/packages/api/src/stream/GenerationJobManager.ts +++ b/packages/api/src/stream/GenerationJobManager.ts @@ -2,6 +2,7 @@ import { logger, getTenantId, SYSTEM_TENANT_ID } from '@librechat/data-schemas'; import { Constants, UsageEvents, + ApprovalEvents, parseTextParts, reconcileContextUsage, promptTokensFromUsage, @@ -28,12 +29,15 @@ import { setGenerationJobsInFlight, recordGenerationJob, } from '~/app/metrics'; +import { isPendingActionStale, isPendingActionExpired } from './interfaces/IJobStore'; import { InMemoryEventTransport } from './implementations/InMemoryEventTransport'; import { InMemoryJobStore } from './implementations/InMemoryJobStore'; import { filterPersistableAbortContent } from './abortContent'; -import { isPendingActionStale } from './interfaces/IJobStore'; import { ApprovalLifecycle } from './ApprovalLifecycle'; +/** Terminal error surfaced to a client still attached when its approval window lapses. */ +const APPROVAL_EXPIRED_ERROR = 'Approval expired before a decision was made'; + /** Error surfaced to any client still attached when a stale/hung job is reaped. */ const REAPED_JOB_ERROR = 'Generation timed out'; const OAUTH_TOOL_CALL_PREFIX = `oauth${Constants.mcp_delimiter}`; @@ -492,6 +496,14 @@ class GenerationJobManagerClass { iconURL: jobData.iconURL, model: jobData.model, promptTokens: jobData.promptTokens, + // Surface the originating agent so the resume route can refuse to rebuild a + // paused run on a different agent. + agent_id: jobData.agent_id, + // Surface whether the turn was temporary so a resume keeps it non-persisted. + isTemporary: jobData.isTemporary, + // Surface deferred tools discovered before the pause so the resume route can + // replay them into createRun (the rebuilt graph passes `messages: []`). + discoveredTools: jobData.discoveredTools, // Surface the pending review so status/resume routes built on the // facade can render the prompt for a `requires_action` job. pendingAction: jobData.pendingAction, @@ -1052,6 +1064,31 @@ class GenerationJobManagerClass { skipBufferReplay: true, }); + // Close the snapshot→subscribe race: getResumeState() snapshots BEFORE we attach the + // subscription, so a pause that becomes durable in that window is in neither + // resumeState.pendingAction nor (Redis mode) pendingEvents — and trackReplayEvent does + // not persist approval events — leaving the client attached to a paused job with no + // approval UI. Re-read the live job AFTER subscribing; if it is now requires_action and + // the snapshot didn't already carry the action, surface it as a pending event so the + // approval prompt renders. Idempotent: a pause landing AFTER attach is delivered live + // too, and the client's handler just sets the current action, so a duplicate is benign. + if (!resumeState?.pendingAction) { + const liveJob = await this.jobStore.getJob(streamId); + if ( + liveJob?.status === 'requires_action' && + liveJob.pendingAction != null && + !isPendingActionStale(liveJob) + ) { + pendingEvents = [ + ...pendingEvents, + { + event: ApprovalEvents.ON_PENDING_ACTION, + data: liveJob.pendingAction as unknown as Record, + }, + ]; + } + } + return { subscription, resumeState, pendingEvents }; } @@ -1351,6 +1388,11 @@ class GenerationJobManagerClass { } const { message } = event; + const extra = message as { + manualSkills?: string[]; + alwaysAppliedSkills?: string[]; + files?: unknown[]; + }; const updates: Partial = { createdEventEmitted: true, userMessage: { @@ -1359,6 +1401,19 @@ class GenerationJobManagerClass { conversationId: message.conversationId, text: message.text, quotes: message.quotes, + // Persist the turn's uploaded files so a HITL resume sources them from the job + // (this authoritative writer), not a user DB row whose save can still be racing + // the approval prompt. + ...(Array.isArray(extra.files) && extra.files.length > 0 && { files: extra.files }), + // Carry skill selections so a HITL resume's reconstructed requestMessage keeps + // its pills — this is the authoritative writer of job.metadata.userMessage and + // would otherwise drop them (the emitted created message includes them). + ...(Array.isArray(extra.manualSkills) && + extra.manualSkills.length > 0 && { manualSkills: extra.manualSkills }), + ...(Array.isArray(extra.alwaysAppliedSkills) && + extra.alwaysAppliedSkills.length > 0 && { + alwaysAppliedSkills: extra.alwaysAppliedSkills, + }), }, }; @@ -1398,9 +1453,18 @@ class GenerationJobManagerClass { if (metadata.model) { updates.model = metadata.model; } + if (metadata.agent_id) { + updates.agent_id = metadata.agent_id; + } + if (metadata.isTemporary !== undefined) { + updates.isTemporary = metadata.isTemporary; + } if (metadata.promptTokens !== undefined) { updates.promptTokens = metadata.promptTokens; } + if (metadata.discoveredTools) { + updates.discoveredTools = metadata.discoveredTools; + } await this.jobStore.updateJob(streamId, updates); } @@ -1598,7 +1662,97 @@ class GenerationJobManagerClass { * Cleanup expired jobs. * Also cleans up any orphaned runtime state, buffers, and event transport entries. */ + /** + * Expire any locally-tracked approval whose window has lapsed: drive the atomic + * `requires_action → aborted` transition and, if this caller won it, emit a + * terminal error so a connected SSE client closes. Only streams this replica has + * runtime for are scanned — those are exactly the ones with a client subscribed + * here; a paused job on another replica is finalized by that replica's sweep (and + * the store's own cleanup). The durable checkpoint is reclaimed by its Mongo TTL + * index, which shares the approval window, so no cross-layer delete is needed here. + */ + /** + * Expire a single observed-stale pending approval NOW (immediate, not via the periodic + * sweep): run the `requires_action → aborted` CAS — pinned to `actionId` so a concurrent + * resolve + re-pause on a fresh action isn't aborted — and, on success, emit the terminal + * `APPROVAL_EXPIRED_ERROR` so any attached SSE client gets a terminal event instead of a + * hung stream. Used by the periodic sweeper and by the resume route, which observes a + * just-expired action when the user submits a decision after the TTL lapsed. Returns true + * if this call expired the action. + */ + async expireApproval(streamId: string, actionId?: string): Promise { + const expired = await this._approvals.expire(streamId, actionId); + if (!expired) { + return false; + } + try { + await this.emitError(streamId, APPROVAL_EXPIRED_ERROR); + } catch (err) { + logger.error(`[GenerationJobManager] Failed to notify expired approval ${streamId}`, err); + } + this.runningJobs.delete(streamId); + return true; + } + + private async expireStaleApprovals(): Promise { + let changed = false; + for (const streamId of this.runtimeState.keys()) { + let job: SerializableJobData | null; + try { + job = await this.jobStore.getJob(streamId); + } catch (err) { + logger.error( + `[GenerationJobManager] Failed to read job during approval expiry sweep: ${streamId}`, + err, + ); + continue; + } + // Loser-replica relay: in a multi-replica deployment another replica's store + // cleanup (`cleanupRequiresActionIndex`) can win the requires_action → aborted + // approval-expiry CAS — it sets the hash error but cannot emit (the store has no + // event transport). A client subscribed on THIS replica would then never get a + // terminal event until the reap path. If the job is already aborted *for approval + // expiry* and we haven't emitted here, relay the terminal error to our subscriber. + // The `errorEvent` flag (set by emitError) keeps this idempotent vs the win path. + const runtime = this.runtimeState.get(streamId); + if ( + job?.status === 'aborted' && + job.error === APPROVAL_EXPIRED_ERROR && + !runtime?.errorEvent + ) { + try { + await this.emitError(streamId, APPROVAL_EXPIRED_ERROR); + } catch (err) { + logger.error(`[GenerationJobManager] Failed to relay expired approval ${streamId}`, err); + } + changed = this.runningJobs.delete(streamId) || changed; + continue; + } + if (!job || job.status !== 'requires_action' || !isPendingActionExpired(job)) { + continue; + } + // Pass the OBSERVED action id so the expire CAS only fires for the action we read + // as stale. Between this read and the CAS, the user could resolve it and the run + // re-pause on a fresh action; without the id, the CAS (status-only) would abort + // that valid new pause and leave it terminal. + const didExpire = await this.expireApproval(streamId, job.pendingAction?.actionId); + if (!didExpire) { + continue; + } + changed = true; + logger.debug(`[GenerationJobManager] Expired pending approval: ${streamId}`); + } + if (changed) { + this.syncRunningJobMetrics(); + } + } + private async cleanup(): Promise { + // Finalize approvals whose window lapsed before the store's own cleanup, so a + // client still attached to a paused stream gets a terminal event instead of a + // connection that hangs open until it gives up. + await this.expireStaleApprovals(); + const count = await this.jobStore.cleanup(); let runningJobsChanged = false; diff --git a/packages/api/src/stream/__tests__/RedisJobStore.stream_integration.spec.ts b/packages/api/src/stream/__tests__/RedisJobStore.stream_integration.spec.ts index 55229ebc5e..5dd3cfb164 100644 --- a/packages/api/src/stream/__tests__/RedisJobStore.stream_integration.spec.ts +++ b/packages/api/src/stream/__tests__/RedisJobStore.stream_integration.spec.ts @@ -287,6 +287,184 @@ describe('RedisJobStore Integration Tests', () => { await store.destroy(); }); + test('appendChunk preserves a paused job’s extended TTL (does not reset to running)', async () => { + if (!ioredisClient) { + return; + } + + const { RedisJobStore } = await import('../implementations/RedisJobStore'); + const store = new RedisJobStore(ioredisClient, { runningTtl: 60 }); + await store.initialize(); + + const streamId = `paused-chunk-ttl-${Date.now()}`; + const chunkKey = `stream:{${streamId}}:chunks`; + + await store.createJob(streamId, 'user-1', streamId); + await store.appendChunk(streamId, { event: 'on_message_delta', data: { text: 'hello' } }); + + // Pause: transitionStatus extends the chunk-key TTL to the long approval window. + await store.transitionStatus(streamId, { + from: 'running', + to: 'requires_action', + patch: { pendingAction: buildPendingAction(streamId) }, + }); + expect(await ioredisClient.ttl(chunkKey)).toBeGreaterThan(60); + + // The on_pending_action chunk is appended AFTER the pause. The bug Codex flagged was + // that appendChunk unconditionally reset the TTL back to the (short) running TTL, + // evicting the pre-pause content before resume. It must now leave the long TTL intact. + await store.appendChunk(streamId, { + event: 'on_pending_action', + data: buildPendingAction(streamId), + }); + expect(await ioredisClient.ttl(chunkKey)).toBeGreaterThan(60); + // The chunk was still appended (XADD ran), so resume can read the full stream. + expect(await ioredisClient.xlen(chunkKey)).toBeGreaterThanOrEqual(2); + + await store.destroy(); + }); + + test('saveRunSteps preserves a paused job’s extended TTL (does not reset to running)', async () => { + if (!ioredisClient) { + return; + } + + const { RedisJobStore } = await import('../implementations/RedisJobStore'); + const store = new RedisJobStore(ioredisClient, { runningTtl: 60 }); + await store.initialize(); + + const streamId = `paused-runsteps-ttl-${Date.now()}`; + const runStepsKey = `stream:{${streamId}}:runsteps`; + + await store.createJob(streamId, 'user-1', streamId); + await store.saveRunSteps!(streamId, [{ id: 'step-1', type: 'tool_call' }] as never); + + // Pause: transitionStatus extends the run-steps key TTL to the long approval window. + await store.transitionStatus(streamId, { + from: 'running', + to: 'requires_action', + patch: { pendingAction: buildPendingAction(streamId) }, + }); + expect(await ioredisClient.ttl(runStepsKey)).toBeGreaterThan(60); + + // A run-step save landing AFTER the pause must NOT reset the key to the running TTL, + // or a reload of the still-live approval after that window loses the tool timeline. + await store.saveRunSteps!(streamId, [ + { id: 'step-1', type: 'tool_call' }, + { id: 'step-2', type: 'tool_call' }, + ] as never); + expect(await ioredisClient.ttl(runStepsKey)).toBeGreaterThan(60); + // The save still landed, so resume can read the full timeline. + const steps = await store.getRunSteps(streamId); + expect(steps.length).toBe(2); + + await store.destroy(); + }); + + test('appendChunk gives the approval TTL when the chunk key did not exist at pause time', async () => { + if (!ioredisClient) { + return; + } + + const { RedisJobStore } = await import('../implementations/RedisJobStore'); + const store = new RedisJobStore(ioredisClient, { runningTtl: 60 }); + await store.initialize(); + + const streamId = `paused-no-chunk-${Date.now()}`; + const chunkKey = `stream:{${streamId}}:chunks`; + + // The job pauses BEFORE any chunk was persisted (ask-user pause with no prior + // delta, or the first appendChunk still in flight because emitChunk is + // fire-and-forget). The pause's `EXPIRE chunks` is a no-op because the key + // does not exist yet, so the chunk stream carries no extended TTL. + await store.createJob(streamId, 'user-1', streamId); + await store.transitionStatus(streamId, { + from: 'running', + to: 'requires_action', + patch: { pendingAction: buildPendingAction(streamId) }, + }); + expect(await ioredisClient.exists(chunkKey)).toBe(0); + + // The first chunk lands AFTER the pause. The bug Codex re-raised: appendChunk + // would create the stream with only the short running TTL (60s), so the + // aggregated tool-call content is evicted before the 24h approval window ends. + // The fix reads the paused window from the job key and bumps the chunk TTL to it. + await store.appendChunk(streamId, { + event: 'on_pending_action', + data: buildPendingAction(streamId), + }); + + expect(await ioredisClient.ttl(chunkKey)).toBeGreaterThan(60); + expect(await ioredisClient.xlen(chunkKey)).toBeGreaterThanOrEqual(1); + + await store.destroy(); + }); + + test('appendChunk keeps a normally-running job on the short running TTL (no inflation)', async () => { + if (!ioredisClient) { + return; + } + + const { RedisJobStore } = await import('../implementations/RedisJobStore'); + const store = new RedisJobStore(ioredisClient, { runningTtl: 60 }); + await store.initialize(); + + const streamId = `running-no-inflate-${Date.now()}`; + const chunkKey = `stream:{${streamId}}:chunks`; + + // A normal running job: the job key carries the running TTL (set by createJob), + // NOT the long approval window. appendChunk must settle the chunk TTL on the + // short running TTL — never max it against the job key — so a live stream is + // not inflated to the 24h approval window. + await store.createJob(streamId, 'user-1', streamId); + await store.appendChunk(streamId, { + event: 'on_message_delta', + data: { text: 'hello' }, + }); + + const ttl = await ioredisClient.ttl(chunkKey); + expect(ttl).toBeGreaterThan(0); + expect(ttl).toBeLessThanOrEqual(60); + + await store.destroy(); + }); + + test('createJob clears stale per-turn identity (agent_id, isTemporary) from a reused hash', async () => { + if (!ioredisClient) { + return; + } + + const { RedisJobStore } = await import('../implementations/RedisJobStore'); + const store = new RedisJobStore(ioredisClient); + await store.initialize(); + + const streamId = `stale-agent-${Date.now()}`; + // Turn 1: a saved agent in a temporary chat that discovered a deferred tool. + await store.createJob(streamId, 'user-1', streamId); + await store.updateJob(streamId, { + agent_id: 'saved-agent-1', + isTemporary: true, + discoveredTools: ['deep_tool'], + }); + const turn1 = await store.getJob(streamId); + expect(turn1?.agent_id).toBe('saved-agent-1'); + expect(turn1?.isTemporary).toBe(true); + expect(turn1?.discoveredTools).toEqual(['deep_tool']); + + // Turn 2 on the SAME conversation switches to an ephemeral / non-temporary turn. + // The hash is keyed by conversationId, so without clearing, the old agent_id and + // isTemporary would survive — the resume guard would reject the valid pause as a + // different agent, and the resumed response would be saved as temporary. The stale + // discoveredTools would also force-load deferred tools this turn never discovered. + await store.createJob(streamId, 'user-1', streamId); + const turn2 = await store.getJob(streamId); + expect(turn2?.agent_id).toBeUndefined(); + expect(turn2?.isTemporary).toBeUndefined(); + expect(turn2?.discoveredTools).toBeUndefined(); + + await store.destroy(); + }); + test('should not drop paused jobs from user tracking when cleanup sees a stale running index', async () => { if (!ioredisClient) { return; diff --git a/packages/api/src/stream/__tests__/pendingAction.spec.ts b/packages/api/src/stream/__tests__/pendingAction.spec.ts index de4136a513..4edfed9619 100644 --- a/packages/api/src/stream/__tests__/pendingAction.spec.ts +++ b/packages/api/src/stream/__tests__/pendingAction.spec.ts @@ -172,6 +172,21 @@ describe('ApprovalLifecycle via GenerationJobManager.approvals (in-memory)', () expect(await manager.approvals.expire(streamId)).toBe(false); }); + test('a mismatched expectedActionId no-ops (protects a re-paused action from a stale sweep)', async () => { + const streamId = 'stream-expire-mismatch'; + await manager.createJob(streamId, 'user-1'); + await manager.approvals.pause(streamId, buildAction(streamId, { actionId: 'action-A' })); + + // A sweep that observed an OLDER (now-resolved) action must not abort the current + // pause — its CAS only fires when the live pendingActionId still matches. + expect(await manager.approvals.expire(streamId, 'stale-other-action')).toBe(false); + expect(await manager.getJobStatus(streamId)).toBe('requires_action'); + + // The matching id still expires it. + expect(await manager.approvals.expire(streamId, 'action-A')).toBe(true); + expect(await manager.getJobStatus(streamId)).toBe('aborted'); + }); + test('sets completedAt so terminal cleanup can reclaim the job', async () => { const streamId = 'stream-expire-completed'; await manager.createJob(streamId, 'user-1'); @@ -249,3 +264,88 @@ describe('InMemoryJobStore — approval expiry cleanup', () => { expect(await store.getJob('s1')).toBeNull(); }); }); + +describe('GenerationJobManager HITL resume metadata (round 19)', () => { + let manager: GenerationJobManagerClass; + + beforeEach(() => { + manager = new GenerationJobManagerClass(); + manager.configure({ + jobStore: new InMemoryJobStore({ ttlAfterComplete: 60000 }), + eventTransport: new InMemoryEventTransport(), + isRedis: false, + cleanupOnComplete: false, + }); + manager.initialize(); + }); + + afterEach(async () => { + await manager.destroy(); + }); + + function buildAction(streamId: string) { + const payload = buildToolApprovalPayload([ + { name: 'shell', arguments: { command: 'ls' }, tool_call_id: 'call_abc' }, + ]); + return buildPendingAction(payload, { + streamId, + conversationId: streamId, + runId: 'run-1', + responseMessageId: 'msg-1', + }); + } + + // H1: round-18 captured discoveredTools but the metadata allowlists (updateMetadata, + // Redis deserialize, buildJobFacade) dropped it, so resume replayed `undefined`. + test('updateMetadata persists discoveredTools and the job facade exposes them', async () => { + const streamId = 'stream-discovered'; + await manager.createJob(streamId, 'user-1'); + await manager.updateMetadata(streamId, { discoveredTools: ['deep_tool', 'other_tool'] }); + const job = await manager.getJob(streamId); + expect(job?.metadata.discoveredTools).toEqual(['deep_tool', 'other_tool']); + }); + + // H4: a pause that lands AFTER the resume snapshot but before the subscription must + // still reach the client. subscribeWithResume re-reads the live job and surfaces it. + test('subscribeWithResume surfaces a pause that the resume snapshot missed', async () => { + const streamId = 'stream-race'; + await manager.createJob(streamId, 'user-1'); + const action = buildAction(streamId); + await manager.approvals.pause(streamId, action); + + // Simulate the snapshot being taken BEFORE the pause: drop pendingAction from the + // resume state even though the live job is now requires_action. + const realGetResumeState = manager.getResumeState.bind(manager); + jest.spyOn(manager, 'getResumeState').mockImplementation(async (id: string) => { + const state = await realGetResumeState(id); + return state ? { ...state, pendingAction: undefined } : state; + }); + + const result = await manager.subscribeWithResume(streamId, () => {}); + const pending = result.pendingEvents.find( + (e) => 'event' in e && e.event === 'on_pending_action', + ); + expect(pending).toBeDefined(); + expect((pending as unknown as { data: { actionId: string } }).data.actionId).toBe( + action.actionId, + ); + result.subscription?.unsubscribe(); + }); + + // H4 negative: when the snapshot already carried the action, the re-read is skipped + // (the client gets it via resumeState.pendingAction) — no duplicate pending event. + test('does not re-surface the pending action when the snapshot already carried it', async () => { + const streamId = 'stream-norace'; + await manager.createJob(streamId, 'user-1'); + const action = buildAction(streamId); + await manager.approvals.pause(streamId, action); + + const result = await manager.subscribeWithResume(streamId, () => {}); + const pendingCount = result.pendingEvents.filter( + (e) => 'event' in e && e.event === 'on_pending_action', + ).length; + expect(pendingCount).toBe(0); + expect(result.resumeState?.pendingAction?.actionId).toBe(action.actionId); + result.subscription?.unsubscribe(); + }); +}); diff --git a/packages/api/src/stream/implementations/RedisJobStore.ts b/packages/api/src/stream/implementations/RedisJobStore.ts index 32d78ee487..6453ab4dcb 100644 --- a/packages/api/src/stream/implementations/RedisJobStore.ts +++ b/packages/api/src/stream/implementations/RedisJobStore.ts @@ -39,6 +39,71 @@ const JOB_CAS_LUA = 'redis.call("EXPIRE", KEYS[1], ttl) ' + 'return 1'; +/** + * XADD a chunk + set the chunk-stream TTL to the right window WITHOUT ever shrinking it. + * + * During a live stream the running TTL is refreshed on every chunk. But a job paused + * for HITL review must keep its chunk stream alive for the whole approval window, not + * the ~20m running TTL — otherwise the pre-pause aggregated content (tool call + earlier + * text) is evicted before the user resolves and `getResumeState()` loses it. + * + * `transitionStatus` extends the chunk-key TTL to the approval window at pause time, but + * that alone is not enough: + * 1. The pause's `EXPIRE chunks` is a no-op if the chunk key does not exist yet — and + * `appendChunk` is fire-and-forget, so the first chunk's XADD can land AFTER the + * pause, or an ask-user pause can occur before any chunk was ever persisted. + * 2. The `on_pending_action` chunk (and any chunk that races in after the pause) would + * otherwise reset an already-extended TTL back to the short running TTL. + * So this script derives the target window itself: the running TTL normally, but when the + * job hash is paused (`status == "requires_action"`) it takes the larger of the running + * TTL and the job key's own remaining TTL (which `transitionStatus` set to the approval + * window). It only ever EXTENDS — `cur < target` — so a normally-running stream keeps the + * round-10 extend-only behavior and is never inflated to the approval window. + * + * Reading the paused window from the job key (rather than always max-ing against it) is + * what keeps a normal running run on the short TTL: TTL(jobKey) is only the long approval + * window while paused; for a running job the job key carries the running TTL, so target + * stays `run`. + * + * KEYS: [chunks, job] + * ARGV: [eventJson, runningTtl] + */ +const CHUNK_APPEND_LUA = + 'redis.call("XADD", KEYS[1], "*", "event", ARGV[1]) ' + + 'local run = tonumber(ARGV[2]) ' + + 'local target = run ' + + 'if redis.call("HGET", KEYS[2], "status") == "requires_action" then ' + + 'local jt = redis.call("TTL", KEYS[2]) ' + + 'if jt > target then target = jt end ' + + 'end ' + + 'local cur = redis.call("TTL", KEYS[1]) ' + + 'if cur < target then redis.call("EXPIRE", KEYS[1], target) end ' + + 'return 1'; + +/** + * Persist the run-step timeline with the same paused-window TTL as the chunk stream. + * `saveRunSteps` SETs (overwrites) the whole array, so unlike the chunk append there's no + * prior key TTL worth preserving — but the write must still extend to the APPROVAL window + * when the job is paused (`status == "requires_action"`). Otherwise a run-step save that + * lands at/after a fast pause resets the key to the short running TTL, and a reload of a + * still-live approval after that window loses the tool/run-step timeline even though the + * approval remains resumable. Reads the paused window from the job key (which + * `transitionStatus` set); a normally-running job keeps the short running TTL. + * + * KEYS: [runSteps, job] + * ARGV: [runStepsJson, runningTtl] + */ +const RUNSTEPS_SAVE_LUA = + 'redis.call("SET", KEYS[1], ARGV[1]) ' + + 'local run = tonumber(ARGV[2]) ' + + 'local target = run ' + + 'if redis.call("HGET", KEYS[2], "status") == "requires_action" then ' + + 'local jt = redis.call("TTL", KEYS[2]) ' + + 'if jt > target then target = jt end ' + + 'end ' + + 'redis.call("EXPIRE", KEYS[1], target) ' + + 'return 1'; + /** Decision kinds the SDK can emit, used to sanity-check persisted records. */ const KNOWN_INTERRUPT_TYPES = new Set(['tool_approval', 'ask_user_question']); @@ -206,14 +271,27 @@ export class RedisJobStore implements IJobStore { const key = KEYS.job(streamId); const userJobsKey = KEYS.userJobs(userId, tenantId); - // A reused streamId overlays onto any existing hash, so paused-run fields - // from a prior generation could survive. Drop the HITL fields so the fresh - // running job never exposes stale approval metadata and cleanup keys off the - // new createdAt rather than a leftover lastActiveAt. + // A reused streamId overlays onto any existing hash, so per-turn fields from a + // prior generation could survive. Drop the HITL fields so the fresh running job + // never exposes stale approval metadata and cleanup keys off the new createdAt + // rather than a leftover lastActiveAt. `agent_id` is included because + // updateMetadata only writes it when truthy — without clearing it here, a + // conversation that switches from a saved agent to an ephemeral/no-agent turn + // would keep the old agent_id and the resume guard would reject the valid pause. const staleHitlFields: Array = [ 'pendingAction', 'pendingActionId', 'lastActiveAt', + 'agent_id', + // Same reasoning as agent_id: updateMetadata only writes isTemporary when the new + // metadata carries it, so a prior temporary turn's isTemporary=1 would otherwise + // survive and a later non-temporary resume would save its response as temporary. + 'isTemporary', + // Same reasoning again: handleRunInterrupt only writes discoveredTools when THIS + // turn discovered ≥1 deferred tool, so a replacement turn that later pauses without + // its own discovery would otherwise inherit the prior run's tool names and force-load + // deferred tools it never discovered on resume. + 'discoveredTools', ]; // For cluster mode, we can't pipeline keys on different slots @@ -630,6 +708,12 @@ export class RedisJobStore implements IJobStore { error: 'Approval expired before a decision was made', completedAt: Date.now(), }, + // Scope the CAS to the action we observed as stale: if the user resolved it + // and the run re-paused on a fresh action between the read and here, the + // pendingActionId no longer matches and this no-ops instead of aborting the + // valid new pause. (Undefined for a missing/malformed pendingAction — nothing + // to protect — so it falls back to the status-only check.) + expectActionId: job.pendingAction?.actionId, }); return 1; } @@ -937,14 +1021,24 @@ export class RedisJobStore implements IJobStore { */ async appendChunk(streamId: string, event: unknown): Promise { const key = KEYS.chunks(streamId); - // Pipeline XADD + EXPIRE in a single round-trip. - // EXPIRE is O(1) and idempotent — refreshing TTL on every chunk is better than - // only setting it once, since the original approach could let the TTL expire - // during long-running streams. - const pipeline = this.redis.pipeline(); - pipeline.xadd(key, '*', 'event', JSON.stringify(event)); - pipeline.expire(key, this.ttl.running); - await pipeline.exec(); + const jobKey = KEYS.job(streamId); + // XADD + derive-and-extend-only EXPIRE in a single atomic eval. Refreshing the TTL on + // every chunk (vs only once) keeps the key alive through long streams, but it must + // NEVER shrink an already-longer TTL — a paused (requires_action) job needs this key + // to live for the whole approval window, and the on_pending_action append (or any + // chunk that lands after the pause) would otherwise reset it to the short running TTL. + // The script reads the paused window from the job key, so it bumps to the approval TTL + // even when the pause's own EXPIRE no-op'd because this key didn't exist yet, while a + // normally-running run still settles on the short running TTL. Both keys share the + // {streamId} hash tag, so the 2-key eval stays on one slot under Redis Cluster. + await this.redis.eval( + CHUNK_APPEND_LUA, + 2, + key, + jobKey, + JSON.stringify(event), + String(this.ttl.running), + ); } /** @@ -970,11 +1064,20 @@ export class RedisJobStore implements IJobStore { } /** - * Save run steps for resume state. + * Save run steps for resume state. Uses the paused-window TTL script so a run-step save + * landing at/after a HITL pause extends to the approval window instead of resetting the + * key to the short running TTL (which would drop the tool timeline on a reload of a + * still-live approval — mirrors the chunk-stream no-shrink behavior). */ async saveRunSteps(streamId: string, runSteps: Agents.RunStep[]): Promise { - const key = KEYS.runSteps(streamId); - await this.redis.set(key, JSON.stringify(runSteps), 'EX', this.ttl.running); + await this.redis.eval( + RUNSTEPS_SAVE_LUA, + 2, + KEYS.runSteps(streamId), + KEYS.job(streamId), + JSON.stringify(runSteps), + String(this.ttl.running), + ); } // ===== Consumer Group Methods ===== @@ -1215,6 +1318,10 @@ export class RedisJobStore implements IJobStore { iconURL: data.iconURL || undefined, model: data.model || undefined, promptTokens: data.promptTokens ? parseInt(data.promptTokens, 10) : undefined, + agent_id: data.agent_id || undefined, + isTemporary: data.isTemporary != null ? data.isTemporary === '1' : undefined, + // Deferred tools discovered before a HITL pause; replayed into createRun on resume. + discoveredTools: data.discoveredTools ? JSON.parse(data.discoveredTools) : undefined, titleEvent: data.titleEvent || undefined, replayEvents: data.replayEvents || undefined, contextUsage: data.contextUsage || undefined, diff --git a/packages/api/src/stream/interfaces/IJobStore.ts b/packages/api/src/stream/interfaces/IJobStore.ts index f3dcb752bf..9e55d543e1 100644 --- a/packages/api/src/stream/interfaces/IJobStore.ts +++ b/packages/api/src/stream/interfaces/IJobStore.ts @@ -32,11 +32,25 @@ export interface SerializableJobData { /** Quoted excerpts referenced on this turn, carried so resumable/aborted * reconstructions of the user message keep their `MessageQuotes`. */ quotes?: string[]; + /** Skill selections, carried so a HITL-resumed turn's requestMessage keeps its pills. */ + manualSkills?: string[]; + alwaysAppliedSkills?: string[]; + /** Uploaded files for the turn, carried so a HITL resume sources them from the job + * rather than a user DB row whose save can still be racing the approval prompt. */ + files?: unknown[]; }; /** Response message ID for reconnection */ responseMessageId?: string; + /** + * Deferred-tool names discovered (via `tool_search`) before a HITL pause, captured + * so a resume can replay them into `createRun` — the rebuilt graph uses `messages: []` + * (state comes from the checkpoint), so without these the paused deferred tool would be + * absent from the schema-only toolMap and resume would fail with "unknown tool". + */ + discoveredTools?: string[]; + /** Whether the user-message created event has been emitted */ createdEventEmitted?: boolean; @@ -67,6 +81,20 @@ export interface SerializableJobData { model?: string; promptTokens?: number; + /** + * Agent that initiated the run. Persisted so a HITL resume can verify it rebuilds + * the SAME agent that paused — resuming Agent A's checkpoint on Agent B's graph + * would mis-execute the paused tool calls. + */ + agent_id?: string; + + /** + * Whether the originating turn was a temporary (non-persisted) chat. Persisted so + * a HITL resume keeps the resumed response temporary instead of saving it — the + * resume request can't be trusted to re-send the flag. + */ + isTemporary?: boolean; + /** * Set when status is `requires_action`. Describes the human review the * run is waiting on. Cleared by the resume path before the job returns to `running`. diff --git a/packages/api/src/types/stream.ts b/packages/api/src/types/stream.ts index d43a1112f1..47a94d6ead 100644 --- a/packages/api/src/types/stream.ts +++ b/packages/api/src/types/stream.ts @@ -20,6 +20,16 @@ export interface GenerationJobMetadata { model?: string; /** Prompt token count for abort token spending */ promptTokens?: number; + /** Agent that initiated the run; a HITL resume verifies it rebuilds the same agent. */ + agent_id?: string; + /** Whether the originating turn was a temporary chat; a HITL resume keeps it so. */ + isTemporary?: boolean; + /** + * Deferred-tool names discovered (via `tool_search`) before a HITL pause. A resume + * replays these into `createRun` because the rebuilt graph uses `messages: []`, so + * without them the paused deferred tool would be missing from the schema-only toolMap. + */ + discoveredTools?: string[]; /** Set when the job is paused for human review (status === 'requires_action') */ pendingAction?: Agents.PendingAction; } diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index 01db2feeed..fefa3305a5 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -801,6 +801,45 @@ export const toolApprovalPolicySchema = z export type TToolApprovalPolicy = z.infer; +/** + * Durable checkpointer backing human-in-the-loop resume. + * + * When `toolApproval.enabled` is true, a run that pauses for review suspends its + * LangGraph state to a checkpoint; resuming rebuilds that state on a *fresh* `Run` + * — possibly on a different replica, or the same worker after a restart. That only + * works if the checkpoint outlives the original request, so HITL needs a durable + * saver, not the SDK's process-local `MemorySaver` fallback. + * + * Defaults are zero-config: with `toolApproval.enabled` on and no `checkpointer` + * block, LibreChat persists checkpoints to its primary MongoDB, so resume works + * across replicas out of the box. + * + * - `type: 'mongo'` (default) — persist to the app database; survives restarts and + * resolves on any replica. A TTL index reclaims runs that are never resolved. + * - `type: 'memory'` — process-local only. Paused runs do NOT survive a restart and + * can only be resolved on the originating worker. Single-process / dev only. + */ +export const checkpointerTypeSchema = z.enum(['mongo', 'memory']); +export type TCheckpointerType = z.infer; + +export const checkpointerSchema = z + .object({ + type: checkpointerTypeSchema.optional(), + /** + * Approval window, in seconds: how long a paused run waits for a decision + * before it is reclaimed. Drives both the Mongo TTL index on checkpoints and + * the pending-action expiry, keeping the two layers in lockstep. Defaults to + * 86400 (24h). Raise it for longer review windows. + */ + ttl: z.number().int().positive().optional(), + /** Advanced: override the Mongo collection names used for checkpoints. */ + checkpointCollectionName: z.string().optional(), + checkpointWritesCollectionName: z.string().optional(), + }) + .optional(); + +export type TCheckpointerConfig = z.infer; + export const agentsEndpointSchema = baseEndpointSchema .omit({ baseURL: true }) .merge( @@ -825,6 +864,9 @@ export const agentsEndpointSchema = baseEndpointSchema remoteApi: remoteApiSchema.optional(), /** Human-in-the-loop tool approval policy. Off by default. */ toolApproval: toolApprovalPolicySchema, + /** Durable checkpointer backing HITL resume. Defaults to the app's MongoDB + * when `toolApproval.enabled` is set; ignored otherwise. */ + checkpointer: checkpointerSchema, }), ) .default({ diff --git a/packages/data-provider/src/types/agents.ts b/packages/data-provider/src/types/agents.ts index 6318dd651f..0e7463f924 100644 --- a/packages/data-provider/src/types/agents.ts +++ b/packages/data-provider/src/types/agents.ts @@ -217,6 +217,11 @@ export namespace Agents { parentMessageId?: string; conversationId?: string; text?: string; + /** Skill selections on the turn, carried so a HITL-resumed turn's reconstructed + * requestMessage keeps its skill pills (they aren't on the DB row the client refetches + * until reload). */ + manualSkills?: string[]; + alwaysAppliedSkills?: string[]; } /** State data sent to reconnecting clients */ @@ -403,6 +408,21 @@ export namespace Agents { * worker that didn't originate the run. */ threadId?: string; + /** + * Fingerprint of the request fields that determine the agent/graph + tool set + * (endpoint, agent_id, model, spec, ephemeralAgent), captured at pause time. The + * resume route recomputes it from the resume request and rejects a mismatch — the + * guard that catches an ephemeral-agent config swap, where `agent_id` is undefined + * so the id check can't. + */ + requestFingerprint?: string; + /** + * Graph-determining request fields (endpoint, agent_id, model, spec, promptPrefix, + * ephemeralAgent) captured at pause. The resume route REPLAYS these onto the request + * before rebuilding the run, so a reload/cross-replica resume — where the client can + * no longer reconstruct the ephemeral config — still rebuilds the same agent/graph. + */ + resumeContext?: Record; } /** diff --git a/packages/data-provider/src/types/runs.ts b/packages/data-provider/src/types/runs.ts index 1fdc07cd81..da386ac5f4 100644 --- a/packages/data-provider/src/types/runs.ts +++ b/packages/data-provider/src/types/runs.ts @@ -48,6 +48,15 @@ export enum UsageEvents { ON_TOKEN_USAGE = 'on_token_usage', } +/** + * Human-in-the-loop event names. Streamed to live clients when a run pauses for + * tool approval (or an ask-user question). Reconnecting clients instead read the + * same record from `resumeState.pendingAction` on the sync event / status route. + */ +export enum ApprovalEvents { + ON_PENDING_ACTION = 'on_pending_action', +} + /** Mirrors TokenBudgetBreakdown from @librechat/agents (data-provider cannot import it). */ export type TTokenBudgetBreakdown = { maxContextTokens: number;