diff --git a/api/package.json b/api/package.json
index b044b49670..8f6e983b49 100644
--- a/api/package.json
+++ b/api/package.json
@@ -46,7 +46,7 @@
     "@azure/storage-blob": "^12.30.0",
     "@google/genai": "^2.8.0",
     "@keyv/redis": "^4.3.3",
-    "@librechat/agents": "^3.2.52",
+    "@librechat/agents": "^3.2.53",
     "@librechat/api": "*",
     "@librechat/data-schemas": "*",
     "@microsoft/microsoft-graph-client": "^3.0.7",
diff --git a/api/server/controllers/agents/__tests__/jobReplacement.spec.js b/api/server/controllers/agents/__tests__/jobReplacement.spec.js
index 7f7a775b75..c4247fcf30 100644
--- a/api/server/controllers/agents/__tests__/jobReplacement.spec.js
+++ b/api/server/controllers/agents/__tests__/jobReplacement.spec.js
@@ -288,3 +288,331 @@ describe('Job Replacement Detection', () => {
     });
   });
 });
+
+/**
+ * HITL terminal-side-effect guards (PR #13942).
+ *
+ * Jobs are keyed by streamId == conversationId, so a NEW request REPLACES the running
+ * one on the same conversation. The replaced generation's tail (its pause attempt, its
+ * checkpoint prune, its resume catch-path terminal writes) must not clobber the live
+ * generation's state. Each guard re-reads the live job and compares createdAt against the
+ * generation's own captured identity before acting. These mirror the predicates in
+ * client.js (handleRunInterrupt / chatCompletion finally) and resume.js.
+ */
+describe('HITL Terminal-Side-Effect Guards', () => {
+  beforeEach(() => {
+    jest.clearAllMocks();
+  });
+
+  describe('F22 — pause is skipped when the generation was replaced', () => {
+    // Mirrors client.js handleRunInterrupt pre-check, run BEFORE approvals.pause.
+    const shouldPause = async ({ jobCreatedAt, streamId }) => {
+      if (jobCreatedAt != null) {
+        const liveJob = await mockGenerationJobManager.getJob(streamId);
+        if (!liveJob || liveJob.createdAt !== jobCreatedAt) {
+          return false;
+        }
+      }
+      return true;
+    };
+
+    it('does not pause when a newer job replaced this one', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue({ createdAt: 2000 });
+      expect(await shouldPause({ jobCreatedAt: 1000, streamId: 'c1' })).toBe(false);
+    });
+
+    it('does not pause when the job is already gone', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(null);
+      expect(await shouldPause({ jobCreatedAt: 1000, streamId: 'c1' })).toBe(false);
+    });
+
+    it('pauses when this is still the live job', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue({ createdAt: 1000 });
+      expect(await shouldPause({ jobCreatedAt: 1000, streamId: 'c1' })).toBe(true);
+    });
+
+    it('pauses without a lookup when identity is unknown (legacy job)', async () => {
+      expect(await shouldPause({ jobCreatedAt: null, streamId: 'c1' })).toBe(true);
+      expect(mockGenerationJobManager.getJob).not.toHaveBeenCalled();
+    });
+  });
+
+  describe('F21 — checkpoint prune is skipped when the generation was replaced', () => {
+    // Mirrors client.js chatCompletion finally: prune only when NOT replaced.
+    const shouldPrune = async ({ resumableStreamId, jobCreatedAt }) => {
+      let replaced = false;
+      if (resumableStreamId && jobCreatedAt != null) {
+        const liveJob = await mockGenerationJobManager.getJob(resumableStreamId);
+        replaced = !liveJob || liveJob.createdAt !== jobCreatedAt;
+      }
+      return !replaced;
+    };
+
+    it('skips the prune when a newer job replaced this one', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue({ createdAt: 2000 });
+      expect(await shouldPrune({ resumableStreamId: 'c1', jobCreatedAt: 1000 })).toBe(false);
+    });
+
+    it('prunes when this is still the live job', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue({ createdAt: 1000 });
+      expect(await shouldPrune({ resumableStreamId: 'c1', jobCreatedAt: 1000 })).toBe(true);
+    });
+
+    it('prunes without a lookup when there is no resumable stream id', async () => {
+      expect(await shouldPrune({ resumableStreamId: undefined, jobCreatedAt: 1000 })).toBe(true);
+      expect(mockGenerationJobManager.getJob).not.toHaveBeenCalled();
+    });
+  });
+
+  describe('F24 — resume catch-path terminal writes are skipped when replaced', () => {
+    // Mirrors resume.js: stillLive gate around emitError/completeJob/deleteAgentCheckpoint.
+    const stillLive = async ({ streamId, jobCreatedAt }) => {
+      let live = true;
+      try {
+        const liveJob = await mockGenerationJobManager.getJob(streamId);
+        live = !!liveJob && liveJob.createdAt === jobCreatedAt;
+      } catch {
+        live = true; // read failed — fail open and run the terminal writes
+      }
+      return live;
+    };
+
+    it('runs terminal writes when this is still the live job', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue({ createdAt: 1000 });
+      expect(await stillLive({ streamId: 'c1', jobCreatedAt: 1000 })).toBe(true);
+    });
+
+    it('skips terminal writes when a newer job replaced this one', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue({ createdAt: 2000 });
+      expect(await stillLive({ streamId: 'c1', jobCreatedAt: 1000 })).toBe(false);
+    });
+
+    it('fails open (runs terminal writes) when the liveness read throws', async () => {
+      mockGenerationJobManager.getJob.mockRejectedValue(new Error('store down'));
+      expect(await stillLive({ streamId: 'c1', jobCreatedAt: 1000 })).toBe(true);
+    });
+  });
+
+  describe('F23 — resumed turn sources files from the job, not the racy DB row', () => {
+    // Mirrors resume.js: prefer the body, then job.metadata.userMessage.files, then DB.
+    const resolveFiles = ({ bodyFiles, metaFiles, dbFiles }) => {
+      if (Array.isArray(bodyFiles) && bodyFiles.length > 0) {
+        return bodyFiles;
+      }
+      if (Array.isArray(metaFiles) && metaFiles.length > 0) {
+        return metaFiles;
+      }
+      return Array.isArray(dbFiles) && dbFiles.length > 0 ? dbFiles : undefined;
+    };
+
+    it('prefers job-metadata files over the DB row (no DB-save race)', () => {
+      expect(
+        resolveFiles({
+          bodyFiles: [],
+          metaFiles: [{ file_id: 'meta' }],
+          dbFiles: [{ file_id: 'db' }],
+        }),
+      ).toEqual([{ file_id: 'meta' }]);
+    });
+
+    it('falls back to the DB row when the job has no persisted files (older job)', () => {
+      expect(
+        resolveFiles({ bodyFiles: [], metaFiles: undefined, dbFiles: [{ file_id: 'db' }] }),
+      ).toEqual([{ file_id: 'db' }]);
+    });
+
+    it('keeps files already present on the resume body', () => {
+      expect(
+        resolveFiles({
+          bodyFiles: [{ file_id: 'body' }],
+          metaFiles: [{ file_id: 'meta' }],
+          dbFiles: [],
+        }),
+      ).toEqual([{ file_id: 'body' }]);
+    });
+  });
+});
+
+/**
+ * Round-18 follow-ups to the guards above (Codex review 4594099963).
+ */
+describe('HITL Resume Fidelity Guards (round 18)', () => {
+  beforeEach(() => {
+    jest.clearAllMocks();
+  });
+
+  describe('G1 — resume re-checks ownership AGAIN right before terminal writes', () => {
+    // The start-of-finalize guard can go stale across saveMessage + title generation,
+    // so resume.js re-reads the live job immediately before emitDone/completeJob/prune.
+    // Same predicate as the catch-path (F24), applied at the success path's second point.
+    const stillLiveBeforeFinalize = async ({ streamId, jobCreatedAt }) => {
+      const liveJob = await mockGenerationJobManager.getJob(streamId);
+      return !!liveJob && liveJob.createdAt === jobCreatedAt;
+    };
+
+    it('runs terminal writes when still the live job at the second check', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue({ createdAt: 1000 });
+      expect(await stillLiveBeforeFinalize({ streamId: 'c1', jobCreatedAt: 1000 })).toBe(true);
+    });
+
+    it('skips terminal writes when replaced DURING finalize (after the first check passed)', async () => {
+      // First check passed earlier with createdAt 1000; a new request replaced it to 2000
+      // while saveMessage + title generation awaited. The second check must catch it.
+      mockGenerationJobManager.getJob.mockResolvedValue({ createdAt: 2000 });
+      expect(await stillLiveBeforeFinalize({ streamId: 'c1', jobCreatedAt: 1000 })).toBe(false);
+    });
+  });
+
+  describe('G2 — uploaded files are seeded into the AWAITED preliminary user message', () => {
+    // Mirrors getPreliminaryUserMessage: files from the request are persisted on the
+    // preliminary (awaited, pre-run) metadata so they land before any interrupt emits.
+    const buildPreliminaryUserMessage = ({ messageId, files }) => {
+      if (typeof messageId !== 'string' || messageId.length === 0) {
+        return null;
+      }
+      return {
+        messageId,
+        ...(Array.isArray(files) && files.length > 0 && { files }),
+      };
+    };
+
+    it('includes files when the request carries them', () => {
+      const msg = buildPreliminaryUserMessage({ messageId: 'm1', files: [{ file_id: 'a' }] });
+      expect(msg.files).toEqual([{ file_id: 'a' }]);
+    });
+
+    it('omits files when none were uploaded (no empty array)', () => {
+      const msg = buildPreliminaryUserMessage({ messageId: 'm1', files: [] });
+      expect(msg).not.toHaveProperty('files');
+    });
+  });
+
+  describe('G3 — resume replays pre-pause discovered deferred tools', () => {
+    // Mirrors createRun's merge: discovered set is union(message-extracted, replayed),
+    // gated entirely on the agent actually having deferred tools.
+    const resolveDiscovered = ({ hasAnyDeferredTools, messageExtracted, replayed }) => {
+      const set = new Set();
+      if (hasAnyDeferredTools) {
+        for (const n of messageExtracted ?? []) {
+          set.add(n);
+        }
+        for (const n of replayed ?? []) {
+          set.add(n);
+        }
+      }
+      return set;
+    };
+
+    it('replays captured names on resume (messages empty) so the paused tool is present', () => {
+      const set = resolveDiscovered({
+        hasAnyDeferredTools: true,
+        messageExtracted: [],
+        replayed: ['deep_tool'],
+      });
+      expect(set.has('deep_tool')).toBe(true);
+    });
+
+    it('unions replayed names with message-extracted names', () => {
+      const set = resolveDiscovered({
+        hasAnyDeferredTools: true,
+        messageExtracted: ['from_history'],
+        replayed: ['deep_tool'],
+      });
+      expect([...set].sort()).toEqual(['deep_tool', 'from_history']);
+    });
+
+    it('is inert when the agent has no deferred tools', () => {
+      const set = resolveDiscovered({
+        hasAnyDeferredTools: false,
+        messageExtracted: ['x'],
+        replayed: ['deep_tool'],
+      });
+      expect(set.size).toBe(0);
+    });
+  });
+
+  describe("H3 — resume replays the paused turn's model parameters (ephemeral agents)", () => {
+    // Mirrors restoreResumeContext: spread persisted model_parameters back onto the body,
+    // excluding `model` (replayed via the fingerprinted RESUME_CONTEXT_KEYS path).
+    const replayModelParameters = (body, resumeContext) => {
+      const params = resumeContext?.model_parameters;
+      if (params && typeof params === 'object') {
+        const { model: _model, ...rest } = params;
+        Object.assign(body, rest);
+      }
+      return body;
+    };
+
+    it('restores non-default params (temperature, max tokens) onto the resume body', () => {
+      const body = { conversationId: 'c1', endpoint: 'agents' };
+      replayModelParameters(body, {
+        model_parameters: { model: 'gpt-4o', temperature: 0.2, max_tokens: 1024 },
+      });
+      expect(body).toMatchObject({ temperature: 0.2, max_tokens: 1024 });
+    });
+
+    it('does NOT overwrite model (kept consistent with the resume fingerprint)', () => {
+      const body = { model: 'pinned-model' };
+      replayModelParameters(body, { model_parameters: { model: 'other-model', temperature: 0.9 } });
+      expect(body.model).toBe('pinned-model');
+    });
+
+    it('overwrites a client-supplied param with the captured authoritative value', () => {
+      const body = { temperature: 1.0 }; // crafted/stale client value
+      replayModelParameters(body, { model_parameters: { temperature: 0.2 } });
+      expect(body.temperature).toBe(0.2);
+    });
+
+    it('is a no-op when nothing was captured', () => {
+      const body = { conversationId: 'c1' };
+      replayModelParameters(body, {});
+      expect(body).toEqual({ conversationId: 'c1' });
+    });
+  });
+
+  describe('J2 — pause unfinished-save is skipped once a fast resume took over', () => {
+    // Mirrors request.js: only mark the paused row unfinished while the job is STILL paused
+    // on THIS generation's action. A claim transitions it out of requires_action and a
+    // replacement bumps createdAt — either means a /resume now owns the row, so marking it
+    // unfinished would clobber the resumed turn's completed content. Fail open on read error.
+    const shouldMarkUnfinished = async ({ jobCreatedAt, streamId }) => {
+      let stillPaused = true;
+      try {
+        const liveJob = await mockGenerationJobManager.getJob(streamId);
+        stillPaused =
+          !!liveJob &&
+          liveJob.status === 'requires_action' &&
+          (jobCreatedAt == null || liveJob.createdAt === jobCreatedAt);
+      } catch {
+        stillPaused = true;
+      }
+      return stillPaused;
+    };
+
+    it('marks unfinished while still paused on this generation', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue({
+        status: 'requires_action',
+        createdAt: 1000,
+      });
+      expect(await shouldMarkUnfinished({ jobCreatedAt: 1000, streamId: 'c1' })).toBe(true);
+    });
+
+    it('skips the unfinished-save once a fast resume claimed it (no longer requires_action)', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue({ status: 'running', createdAt: 1000 });
+      expect(await shouldMarkUnfinished({ jobCreatedAt: 1000, streamId: 'c1' })).toBe(false);
+    });
+
+    it('skips the unfinished-save when a newer request replaced the job', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue({
+        status: 'requires_action',
+        createdAt: 2000,
+      });
+      expect(await shouldMarkUnfinished({ jobCreatedAt: 1000, streamId: 'c1' })).toBe(false);
+    });
+
+    it('fails open (marks unfinished) when the liveness read throws', async () => {
+      mockGenerationJobManager.getJob.mockRejectedValue(new Error('store down'));
+      expect(await shouldMarkUnfinished({ jobCreatedAt: 1000, streamId: 'c1' })).toBe(true);
+    });
+  });
+});
diff --git a/api/server/controllers/agents/__tests__/resume.spec.js b/api/server/controllers/agents/__tests__/resume.spec.js
new file mode 100644
index 0000000000..dc07de7c7d
--- /dev/null
+++ b/api/server/controllers/agents/__tests__/resume.spec.js
@@ -0,0 +1,1114 @@
+/**
+ * Integration tests for the HITL resume controller (POST /agents/chat/resume).
+ *
+ * Drives the real `ResumeAgentController` end-to-end over supertest with the SDK
+ * run, durable checkpointer, Mongo, and concurrency cache mocked out. The pure
+ * decision/liveness helpers (`isPendingActionStale`, `mapToolApprovalResolutions`,
+ * `findUndecidedToolCalls`, `findDisallowedDecisions`, `buildAbortedResponseMetadata`,
+ * `sanitizeMessageForTransmit`) run for real via `requireActual`, so the test
+ * exercises the actual guard ladder and the pause -> approve -> resume -> finalize
+ * lifecycle rather than re-implemented stubs.
+ *
+ * Covers:
+ *  - the authorization / staleness / agent-and-endpoint / actionId guard ladder
+ *  - tool_approval validation (undecided, policy-disallowed decision)
+ *  - ask_user_question answer requirement
+ *  - concurrency gate (429) and the atomic single-winner claim (409)
+ *  - the happy path: ACK, run reconstruction, resumeCompletion, finalize (save the
+ *    now-finished response, emit done, complete job, prune checkpoint)
+ *  - re-pause (no double finalize), abort-during-resume (no double finalize),
+ *    and the resume-failure terminal path
+ */
+
+const express = require('express');
+const request = require('supertest');
+const { Constants } = require('librechat-data-provider');
+
+const USER_ID = 'user-1';
+const TENANT_ID = 'tenant-1';
+const AGENT_ID = 'agent-abc';
+const CONVO_ID = 'convo-123';
+const ACTION_ID = 'action-xyz';
+const RESPONSE_MSG_ID = 'resp-1';
+const USER_MSG_ID = 'umsg-1';
+const THREAD_PARENT_ID = 'thread-parent-1';
+
+const mockLogger = {
+  debug: jest.fn(),
+  warn: jest.fn(),
+  error: jest.fn(),
+  info: jest.fn(),
+};
+
+const mockJobStore = {
+  getJob: jest.fn(),
+  updateJob: jest.fn(),
+};
+
+const mockGenerationJobManager = {
+  getJob: jest.fn(),
+  getJobStore: jest.fn(() => mockJobStore),
+  getResumeState: jest.fn(),
+  setContentParts: jest.fn(),
+  emitChunk: jest.fn(),
+  emitDone: jest.fn(),
+  emitError: jest.fn(),
+  completeJob: jest.fn(),
+  expireApproval: jest.fn(),
+  approvals: { resolve: jest.fn() },
+};
+
+const mockDeleteAgentCheckpoint = jest.fn();
+const mockDecrementPendingRequest = jest.fn();
+const mockCheckAndIncrementPendingRequest = jest.fn();
+
+const mockSaveMessage = jest.fn();
+const mockGetConvo = jest.fn();
+const mockGetMessages = jest.fn();
+const mockDisposeClient = jest.fn();
+const mockGetMCPRequestContext = jest.fn();
+const mockCleanupMCPRequestContextForReq = jest.fn();
+
+jest.mock('@librechat/data-schemas', () => ({
+  ...jest.requireActual('@librechat/data-schemas'),
+  logger: mockLogger,
+}));
+
+jest.mock('@librechat/api', () => ({
+  ...jest.requireActual('@librechat/api'),
+  GenerationJobManager: mockGenerationJobManager,
+  deleteAgentCheckpoint: (...args) => mockDeleteAgentCheckpoint(...args),
+  decrementPendingRequest: (...args) => mockDecrementPendingRequest(...args),
+  checkAndIncrementPendingRequest: (...args) => mockCheckAndIncrementPendingRequest(...args),
+}));
+
+jest.mock('~/models', () => ({
+  saveMessage: (...args) => mockSaveMessage(...args),
+  getConvo: (...args) => mockGetConvo(...args),
+  getMessages: (...args) => mockGetMessages(...args),
+}));
+
+jest.mock('~/server/cleanup', () => ({
+  disposeClient: (...args) => mockDisposeClient(...args),
+}));
+
+jest.mock('~/server/services/MCPRequestContext', () => ({
+  getMCPRequestContext: (...args) => mockGetMCPRequestContext(...args),
+  cleanupMCPRequestContextForReq: (...args) => mockCleanupMCPRequestContextForReq(...args),
+}));
+
+// Import after mocks
+const ResumeAgentController = require('~/server/controllers/agents/resume');
+
+/** Drain the microtask + immediate queues so the post-ACK continuation settles. */
+const flush = () => new Promise((resolve) => setImmediate(resolve));
+
+/** A live, resolvable paused tool-approval job (single tool call `tc1`). */
+function makeToolApprovalJob(overrides = {}) {
+  const metaOverrides = overrides.metadata ?? {};
+  const pendingOverrides = metaOverrides.pendingAction ?? {};
+  return {
+    status: 'requires_action',
+    abortController: new AbortController(),
+    ...overrides,
+    metadata: {
+      userId: USER_ID,
+      tenantId: TENANT_ID,
+      agent_id: AGENT_ID,
+      endpoint: 'agents',
+      responseMessageId: RESPONSE_MSG_ID,
+      sender: 'TestAgent',
+      iconURL: 'https://example.com/icon.png',
+      model: 'claude-test',
+      isTemporary: false,
+      userMessage: {
+        messageId: USER_MSG_ID,
+        parentMessageId: THREAD_PARENT_ID,
+        text: 'please run the tool',
+      },
+      ...metaOverrides,
+      pendingAction: {
+        actionId: ACTION_ID,
+        expiresAt: Date.now() + 60_000,
+        payload: {
+          type: 'tool_approval',
+          action_requests: [{ tool_call_id: 'tc1' }],
+          review_configs: [{ tool_call_id: 'tc1', allowed_decisions: ['approve', 'reject'] }],
+        },
+        ...pendingOverrides,
+      },
+    },
+  };
+}
+
+/** A live, resolvable paused ask-user-question job. */
+function makeAskUserJob(overrides = {}) {
+  const job = makeToolApprovalJob(overrides);
+  job.metadata.pendingAction.payload = {
+    type: 'ask_user_question',
+    question: 'What should I name the file?',
+  };
+  return job;
+}
+
+/** A mock reconstructed client for the post-ACK path. */
+function makeClient(overrides = {}) {
+  return {
+    sender: 'TestAgent',
+    contentParts: [{ type: 'text', text: 'resumed answer' }],
+    artifactPromises: [],
+    pendingApproval: false,
+    buildResponseMetadata: jest.fn(() => null),
+    resumeCompletion: jest.fn().mockResolvedValue(undefined),
+    ...overrides,
+  };
+}
+
+describe('ResumeAgentController (POST /agents/chat/resume)', () => {
+  let app;
+  let mockInitializeClient;
+  let mockAddTitle;
+  let capturedInit;
+  let settle;
+  let settled;
+
+  beforeEach(() => {
+    jest.clearAllMocks();
+
+    capturedInit = null;
+    mockCheckAndIncrementPendingRequest.mockResolvedValue({ allowed: true });
+    mockDecrementPendingRequest.mockResolvedValue(undefined);
+    mockDeleteAgentCheckpoint.mockResolvedValue(undefined);
+    mockCleanupMCPRequestContextForReq.mockResolvedValue(undefined);
+    mockSaveMessage.mockResolvedValue(undefined);
+    mockGetConvo.mockResolvedValue(null);
+    mockGetMessages.mockResolvedValue([]);
+    mockJobStore.getJob.mockResolvedValue({ tokenUsage: null, contextUsage: null });
+    mockJobStore.updateJob.mockResolvedValue(undefined);
+    mockGenerationJobManager.getResumeState.mockResolvedValue({ aggregatedContent: [] });
+    mockGenerationJobManager.emitDone.mockResolvedValue(undefined);
+    mockGenerationJobManager.emitError.mockResolvedValue(undefined);
+    mockGenerationJobManager.emitChunk.mockResolvedValue(undefined);
+    mockGenerationJobManager.completeJob.mockResolvedValue(undefined);
+    mockGenerationJobManager.approvals.resolve.mockResolvedValue(true);
+
+    // `decrementPendingRequest` runs in the controller's `finally` on every
+    // post-ACK path, so resolving on it signals the async continuation is done.
+    settled = new Promise((resolve) => {
+      settle = resolve;
+    });
+    mockDecrementPendingRequest.mockImplementation(async () => {
+      settle();
+    });
+
+    mockAddTitle = jest.fn().mockResolvedValue(undefined);
+    mockInitializeClient = jest.fn(async ({ req }) => {
+      // Capture the request state the controller seeds BEFORE reconstruction.
+      capturedInit = {
+        parentMessageId: req.body.parentMessageId,
+        files: req.body.files,
+        conversationCreatedAt: req.conversationCreatedAt,
+        timezone: req.body.timezone,
+      };
+      return { client: makeClient(), userMCPAuthMap: { server1: { token: 't' } } };
+    });
+
+    app = express();
+    app.use(express.json());
+    app.use((req, _res, next) => {
+      req.user = { id: USER_ID, tenantId: TENANT_ID };
+      req.config = {
+        endpoints: { agents: { checkpointer: { type: 'mongo' } } },
+        interfaceConfig: {},
+      };
+      next();
+    });
+    app.post('/api/agents/chat/resume', (req, res, next) =>
+      ResumeAgentController(req, res, next, mockInitializeClient, mockAddTitle),
+    );
+  });
+
+  const post = (body) => request(app).post('/api/agents/chat/resume').send(body);
+
+  const approveBody = (extra = {}) => ({
+    conversationId: CONVO_ID,
+    actionId: ACTION_ID,
+    agent_id: AGENT_ID,
+    endpoint: 'agents',
+    decisions: [{ tool_call_id: 'tc1', decision: 'approve' }],
+    ...extra,
+  });
+
+  describe('temporal context restore', () => {
+    it('restores req.conversationCreatedAt from the convo before initializeClient', async () => {
+      // Temporal prompt vars must resolve against the paused anchor, not resume wall-clock.
+      mockGetConvo.mockResolvedValue({ createdAt: new Date('2020-01-02T03:04:05.000Z') });
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      const res = await post(approveBody());
+      expect(res.status).toBe(200);
+      await settled;
+      expect(capturedInit.conversationCreatedAt).toBe('2020-01-02T03:04:05.000Z');
+    });
+
+    it('leaves conversationCreatedAt unset when the convo lookup yields nothing', async () => {
+      mockGetConvo.mockResolvedValue(null);
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      const res = await post(approveBody());
+      expect(res.status).toBe(200);
+      await settled;
+      expect(capturedInit.conversationCreatedAt).toBeUndefined();
+    });
+  });
+
+  describe('MCP request-context lifecycle', () => {
+    it('pre-seeds the run-scoped MCP context before initializeClient and tears it down after', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      const res = await post(approveBody());
+      expect(res.status).toBe(200);
+      await settled; // the controller's finally has run
+
+      // Seeded with a null `res` + cleanupOnResponse:false so the post-ACK tool load
+      // finds the existing store instead of getting undefined (res is already finished).
+      expect(mockGetMCPRequestContext).toHaveBeenCalledWith(expect.anything(), undefined, {
+        cleanupOnResponse: false,
+      });
+      // ...and seeded BEFORE the client (hence tool loading) is built.
+      expect(mockGetMCPRequestContext.mock.invocationCallOrder[0]).toBeLessThan(
+        mockInitializeClient.mock.invocationCallOrder[0],
+      );
+      // ...then torn down exactly once in the finally.
+      expect(mockCleanupMCPRequestContextForReq).toHaveBeenCalledTimes(1);
+    });
+  });
+
+  describe('request guards (rejected before claiming the action)', () => {
+    it('400 when conversationId is missing', async () => {
+      const res = await post({ actionId: ACTION_ID });
+      expect(res.status).toBe(400);
+      expect(res.body.error).toMatch(/conversationId is required/i);
+      expect(mockGenerationJobManager.getJob).not.toHaveBeenCalled();
+    });
+
+    it('400 when conversationId is the "new" placeholder', async () => {
+      const res = await post({ conversationId: 'new', actionId: ACTION_ID });
+      expect(res.status).toBe(400);
+      expect(mockGenerationJobManager.getJob).not.toHaveBeenCalled();
+    });
+
+    it('404 when there is no paused job for the conversation', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(null);
+      const res = await post(approveBody());
+      expect(res.status).toBe(404);
+      expect(res.body.error).toMatch(/no paused generation/i);
+    });
+
+    it('403 when the job belongs to another user', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(
+        makeToolApprovalJob({ metadata: { userId: 'someone-else' } }),
+      );
+      const res = await post(approveBody());
+      expect(res.status).toBe(403);
+      expect(mockGenerationJobManager.approvals.resolve).not.toHaveBeenCalled();
+    });
+
+    it('403 on a tenant mismatch', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(
+        makeToolApprovalJob({ metadata: { tenantId: 'other-tenant' } }),
+      );
+      const res = await post(approveBody());
+      expect(res.status).toBe(403);
+    });
+
+    it('403 when the resume omits the paused agent_id', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      const res = await post(approveBody({ agent_id: undefined }));
+      expect(res.status).toBe(403);
+      expect(res.body.error).toMatch(/different agent/i);
+    });
+
+    it('403 when the resume claims a different agent_id', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      const res = await post(approveBody({ agent_id: 'agent-other' }));
+      expect(res.status).toBe(403);
+      expect(res.body.error).toMatch(/different agent/i);
+    });
+
+    it('403 when the resume claims a different endpoint', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      const res = await post(approveBody({ endpoint: 'bedrock' }));
+      expect(res.status).toBe(403);
+      expect(res.body.error).toMatch(/different endpoint/i);
+    });
+
+    it('403 when the resume OMITS the paused endpoint (no fall-through to ephemeral)', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      const res = await post(approveBody({ endpoint: undefined }));
+      expect(res.status).toBe(403);
+      expect(res.body.error).toMatch(/different endpoint/i);
+      expect(mockGenerationJobManager.approvals.resolve).not.toHaveBeenCalled();
+    });
+
+    it('409 when the job is not in requires_action (already terminal; no expire)', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob({ status: 'running' }));
+      const res = await post(approveBody());
+      expect(res.status).toBe(409);
+      expect(res.body.error).toMatch(/no live pending action/i);
+      // Already resolved/terminal — nothing to expire.
+      expect(mockGenerationJobManager.expireApproval).not.toHaveBeenCalled();
+    });
+
+    it('409 AND drives expiry when the pending action has expired (stale)', async () => {
+      const job = makeToolApprovalJob();
+      job.metadata.pendingAction.expiresAt = Date.now() - 1_000;
+      mockGenerationJobManager.getJob.mockResolvedValue(job);
+      const res = await post(approveBody());
+      expect(res.status).toBe(409);
+      expect(res.body.error).toMatch(/no live pending action/i);
+      // The stale action is expired NOW (expire CAS + terminal SSE) so an attached SSE
+      // client gets a terminal event instead of hanging until the periodic sweeper runs.
+      expect(mockGenerationJobManager.expireApproval).toHaveBeenCalledWith(CONVO_ID, ACTION_ID);
+    });
+
+    it('400 when actionId is missing', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      const res = await post(approveBody({ actionId: undefined }));
+      expect(res.status).toBe(400);
+      expect(res.body.error).toMatch(/actionId is required/i);
+    });
+
+    it('409 when actionId targets a stale action', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      const res = await post(approveBody({ actionId: 'stale-action' }));
+      expect(res.status).toBe(409);
+      expect(res.body.error).toMatch(/stale action/i);
+    });
+
+    it('400 when a tool call is left undecided', async () => {
+      const job = makeToolApprovalJob();
+      job.metadata.pendingAction.payload.action_requests = [
+        { tool_call_id: 'tc1' },
+        { tool_call_id: 'tc2' },
+      ];
+      job.metadata.pendingAction.payload.review_configs = [
+        { tool_call_id: 'tc1', allowed_decisions: ['approve', 'reject'] },
+        { tool_call_id: 'tc2', allowed_decisions: ['approve', 'reject'] },
+      ];
+      mockGenerationJobManager.getJob.mockResolvedValue(job);
+      const res = await post(approveBody()); // only decides tc1
+      expect(res.status).toBe(400);
+      expect(res.body.error).toMatch(/must be decided/i);
+      expect(res.body.undecided).toEqual(['tc2']);
+      expect(mockGenerationJobManager.approvals.resolve).not.toHaveBeenCalled();
+    });
+
+    it('403 when a decision is not permitted by the tool policy', async () => {
+      const job = makeToolApprovalJob();
+      // Policy restricts tc1 to reject only; an `approve` must be refused.
+      job.metadata.pendingAction.payload.review_configs = [
+        { tool_call_id: 'tc1', allowed_decisions: ['reject'] },
+      ];
+      mockGenerationJobManager.getJob.mockResolvedValue(job);
+      const res = await post(approveBody());
+      expect(res.status).toBe(403);
+      expect(res.body.error).toMatch(/not permitted/i);
+      expect(mockGenerationJobManager.approvals.resolve).not.toHaveBeenCalled();
+    });
+
+    it('400 when an edit decision omits editedArguments', async () => {
+      const job = makeToolApprovalJob();
+      job.metadata.pendingAction.payload.review_configs = [
+        { tool_call_id: 'tc1', allowed_decisions: ['approve', 'edit'] },
+      ];
+      mockGenerationJobManager.getJob.mockResolvedValue(job);
+      const res = await post(
+        approveBody({ decisions: [{ tool_call_id: 'tc1', decision: 'edit' }] }),
+      );
+      expect(res.status).toBe(400);
+      expect(res.body.error).toMatch(/editedArguments/i);
+      expect(res.body.incomplete).toEqual(['tc1']);
+      expect(mockGenerationJobManager.approvals.resolve).not.toHaveBeenCalled();
+    });
+
+    it('400 when a respond decision omits responseText', async () => {
+      const job = makeToolApprovalJob();
+      job.metadata.pendingAction.payload.review_configs = [
+        { tool_call_id: 'tc1', allowed_decisions: ['approve', 'respond'] },
+      ];
+      mockGenerationJobManager.getJob.mockResolvedValue(job);
+      const res = await post(
+        approveBody({ decisions: [{ tool_call_id: 'tc1', decision: 'respond' }] }),
+      );
+      expect(res.status).toBe(400);
+      expect(res.body.error).toMatch(/responseText/i);
+    });
+
+    it('accepts a complete edit decision (editedArguments present)', async () => {
+      const job = makeToolApprovalJob();
+      job.metadata.pendingAction.payload.review_configs = [
+        { tool_call_id: 'tc1', allowed_decisions: ['approve', 'edit'] },
+      ];
+      mockGenerationJobManager.getJob.mockResolvedValue(job);
+      const res = await post(
+        approveBody({
+          decisions: [{ tool_call_id: 'tc1', decision: 'edit', editedArguments: { q: 'x' } }],
+        }),
+      );
+      expect(res.status).toBe(200);
+      await settled;
+      await flush();
+    });
+
+    it('403 when the resume request fingerprint does not match the paused config', async () => {
+      const job = makeToolApprovalJob();
+      job.metadata.pendingAction.requestFingerprint = 'fingerprint-of-a-different-config';
+      mockGenerationJobManager.getJob.mockResolvedValue(job);
+      const res = await post(approveBody());
+      expect(res.status).toBe(403);
+      expect(res.body.error).toMatch(/different agent configuration/i);
+      expect(mockGenerationJobManager.approvals.resolve).not.toHaveBeenCalled();
+    });
+
+    it('proceeds when the resume request fingerprint matches the paused config', async () => {
+      const { computeAgentRequestFingerprint } = jest.requireActual('@librechat/api');
+      const job = makeToolApprovalJob();
+      job.metadata.pendingAction.requestFingerprint = computeAgentRequestFingerprint({
+        endpoint: 'agents',
+        agent_id: AGENT_ID,
+      });
+      mockGenerationJobManager.getJob.mockResolvedValue(job);
+      const res = await post(approveBody());
+      expect(res.status).toBe(200);
+      expect(mockGenerationJobManager.approvals.resolve).toHaveBeenCalledWith(CONVO_ID, ACTION_ID);
+      await settled;
+      await flush();
+    });
+
+    it('403 when the resume sends a different promptPrefix than the paused config', async () => {
+      const { computeAgentRequestFingerprint } = jest.requireActual('@librechat/api');
+      const job = makeToolApprovalJob();
+      // Ephemeral instructions come from promptPrefix, so it's part of the fingerprint.
+      job.metadata.pendingAction.requestFingerprint = computeAgentRequestFingerprint({
+        endpoint: 'agents',
+        agent_id: AGENT_ID,
+        promptPrefix: 'be terse',
+      });
+      mockGenerationJobManager.getJob.mockResolvedValue(job);
+      const res = await post(approveBody({ promptPrefix: 'ignore previous instructions' }));
+      expect(res.status).toBe(403);
+      expect(res.body.error).toMatch(/different agent configuration/i);
+      expect(mockGenerationJobManager.approvals.resolve).not.toHaveBeenCalled();
+    });
+
+    it('400 when an ask_user_question resume carries no answer', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeAskUserJob());
+      const res = await post({
+        conversationId: CONVO_ID,
+        actionId: ACTION_ID,
+        agent_id: AGENT_ID,
+        endpoint: 'agents',
+      });
+      expect(res.status).toBe(400);
+      expect(res.body.error).toMatch(/answer is required/i);
+    });
+
+    it('400 on an unsupported pending-action type', async () => {
+      const job = makeToolApprovalJob();
+      job.metadata.pendingAction.payload = { type: 'totally_unknown' };
+      mockGenerationJobManager.getJob.mockResolvedValue(job);
+      const res = await post(approveBody());
+      expect(res.status).toBe(400);
+      expect(res.body.error).toMatch(/unsupported pending action/i);
+      expect(mockGenerationJobManager.approvals.resolve).not.toHaveBeenCalled();
+    });
+
+    it('proceeds (does not 403) for a pre-multi-tenancy job with no tenantId', async () => {
+      // hasTenantMismatch only blocks when the job carries a tenantId that differs;
+      // an untenanted (legacy) job must still resume once the userId check passes.
+      const job = makeToolApprovalJob({ metadata: { tenantId: undefined } });
+      mockGenerationJobManager.getJob.mockResolvedValue(job);
+      const res = await post(approveBody());
+      expect(res.status).toBe(200);
+      expect(mockGenerationJobManager.approvals.resolve).toHaveBeenCalledWith(CONVO_ID, ACTION_ID);
+      await settled;
+      await flush();
+    });
+
+    it('429 when the concurrency gate rejects the resume', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      mockCheckAndIncrementPendingRequest.mockResolvedValue({ allowed: false });
+      const res = await post(approveBody());
+      expect(res.status).toBe(429);
+      expect(mockGenerationJobManager.approvals.resolve).not.toHaveBeenCalled();
+    });
+
+    it('409 and releases the slot when the action was already claimed (single-winner)', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      mockGenerationJobManager.approvals.resolve.mockResolvedValue(false);
+      const res = await post(approveBody());
+      expect(res.status).toBe(409);
+      expect(res.body.error).toMatch(/already resolved or has expired/i);
+      expect(mockGenerationJobManager.approvals.resolve).toHaveBeenCalledWith(CONVO_ID, ACTION_ID);
+      expect(mockDecrementPendingRequest).toHaveBeenCalledWith(USER_ID);
+      expect(mockInitializeClient).not.toHaveBeenCalled();
+    });
+
+    it('releases the slot when the claim itself throws (store error, not a leak)', async () => {
+      // The increment happens before the claim, which runs before the run's own
+      // try/finally — a store error here must still release the slot or a retry of the
+      // still-paused approval gets spuriously 429'd until the counter TTL expires.
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      mockGenerationJobManager.approvals.resolve.mockRejectedValue(new Error('redis down'));
+      const res = await post(approveBody());
+      expect(res.status).toBe(500);
+      expect(mockDecrementPendingRequest).toHaveBeenCalledWith(USER_ID);
+      expect(mockInitializeClient).not.toHaveBeenCalled();
+    });
+  });
+
+  describe('happy path: approve -> reconstruct -> resume -> finalize', () => {
+    it('ACKs immediately and claims the action atomically with the submitted actionId', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      const res = await post(approveBody());
+      expect(res.status).toBe(200);
+      expect(res.body).toEqual({
+        streamId: CONVO_ID,
+        conversationId: CONVO_ID,
+        status: 'resuming',
+      });
+      expect(mockGenerationJobManager.approvals.resolve).toHaveBeenCalledWith(CONVO_ID, ACTION_ID);
+      await settled;
+      await flush();
+    });
+
+    it('seeds the thread parent before reconstruction and maps the decision to the SDK', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      await post(approveBody());
+      await settled;
+      await flush();
+
+      // initializeAgent scopes thread files off req.body.parentMessageId, seeded
+      // from the paused user message's parent before initializeClient runs.
+      expect(capturedInit.parentMessageId).toBe(THREAD_PARENT_ID);
+
+      expect(mockInitializeClient).toHaveBeenCalledTimes(1);
+      const client = await mockInitializeClient.mock.results[0].value.then((r) => r.client);
+      expect(client.resumeCompletion).toHaveBeenCalledWith(
+        expect.objectContaining({
+          resumeValue: { tc1: { type: 'approve' } },
+          userMCPAuthMap: { server1: { token: 't' } },
+        }),
+      );
+    });
+
+    it('restores the paused user message files before reconstruction (execute-code files)', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      // The resume body carries no files; the controller must source them from the
+      // persisted user message so an approved code/read-file tool keeps its uploads.
+      mockGetMessages.mockResolvedValue([{ files: [{ file_id: 'f1' }] }]);
+
+      await post(approveBody());
+      await settled;
+      await flush();
+
+      expect(capturedInit.files).toEqual([{ file_id: 'f1' }]);
+    });
+
+    it('ignores client-supplied resume files, sourcing from the paused job (security)', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      // The paused turn's authoritative files (DB row); a crafted client tries to swap them.
+      mockGetMessages.mockResolvedValue([{ files: [{ file_id: 'paused' }] }]);
+
+      await post(approveBody({ files: [{ file_id: 'attacker-supplied' }] }));
+      await settled;
+      await flush();
+
+      // The crafted client files must NOT reach initializeAgent — only the paused set.
+      expect(capturedInit.files).toEqual([{ file_id: 'paused' }]);
+    });
+
+    it('clears client-supplied resume files when the paused turn had none (security)', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      mockGetMessages.mockResolvedValue([{ files: [] }]); // the paused turn had no files
+
+      await post(approveBody({ files: [{ file_id: 'attacker-supplied' }] }));
+      await settled;
+      await flush();
+
+      expect(capturedInit.files).toEqual([]);
+    });
+
+    it('prefers job-metadata files over both the client body and the DB row', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(
+        makeToolApprovalJob({
+          metadata: {
+            userMessage: {
+              messageId: USER_MSG_ID,
+              parentMessageId: THREAD_PARENT_ID,
+              text: 'x',
+              files: [{ file_id: 'meta' }],
+            },
+          },
+        }),
+      );
+      mockGetMessages.mockResolvedValue([{ files: [{ file_id: 'db' }] }]);
+
+      await post(approveBody({ files: [{ file_id: 'attacker-supplied' }] }));
+      await settled;
+      await flush();
+
+      expect(capturedInit.files).toEqual([{ file_id: 'meta' }]);
+    });
+
+    it('carries the restored files onto the final requestMessage (user bubble keeps attachments)', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      // job.metadata.userMessage is persisted without files; the final SSE must still
+      // carry the restored uploads or the user bubble loses its attachments on resume.
+      mockGetMessages.mockResolvedValue([{ files: [{ file_id: 'f1', filename: 'a.pdf' }] }]);
+
+      await post(approveBody());
+      await settled;
+      await flush();
+
+      const [, finalEvent] = mockGenerationJobManager.emitDone.mock.calls[0];
+      expect(finalEvent.requestMessage).toMatchObject({
+        messageId: USER_MSG_ID,
+        isCreatedByUser: true,
+        files: [{ file_id: 'f1', filename: 'a.pdf' }],
+      });
+    });
+
+    it('persists the finished response, emits done, completes the job, and prunes the checkpoint', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      await post(approveBody());
+      await settled;
+      await flush();
+
+      expect(mockSaveMessage).toHaveBeenCalledWith(
+        expect.objectContaining({ userId: USER_ID, isTemporary: false }),
+        expect.objectContaining({
+          messageId: RESPONSE_MSG_ID,
+          parentMessageId: USER_MSG_ID,
+          conversationId: CONVO_ID,
+          content: [{ type: 'text', text: 'resumed answer' }],
+          unfinished: false,
+          error: false,
+          isCreatedByUser: false,
+          user: USER_ID,
+          agent_id: AGENT_ID,
+        }),
+        expect.objectContaining({
+          context: 'api/server/controllers/agents/resume.js - resumed response end',
+        }),
+      );
+
+      // Assert the finalEvent STRUCTURE, not just the hardcoded `final: true` literal —
+      // a `final: true`-only check would still pass if the entire content / title /
+      // requestMessage build in finalizeResumedTurn were deleted.
+      const [doneStreamId, finalEvent] = mockGenerationJobManager.emitDone.mock.calls[0];
+      expect(doneStreamId).toBe(CONVO_ID);
+      expect(finalEvent).toMatchObject({
+        final: true,
+        conversation: { conversationId: CONVO_ID },
+        responseMessage: {
+          messageId: RESPONSE_MSG_ID,
+          content: [{ type: 'text', text: 'resumed answer' }],
+          unfinished: false,
+        },
+        requestMessage: { messageId: USER_MSG_ID, isCreatedByUser: true },
+      });
+      expect(typeof finalEvent.title).toBe('string');
+
+      expect(mockGenerationJobManager.completeJob).toHaveBeenCalledWith(CONVO_ID);
+      expect(mockDeleteAgentCheckpoint).toHaveBeenCalledWith(CONVO_ID, { type: 'mongo' });
+      expect(mockDecrementPendingRequest).toHaveBeenCalledWith(USER_ID);
+      expect(mockDisposeClient).toHaveBeenCalledTimes(1);
+    });
+
+    it('skips finalization (no save/emitDone/complete) when the job was replaced mid-resume', async () => {
+      // The paused job has createdAt 1000; a concurrent request reused this conversationId,
+      // so the live job now has a different createdAt — finalizing would clobber the newer
+      // turn's job. The finally still runs (slot release), so `settled` resolves.
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob({ createdAt: 1000 }));
+      mockJobStore.getJob.mockResolvedValue({
+        tokenUsage: null,
+        contextUsage: null,
+        createdAt: 2000,
+      });
+      await post(approveBody());
+      await settled;
+      await flush();
+
+      expect(mockSaveMessage).not.toHaveBeenCalled();
+      expect(mockGenerationJobManager.emitDone).not.toHaveBeenCalled();
+      expect(mockGenerationJobManager.completeJob).not.toHaveBeenCalled();
+    });
+
+    it('does not release the slot in the finally when the client already released it on pause', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      // Simulate handleRunInterrupt having released the concurrency slot on a re-pause.
+      mockInitializeClient.mockResolvedValue({
+        client: makeClient({ pendingRequestReleased: true }),
+        userMCPAuthMap: {},
+      });
+      let disposed;
+      const disposedP = new Promise((resolve) => {
+        disposed = resolve;
+      });
+      mockDisposeClient.mockImplementation(() => disposed());
+
+      await post(approveBody());
+      await disposedP;
+      await flush();
+
+      // The finally must NOT double-release — handleRunInterrupt already did.
+      expect(mockDecrementPendingRequest).not.toHaveBeenCalled();
+    });
+
+    it('persists tool artifacts produced by the resumed continuation as attachments', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      const artifact = { type: 'image', file_id: 'img-1' };
+      // The lean resume path bypasses BaseClient.sendMessage's artifact await, so the
+      // controller must await client.artifactPromises itself (and drop null results).
+      mockInitializeClient.mockResolvedValue({
+        client: makeClient({
+          artifactPromises: [Promise.resolve(artifact), Promise.resolve(null)],
+        }),
+        userMCPAuthMap: {},
+      });
+
+      await post(approveBody());
+      await settled;
+      await flush();
+
+      expect(mockSaveMessage).toHaveBeenCalledWith(
+        expect.anything(),
+        expect.objectContaining({ attachments: [artifact] }),
+        expect.anything(),
+      );
+    });
+
+    it('falls back to the aggregated store content when the live client content is empty', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      // No live content on the rebuilt client → the saved response must use the
+      // pre-pause aggregated content from the store, not an empty array.
+      mockInitializeClient.mockResolvedValue({
+        client: makeClient({ contentParts: [] }),
+        userMCPAuthMap: {},
+      });
+      mockGenerationJobManager.getResumeState.mockResolvedValue({
+        aggregatedContent: [{ type: 'text', text: 'from-store' }],
+      });
+
+      await post(approveBody());
+      await settled;
+      await flush();
+
+      expect(mockSaveMessage).toHaveBeenCalledWith(
+        expect.anything(),
+        expect.objectContaining({ content: [{ type: 'text', text: 'from-store' }] }),
+        expect.anything(),
+      );
+    });
+
+    it('strips malformed tool_call parts from the saved content', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      mockInitializeClient.mockResolvedValue({
+        client: makeClient({
+          contentParts: [
+            { type: 'text', text: 'kept' },
+            { type: 'tool_call' }, // malformed: no tool_call payload — must be filtered
+          ],
+        }),
+        userMCPAuthMap: {},
+      });
+
+      await post(approveBody());
+      await settled;
+      await flush();
+
+      expect(mockSaveMessage).toHaveBeenCalledWith(
+        expect.anything(),
+        expect.objectContaining({ content: [{ type: 'text', text: 'kept' }] }),
+        expect.anything(),
+      );
+    });
+
+    it('merges previously persisted attachments with the resumed segment artifacts', async () => {
+      const priorArtifact = { type: 'image', file_id: 'prior-1' };
+      const newArtifact = { type: 'image', file_id: 'new-1' };
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      // An earlier pause segment already saved an attachment on the response row.
+      mockGetMessages.mockResolvedValue([{ attachments: [priorArtifact] }]);
+      mockInitializeClient.mockResolvedValue({
+        client: makeClient({ artifactPromises: [Promise.resolve(newArtifact)] }),
+        userMCPAuthMap: {},
+      });
+
+      await post(approveBody());
+      await settled;
+      await flush();
+
+      expect(mockSaveMessage).toHaveBeenCalledWith(
+        expect.anything(),
+        expect.objectContaining({ attachments: [priorArtifact, newArtifact] }),
+        expect.anything(),
+      );
+    });
+
+    it('persists the resumed run context calibration (contextMeta) onto the saved response', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      const contextMeta = { calibrationRatio: 0.8 };
+      mockInitializeClient.mockResolvedValue({
+        client: makeClient({ contextMeta }),
+        userMCPAuthMap: {},
+      });
+
+      await post(approveBody());
+      await settled;
+      await flush();
+
+      expect(mockSaveMessage).toHaveBeenCalledWith(
+        expect.anything(),
+        expect.objectContaining({ contextMeta }),
+        expect.anything(),
+      );
+    });
+
+    it('carries manualSkills/alwaysAppliedSkills onto the resumed requestMessage', async () => {
+      const job = makeToolApprovalJob();
+      job.metadata.userMessage.manualSkills = ['skill-a'];
+      job.metadata.userMessage.alwaysAppliedSkills = ['skill-b'];
+      mockGenerationJobManager.getJob.mockResolvedValue(job);
+
+      await post(approveBody());
+      await settled;
+      await flush();
+
+      const [, finalEvent] = mockGenerationJobManager.emitDone.mock.calls[0];
+      expect(finalEvent.requestMessage).toMatchObject({
+        manualSkills: ['skill-a'],
+        alwaysAppliedSkills: ['skill-b'],
+      });
+    });
+
+    it('attaches client response metadata to the saved message when present', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      const contextUsage = { tokenCount: 1234 };
+      mockInitializeClient.mockResolvedValue({
+        client: makeClient({ buildResponseMetadata: jest.fn(() => ({ contextUsage })) }),
+        userMCPAuthMap: {},
+      });
+
+      await post(approveBody());
+      await settled;
+      await flush();
+
+      expect(mockSaveMessage).toHaveBeenCalledWith(
+        expect.anything(),
+        expect.objectContaining({ metadata: expect.objectContaining({ contextUsage }) }),
+        expect.anything(),
+      );
+    });
+
+    it('resumes an ask_user_question with the free-form answer', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeAskUserJob());
+      const res = await post({
+        conversationId: CONVO_ID,
+        actionId: ACTION_ID,
+        agent_id: AGENT_ID,
+        endpoint: 'agents',
+        answer: 'call it report.pdf',
+      });
+      expect(res.status).toBe(200);
+      await settled;
+      await flush();
+
+      const client = await mockInitializeClient.mock.results[0].value.then((r) => r.client);
+      expect(client.resumeCompletion).toHaveBeenCalledWith(
+        expect.objectContaining({ resumeValue: { answer: 'call it report.pdf' } }),
+      );
+      expect(mockGenerationJobManager.completeJob).toHaveBeenCalledWith(CONVO_ID);
+    });
+
+    it('generates a title for a first-turn pause before completing the stream', async () => {
+      const job = makeToolApprovalJob();
+      job.metadata.userMessage.parentMessageId = Constants.NO_PARENT;
+      mockGenerationJobManager.getJob.mockResolvedValue(job);
+      mockGetConvo.mockResolvedValue({ title: 'New Chat' });
+
+      await post(approveBody());
+      await settled;
+      await flush();
+
+      expect(mockAddTitle).toHaveBeenCalledTimes(1);
+      // Title is emitted (and the job completed) — order matters but both must happen.
+      expect(mockGenerationJobManager.completeJob).toHaveBeenCalledWith(CONVO_ID);
+    });
+
+    it('still finalizes the turn when first-turn title generation throws', async () => {
+      const job = makeToolApprovalJob();
+      job.metadata.userMessage.parentMessageId = Constants.NO_PARENT;
+      mockGenerationJobManager.getJob.mockResolvedValue(job);
+      mockGetConvo.mockResolvedValue({ title: 'New Chat' });
+      // Title generation is best-effort: a throw must not break the resumed turn.
+      mockAddTitle.mockRejectedValue(new Error('title service down'));
+
+      await post(approveBody());
+      await settled;
+      await flush();
+
+      expect(mockLogger.error).toHaveBeenCalled();
+      expect(mockSaveMessage).toHaveBeenCalledTimes(1);
+      expect(mockGenerationJobManager.emitDone).toHaveBeenCalledWith(CONVO_ID, expect.any(Object));
+      expect(mockGenerationJobManager.completeJob).toHaveBeenCalledWith(CONVO_ID);
+    });
+  });
+
+  describe('non-finalizing outcomes', () => {
+    it('re-pause: does not finalize when the run pauses again', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      mockInitializeClient.mockResolvedValue({
+        client: makeClient({ pendingApproval: true }),
+        userMCPAuthMap: {},
+      });
+
+      const res = await post(approveBody());
+      expect(res.status).toBe(200);
+      await settled;
+      await flush();
+
+      // It persists progress (unfinished) but must NOT finalize the turn.
+      expect(mockSaveMessage).toHaveBeenCalledWith(
+        expect.anything(),
+        expect.objectContaining({ unfinished: true }),
+        expect.anything(),
+      );
+      expect(mockGenerationJobManager.emitDone).not.toHaveBeenCalled();
+      expect(mockGenerationJobManager.completeJob).not.toHaveBeenCalled();
+      expect(mockDeleteAgentCheckpoint).not.toHaveBeenCalled();
+      // The slot is still released and the client disposed.
+      expect(mockDecrementPendingRequest).toHaveBeenCalledWith(USER_ID);
+      expect(mockDisposeClient).toHaveBeenCalledTimes(1);
+    });
+
+    it('re-pause: persists the segment content (unfinished) so an expiring re-pause keeps it', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      mockInitializeClient.mockResolvedValue({
+        client: makeClient({
+          pendingApproval: true,
+          contentParts: [{ type: 'text', text: 'streamed this segment' }],
+          artifactPromises: [],
+        }),
+        userMCPAuthMap: {},
+      });
+
+      const res = await post(approveBody());
+      expect(res.status).toBe(200);
+      await settled;
+      await flush();
+
+      expect(mockSaveMessage).toHaveBeenCalledWith(
+        expect.anything(),
+        expect.objectContaining({
+          content: [{ type: 'text', text: 'streamed this segment' }],
+          unfinished: true,
+        }),
+        expect.objectContaining({
+          context: 'api/server/controllers/agents/resume.js - re-pause progress persist',
+        }),
+      );
+      expect(mockGenerationJobManager.emitDone).not.toHaveBeenCalled();
+    });
+
+    it('re-pause: persists artifacts produced before pausing again (unfinished)', async () => {
+      const artifact = { type: 'image', file_id: 'seg-1' };
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      mockInitializeClient.mockResolvedValue({
+        client: makeClient({
+          pendingApproval: true,
+          artifactPromises: [Promise.resolve(artifact)],
+        }),
+        userMCPAuthMap: {},
+      });
+
+      const res = await post(approveBody());
+      expect(res.status).toBe(200);
+      await settled;
+      await flush();
+
+      // No finalize, but the segment's artifact is persisted unfinished so the next
+      // resume's finalize can merge it (otherwise the fresh client drops it).
+      expect(mockGenerationJobManager.emitDone).not.toHaveBeenCalled();
+      expect(mockSaveMessage).toHaveBeenCalledWith(
+        expect.anything(),
+        expect.objectContaining({ attachments: [artifact], unfinished: true }),
+        expect.objectContaining({
+          context: 'api/server/controllers/agents/resume.js - re-pause progress persist',
+        }),
+      );
+    });
+
+    it('abort-during-resume: lets the abort route finalize, does not double-save', async () => {
+      const job = makeToolApprovalJob();
+      mockGenerationJobManager.getJob.mockResolvedValue(job);
+      mockInitializeClient.mockImplementation(async () => {
+        job.abortController.abort();
+        return { client: makeClient(), userMCPAuthMap: {} };
+      });
+
+      const res = await post(approveBody());
+      expect(res.status).toBe(200);
+      await settled;
+      await flush();
+
+      expect(mockSaveMessage).not.toHaveBeenCalled();
+      expect(mockGenerationJobManager.emitDone).not.toHaveBeenCalled();
+      expect(mockGenerationJobManager.completeJob).not.toHaveBeenCalled();
+      expect(mockDecrementPendingRequest).toHaveBeenCalledWith(USER_ID);
+    });
+
+    it('resume failure: emits an error, finalizes the job, and prunes the checkpoint', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      mockInitializeClient.mockResolvedValue({
+        client: makeClient({
+          resumeCompletion: jest.fn().mockRejectedValue(new Error('boom')),
+        }),
+        userMCPAuthMap: {},
+      });
+
+      const res = await post(approveBody());
+      expect(res.status).toBe(200); // already ACKed before the failure
+      await settled;
+      await flush();
+
+      expect(mockGenerationJobManager.emitError).toHaveBeenCalledWith(CONVO_ID, 'boom');
+      expect(mockGenerationJobManager.completeJob).toHaveBeenCalledWith(CONVO_ID, 'boom');
+      expect(mockDeleteAgentCheckpoint).toHaveBeenCalledWith(CONVO_ID, { type: 'mongo' });
+      expect(mockDecrementPendingRequest).toHaveBeenCalledWith(USER_ID);
+      expect(mockSaveMessage).not.toHaveBeenCalled();
+    });
+
+    it('forces a terminal job state when completeJob also fails during a resume error', async () => {
+      mockGenerationJobManager.getJob.mockResolvedValue(makeToolApprovalJob());
+      mockInitializeClient.mockResolvedValue({
+        client: makeClient({
+          resumeCompletion: jest.fn().mockRejectedValue(new Error('boom')),
+        }),
+        userMCPAuthMap: {},
+      });
+      // The error path's completeJob also fails → last-resort updateJob must force a
+      // terminal state so the job isn't orphaned in `running`.
+      mockGenerationJobManager.completeJob.mockRejectedValue(new Error('complete failed'));
+
+      await post(approveBody());
+      await settled;
+      await flush();
+
+      expect(mockJobStore.updateJob).toHaveBeenCalledWith(
+        CONVO_ID,
+        expect.objectContaining({ status: 'error', error: 'Resume failed' }),
+      );
+      expect(mockDecrementPendingRequest).toHaveBeenCalledWith(USER_ID);
+    });
+  });
+});
diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js
index df2515c2ab..f9432ae821 100644
--- a/api/server/controllers/agents/client.js
+++ b/api/server/controllers/agents/client.js
@@ -33,6 +33,13 @@ const {
   GenerationJobManager,
   getTransactionsConfig,
   resolveRecursionLimit,
+  buildPendingAction,
+  computeAgentRequestFingerprint,
+  extractDiscoveredToolsFromHistory,
+  pickResumeContext,
+  getApprovalTtlMs,
+  isHITLEnabled,
+  deleteAgentCheckpoint,
   getRequestMemories,
   createMemoryProcessor,
   agentHasInlineMemoryTools,
@@ -54,6 +61,7 @@ const {
   hasUrlContextTool,
   appendYouTubeVideoParts,
   resolveYouTubeInjectionConfig,
+  decrementPendingRequest,
 } = require('@librechat/api');
 const {
   Callback,
@@ -69,6 +77,7 @@ const {
   Permissions,
   VisionModes,
   ContentTypes,
+  ApprovalEvents,
   EModelEndpoint,
   PermissionTypes,
   AgentCapabilities,
@@ -1165,6 +1174,150 @@ class AgentClient extends BaseClient {
    * @param {Record<string, Record<string, string>>} [params.userMCPAuthMap]
    * @param {AbortController} [params.abortController]
    */
+  /**
+   * @deprecated Agent Chain — strip hidden intermediate sequential-agent content
+   * before persistence, keeping only the last part + tool_call parts. Mirrors the
+   * chat path so a HITL resume doesn't persist/emit intermediate outputs the
+   * agent's `hide_sequential_outputs` setting is meant to hide.
+   */
+  applyHideSequentialOutputsFilter() {
+    if (!this.options.agent?.hide_sequential_outputs || !Array.isArray(this.contentParts)) {
+      return;
+    }
+    this.contentParts = this.contentParts.filter(
+      (part, index) =>
+        index >= this.contentParts.length - 1 ||
+        part.type === ContentTypes.TOOL_CALL ||
+        part.tool_call_ids,
+    );
+  }
+
+  /**
+   * Surface any human-in-the-loop interrupt the SDK captured during the most
+   * recent `processStream` / `resume`. When the run paused for tool approval (or
+   * an ask-user question), mark the job `requires_action`, persist the pending
+   * review record, and emit it to live clients — then set `this.pendingApproval`
+   * so the controller leaves the turn unfinalized for the resume route to continue.
+   *
+   * No-op when the run completed without an interrupt, or when the job was aborted
+   * between the interrupt firing and this mark (a late interrupt must not pause a
+   * dead job — the atomic `pause` transition returns false and we drop it).
+   *
+   * @param {AgentRun} run
+   * @param {string} [streamId]
+   */
+  async handleRunInterrupt(run, streamId) {
+    if (!streamId || typeof run?.getInterrupt !== 'function') {
+      return;
+    }
+    const interrupt = run.getInterrupt();
+    if (!interrupt?.payload) {
+      return;
+    }
+
+    const appConfig = this.options.req?.config;
+    const checkpointerCfg = appConfig?.endpoints?.[EModelEndpoint.agents]?.checkpointer;
+    // Persist the resolved model parameters (temperature, max tokens, custom endpoint
+    // params, …) so an ephemeral-agent resume continues with the SAME settings the run
+    // paused on. The resume payload omits them and they aren't part of the fingerprint, so
+    // without this the rebuilt ephemeral run falls back to defaults. (Saved agents source
+    // these from the DB record server-side, so this is belt-and-suspenders for them.)
+    const resumeContext = pickResumeContext(this.options.req?.body);
+    const resolvedModelParameters = this.options.agent?.model_parameters;
+    if (resolvedModelParameters && typeof resolvedModelParameters === 'object') {
+      resumeContext.model_parameters = resolvedModelParameters;
+    }
+    const pendingAction = buildPendingAction(interrupt.payload, {
+      streamId,
+      conversationId: this.conversationId,
+      // runId mirrors the LangGraph checkpoint namespace when the SDK provides it
+      // (its documented meaning), falling back to the response message id.
+      runId: interrupt.checkpointNs ?? this.responseMessageId,
+      responseMessageId: this.responseMessageId,
+      interruptId: interrupt.interruptId,
+      // thread_id was bound to conversationId at run config (config.configurable);
+      // fall back to it when the SDK doesn't echo threadId on the interrupt.
+      threadId: interrupt.threadId ?? this.conversationId,
+      ttlMs: getApprovalTtlMs(checkpointerCfg),
+      // Pin the graph-determining request fields so resume can't rebuild this paused
+      // run on a different agent/tool set (esp. ephemeral agents, whose agent_id is
+      // undefined so the id guard can't tell two configs apart).
+      requestFingerprint: computeAgentRequestFingerprint(this.options.req?.body ?? {}),
+      // Persist those same fields verbatim so the resume route can REPLAY them — a
+      // reload/cross-replica resume can't reconstruct the ephemeral config client-side,
+      // so the server restores it and rebuilds the same graph (and the fingerprint matches).
+      resumeContext,
+    });
+
+    // Job-replacement guard: streamId == conversationId is reused per conversation, so a
+    // newer request can replace this run's job. If this (older) run hits an interrupt
+    // after a replacement, pausing would flip the NEWER job to requires_action with this
+    // stale run's pending action, blocking fresh work behind the wrong approval. Only
+    // pause when the live job is still the one THIS run created (mirrors request.js).
+    if (this.jobCreatedAt != null) {
+      const liveJob = await GenerationJobManager.getJobStore().getJob(streamId);
+      if (!liveJob || liveJob.createdAt !== this.jobCreatedAt) {
+        logger.debug(`[AgentClient] Interrupt fired but job ${streamId} was replaced; not pausing`);
+        return;
+      }
+    }
+
+    const paused = await GenerationJobManager.approvals.pause(streamId, pendingAction);
+    if (!paused) {
+      logger.debug(
+        `[AgentClient] Interrupt fired but job ${streamId} was not running; not pausing`,
+      );
+      return;
+    }
+
+    // Capture deferred tools discovered (via tool_search) earlier in THIS turn so resume
+    // can replay them into createRun. The resumed graph is rebuilt with `messages: []`
+    // (state comes from the checkpoint), so the in-turn tool_search results that mark a
+    // deferred tool discovered aren't present there — without this the paused deferred
+    // tool would be missing from the rebuilt schema-only toolMap and resume would fail
+    // with "unknown tool". Inert for non-deferred turns (the set comes back empty).
+    try {
+      const runMessages =
+        typeof run.getRunMessages === 'function' ? run.getRunMessages() : undefined;
+      if (Array.isArray(runMessages) && runMessages.length > 0) {
+        const discovered = extractDiscoveredToolsFromHistory(runMessages);
+        if (discovered.size > 0) {
+          await GenerationJobManager.updateMetadata(streamId, {
+            discoveredTools: Array.from(discovered),
+          });
+        }
+      }
+    } catch (err) {
+      logger.warn(
+        `[AgentClient] Failed to capture discovered tools for resume on ${streamId}`,
+        err?.message ?? err,
+      );
+    }
+
+    this.pendingApproval = pendingAction;
+    // Release the concurrency slot this request held the MOMENT the turn is durably
+    // paused — before the approval card is emitted — so the user's `/resume` can
+    // re-acquire one immediately. Otherwise a fast Approve races the HTTP-driver
+    // teardown (request.js pause branch / resume.js finally) that would otherwise
+    // release it, and `/resume` 429s under LIMIT_CONCURRENT_MESSAGES. Idempotent via
+    // the flag; if it fails here, the teardown still releases (it checks the flag).
+    if (!this.pendingRequestReleased) {
+      try {
+        await decrementPendingRequest(this.options.req?.user?.id);
+        this.pendingRequestReleased = true;
+      } catch (err) {
+        logger.error(`[AgentClient] Failed to release request slot on pause ${streamId}`, err);
+      }
+    }
+    await GenerationJobManager.emitChunk(streamId, {
+      event: ApprovalEvents.ON_PENDING_ACTION,
+      data: pendingAction,
+    });
+    logger.debug(
+      `[AgentClient] Paused ${streamId} for ${interrupt.payload.type} (action ${pendingAction.actionId})`,
+    );
+  }
+
   async chatCompletion({ payload, userMCPAuthMap, abortController = null }) {
     /** @type {Partial<GraphRunnableConfig>} */
     let config;
@@ -1405,6 +1558,11 @@ class AgentClient extends BaseClient {
         run = await createRun({
           agents,
           messages,
+          // This controller implements the full HITL pause/resume lifecycle (handleRunInterrupt
+          // persists the pending action; the /resume route rebuilds + continues the run), so it
+          // opts into the tool-approval wiring. Non-resumable callers (OpenAI-compat, Responses)
+          // leave this off so an approval-gated tool can't pause where there's no resume path.
+          hitlCapable: true,
           indexTokenCountMap,
           initialSummary,
           initialSessions,
@@ -1452,16 +1610,29 @@ class AgentClient extends BaseClient {
 
         /** @deprecated Agent Chain */
         config.configurable.last_agent_id = agents[agents.length - 1].id;
+
+        // HITL: clear any checkpoint orphaned by a prior paused turn in this
+        // conversation (one that expired or was aborted while paused) so this fresh
+        // turn starts clean instead of rehydrating a stale interrupt — thread_id is
+        // the stable conversationId. No-op when HITL is off or nothing is orphaned.
+        if (streamId && isHITLEnabled(agentsEConfig?.toolApproval)) {
+          await deleteAgentCheckpoint(this.conversationId, agentsEConfig?.checkpointer);
+        }
+
         await run.processStream({ messages }, config, {
           callbacks: {
             [Callback.TOOL_ERROR]: logToolError,
           },
         });
 
+        // HITL: if the run paused for tool approval, mark the job
+        // `requires_action` + emit the prompt and leave the turn unfinalized
+        // (the resume route continues it). No-op when the run completed.
+        await this.handleRunInterrupt(run, streamId);
+
         config.signal = null;
       };
 
-      const hideSequentialOutputs = config.configurable.hide_sequential_outputs;
       await runAgents(initialMessages);
 
       /**
@@ -1501,20 +1672,7 @@ class AgentClient extends BaseClient {
         this.contentParts.unshift(...manualParts);
       }
 
-      /** @deprecated Agent Chain */
-      if (hideSequentialOutputs) {
-        this.contentParts = this.contentParts.filter((part, index) => {
-          // Include parts that are either:
-          // 1. At or after the finalContentStart index
-          // 2. Of type tool_call
-          // 3. Have tool_call_ids property
-          return (
-            index >= this.contentParts.length - 1 ||
-            part.type === ContentTypes.TOOL_CALL ||
-            part.tool_call_ids
-          );
-        });
-      }
+      this.applyHideSequentialOutputsFilter();
     } catch (err) {
       if (abortController.signal.aborted) {
         logger.debug(
@@ -1584,12 +1742,292 @@ class AgentClient extends BaseClient {
         this._resolveRun(this.run ?? null);
         this._resolveRun = null;
       }
+
+      // HITL: a turn that completed (or errored) without pausing leaves a dead
+      // checkpoint. thread_id is the conversationId — stable across turns — so it
+      // MUST be pruned before the next turn, or LangGraph would resume this turn's
+      // state instead of starting fresh. Skip when paused (the checkpoint is needed
+      // to resume) or when HITL is off (none was written). The Mongo TTL is the backstop.
+      const agentsEConfig = appConfig?.endpoints?.[EModelEndpoint.agents];
+      if (!this.pendingApproval && isHITLEnabled(agentsEConfig?.toolApproval)) {
+        try {
+          // Job-replacement guard: only prune if THIS generation is still the live job.
+          // A newer request can replace this one on the same conversationId; if this
+          // (older) run's finally lands after the newer run paused, pruning by
+          // conversationId would delete the NEWER run's checkpoint and break its /resume.
+          const resumableStreamId = this.options.req?._resumableStreamId;
+          let replaced = false;
+          if (resumableStreamId && this.jobCreatedAt != null) {
+            const liveJob = await GenerationJobManager.getJobStore().getJob(resumableStreamId);
+            replaced = !liveJob || liveJob.createdAt !== this.jobCreatedAt;
+          }
+          if (replaced) {
+            logger.debug('[AgentClient] Skipping checkpoint prune — job was replaced', {
+              streamId: resumableStreamId,
+            });
+          } else {
+            await deleteAgentCheckpoint(this.conversationId, agentsEConfig?.checkpointer);
+          }
+        } catch (err) {
+          logger.warn('[AgentClient] Failed to prune checkpoint after completion', err);
+        }
+      }
+
       run = null;
       config = null;
       memoryPromise = null;
     }
   }
 
+  /**
+   * Resume a run that paused for human-in-the-loop review.
+   *
+   * The original run lives in a detached background task that exits when the run
+   * pauses, so resume REBUILDS the run on a fresh graph bound to the same
+   * `thread_id` (= conversationId) and the durable checkpointer. LangGraph rehydrates
+   * the paused graph state from the checkpoint; `run.resume(value)` re-enters the
+   * interrupted node with the user's decision. State comes from the checkpoint, so
+   * no message history is rebuilt here — `createRun` only needs the agent(s) to
+   * reconstruct the graph structure.
+   *
+   * `seedContent` is the content streamed before the pause (the assistant message +
+   * its tool call). In Redis mode the job store's append log already spans the pause,
+   * so the finalized message is complete regardless; seeding keeps the in-memory store
+   * complete too. The run drives events through the same `streamId`, so the client's
+   * open SSE receives the continuation live.
+   *
+   * Unlike `chatCompletion`, this does NOT prune the checkpoint in its `finally` — the
+   * resume controller owns checkpoint lifecycle (it must also clean up on failures that
+   * happen before this method runs, and keep the checkpoint on a re-pause).
+   *
+   * @param {object} params
+   * @param {Agents.ToolApprovalDecisionMap | { answer: string }} params.resumeValue
+   * @param {Array} [params.seedContent] - content aggregated before the pause
+   * @param {AbortController} [params.abortController]
+   * @param {Pick<import('@langchain/langgraph').Command, 'update' | 'goto'>} [params.commandOptions]
+   */
+  async resumeCompletion({
+    resumeValue,
+    seedContent = [],
+    abortController = null,
+    commandOptions,
+    userMCPAuthMap,
+    discoveredToolNames,
+  }) {
+    /** @type {Partial<GraphRunnableConfig>} */
+    let config;
+    /** @type {ReturnType<createRun>} */
+    let run;
+    const appConfig = this.options.req.config;
+    const balanceConfig = getBalanceConfig(appConfig);
+    const transactionsConfig = getTransactionsConfig(appConfig);
+    try {
+      if (!abortController) {
+        abortController = new AbortController();
+      }
+
+      /** @type {AppConfig['endpoints']['agents']} */
+      const agentsEConfig = appConfig.endpoints?.[EModelEndpoint.agents];
+
+      config = {
+        runName: 'AgentRun',
+        configurable: {
+          thread_id: this.conversationId,
+          last_agent_index: this.agentConfigs?.size ?? 0,
+          user_id: this.user ?? this.options.req.user?.id,
+          hide_sequential_outputs: this.options.agent.hide_sequential_outputs,
+          requestBody: {
+            messageId: this.responseMessageId,
+            conversationId: this.conversationId,
+            parentMessageId: this.parentMessageId,
+          },
+          user: createSafeUser(this.options.req.user),
+        },
+        recursionLimit: resolveRecursionLimit(agentsEConfig, this.options.agent),
+        signal: abortController.signal,
+        streamMode: 'values',
+        version: 'v2',
+      };
+
+      // Seed pre-pause content so the in-memory job store reports the complete turn
+      // (Redis aggregates across the pause via its append log; this covers in-memory).
+      if (Array.isArray(seedContent) && seedContent.length > 0) {
+        this.contentParts.push(...seedContent);
+      }
+
+      const tokenCounter = createTokenCounter(this.getEncoding());
+      const agents = [this.options.agent];
+      if (this.agentConfigs && this.agentConfigs.size > 0) {
+        agents.push(...this.agentConfigs.values());
+      }
+
+      // Re-prime skill files invoked in the pre-pause segment (mirrors the normal path's
+      // `primeInvokedSkills(payload)`), so an approved code/file-backed tool keeps the
+      // injected skill-file session refs instead of running without them. The pre-pause
+      // content carries the `skill` tool_calls, so it stands in for the message payload.
+      let skillSessions;
+      if (
+        typeof this.options.primeInvokedSkills === 'function' &&
+        Array.isArray(seedContent) &&
+        seedContent.length > 0
+      ) {
+        try {
+          const primed = await this.options.primeInvokedSkills([
+            { role: 'assistant', content: seedContent },
+          ]);
+          skillSessions = primed?.initialSessions;
+        } catch (err) {
+          logger.warn(
+            '[api/server/controllers/agents/client.js #resumeCompletion] Failed to re-prime skill sessions',
+            err?.message ?? err,
+          );
+        }
+      }
+
+      // Seed code-env / skill tool sessions so an approved code/file/skill-backed tool
+      // runs with the same uploaded-file context the pre-pause run had — the rebuilt
+      // graph otherwise has no `Graph.sessions` entries (especially cross-replica).
+      const initialSessions = buildInitialToolSessions({ skillSessions, agents });
+
+      run = await createRun({
+        agents,
+        // State (messages, tool calls) is rehydrated from the checkpoint by
+        // run.resume; createRun only needs the agents to rebuild the graph.
+        messages: [],
+        // The resumed run can pause AGAIN (another tool, a follow-up question), and this
+        // controller owns that lifecycle, so it must keep the HITL wiring on the rebuilt run.
+        hitlCapable: true,
+        // Replay deferred tools discovered before the pause. With `messages: []` the
+        // discovery scan finds nothing, so a deferred tool the paused call targets
+        // would be absent from the rebuilt toolMap; these names (captured at pause)
+        // force it back in. Undefined/empty for non-deferred turns — a harmless no-op.
+        discoveredToolNames,
+        initialSessions,
+        runId: this.responseMessageId,
+        signal: abortController.signal,
+        customHandlers: this.options.eventHandlers,
+        requestBody: config.configurable.requestBody,
+        user: createSafeUser(this.options.req?.user),
+        tenantId: this.options.req?.user?.tenantId,
+        summarizationConfig: appConfig?.summarization,
+        appConfig,
+        tokenCounter,
+        subagentUsageSink: createSubagentUsageSink(
+          this.collectedUsage,
+          this.buildSubagentUsageEmitter(appConfig),
+        ),
+      });
+
+      if (!run) {
+        throw new Error('Failed to create run for resume');
+      }
+
+      this.run = run;
+      if (this._resolveRun) {
+        this._resolveRun(run);
+        this._resolveRun = null;
+      }
+
+      const streamId = this.options.req?._resumableStreamId;
+      // Do NOT cache the rebuilt graph on resume: it was created with `messages: []`, so
+      // RedisJobStore.getContentParts() (which prefers a cached graph over reconstructing
+      // from the chunk log) would return only the resumed segment and drop the pre-pause
+      // assistant/tool-call content on a same-replica reload/status poll. Skipping it makes
+      // introspection fall back to the durable chunk reconstruction, which is complete.
+      // `setContentParts` still points the in-memory store at the seeded client content.
+      if (streamId && this.contentParts) {
+        GenerationJobManager.setContentParts(streamId, this.contentParts);
+      }
+
+      // Carry the user's MCP auth into the rebuilt run so an approved MCP tool executes
+      // with the same OAuth/user credentials it had before the pause.
+      if (userMCPAuthMap != null) {
+        config.configurable.userMCPAuthMap = userMCPAuthMap;
+      }
+
+      /** @deprecated Agent Chain */
+      config.configurable.last_agent_id = agents[agents.length - 1].id;
+
+      await run.resume(
+        resumeValue,
+        config,
+        { callbacks: { [Callback.TOOL_ERROR]: logToolError } },
+        commandOptions,
+      );
+
+      config.signal = null;
+
+      // The model may pause AGAIN (another tool needs approval, or a follow-up
+      // question). Re-arm the same interrupt gate so the cycle can repeat.
+      await this.handleRunInterrupt(run, streamId);
+
+      // Mirror chatCompletion: strip hidden intermediate sequential-agent content
+      // before resume finalize/re-pause persistence reads `this.contentParts`, so a
+      // resumed sequential chain doesn't persist/emit outputs hide_sequential_outputs
+      // is meant to hide.
+      this.applyHideSequentialOutputsFilter();
+    } catch (err) {
+      if (abortController.signal.aborted) {
+        logger.debug(
+          '[api/server/controllers/agents/client.js #resumeCompletion] Aborted by user',
+          {
+            conversationId: this.conversationId,
+            name: err?.name,
+            code: err?.code,
+          },
+        );
+      } else {
+        logger.error(
+          '[api/server/controllers/agents/client.js #resumeCompletion] Unhandled error',
+          err,
+        );
+        this.contentParts.push({
+          type: ContentTypes.ERROR,
+          [ContentTypes.ERROR]: `An error occurred while resuming the request${err?.message ? `: ${err.message}` : ''}`,
+        });
+      }
+    } finally {
+      const ratio = this.run?.getCalibrationRatio() ?? 0;
+      if (ratio > 0 && ratio !== 1) {
+        this.contextMeta = {
+          calibrationRatio: Math.round(ratio * 1000) / 1000,
+          encoding: this.getEncoding(),
+        };
+      } else {
+        this.contextMeta = undefined;
+      }
+
+      this.finalizeSubagentContent();
+
+      if (this.pendingSubagentEmits.length > 0) {
+        await Promise.allSettled(this.pendingSubagentEmits);
+        this.pendingSubagentEmits = [];
+      }
+
+      try {
+        const wasAborted = abortController?.signal?.aborted;
+        if (!wasAborted) {
+          await this.recordCollectedUsage({
+            context: 'message',
+            balance: balanceConfig,
+            transactions: transactionsConfig,
+          });
+        }
+      } catch (err) {
+        logger.error(
+          '[api/server/controllers/agents/client.js #resumeCompletion] Error in cleanup phase',
+          err,
+        );
+      }
+      if (this._resolveRun) {
+        this._resolveRun(this.run ?? null);
+        this._resolveRun = null;
+      }
+      run = null;
+      config = null;
+    }
+  }
+
   /**
    * Resolves with the agent run once it is initialized, or `null` if
    * initialization fails. Lets immediate-mode title generation await the run
diff --git a/api/server/controllers/agents/client.test.js b/api/server/controllers/agents/client.test.js
index 1320e362c1..1646a448a7 100644
--- a/api/server/controllers/agents/client.test.js
+++ b/api/server/controllers/agents/client.test.js
@@ -48,6 +48,32 @@ jest.mock('~/config', () => ({
   })),
 }));
 
+describe('AgentClient - applyHideSequentialOutputsFilter', () => {
+  const textPart = (text) => ({ type: ContentTypes.TEXT, text });
+  const toolCallPart = (id) => ({ type: ContentTypes.TOOL_CALL, tool_call: { id } });
+
+  it('keeps only the last part + tool_call parts when hide_sequential_outputs is on', () => {
+    const ctx = {
+      options: { agent: { hide_sequential_outputs: true } },
+      contentParts: [
+        textPart('intermediate'),
+        toolCallPart('tc1'),
+        textPart('reasoning'),
+        textPart('final'),
+      ],
+    };
+    AgentClient.prototype.applyHideSequentialOutputsFilter.call(ctx);
+    expect(ctx.contentParts).toEqual([toolCallPart('tc1'), textPart('final')]);
+  });
+
+  it('is a no-op when hide_sequential_outputs is off', () => {
+    const parts = [textPart('a'), textPart('b')];
+    const ctx = { options: { agent: { hide_sequential_outputs: false } }, contentParts: parts };
+    AgentClient.prototype.applyHideSequentialOutputsFilter.call(ctx);
+    expect(ctx.contentParts).toEqual([textPart('a'), textPart('b')]);
+  });
+});
+
 describe('AgentClient - titleConvo', () => {
   let client;
   let mockRun;
diff --git a/api/server/controllers/agents/request.js b/api/server/controllers/agents/request.js
index 0b8c0cee1a..81213a23d9 100644
--- a/api/server/controllers/agents/request.js
+++ b/api/server/controllers/agents/request.js
@@ -94,7 +94,10 @@ function getPreliminaryResponseMessageId({ messageId, responseMessageId }) {
   return `${messageId.replace(/_+$/, '')}_`;
 }
 
-function getPreliminaryUserMessage({ messageId, parentMessageId, text, quotes }, conversationId) {
+function getPreliminaryUserMessage(
+  { messageId, parentMessageId, text, quotes, files, manualSkills, alwaysAppliedSkills },
+  conversationId,
+) {
   if (typeof messageId !== 'string' || messageId.length === 0) {
     return null;
   }
@@ -113,6 +116,17 @@ function getPreliminaryUserMessage({ messageId, parentMessageId, text, quotes },
     conversationId,
     text,
     ...(referencedQuotes != null && { quotes: referencedQuotes }),
+    // Persist the turn's uploaded files on this AWAITED preliminary write so they land on
+    // job.metadata.userMessage BEFORE the run can reach its first interrupt. onStart's
+    // later writes are fire-and-forget, so a fast approval could otherwise read the job
+    // and resume an approved code/read-file tool without the paused turn's uploads.
+    ...(Array.isArray(files) && files.length > 0 && { files }),
+    // Carry skill selections so a HITL-resumed turn's reconstructed `requestMessage`
+    // keeps its skill pills — the client's final handler replaces the user bubble from
+    // this object, and they'd otherwise vanish until a full reload refetches the row.
+    ...(Array.isArray(manualSkills) && manualSkills.length > 0 && { manualSkills }),
+    ...(Array.isArray(alwaysAppliedSkills) &&
+      alwaysAppliedSkills.length > 0 && { alwaysAppliedSkills }),
   };
 }
 
@@ -248,6 +262,12 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
       endpoint: endpointOption.endpoint,
       iconURL: endpointIconURL,
       model: responseModel,
+      // Persist the originating agent so a HITL resume can refuse to rebuild this
+      // paused run on a different agent (see resume.js).
+      agent_id: endpointOption.agent_id ?? req.body?.agent_id,
+      // Persist temporary-chat state so a HITL resume keeps the resumed response
+      // non-persisted instead of trusting the resume request to re-send the flag.
+      isTemporary: req.body?.isTemporary,
       responseMessageId: preliminaryResponseMessageId,
       userMessage: preliminaryUserMessage,
     });
@@ -344,6 +364,10 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
     }
 
     client = result.client;
+    // Tag the client with THIS generation's identity so HITL terminal side-effects
+    // (pause CAS, checkpoint prune) can tell whether a newer request has since replaced
+    // this job on the same conversationId before acting on it.
+    client.jobCreatedAt = jobCreatedAt;
 
     // Resolve title timing from the public agents endpoint first, then fall
     // back to the agent's actual backing provider/custom endpoint.
@@ -450,12 +474,44 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
               conversationId: userMsg.conversationId,
               text: userMsg.text,
               quotes: userMsg.quotes,
+              // Persist the turn's uploaded files here (authoritative job metadata) so a
+              // HITL resume sources them from the job, not the user DB row — which the
+              // approval prompt can race (the row save may still be in flight when a fast
+              // /resume reads it). Without this an approved tool run can rebuild without the
+              // paused turn's files.
+              ...(Array.isArray(req.body?.files) &&
+                req.body.files.length > 0 && { files: req.body.files }),
+              // Skill selections aren't on `userMsg` yet at onStart (BaseClient adds them
+              // later), so source them from the request — otherwise this update overwrites
+              // the preliminary metadata and a HITL-resumed turn loses its skill pills.
+              ...(Array.isArray(req.body?.manualSkills) &&
+                req.body.manualSkills.length > 0 && { manualSkills: req.body.manualSkills }),
+              ...(Array.isArray(req.body?.alwaysAppliedSkills) &&
+                req.body.alwaysAppliedSkills.length > 0 && {
+                  alwaysAppliedSkills: req.body.alwaysAppliedSkills,
+                }),
             },
           });
 
           GenerationJobManager.emitChunk(streamId, {
             created: true,
-            message: userMessage,
+            // Skill selections aren't on `userMessage` yet at onStart (BaseClient adds
+            // them later), so attach them from the request — this is the message
+            // `trackUserMessage` persists as the authoritative job.metadata.userMessage,
+            // and it's what the live client renders the user bubble from.
+            message: {
+              ...userMessage,
+              // Carry files so trackUserMessage (the authoritative writer) persists them on
+              // job.metadata.userMessage for a HITL resume (see the updateMetadata above).
+              ...(Array.isArray(req.body?.files) &&
+                req.body.files.length > 0 && { files: req.body.files }),
+              ...(Array.isArray(req.body?.manualSkills) &&
+                req.body.manualSkills.length > 0 && { manualSkills: req.body.manualSkills }),
+              ...(Array.isArray(req.body?.alwaysAppliedSkills) &&
+                req.body.alwaysAppliedSkills.length > 0 && {
+                  alwaysAppliedSkills: req.body.alwaysAppliedSkills,
+                }),
+            },
             streamId,
           });
         };
@@ -503,6 +559,99 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
 
         const response = await sendPromise;
 
+        // HITL: the turn paused for human review (see AgentClient.handleRunInterrupt).
+        // The job is already `requires_action` with the pending action persisted and
+        // emitted to the client; the resume route owns finishing this turn. Settle the
+        // in-flight user-message / conversation save, then tear down WITHOUT saving a
+        // partial response, emitting a terminal event, or completing the job.
+        if (client?.pendingApproval) {
+          if (response?.databasePromise) {
+            try {
+              await response.databasePromise;
+            } catch (dbErr) {
+              logger.error(
+                '[ResumableAgentController] Error settling databasePromise on HITL pause',
+                dbErr,
+              );
+            }
+            delete response.databasePromise;
+          }
+          // BaseClient saved the response as completed (unfinished:false), but the turn
+          // is paused awaiting a decision. Re-mark it unfinished so an expired / never-
+          // resumed approval doesn't leave a "finished" response in history; the resume
+          // path overwrites it with the full completed message on success.
+          if (response?.messageId) {
+            // Guard against a fast /resume: the user can approve the instant the
+            // pending-action SSE lands, and resume.js can then claim + finalize — saving
+            // the COMPLETED response — while we're still awaiting `response.databasePromise`
+            // above. Marking the row unfinished now would clobber that completed content
+            // with this stale pre-pause response. Only mark unfinished while the job is
+            // STILL paused on THIS generation's action: a claim transitions it out of
+            // `requires_action`, and a replacement bumps `createdAt`. Fail open on a read
+            // error so a genuinely never-resumed approval isn't left looking "finished".
+            let stillPaused = true;
+            try {
+              const liveJob = await GenerationJobManager.getJob(streamId);
+              stillPaused =
+                !!liveJob &&
+                liveJob.status === 'requires_action' &&
+                (client?.jobCreatedAt == null || liveJob.createdAt === client.jobCreatedAt);
+            } catch (readErr) {
+              logger.warn(
+                '[ResumableAgentController] Pause unfinished-save liveness check failed; proceeding',
+                readErr?.message ?? readErr,
+              );
+            }
+            if (!stillPaused) {
+              logger.debug(
+                `[ResumableAgentController] Skipping pause unfinished-save — ${streamId} already resumed/replaced`,
+              );
+            } else {
+              try {
+                await saveMessage(
+                  {
+                    userId,
+                    isTemporary: req?.body?.isTemporary,
+                    interfaceConfig: req?.config?.interfaceConfig,
+                  },
+                  {
+                    ...response,
+                    endpoint: endpointOption.endpoint,
+                    unfinished: true,
+                    user: userId,
+                  },
+                  {
+                    context:
+                      'api/server/controllers/agents/request.js - HITL pause (mark unfinished)',
+                  },
+                );
+              } catch (saveErr) {
+                logger.error(
+                  '[ResumableAgentController] Failed to mark paused response unfinished',
+                  saveErr,
+                );
+              }
+            }
+          }
+          titleAbortController.abort();
+          acceptsTitleEvents = false;
+          resolveConvoReady();
+          // handleRunInterrupt already released the concurrency slot the moment it paused
+          // (so a fast /resume isn't 429'd); only release here if that didn't happen.
+          // Always run the MCP request-context cleanup.
+          await cleanupMCPRequestContextForReq(req);
+          if (!client?.pendingRequestReleased) {
+            await decrementPendingRequest(userId);
+          }
+          if (client) {
+            disposeClient(client);
+          }
+          logger.debug(
+            `[ResumableAgentController] Turn paused for approval; awaiting resume: ${streamId}`,
+          );
+          return;
+        }
+
         const messageId = response.messageId;
         const endpoint = endpointOption.endpoint;
         response.endpoint = endpoint;
diff --git a/api/server/controllers/agents/resume.js b/api/server/controllers/agents/resume.js
new file mode 100644
index 0000000000..cc7512dc6f
--- /dev/null
+++ b/api/server/controllers/agents/resume.js
@@ -0,0 +1,700 @@
+const { logger } = require('@librechat/data-schemas');
+const { Constants, EModelEndpoint } = require('librechat-data-provider');
+const {
+  GenerationJobManager,
+  isPendingActionStale,
+  mapToolApprovalResolutions,
+  mapAskUserAnswer,
+  findUndecidedToolCalls,
+  findDisallowedDecisions,
+  findIncompleteDecisions,
+  computeAgentRequestFingerprint,
+  deleteAgentCheckpoint,
+  buildAbortedResponseMetadata,
+  sanitizeMessageForTransmit,
+  filterMalformedContentParts,
+  decrementPendingRequest,
+  checkAndIncrementPendingRequest,
+} = require('@librechat/api');
+const { disposeClient } = require('~/server/cleanup');
+const {
+  getMCPRequestContext,
+  cleanupMCPRequestContextForReq,
+} = require('~/server/services/MCPRequestContext');
+const { saveMessage, getConvo, getMessages } = require('~/models');
+
+/** De-duplicate a merged attachment list by a stable artifact identity. */
+function mergeAttachments(existing, incoming) {
+  const seen = new Set();
+  const out = [];
+  for (const attachment of [...(existing ?? []), ...(incoming ?? [])]) {
+    if (!attachment) {
+      continue;
+    }
+    const key =
+      attachment.file_id ??
+      attachment.filepath ??
+      attachment.filename ??
+      JSON.stringify(attachment);
+    if (seen.has(key)) {
+      continue;
+    }
+    seen.add(key);
+    out.push(attachment);
+  }
+  return out;
+}
+
+/**
+ * Resolve the current segment's tool artifacts and merge them with any already
+ * persisted on the response row. A resumed turn can span multiple pause segments;
+ * each rebuilt client has its own `artifactPromises`, and the final finalize would
+ * otherwise OVERWRITE the row's attachments with only the last segment's. Reading
+ * the persisted row and merging keeps every segment's artifacts on the saved message.
+ */
+async function resolveAccumulatedAttachments({ client, conversationId, responseMessageId }) {
+  const promises = Array.isArray(client?.artifactPromises) ? client.artifactPromises : [];
+  const resolved = promises.length > 0 ? (await Promise.all(promises)).filter(Boolean) : [];
+  let existing = [];
+  if (responseMessageId) {
+    try {
+      const [row] = await getMessages(
+        { conversationId, messageId: responseMessageId },
+        'attachments',
+      );
+      existing = Array.isArray(row?.attachments) ? row.attachments : [];
+    } catch (err) {
+      logger.warn(
+        '[ResumeAgentController] Failed to read prior attachments for merge',
+        err?.message ?? err,
+      );
+    }
+  }
+  return mergeAttachments(existing, resolved);
+}
+
+/** Resolve the segment's content for an unfinished save (mirrors finalize's source). */
+async function resolveSegmentContent(client, streamId) {
+  const liveContent = Array.isArray(client?.contentParts) ? client.contentParts : [];
+  const rawContent =
+    liveContent.length > 0
+      ? liveContent
+      : ((await GenerationJobManager.getResumeState(streamId))?.aggregatedContent ?? []);
+  return filterMalformedContentParts(rawContent);
+}
+
+/**
+ * A resumed segment that streamed content / produced artifacts and then paused AGAIN
+ * must persist that progress before returning. The next resume rebuilds a fresh client
+ * (empty `contentParts`/`artifactPromises`), so without this an approval that later
+ * expires or is reaped would leave only the EARLIER pause's content on the saved row —
+ * the user loses everything streamed during this segment. Saved as a partial (`$set`,
+ * still `unfinished`) so a subsequent successful resume overwrites it on finalize.
+ */
+async function persistRePauseProgress({ req, client, job, streamId, conversationId }) {
+  const userId = req.user.id;
+  const meta = job.metadata ?? {};
+  const responseMessageId = meta.responseMessageId ?? client.responseMessageId;
+  if (!responseMessageId) {
+    return;
+  }
+  const content = await resolveSegmentContent(client, streamId);
+  const attachments = await resolveAccumulatedAttachments({
+    client,
+    conversationId,
+    responseMessageId,
+  });
+  if (content.length === 0 && attachments.length === 0) {
+    return;
+  }
+  try {
+    await saveMessage(
+      {
+        userId,
+        isTemporary: meta.isTemporary ?? req.body?.isTemporary,
+        interfaceConfig: req?.config?.interfaceConfig,
+      },
+      {
+        messageId: responseMessageId,
+        conversationId,
+        ...(content.length > 0 && { content }),
+        ...(attachments.length > 0 && { attachments }),
+        unfinished: true,
+        user: userId,
+      },
+      { context: 'api/server/controllers/agents/resume.js - re-pause progress persist' },
+    );
+  } catch (err) {
+    logger.error('[ResumeAgentController] Failed to persist re-pause progress', err);
+  }
+}
+
+/** Untenanted jobs (pre-multi-tenancy) remain accessible if the userId check passes. */
+function hasTenantMismatch(job, user) {
+  return job.metadata?.tenantId != null && job.metadata.tenantId !== user.tenantId;
+}
+
+/**
+ * Build the SDK resume value from the wire decision payload, validating against the
+ * pending action. Returns `{ resumeValue }` on success or `{ error }` with an HTTP
+ * status for the route to surface.
+ */
+function resolveResumeValue(pendingAction, body) {
+  const payload = pendingAction.payload;
+  if (payload?.type === 'tool_approval') {
+    const resolutions = Array.isArray(body.decisions) ? body.decisions : [];
+    const undecided = findUndecidedToolCalls(payload, resolutions);
+    if (undecided.length > 0) {
+      return { status: 400, error: 'Every paused tool call must be decided', undecided };
+    }
+    // Enforce the policy's per-tool allowed_decisions — a crafted POST must not
+    // approve a tool the policy restricted to (e.g.) reject/respond.
+    const disallowed = findDisallowedDecisions(payload, resolutions);
+    if (disallowed.length > 0) {
+      return { status: 403, error: 'Decision not permitted for one or more tools', disallowed };
+    }
+    // `edit`/`respond` must carry their payload — otherwise toSdkDecision's defensive
+    // defaults ({} / '') would resume with an empty input/result the user didn't approve.
+    const incomplete = findIncompleteDecisions(resolutions);
+    if (incomplete.length > 0) {
+      return {
+        status: 400,
+        error: 'edit requires editedArguments and respond requires responseText',
+        incomplete,
+      };
+    }
+    return { resumeValue: mapToolApprovalResolutions(resolutions) };
+  }
+  if (payload?.type === 'ask_user_question') {
+    if (typeof body.answer !== 'string' || body.answer.length === 0) {
+      return { status: 400, error: 'An answer is required' };
+    }
+    return { resumeValue: mapAskUserAnswer({ answer: body.answer }) };
+  }
+  return { status: 400, error: 'Unsupported pending action type' };
+}
+
+/**
+ * Finalize a resumed turn that ran to completion: persist the (now complete)
+ * response message, emit the terminal event over the existing SSE, complete the
+ * job, and prune the checkpoint. Mirrors the abort route's save shape but for a
+ * successful finish. Best-effort title generation for a first-turn pause.
+ */
+async function finalizeResumedTurn({ req, client, job, streamId, conversationId, addTitle }) {
+  const userId = req.user.id;
+  const checkpointerCfg = req.config?.endpoints?.[EModelEndpoint.agents]?.checkpointer;
+  const meta = job.metadata ?? {};
+  const userMessage = meta.userMessage;
+  // The response hangs off the user message; the *user* message's own parent decides
+  // whether this is the first turn of the conversation (title eligibility).
+  const parentMessageId = userMessage?.messageId ?? Constants.NO_PARENT;
+  const isFirstTurn = (userMessage?.parentMessageId ?? Constants.NO_PARENT) === Constants.NO_PARENT;
+  const responseMessageId = meta.responseMessageId ?? `${userMessage?.messageId ?? 'resumed'}_`;
+  // Sourced from the paused job (persisted at creation), not the resume body — a
+  // temporary chat must stay temporary on resume so its messages aren't persisted.
+  const isTemporary = meta.isTemporary ?? req.body?.isTemporary;
+
+  // Read the raw job data BEFORE completeJob deletes it — its tracked token/context
+  // usage backs the response message's cost rollup (parity with normal completion).
+  const jobData = await GenerationJobManager.getJobStore().getJob(streamId);
+
+  // Job-replacement guard (mirrors the normal request path): jobs are keyed by streamId
+  // (== conversationId), so a new/concurrent request reusing this conversation overwrites
+  // the record with a fresh createdAt. If that happened while we were resuming, finalizing
+  // now would emit `done` to / complete / delete the NEWER turn's job. Skip all terminal
+  // side effects when the job we paused is no longer the live one; the caller's `finally`
+  // still disposes the client + releases the slot.
+  if (!jobData || jobData.createdAt !== job.createdAt) {
+    logger.warn(
+      `[ResumeAgentController] Skipping resumed finalization — job ${streamId} was replaced`,
+    );
+    return;
+  }
+  // Prefer the resumed run's live content: it's complete (seeded with the pre-pause
+  // content) and avoids a Redis re-read that can race appendChunk writes still in
+  // flight. Fall back to the aggregated store content only when the live array is empty.
+  const liveContent = Array.isArray(client?.contentParts) ? client.contentParts : [];
+  const rawContent =
+    liveContent.length > 0
+      ? liveContent
+      : ((await GenerationJobManager.getResumeState(streamId))?.aggregatedContent ?? []);
+  // Parity with the normal agents path (AgentClient strips these before saving):
+  // drop empty/malformed tool_call parts so a resumed turn can't persist an invalid
+  // part that breaks reload/rendering.
+  const content = filterMalformedContentParts(rawContent);
+
+  const responseMessage = {
+    messageId: responseMessageId,
+    parentMessageId,
+    conversationId,
+    content,
+    sender: meta.sender ?? client?.sender ?? 'AI',
+    endpoint: meta.endpoint,
+    iconURL: meta.iconURL,
+    model: meta.model,
+    unfinished: false,
+    error: false,
+    isCreatedByUser: false,
+    user: userId,
+  };
+  if (meta.agent_id ?? req.body?.agent_id) {
+    responseMessage.agent_id = meta.agent_id ?? req.body.agent_id;
+  }
+  // Persist tool artifacts (code files, images, UI resources) the resumed continuation
+  // produced — BaseClient.sendMessage awaits these before saving, but the lean resume
+  // path bypasses it, so do it here or they vanish on reload / for late subscribers.
+  // MERGE with any already on the row (earlier pause segments) rather than overwrite —
+  // the final segment's client only holds its own segment's artifacts.
+  const attachments = await resolveAccumulatedAttachments({
+    client,
+    conversationId,
+    responseMessageId,
+  });
+  if (attachments.length > 0) {
+    responseMessage.attachments = attachments;
+  }
+
+  // Response metadata: the resume client only sees POST-resume usage, while the job's
+  // tracked tokenUsage is cumulative across the pause. Take the cumulative usage (+
+  // summary marker) from the job, and contextUsage / thoughtSignatures from the client
+  // (which the abort-only helper drops). Cumulative usage wins so cost isn't underreported.
+  const clientMeta = client?.buildResponseMetadata?.() ?? null;
+  const cumulativeMeta = jobData ? buildAbortedResponseMetadata(jobData) : null;
+  const responseMetadata = {
+    ...(clientMeta ?? {}),
+    ...(cumulativeMeta?.usage ? { usage: cumulativeMeta.usage } : {}),
+    ...(cumulativeMeta?.summaryUsedTokens != null
+      ? { summaryUsedTokens: cumulativeMeta.summaryUsedTokens }
+      : {}),
+  };
+  if (Object.keys(responseMetadata).length > 0) {
+    responseMessage.metadata = responseMetadata;
+  }
+  // Carry the resumed run's context-window calibration (BaseClient.sendMessage persists
+  // this on the response). Without it, the NEXT turn can't seed its pruner from this
+  // run and falls back to uncalibrated token accounting.
+  if (client?.contextMeta != null) {
+    responseMessage.contextMeta = client.contextMeta;
+  }
+
+  await saveMessage(
+    { userId, isTemporary, interfaceConfig: req?.config?.interfaceConfig },
+    responseMessage,
+    { context: 'api/server/controllers/agents/resume.js - resumed response end' },
+  );
+
+  const convo = await getConvo(userId, conversationId);
+  const conversation = { ...(convo ?? {}), conversationId };
+
+  // First-turn pause: the title was deferred when the turn paused. Generate it BEFORE
+  // completing the stream so the `title` event still reaches the live client (emitChunk
+  // no-ops once completeJob tears down the runtime) and the final event carries the real
+  // title instead of "New Chat". Best-effort — a failure must not fail the resumed turn.
+  if (
+    addTitle &&
+    isFirstTurn &&
+    !isTemporary &&
+    userMessage?.text &&
+    (!convo || !convo.title || convo.title === 'New Chat')
+  ) {
+    try {
+      await addTitle(req, {
+        text: userMessage.text,
+        conversationId,
+        client,
+        onTitleGenerated: ({ conversationId: titleConvoId, title }) => {
+          conversation.title = title;
+          return GenerationJobManager.emitChunk(streamId, {
+            event: 'title',
+            data: { conversationId: titleConvoId, title },
+          });
+        },
+      });
+    } catch (err) {
+      logger.error('[ResumeAgentController] Title generation failed after resume', err);
+    }
+  }
+  conversation.title = conversation.title || 'New Chat';
+
+  // Re-check ownership immediately before the terminal writes. The start-of-function
+  // guard can go stale across the awaits above: saveMessage and (first-turn) title
+  // generation can take long enough for a new request to replace this job on the same
+  // conversationId (streamId == conversationId). Without this second read, emitDone /
+  // completeJob / prune below would emit `done` to and tear down the REPLACEMENT job —
+  // the same hazard the catch-path guard prevents on the failure path.
+  const liveJobBeforeFinalize = await GenerationJobManager.getJobStore().getJob(streamId);
+  if (!liveJobBeforeFinalize || liveJobBeforeFinalize.createdAt !== job.createdAt) {
+    logger.warn(
+      `[ResumeAgentController] Skipping resumed terminal writes — job ${streamId} was replaced mid-finalize`,
+    );
+    return;
+  }
+
+  const finalEvent = {
+    final: true,
+    conversation,
+    title: conversation.title,
+    requestMessage: userMessage
+      ? sanitizeMessageForTransmit({
+          ...userMessage,
+          conversationId,
+          isCreatedByUser: true,
+          // job.metadata.userMessage is persisted without files; carry the restored
+          // uploads (seeded onto req.body.files before reconstruction) so the final SSE
+          // doesn't blank the user bubble's attachments — matching the normal path.
+          ...(Array.isArray(req.body?.files) && req.body.files.length > 0
+            ? { files: req.body.files }
+            : {}),
+        })
+      : null,
+    responseMessage: { ...responseMessage },
+  };
+
+  await GenerationJobManager.emitDone(streamId, finalEvent);
+  // Awaited (not fire-and-forget) so the job's terminal write lands before the
+  // checkpoint prune, and so a failure here doesn't race the controller's error path.
+  try {
+    await GenerationJobManager.completeJob(streamId);
+  } catch (completeErr) {
+    logger.error('[ResumeAgentController] Failed to complete resumed turn', completeErr);
+  }
+  await deleteAgentCheckpoint(conversationId, checkpointerCfg);
+}
+
+/**
+ * Resume a generation that paused for human-in-the-loop review.
+ *
+ * The original run lives in a detached background task that exits when the run
+ * pauses, so this REBUILDS the run from the durable checkpoint (same `thread_id`)
+ * and continues it with the user's decision. The continuation streams over the
+ * client's existing SSE (events flow through the same `streamId`).
+ *
+ * Flow: authorize → map decisions → atomically claim the resume (single-winner) →
+ * ACK → reconstruct the client → `resumeCompletion` → finalize (or re-pause).
+ *
+ * Shares chat.js's middleware (auth, agent access, `buildEndpointOption`) so the
+ * agent/endpoint are reconstructed from the request exactly like a normal turn.
+ *
+ * @param {express.Request} req
+ * @param {express.Response} res
+ * @param {express.NextFunction} next
+ * @param {Function} initializeClient
+ * @param {Function} addTitle
+ */
+const ResumeAgentController = async (req, res, next, initializeClient, addTitle) => {
+  const userId = req.user.id;
+  const { conversationId, actionId } = req.body;
+  const streamId = conversationId;
+
+  if (!streamId || streamId === 'new') {
+    return res.status(400).json({ error: 'conversationId is required to resume' });
+  }
+
+  const job = await GenerationJobManager.getJob(streamId);
+  if (!job) {
+    return res.status(404).json({ error: 'No paused generation for this conversation' });
+  }
+  if (job.metadata?.userId && job.metadata.userId !== userId) {
+    return res.status(403).json({ error: 'Unauthorized' });
+  }
+  if (hasTenantMismatch(job, req.user)) {
+    return res.status(403).json({ error: 'Unauthorized' });
+  }
+
+  // The resume must rebuild the SAME agent/endpoint that paused. Require an EXACT
+  // agent_id match when the paused job had one — a request that omits agent_id (or
+  // claims an ephemeral / non-agents endpoint) must not rebuild the claimed checkpoint
+  // on a different graph. The conversation's agent is stable, so a correct client always
+  // sends the right one.
+  const originalAgentId = job.metadata?.agent_id;
+  if (originalAgentId && req.body.agent_id !== originalAgentId) {
+    return res.status(403).json({ error: 'Cannot resume with a different agent' });
+  }
+  // Require an EXACT endpoint match (like agent_id): a request that OMITS endpoint must
+  // not fall through — the shared chat middleware treats a missing/non-agents endpoint
+  // as the ephemeral agent, so omitting it could rebuild the claimed checkpoint on a
+  // different graph. A correct client always echoes the paused endpoint.
+  const originalEndpoint = job.metadata?.endpoint;
+  if (originalEndpoint && req.body.endpoint !== originalEndpoint) {
+    return res.status(403).json({ error: 'Cannot resume on a different endpoint' });
+  }
+
+  const pendingAction = job.metadata?.pendingAction;
+  if (job.status !== 'requires_action') {
+    return res.status(409).json({ error: 'No live pending action to resume' });
+  }
+  if (isPendingActionStale({ pendingAction })) {
+    // The action expired between the pending-action SSE and this submit. Drive the expiry
+    // NOW (expire CAS + terminal SSE) instead of waiting for the periodic sweeper —
+    // otherwise the job sits `requires_action` with a dead action and any attached SSE
+    // client never gets a terminal event, so the stream appears to hang even though the
+    // UI already reported the action as expired.
+    try {
+      await GenerationJobManager.expireApproval(streamId, pendingAction?.actionId);
+    } catch (err) {
+      logger.warn(
+        '[ResumeAgentController] Failed to expire stale action on submit',
+        err?.message ?? err,
+      );
+    }
+    return res.status(409).json({ error: 'No live pending action to resume' });
+  }
+  // Require the actionId the UI sends: without it, a stale/malformed client could
+  // resolve whatever action is currently pending (e.g. answer a different question).
+  if (!actionId) {
+    return res.status(400).json({ error: 'actionId is required to resume' });
+  }
+  if (pendingAction.actionId !== actionId) {
+    return res.status(409).json({ error: 'This decision targets a stale action' });
+  }
+
+  // Pin the graph identity: the resume must rebuild the SAME agent/graph + tool set the
+  // run paused on. The agent_id + endpoint guards above cover saved agents; the
+  // fingerprint additionally catches an ephemeral-agent config swap (its agent_id is
+  // undefined, so the id guard can't tell two ephemeral configs apart). Enforced only
+  // when the paused action carries a fingerprint (in-flight pauses from before this
+  // change won't), and recomputed from the resume body's graph-determining fields.
+  const pinnedFingerprint = pendingAction.requestFingerprint;
+  if (pinnedFingerprint && pinnedFingerprint !== computeAgentRequestFingerprint(req.body ?? {})) {
+    return res.status(403).json({ error: 'Cannot resume with a different agent configuration' });
+  }
+
+  const mapped = resolveResumeValue(pendingAction, req.body);
+  if (mapped.error) {
+    return res.status(mapped.status).json({
+      error: mapped.error,
+      ...(mapped.undecided && { undecided: mapped.undecided }),
+      ...(mapped.disallowed && { disallowed: mapped.disallowed }),
+      ...(mapped.incomplete && { incomplete: mapped.incomplete }),
+    });
+  }
+
+  // Count the resume against the concurrency limit. The original turn released its slot
+  // when it paused, so resuming must re-acquire one — otherwise pausing several turns
+  // and resuming them at once would bypass LIMIT_CONCURRENT_MESSAGES.
+  const { allowed } = await checkAndIncrementPendingRequest(userId);
+  if (!allowed) {
+    return res.status(429).json({ error: 'Too many concurrent requests' });
+  }
+
+  // Atomically claim the resume. The single winner drives the run; a racing second
+  // submit (double-click, two tabs) gets false and must not re-drive — that would
+  // re-execute tools and double-bill.
+  //
+  // The claim runs AFTER the slot increment above but BEFORE the run's own try/finally
+  // that releases it, so a store/Redis error here (unlike the clean `!claimed` branch)
+  // would leak the concurrency slot until the counter TTL expires — spuriously 429'ing
+  // the user when they retry the still-paused approval. Release the slot on that path too.
+  let claimed;
+  try {
+    claimed = await GenerationJobManager.approvals.resolve(streamId, pendingAction.actionId);
+  } catch (err) {
+    await decrementPendingRequest(userId);
+    logger.error('[ResumeAgentController] Failed to claim resume', err);
+    return res.status(500).json({ error: 'Failed to resume' });
+  }
+  if (!claimed) {
+    await decrementPendingRequest(userId);
+    return res.status(409).json({ error: 'This action was already resolved or has expired' });
+  }
+
+  // Seed the run-scoped MCP request-context store BEFORE the ACK: once `res.json`
+  // finishes the response, a later `getMCPRequestContext(req, res)` (from tool loading)
+  // sees `res` as ended and returns undefined, leaving the resumed run without its MCP
+  // connection store — approved MCP / OAuth-overlay tools would then run without their
+  // request-scoped connections. Pre-seeding with a null `res` + `cleanupOnResponse:false`
+  // mirrors the normal stream path (request.js); torn down in the `finally` below.
+  req._resumableStreamId = streamId;
+  getMCPRequestContext(req, undefined, { cleanupOnResponse: false });
+
+  // ACK immediately; the continuation streams over the client's existing SSE.
+  res.json({ streamId, conversationId, status: 'resuming' });
+
+  // Seed the original thread parent BEFORE initializeClient: initializeAgent scopes
+  // thread files / code artifacts off `req.body.parentMessageId`, and the resume body
+  // doesn't carry it. This is the user message's parent (the thread position);
+  // `client.parentMessageId` below is a different value — the response's parent, i.e.
+  // the user message id.
+  req.body.parentMessageId = job.metadata.userMessage?.parentMessageId ?? Constants.NO_PARENT;
+
+  // Restore the paused user message's OWN uploaded files. initializeAgent rebuilds
+  // code/file sessions by walking the conversation from `parentMessageId`, but
+  // execute-code files are excluded from that lookup, so files uploaded on the paused
+  // turn would be dropped — an approved code/read-file tool would resume without them.
+  //
+  // SECURITY: ALWAYS source files from the paused job, never from the `/resume` body.
+  // `files` is not pinned by the resume fingerprint or replayed via resumeContext, so
+  // honoring a client-supplied `files` array would let a crafted/buggy client resume an
+  // approved code/read-file tool against a DIFFERENT file set than the one the user
+  // approved. A resume reconstructs the SAME paused turn, so there is no legitimate
+  // reason for the client to supply its own files. Prefer the files persisted on the JOB
+  // at onStart (race-free), fall back to the DB row for older jobs, and CLEAR otherwise
+  // so a client-supplied set can never leak through.
+  const metaFiles = job.metadata.userMessage?.files;
+  if (Array.isArray(metaFiles) && metaFiles.length > 0) {
+    req.body.files = metaFiles;
+  } else {
+    let restoredFiles = false;
+    const pausedUserMessageId = job.metadata.userMessage?.messageId;
+    if (pausedUserMessageId) {
+      try {
+        const [row] = await getMessages(
+          { conversationId, messageId: pausedUserMessageId },
+          'files',
+        );
+        if (Array.isArray(row?.files) && row.files.length > 0) {
+          req.body.files = row.files;
+          restoredFiles = true;
+        }
+      } catch (err) {
+        logger.warn(
+          '[ResumeAgentController] Failed to restore paused user message files',
+          err?.message ?? err,
+        );
+      }
+    }
+    if (!restoredFiles) {
+      // No paused files (or the lookup failed): drop any client-supplied files so a
+      // crafted resume body can't inject a file set the paused turn never had.
+      req.body.files = [];
+    }
+  }
+
+  // Restore the conversation's createdAt so temporal prompt vars ({{current_datetime}},
+  // {{iso_datetime}}, ...) resolve against the SAME anchor the paused graph used rather
+  // than the resume wall-clock. initializeAgent reads `req.conversationCreatedAt`; the
+  // normal path sets it from the convo timestamp (resolveConversationCreatedAt), so mirror
+  // that here. (The original `timezone` is replayed onto req.body via RESUME_CONTEXT_KEYS.)
+  try {
+    const resumedConvo = await getConvo(userId, conversationId);
+    const createdAt = resumedConvo?.createdAt ? new Date(resumedConvo.createdAt) : null;
+    if (createdAt && !Number.isNaN(createdAt.getTime())) {
+      req.conversationCreatedAt = createdAt.toISOString();
+    }
+  } catch (err) {
+    logger.warn(
+      '[ResumeAgentController] Failed to restore conversation timestamp anchor',
+      err?.message ?? err,
+    );
+  }
+
+  let client = null;
+  try {
+    const result = await initializeClient({
+      req,
+      res,
+      endpointOption: req.body.endpointOption,
+      signal: job.abortController.signal,
+    });
+    client = result.client;
+
+    // Bind the rebuilt client to the in-flight turn's identity (no new user message).
+    client.conversationId = streamId;
+    // The resume operates on the SAME job (it moved it running again), so its identity is
+    // the paused job's createdAt — used by the re-pause CAS pre-check + checkpoint prune to
+    // avoid acting on a job a newer request has since replaced.
+    client.jobCreatedAt = job.createdAt;
+    client.responseMessageId = job.metadata.responseMessageId;
+    client.parentMessageId = job.metadata.userMessage?.messageId ?? Constants.NO_PARENT;
+    // Read the pre-pause content BEFORE swapping the store's content reference: the
+    // in-memory store's setContentParts REPLACES the stored array, so reading the
+    // resume state afterward would see the new (empty) client array and lose the seed.
+    const resumeState = await GenerationJobManager.getResumeState(streamId);
+    const seedContent = resumeState?.aggregatedContent ?? [];
+    if (client.contentParts) {
+      GenerationJobManager.setContentParts(streamId, client.contentParts);
+    }
+
+    await client.resumeCompletion({
+      resumeValue: mapped.resumeValue,
+      seedContent,
+      abortController: job.abortController,
+      // Carry the user's MCP auth so approved MCP tools run with their credentials.
+      userMCPAuthMap: result.userMCPAuthMap,
+      // Replay deferred tools discovered before the pause (captured at pause). The rebuilt
+      // graph passes `messages: []`, so without these an approved deferred tool would be
+      // absent from the schema-only toolMap and resume would fail with "unknown tool".
+      discoveredToolNames: job.metadata?.discoveredTools,
+    });
+
+    // The model may pause AGAIN (another tool, or a follow-up question). The pending
+    // action is already persisted + emitted; leave the job `requires_action`.
+    if (client.pendingApproval) {
+      logger.debug(`[ResumeAgentController] Re-paused for approval: ${streamId}`);
+      // Persist this segment's content + artifacts before the fresh client (next
+      // resume) drops them, so an expiring re-pause doesn't lose them; finalize later
+      // overwrites content and merges attachments onto the saved message.
+      await persistRePauseProgress({ req, client, job, streamId, conversationId });
+      return;
+    }
+
+    // If the user aborted mid-resume, the abort route already emitted the terminal
+    // event and finalized the job — don't double-save / double-finalize here.
+    if (job.abortController.signal.aborted) {
+      logger.debug(
+        `[ResumeAgentController] Aborted during resume; abort route finalizes: ${streamId}`,
+      );
+      return;
+    }
+
+    await finalizeResumedTurn({ req, client, job, streamId, conversationId, addTitle });
+  } catch (err) {
+    logger.error('[ResumeAgentController] Resume failed', err);
+    // Job-replacement guard (mirrors finalizeResumedTurn's success-path guard): if a
+    // newer request reused this conversationId while the resume was failing, do NOT emit
+    // the error to / complete / prune the NEWER turn's job. The finally still releases
+    // the slot + disposes. Proceed with finalization if the replacement check itself fails.
+    let stillLive = true;
+    try {
+      const liveJob = await GenerationJobManager.getJobStore().getJob(streamId);
+      stillLive = !!liveJob && liveJob.createdAt === job.createdAt;
+    } catch (readErr) {
+      logger.warn('[ResumeAgentController] Replacement check failed; finalizing anyway', readErr);
+    }
+    if (!stillLive) {
+      logger.warn(
+        `[ResumeAgentController] Skipping failed-resume finalization — job ${streamId} was replaced`,
+      );
+    } else {
+      try {
+        await GenerationJobManager.emitError(streamId, err?.message ?? 'Resume failed');
+      } catch (emitErr) {
+        logger.error('[ResumeAgentController] Failed to emit resume error', emitErr);
+      }
+      try {
+        await GenerationJobManager.completeJob(streamId, err?.message ?? 'Resume failed');
+      } catch (completeErr) {
+        logger.error('[ResumeAgentController] Failed to finalize failed resume', completeErr);
+        // Last resort: force a terminal state so the job isn't orphaned in `running`.
+        await GenerationJobManager.getJobStore()
+          .updateJob(streamId, {
+            status: 'error',
+            completedAt: Date.now(),
+            error: 'Resume failed',
+          })
+          .catch((updErr) =>
+            logger.error('[ResumeAgentController] Fallback job finalize failed', updErr),
+          );
+      }
+      await deleteAgentCheckpoint(
+        conversationId,
+        req.config?.endpoints?.[EModelEndpoint.agents]?.checkpointer,
+      );
+    }
+  } finally {
+    // Tear down the MCP request-context store seeded before the ACK (parity with
+    // request.js's finishResumableRequest). No-op if it was never seeded.
+    await cleanupMCPRequestContextForReq(req);
+    // Release the concurrency slot taken above — UNLESS handleRunInterrupt already
+    // released it on a re-pause (so a fast /resume isn't 429'd). On a normal finish or
+    // error it didn't, so release here. A re-pause re-acquires its own slot next resume.
+    if (!client?.pendingRequestReleased) {
+      await decrementPendingRequest(userId);
+    }
+    if (client) {
+      disposeClient(client);
+    }
+  }
+};
+
+module.exports = ResumeAgentController;
diff --git a/api/server/middleware/moderateText.js b/api/server/middleware/moderateText.js
index 1aed0991ad..3356f75921 100644
--- a/api/server/middleware/moderateText.js
+++ b/api/server/middleware/moderateText.js
@@ -17,8 +17,17 @@ async function moderateText(req, res, next) {
      * `getReferencedQuotes` first (matching `BaseClient`); moderating the merged
      * string also covers content split across a quote and the typed body. The
      * moderation API accepts an array of inputs.
+     *
+     * `answer` covers the HITL resume payload (POST /agents/chat/resume) for an
+     * ask-user question — the user's free-form text — so it's moderated like a typed
+     * message. A tool-approval resume carries no user text and is skipped below.
      */
-    const safeText = typeof text === 'string' ? text : '';
+    let safeText = '';
+    if (typeof text === 'string') {
+      safeText = text;
+    } else if (typeof req.body.answer === 'string') {
+      safeText = req.body.answer;
+    }
     const inputs = [];
     if (safeText.length > 0) {
       inputs.push(safeText);
@@ -28,7 +37,36 @@ async function moderateText(req, res, next) {
       inputs.push(...quotes);
       inputs.push(mergeQuotedText(safeText, quotes));
     }
-    const input = inputs.length > 1 ? inputs : (inputs[0] ?? text);
+    // A tool-approval resume can carry user-authored text in `decisions[]`: the
+    // `respond` substitute result, a `reject` reason, and `edit`ed tool arguments —
+    // moderate all of them like typed text (edited args stringified).
+    if (Array.isArray(req.body.decisions)) {
+      for (const decision of req.body.decisions) {
+        if (typeof decision?.responseText === 'string' && decision.responseText.length > 0) {
+          inputs.push(decision.responseText);
+        }
+        if (typeof decision?.reason === 'string' && decision.reason.length > 0) {
+          inputs.push(decision.reason);
+        }
+        if (decision?.editedArguments != null) {
+          try {
+            const edited = JSON.stringify(decision.editedArguments);
+            if (typeof edited === 'string' && edited.length > 0) {
+              inputs.push(edited);
+            }
+          } catch {
+            /* ignore unstringifiable edited args */
+          }
+        }
+      }
+    }
+    // Nothing to moderate (e.g. a tool-approval resume with no `respond` text) —
+    // don't post an empty/undefined `input`, which the moderation API rejects and which
+    // would otherwise deny the request.
+    if (inputs.length === 0) {
+      return next();
+    }
+    const input = inputs.length > 1 ? inputs : inputs[0];
 
     const response = await axios.post(
       process.env.OPENAI_MODERATION_REVERSE_PROXY || 'https://api.openai.com/v1/moderations',
diff --git a/api/server/routes/agents/__tests__/abort.spec.js b/api/server/routes/agents/__tests__/abort.spec.js
index 9fe007596c..15efb19a39 100644
--- a/api/server/routes/agents/__tests__/abort.spec.js
+++ b/api/server/routes/agents/__tests__/abort.spec.js
@@ -293,6 +293,40 @@ describe('Agent Abort Endpoint', () => {
         );
       });
 
+      it('saves the aborted partial as temporary from job metadata, not the request body', async () => {
+        const jobStreamId = 'test-stream-123';
+
+        mockGenerationJobManager.getJob.mockResolvedValue({
+          metadata: { userId: 'test-user-123' },
+        });
+
+        // The job was a temporary chat; the stop button posts only conversationId.
+        mockGenerationJobManager.abortJob.mockResolvedValue({
+          success: true,
+          jobData: {
+            userMessage: { messageId: 'user-msg-123' },
+            responseMessageId: 'response-msg-456',
+            conversationId: jobStreamId,
+            isTemporary: true,
+          },
+          content: [{ type: 'text', text: 'Partial...' }],
+          text: 'Partial...',
+        });
+
+        mockSaveMessage.mockResolvedValue();
+
+        const response = await request(app)
+          .post('/api/agents/chat/abort')
+          .send({ conversationId: jobStreamId }); // no isTemporary in body
+
+        expect(response.status).toBe(200);
+        expect(mockSaveMessage).toHaveBeenCalledWith(
+          expect.objectContaining({ isTemporary: true }),
+          expect.anything(),
+          expect.anything(),
+        );
+      });
+
       it('should handle saveMessage errors gracefully', async () => {
         const jobStreamId = 'test-stream-123';
 
diff --git a/api/server/routes/agents/chat.js b/api/server/routes/agents/chat.js
index 8ffbde6552..9463406652 100644
--- a/api/server/routes/agents/chat.js
+++ b/api/server/routes/agents/chat.js
@@ -1,5 +1,12 @@
 const express = require('express');
-const { createMessageFilterPii, generateCheckAccess, skipAgentCheck } = require('@librechat/api');
+const { logger } = require('@librechat/data-schemas');
+const {
+  createMessageFilterPii,
+  generateCheckAccess,
+  skipAgentCheck,
+  applyResumeContext,
+  GenerationJobManager,
+} = require('@librechat/api');
 const { PermissionTypes, Permissions, PermissionBits } = require('librechat-data-provider');
 const {
   moderateText,
@@ -10,6 +17,7 @@ const {
 } = require('~/server/middleware');
 const { initializeClient } = require('~/server/services/Endpoints/agents');
 const AgentController = require('~/server/controllers/agents/request');
+const ResumeController = require('~/server/controllers/agents/resume');
 const addTitle = require('~/server/services/Endpoints/agents/title');
 const { getRoleByName } = require('~/models');
 
@@ -25,6 +33,45 @@ const checkAgentResourceAccess = canAccessAgentFromBody({
   requiredPermission: PermissionBits.VIEW,
 });
 
+/**
+ * Replay the paused turn's graph-determining config onto a resume request BEFORE the
+ * rest of the chain (PII filter, agent-access, buildEndpointOption) reads it. The client
+ * can't reliably re-send the ephemeral-agent config after a reload/cross-session, so the
+ * server restores it from the pending action — the resume then rebuilds the SAME
+ * agent/graph the run paused on (and a crafted resume can't swap the tool set). No-op for
+ * every non-resume route.
+ */
+const restoreResumeContext = async (req, res, next) => {
+  if (req.path !== '/resume') {
+    return next();
+  }
+  try {
+    const streamId = req.body?.conversationId;
+    if (streamId) {
+      const job = await GenerationJobManager.getJob(streamId);
+      const resumeContext = job?.metadata?.pendingAction?.resumeContext;
+      applyResumeContext(req.body, resumeContext);
+      // Replay the paused turn's resolved model parameters. Ephemeral agents derive these
+      // (temperature, max tokens, custom endpoint params) from the request body, which the
+      // resume payload omits — without this the continuation runs with defaults. They're
+      // scattered top-level fields (folded into model_parameters by buildOptions' rest
+      // spread), not part of the RESUME_CONTEXT_KEYS allowlist, so merge them back here.
+      // Authoritative: overwrites any client-supplied values with the captured set.
+      // `model` is excluded — it's replayed via RESUME_CONTEXT_KEYS to the exact value the
+      // resume fingerprint was pinned on, so overwriting it here could trip that check.
+      const resumedModelParameters = resumeContext?.model_parameters;
+      if (resumedModelParameters && typeof resumedModelParameters === 'object') {
+        const { model: _replayedModel, ...replayParams } = resumedModelParameters;
+        Object.assign(req.body, replayParams);
+      }
+    }
+  } catch (err) {
+    logger.warn('[agents/chat] Failed to restore resume context', err?.message ?? err);
+  }
+  next();
+};
+
+router.use(restoreResumeContext);
 router.use(createMessageFilterPii({ getConfig: (req) => req.config?.messageFilter?.pii }));
 router.use(moderateText);
 router.use(checkAgentAccess);
@@ -36,6 +83,21 @@ const controller = async (req, res, next) => {
   await AgentController(req, res, next, initializeClient, addTitle);
 };
 
+const resumeController = async (req, res, next) => {
+  await ResumeController(req, res, next, initializeClient, addTitle);
+};
+
+/**
+ * @route POST /resume
+ * @desc Resume a generation paused for human-in-the-loop review (tool approval or
+ *       ask-user answer). Shares this router's middleware so the agent/endpoint are
+ *       reconstructed from the request exactly like a normal turn. Declared before
+ *       `/:endpoint` so it is not captured as an ephemeral endpoint name.
+ * @access Private
+ * @returns {void}
+ */
+router.post('/resume', resumeController);
+
 /**
  * @route POST / (regular endpoint)
  * @desc Chat with an assistant
diff --git a/api/server/routes/agents/index.js b/api/server/routes/agents/index.js
index 7fb23f6c66..fe36869983 100644
--- a/api/server/routes/agents/index.js
+++ b/api/server/routes/agents/index.js
@@ -5,6 +5,8 @@ const {
   hasPersistableAbortContent,
   buildAbortedResponseMetadata,
   isPendingActionStale,
+  isHITLEnabled,
+  deleteAgentCheckpoint,
 } = require('@librechat/api');
 const { createSseStreamTelemetry } = require('@librechat/api/telemetry');
 const { logger } = require('@librechat/data-schemas');
@@ -231,7 +233,7 @@ router.get('/chat/status/:conversationId', async (req, res) => {
  * @access Private
  * @description Mounted before chatRouter to bypass buildEndpointOption middleware
  */
-router.post('/chat/abort', async (req, res) => {
+router.post('/chat/abort', configMiddleware, async (req, res) => {
   logger.debug(`[AgentStream] ========== ABORT ENDPOINT HIT ==========`);
   logger.debug(`[AgentStream] Method: ${req.method}, Path: ${req.path}`);
   logger.debug(`[AgentStream] Body:`, req.body);
@@ -288,6 +290,17 @@ router.post('/chat/abort', async (req, res) => {
       abortResultResponseMessageId: abortResult.jobData?.responseMessageId,
     });
 
+    // HITL: prune the durable checkpoint of a run aborted while paused, so a new turn
+    // in this conversation can't rehydrate the stale interrupt before the Mongo TTL
+    // reclaims it (thread_id is the stable conversationId). Idempotent / no-op when
+    // HITL is off or nothing was written.
+    const agentsCfg = req.config?.endpoints?.agents;
+    if (isHITLEnabled(agentsCfg?.toolApproval)) {
+      await deleteAgentCheckpoint(jobStreamId, agentsCfg?.checkpointer).catch((err) =>
+        logger.error(`[AgentStream] Failed to prune checkpoint on abort: ${jobStreamId}`, err),
+      );
+    }
+
     // CRITICAL: Save partial response BEFORE returning to prevent race condition.
     // If user sends a follow-up immediately after abort, the parentMessageId must exist in DB.
     // Only save if we have a valid responseMessageId (skip early aborts before generation started)
@@ -327,7 +340,10 @@ router.post('/chat/abort', async (req, res) => {
         await saveMessage(
           {
             userId: req?.user?.id,
-            isTemporary: req?.body?.isTemporary,
+            // Source from the job, not the request: the stop button posts only the
+            // conversationId, so trusting req.body.isTemporary would persist an aborted
+            // temporary-chat partial as a normal (orphaned) message.
+            isTemporary: jobData?.isTemporary ?? req?.body?.isTemporary,
             interfaceConfig: req?.config?.interfaceConfig,
           },
           responseMessage,
diff --git a/client/src/components/Chat/Messages/Content/ApprovalContext.tsx b/client/src/components/Chat/Messages/Content/ApprovalContext.tsx
new file mode 100644
index 0000000000..aed523c42d
--- /dev/null
+++ b/client/src/components/Chat/Messages/Content/ApprovalContext.tsx
@@ -0,0 +1,292 @@
+import { createContext, useCallback, useContext, useMemo, useRef, useState } from 'react';
+import { Constants } from 'librechat-data-provider';
+import type { Agents } from 'librechat-data-provider';
+import {
+  useSubmitToolApprovalMutation,
+  useSubmitAskAnswerMutation,
+  type ResumeAgentFields,
+} from '~/data-provider';
+import { ChatContext } from '~/Providers/ChatContext';
+import { useGetEphemeralAgent } from '~/store/agents';
+
+/** Per-action submission lifecycle, surfaced to the cards so they can disable
+ *  controls and explain a terminal outcome. */
+type ActionStatus = 'idle' | 'submitting' | 'submitted' | 'expired' | 'error';
+
+interface ApprovalContextValue {
+  /** Record (or clear) a card's decision for its tool_call within an action. */
+  setDecision: (
+    actionId: string,
+    toolCallId: string,
+    decision: Agents.ToolApprovalResolution | null,
+  ) => void;
+  /** Current decision a card holds, if any (drives selected-state styling). */
+  getDecision: (actionId: string, toolCallId: string) => Agents.ToolApprovalResolution | undefined;
+  /** Every recorded decision for an action, in registration order (the submit batch). */
+  getDecisions: (actionId: string) => Agents.ToolApprovalResolution[];
+  /** Declare that a tool_call belongs to an action so submit can require all. */
+  registerToolCall: (actionId: string, toolCallId: string) => void;
+  /** Drop a tool_call's registration when its card unmounts, so a resolved/removed
+   *  card can't keep `isReady` false and wedge the batch submit. */
+  unregisterToolCall: (actionId: string, toolCallId: string) => void;
+  /** The first-registered tool_call for an action — the single card that owns
+   *  the batch submit button (avoids N buttons across sibling cards). */
+  getLeadToolCallId: (actionId: string) => string | undefined;
+  /** Number of tool calls paused under an action (for a "1 of N" label). */
+  getRegisteredCount: (actionId: string) => number;
+  /** True once every registered tool_call in the action has a decision. */
+  isReady: (actionId: string) => boolean;
+  /** Lifecycle status for an action (so cards can disable / show messages). */
+  getStatus: (actionId: string) => ActionStatus;
+  /** Set an action's submission status (driven by the cards' submit via `useResumeSubmit`). */
+  setStatus: (actionId: string, status: ActionStatus) => void;
+}
+
+const ApprovalContext = createContext<ApprovalContextValue | null>(null);
+
+/** Cards call this; outside a provider it degrades to inert no-ops so a tool
+ *  call without an active approval never crashes. */
+export const useApprovalContext = (): ApprovalContextValue => {
+  const ctx = useContext(ApprovalContext);
+  return ctx ?? FALLBACK;
+};
+
+const FALLBACK: ApprovalContextValue = {
+  setDecision: () => undefined,
+  getDecision: () => undefined,
+  getDecisions: () => [],
+  registerToolCall: () => undefined,
+  unregisterToolCall: () => undefined,
+  getLeadToolCallId: () => undefined,
+  getRegisteredCount: () => 0,
+  isReady: () => false,
+  getStatus: () => 'idle',
+  setStatus: () => undefined,
+};
+
+const isExpiredError = (error: unknown): boolean => {
+  const status = (error as { response?: { status?: number } } | undefined)?.response?.status;
+  return status === 409;
+};
+
+/**
+ * Coordinates human-in-the-loop decisions for a single response message.
+ *
+ * An action may pause multiple tool calls (same `actionId`); each `ToolApproval`
+ * card registers its `tool_call_id` and records a decision here, and the lead card
+ * submits ONCE with the full `decisions[]` covering every paused call (the server
+ * rejects a partial batch).
+ *
+ * Intentionally PURE state — it does NOT read `ChatContext`, the agent store, or
+ * React Query. Message content renders in places without those providers (shared /
+ * exported views, tests), so the provider must be safe to mount anywhere. The
+ * context-dependent submit lives in {@link useResumeSubmit}, which the cards call —
+ * and the cards only render inside a live chat view where those providers exist.
+ */
+export default function ApprovalProvider({ children }: { children: React.ReactNode }) {
+  /** actionId → (tool_call_id → resolution). Mutable ref + a version bump so
+   *  reads are synchronous for `isReady`/submit while renders stay cheap. */
+  const decisionsRef = useRef(new Map<string, Map<string, Agents.ToolApprovalResolution>>());
+  const registeredRef = useRef(new Map<string, Set<string>>());
+  const [, bump] = useState(0);
+  const rerender = useCallback(() => bump((v) => v + 1), []);
+  const [statusByAction, setStatusByAction] = useState<Record<string, ActionStatus>>({});
+
+  const registerToolCall = useCallback(
+    (actionId: string, toolCallId: string) => {
+      const set = registeredRef.current.get(actionId) ?? new Set<string>();
+      if (set.has(toolCallId)) {
+        return;
+      }
+      set.add(toolCallId);
+      registeredRef.current.set(actionId, set);
+      /** A newly-registered call shifts the lead / "of N" count for sibling
+       *  cards — re-render so they reflect it. */
+      rerender();
+    },
+    [rerender],
+  );
+
+  const unregisterToolCall = useCallback(
+    (actionId: string, toolCallId: string) => {
+      const set = registeredRef.current.get(actionId);
+      if (!set || !set.has(toolCallId)) {
+        return;
+      }
+      set.delete(toolCallId);
+      // Also drop any decision it held so a stale entry can't linger.
+      decisionsRef.current.get(actionId)?.delete(toolCallId);
+      if (set.size === 0) {
+        registeredRef.current.delete(actionId);
+        decisionsRef.current.delete(actionId);
+      }
+      rerender();
+    },
+    [rerender],
+  );
+
+  const getLeadToolCallId = useCallback(
+    (actionId: string) => registeredRef.current.get(actionId)?.values().next().value,
+    [],
+  );
+
+  const getRegisteredCount = useCallback(
+    (actionId: string) => registeredRef.current.get(actionId)?.size ?? 0,
+    [],
+  );
+
+  const setDecision = useCallback(
+    (actionId: string, toolCallId: string, decision: Agents.ToolApprovalResolution | null) => {
+      const map = decisionsRef.current.get(actionId) ?? new Map();
+      if (decision == null) {
+        map.delete(toolCallId);
+      } else {
+        map.set(toolCallId, decision);
+      }
+      decisionsRef.current.set(actionId, map);
+      rerender();
+    },
+    [rerender],
+  );
+
+  const getDecision = useCallback(
+    (actionId: string, toolCallId: string) => decisionsRef.current.get(actionId)?.get(toolCallId),
+    [],
+  );
+
+  const getDecisions = useCallback(
+    (actionId: string) => Array.from(decisionsRef.current.get(actionId)?.values() ?? []),
+    [],
+  );
+
+  const isReady = useCallback((actionId: string) => {
+    const registered = registeredRef.current.get(actionId);
+    const decided = decisionsRef.current.get(actionId);
+    if (!registered || registered.size === 0) {
+      return false;
+    }
+    for (const toolCallId of registered) {
+      if (!decided?.has(toolCallId)) {
+        return false;
+      }
+    }
+    return true;
+  }, []);
+
+  const getStatus = useCallback(
+    (actionId: string): ActionStatus => statusByAction[actionId] ?? 'idle',
+    [statusByAction],
+  );
+
+  const setStatus = useCallback((actionId: string, status: ActionStatus) => {
+    setStatusByAction((prev) => ({ ...prev, [actionId]: status }));
+  }, []);
+
+  const value = useMemo<ApprovalContextValue>(
+    () => ({
+      setDecision,
+      getDecision,
+      getDecisions,
+      registerToolCall,
+      unregisterToolCall,
+      getLeadToolCallId,
+      getRegisteredCount,
+      isReady,
+      getStatus,
+      setStatus,
+    }),
+    [
+      setDecision,
+      getDecision,
+      getDecisions,
+      registerToolCall,
+      unregisterToolCall,
+      getLeadToolCallId,
+      getRegisteredCount,
+      isReady,
+      getStatus,
+      setStatus,
+    ],
+  );
+
+  return <ApprovalContext.Provider value={value}>{children}</ApprovalContext.Provider>;
+}
+
+/**
+ * Submit hook for the approval cards. Sources the resume body's agent/endpoint
+ * fields from the active conversation (so the route's shared `buildEndpointOption`
+ * middleware reconstructs the same agent) and fires the resume mutation, threading
+ * the result back into the action's status.
+ *
+ * Reads `ChatContext` / the agent store / React Query. The cards render it from
+ * live chat views but ALSO from contexts without a `ChatContext.Provider` (e.g. a
+ * subagent tool paused inside a portaled dialog, or a search/citation render that
+ * passes chat context as a prop), so it reads the context non-throwingly: with no
+ * conversation, `buildResumeFields` returns null and the controls are inert rather
+ * than crashing.
+ */
+export function useResumeSubmit() {
+  const conversation = useContext(ChatContext)?.conversation;
+  const getEphemeralAgent = useGetEphemeralAgent();
+  const approvalMutation = useSubmitToolApprovalMutation();
+  const askMutation = useSubmitAskAnswerMutation();
+  const { getDecisions, isReady, setStatus } = useApprovalContext();
+
+  const buildResumeFields = useCallback((): ResumeAgentFields | null => {
+    const conversationId = conversation?.conversationId;
+    if (!conversationId || conversationId === Constants.NEW_CONVO) {
+      return null;
+    }
+    return {
+      conversationId,
+      endpoint: conversation?.endpoint,
+      endpointType: conversation?.endpointType,
+      agent_id: conversation?.agent_id,
+      model: conversation?.model,
+      spec: conversation?.spec,
+      // Ephemeral agents derive their instructions from promptPrefix — re-send it so
+      // the resumed run rebuilds the same graph and matches the server fingerprint.
+      promptPrefix: conversation?.promptPrefix,
+      ephemeralAgent: getEphemeralAgent(conversationId),
+    };
+  }, [conversation, getEphemeralAgent]);
+
+  const submitToolApproval = useCallback(
+    (actionId: string) => {
+      const fields = buildResumeFields();
+      const decisions = getDecisions(actionId);
+      if (!fields || decisions.length === 0 || !isReady(actionId)) {
+        return;
+      }
+      setStatus(actionId, 'submitting');
+      approvalMutation.mutate(
+        { ...fields, actionId, decisions },
+        {
+          onSuccess: () => setStatus(actionId, 'submitted'),
+          onError: (error) => setStatus(actionId, isExpiredError(error) ? 'expired' : 'error'),
+        },
+      );
+    },
+    [approvalMutation, buildResumeFields, getDecisions, isReady, setStatus],
+  );
+
+  const submitAskAnswer = useCallback(
+    (actionId: string, answer: string) => {
+      const fields = buildResumeFields();
+      if (!fields || answer.length === 0) {
+        return;
+      }
+      setStatus(actionId, 'submitting');
+      askMutation.mutate(
+        { ...fields, actionId, answer },
+        {
+          onSuccess: () => setStatus(actionId, 'submitted'),
+          onError: (error) => setStatus(actionId, isExpiredError(error) ? 'expired' : 'error'),
+        },
+      );
+    },
+    [askMutation, buildResumeFields, setStatus],
+  );
+
+  return { submitToolApproval, submitAskAnswer };
+}
diff --git a/client/src/components/Chat/Messages/Content/AskUserQuestion.tsx b/client/src/components/Chat/Messages/Content/AskUserQuestion.tsx
new file mode 100644
index 0000000000..55f85b26af
--- /dev/null
+++ b/client/src/components/Chat/Messages/Content/AskUserQuestion.tsx
@@ -0,0 +1,93 @@
+import { useState } from 'react';
+import { TriangleAlert } from 'lucide-react';
+import { Button, TextareaAutosize } from '@librechat/client';
+import type { Agents } from 'librechat-data-provider';
+import { useApprovalContext, useResumeSubmit } from './ApprovalContext';
+import { useLocalize } from '~/hooks';
+
+/**
+ * Renders an `ask_user_question` pause: the prompt, optional description, any
+ * curated option buttons, and a free-form text answer. Each path submits the
+ * answer through {@link useApprovalContext} to resume the paused run; the
+ * continuation arrives on the existing SSE.
+ */
+export default function AskUserQuestion({
+  actionId,
+  question,
+}: {
+  actionId: string;
+  question: Agents.AskUserQuestionRequest;
+}) {
+  const localize = useLocalize();
+  const { getStatus } = useApprovalContext();
+  const { submitAskAnswer } = useResumeSubmit();
+  const [answer, setAnswer] = useState('');
+
+  const status = getStatus(actionId);
+  const locked = status === 'submitting' || status === 'submitted' || status === 'expired';
+
+  if (status === 'submitted') {
+    return null;
+  }
+
+  const trimmed = answer.trim();
+
+  return (
+    <div className="my-2 flex w-full flex-col gap-2 rounded-lg border border-border-light bg-surface-secondary p-3">
+      <p className="text-sm font-medium text-text-primary">{question.question}</p>
+      {question.description != null && question.description.length > 0 && (
+        <p className="text-sm text-text-secondary">{question.description}</p>
+      )}
+
+      {question.options != null && question.options.length > 0 && (
+        <div className="flex flex-wrap gap-2">
+          {question.options.map((option) => (
+            <Button
+              key={option.value}
+              size="sm"
+              variant="outline"
+              disabled={locked}
+              onClick={() => submitAskAnswer(actionId, option.value)}
+            >
+              {option.label}
+            </Button>
+          ))}
+        </div>
+      )}
+
+      <TextareaAutosize
+        value={answer}
+        disabled={locked}
+        onChange={(e) => setAnswer(e.target.value)}
+        minRows={2}
+        maxRows={12}
+        placeholder={localize('com_ui_your_answer')}
+        className="w-full resize-none rounded-md border border-border-light bg-surface-primary p-2 text-sm"
+        aria-label={localize('com_ui_your_answer')}
+      />
+
+      <div className="flex items-center gap-3">
+        <Button
+          size="sm"
+          variant="submit"
+          disabled={trimmed.length === 0 || locked}
+          onClick={() => submitAskAnswer(actionId, trimmed)}
+        >
+          {status === 'submitting' ? localize('com_ui_submitting') : localize('com_ui_submit')}
+        </Button>
+        {status === 'expired' && (
+          <span className="flex items-center text-xs text-text-warning">
+            <TriangleAlert className="mr-1.5 h-4 w-4" aria-hidden="true" />
+            {localize('com_ui_approval_expired')}
+          </span>
+        )}
+        {status === 'error' && (
+          <span className="flex items-center text-xs text-text-warning">
+            <TriangleAlert className="mr-1.5 h-4 w-4" aria-hidden="true" />
+            {localize('com_ui_approval_error')}
+          </span>
+        )}
+      </div>
+    </div>
+  );
+}
diff --git a/client/src/components/Chat/Messages/Content/ContentParts.tsx b/client/src/components/Chat/Messages/Content/ContentParts.tsx
index 00569c11f5..148c8a3e2d 100644
--- a/client/src/components/Chat/Messages/Content/ContentParts.tsx
+++ b/client/src/components/Chat/Messages/Content/ContentParts.tsx
@@ -12,6 +12,7 @@ import { mapAttachments, groupSequentialToolCalls } from '~/utils';
 import { MessageContext, SearchContext } from '~/Providers';
 import PendingSkillCall from './Parts/PendingSkillCall';
 import { EditTextPart, EmptyText } from './Parts';
+import ApprovalProvider from './ApprovalContext';
 import MemoryArtifacts from './MemoryArtifacts';
 import ToolCallGroup from './ToolCallGroup';
 import Container from './Container';
@@ -373,7 +374,7 @@ const ContentParts = memo(function ContentParts({
   const hasParallelContent = safeContent.some((part) => part?.groupId != null);
   if (hasParallelContent) {
     return (
-      <>
+      <ApprovalProvider>
         {renderPendingSkills()}
         <ParallelContentRenderer
           content={content}
@@ -385,41 +386,43 @@ const ContentParts = memo(function ContentParts({
           isSubmitting={effectiveIsSubmitting}
           renderPart={renderPart}
         />
-      </>
+      </ApprovalProvider>
     );
   }
 
   // Sequential content: render parts in order (90% of cases)
   return (
-    <SearchContext.Provider value={{ searchResults }}>
-      <MemoryArtifacts attachments={attachments} />
-      {renderPendingSkills()}
-      {showEmptyCursor && (
-        <Container>
-          <EmptyText />
-        </Container>
-      )}
-      {groupedParts.map((group) => {
-        if (group.type === 'single') {
-          const { part, idx } = group.part;
-          return renderPart(part, idx, idx === lastContentIdx);
-        }
-        const { groupId } = group;
-        return (
-          <ToolCallGroup
-            key={`tool-group-${groupId}`}
-            parts={group.parts}
-            isSubmitting={effectiveIsSubmitting}
-            isLast={group.parts.some((p) => p.idx === lastContentIdx)}
-            renderPart={renderGroupedPart}
-            lastContentIdx={lastContentIdx}
-            groupAttachments={group.groupAttachments}
-            initialExpansionState={toolGroupExpansionRef.current.get(groupId)}
-            onExpansionChange={(state) => handleGroupExpansionChange(groupId, state)}
-          />
-        );
-      })}
-    </SearchContext.Provider>
+    <ApprovalProvider>
+      <SearchContext.Provider value={{ searchResults }}>
+        <MemoryArtifacts attachments={attachments} />
+        {renderPendingSkills()}
+        {showEmptyCursor && (
+          <Container>
+            <EmptyText />
+          </Container>
+        )}
+        {groupedParts.map((group) => {
+          if (group.type === 'single') {
+            const { part, idx } = group.part;
+            return renderPart(part, idx, idx === lastContentIdx);
+          }
+          const { groupId } = group;
+          return (
+            <ToolCallGroup
+              key={`tool-group-${groupId}`}
+              parts={group.parts}
+              isSubmitting={effectiveIsSubmitting}
+              isLast={group.parts.some((p) => p.idx === lastContentIdx)}
+              renderPart={renderGroupedPart}
+              lastContentIdx={lastContentIdx}
+              groupAttachments={group.groupAttachments}
+              initialExpansionState={toolGroupExpansionRef.current.get(groupId)}
+              onExpansionChange={(state) => handleGroupExpansionChange(groupId, state)}
+            />
+          );
+        })}
+      </SearchContext.Provider>
+    </ApprovalProvider>
   );
 });
 
diff --git a/client/src/components/Chat/Messages/Content/Part.tsx b/client/src/components/Chat/Messages/Content/Part.tsx
index 6df833370a..6998cdbe5f 100644
--- a/client/src/components/Chat/Messages/Content/Part.tsx
+++ b/client/src/components/Chat/Messages/Content/Part.tsx
@@ -22,16 +22,19 @@ import {
   BashCall,
   SubagentCall,
 } from './Parts';
+import { getAskUserQuestionPart } from '~/utils/approval';
+import { isBashProgrammaticToolCall } from './routing';
 import { ErrorMessage } from './MessageContent';
+import AskUserQuestion from './AskUserQuestion';
 import RetrievalCall from './RetrievalCall';
 import { getCachedPreview } from '~/utils';
+import ToolApproval from './ToolApproval';
 import AgentHandoff from './AgentHandoff';
 import CodeAnalyze from './CodeAnalyze';
 import Container from './Container';
 import WebSearch from './WebSearch';
 import ToolCall from './ToolCall';
 import Image from './Image';
-import { isBashProgrammaticToolCall } from './routing';
 
 type PartProps = {
   part?: TMessageContentParts;
@@ -58,6 +61,16 @@ const Part = memo(function Part({
     return null;
   }
 
+  const askUserQuestion = getAskUserQuestionPart(part);
+  if (askUserQuestion) {
+    return (
+      <AskUserQuestion
+        actionId={askUserQuestion.ask_user_question.actionId}
+        question={askUserQuestion.ask_user_question.question}
+      />
+    );
+  }
+
   if (part.type === ContentTypes.ERROR) {
     return (
       <ErrorMessage
@@ -136,164 +149,183 @@ const Part = memo(function Part({
 
     const isToolCall =
       'args' in toolCall && (!toolCall.type || toolCall.type === ToolCallTypes.TOOL_CALL);
-    if (isToolCall && isBashProgrammaticToolCall(toolCall.name, toolCall.args)) {
-      return (
-        <BashCall
-          args={toolCall.args}
-          output={toolCall.output ?? ''}
-          initialProgress={toolCall.progress ?? 0.1}
-          isSubmitting={isSubmitting}
-          attachments={attachments}
-          commandField="code"
-          hideAttachments={hideAttachments}
-          onExpand={onToolExpand}
-        />
-      );
-    } else if (
-      isToolCall &&
-      (toolCall.name === Tools.execute_code ||
-        toolCall.name === Constants.PROGRAMMATIC_TOOL_CALLING ||
-        toolCall.name === Constants.BASH_PROGRAMMATIC_TOOL_CALLING)
-    ) {
-      return (
-        <ExecuteCode
-          attachments={attachments}
-          isSubmitting={isSubmitting}
-          output={toolCall.output ?? ''}
-          initialProgress={toolCall.progress ?? 0.1}
-          args={toolCall.args}
-          hideAttachments={hideAttachments}
-          onExpand={onToolExpand}
-        />
-      );
-    } else if (
-      isToolCall &&
-      (toolCall.name === 'image_gen_oai' ||
-        toolCall.name === 'image_edit_oai' ||
-        toolCall.name === 'gemini_image_gen')
-    ) {
-      return (
-        <ImageGen
-          initialProgress={toolCall.progress ?? 0.1}
-          isSubmitting={isSubmitting}
-          toolName={toolCall.name}
-          args={typeof toolCall.args === 'string' ? toolCall.args : ''}
-          output={toolCall.output ?? ''}
-          attachments={attachments}
-          hideAttachments={hideAttachments}
-        />
-      );
-    } else if (isToolCall && toolCall.name === 'skill') {
-      return (
-        <SkillCall
-          args={toolCall.args}
-          output={toolCall.output ?? ''}
-          initialProgress={toolCall.progress ?? 0.1}
-          isSubmitting={isSubmitting}
-          attachments={attachments}
-          hideAttachments={hideAttachments}
-          onExpand={onToolExpand}
-        />
-      );
-    } else if (isToolCall && toolCall.name === Constants.SUBAGENT) {
-      /** `subagent_content` is the aggregated content-parts array the
-       *  backend writes onto the tool_call at message-save time so the
-       *  child's activity survives a page refresh. Not present on older
-       *  runs recorded before the persistence path existed — those fall
-       *  back to the Recoil atom (live session) or the raw tool output
-       *  inside `SubagentCall`. */
-      const persistedContent = (
-        toolCall as unknown as {
-          subagent_content?: TMessageContentParts[];
+    if (isToolCall) {
+      const card = (() => {
+        if (isBashProgrammaticToolCall(toolCall.name, toolCall.args)) {
+          return (
+            <BashCall
+              args={toolCall.args}
+              output={toolCall.output ?? ''}
+              initialProgress={toolCall.progress ?? 0.1}
+              isSubmitting={isSubmitting}
+              attachments={attachments}
+              commandField="code"
+              hideAttachments={hideAttachments}
+              onExpand={onToolExpand}
+            />
+          );
+        } else if (
+          toolCall.name === Tools.execute_code ||
+          toolCall.name === Constants.PROGRAMMATIC_TOOL_CALLING ||
+          toolCall.name === Constants.BASH_PROGRAMMATIC_TOOL_CALLING
+        ) {
+          return (
+            <ExecuteCode
+              attachments={attachments}
+              isSubmitting={isSubmitting}
+              output={toolCall.output ?? ''}
+              initialProgress={toolCall.progress ?? 0.1}
+              args={toolCall.args}
+              hideAttachments={hideAttachments}
+              onExpand={onToolExpand}
+            />
+          );
+        } else if (
+          toolCall.name === 'image_gen_oai' ||
+          toolCall.name === 'image_edit_oai' ||
+          toolCall.name === 'gemini_image_gen'
+        ) {
+          return (
+            <ImageGen
+              initialProgress={toolCall.progress ?? 0.1}
+              isSubmitting={isSubmitting}
+              toolName={toolCall.name}
+              args={typeof toolCall.args === 'string' ? toolCall.args : ''}
+              output={toolCall.output ?? ''}
+              attachments={attachments}
+              hideAttachments={hideAttachments}
+            />
+          );
+        } else if (toolCall.name === 'skill') {
+          return (
+            <SkillCall
+              args={toolCall.args}
+              output={toolCall.output ?? ''}
+              initialProgress={toolCall.progress ?? 0.1}
+              isSubmitting={isSubmitting}
+              attachments={attachments}
+              hideAttachments={hideAttachments}
+              onExpand={onToolExpand}
+            />
+          );
+        } else if (toolCall.name === Constants.SUBAGENT) {
+          /** `subagent_content` is the aggregated content-parts array the
+           *  backend writes onto the tool_call at message-save time so the
+           *  child's activity survives a page refresh. Not present on older
+           *  runs recorded before the persistence path existed — those fall
+           *  back to the Recoil atom (live session) or the raw tool output
+           *  inside `SubagentCall`. */
+          const persistedContent = (
+            toolCall as unknown as {
+              subagent_content?: TMessageContentParts[];
+            }
+          ).subagent_content;
+          return (
+            <SubagentCall
+              toolCallId={toolCall.id ?? ''}
+              args={toolCall.args}
+              output={toolCall.output ?? ''}
+              initialProgress={toolCall.progress ?? 0.1}
+              isSubmitting={isSubmitting}
+              attachments={attachments}
+              persistedContent={persistedContent}
+              hideAttachments={hideAttachments}
+            />
+          );
+        } else if (toolCall.name === 'read_file') {
+          return (
+            <ReadFileCall
+              args={toolCall.args}
+              output={toolCall.output ?? ''}
+              initialProgress={toolCall.progress ?? 0.1}
+              isSubmitting={isSubmitting}
+              attachments={attachments}
+              hideAttachments={hideAttachments}
+              onExpand={onToolExpand}
+            />
+          );
+        } else if (toolCall.name === 'create_file' || toolCall.name === 'edit_file') {
+          return (
+            <FileAuthoringCall
+              toolName={toolCall.name}
+              args={toolCall.args}
+              output={toolCall.output ?? ''}
+              initialProgress={toolCall.progress ?? 0.1}
+              isSubmitting={isSubmitting}
+              attachments={attachments}
+              hideAttachments={hideAttachments}
+              onExpand={onToolExpand}
+            />
+          );
+        } else if (toolCall.name === Tools.bash_tool) {
+          return (
+            <BashCall
+              args={toolCall.args}
+              output={toolCall.output ?? ''}
+              initialProgress={toolCall.progress ?? 0.1}
+              isSubmitting={isSubmitting}
+              attachments={attachments}
+              hideAttachments={hideAttachments}
+              onExpand={onToolExpand}
+            />
+          );
+        } else if (toolCall.name === Tools.web_search) {
+          return (
+            <WebSearch
+              output={toolCall.output ?? ''}
+              initialProgress={toolCall.progress ?? 0.1}
+              isSubmitting={isSubmitting}
+              attachments={attachments}
+              isLast={isLast}
+              onExpand={onToolExpand}
+            />
+          );
+        } else if (toolCall.name === 'file_search' || toolCall.name === 'retrieval') {
+          return (
+            <RetrievalCall
+              initialProgress={toolCall.progress ?? 0.1}
+              isSubmitting={isSubmitting}
+              output={toolCall.output ?? undefined}
+              attachments={attachments}
+              onExpand={onToolExpand}
+            />
+          );
+        } else if (toolCall.name?.startsWith(Constants.LC_TRANSFER_TO_)) {
+          return <AgentHandoff args={toolCall.args ?? ''} name={toolCall.name || ''} />;
         }
-      ).subagent_content;
-      return (
-        <SubagentCall
-          toolCallId={toolCall.id ?? ''}
-          args={toolCall.args}
-          output={toolCall.output ?? ''}
-          initialProgress={toolCall.progress ?? 0.1}
-          isSubmitting={isSubmitting}
-          attachments={attachments}
-          persistedContent={persistedContent}
-          hideAttachments={hideAttachments}
-        />
-      );
-    } else if (isToolCall && toolCall.name === 'read_file') {
-      return (
-        <ReadFileCall
-          args={toolCall.args}
-          output={toolCall.output ?? ''}
-          initialProgress={toolCall.progress ?? 0.1}
-          isSubmitting={isSubmitting}
-          attachments={attachments}
-          hideAttachments={hideAttachments}
-          onExpand={onToolExpand}
-        />
-      );
-    } else if (isToolCall && (toolCall.name === 'create_file' || toolCall.name === 'edit_file')) {
-      return (
-        <FileAuthoringCall
-          toolName={toolCall.name}
-          args={toolCall.args}
-          output={toolCall.output ?? ''}
-          initialProgress={toolCall.progress ?? 0.1}
-          isSubmitting={isSubmitting}
-          attachments={attachments}
-          hideAttachments={hideAttachments}
-          onExpand={onToolExpand}
-        />
-      );
-    } else if (isToolCall && toolCall.name === Tools.bash_tool) {
-      return (
-        <BashCall
-          args={toolCall.args}
-          output={toolCall.output ?? ''}
-          initialProgress={toolCall.progress ?? 0.1}
-          isSubmitting={isSubmitting}
-          attachments={attachments}
-          hideAttachments={hideAttachments}
-          onExpand={onToolExpand}
-        />
-      );
-    } else if (isToolCall && toolCall.name === Tools.web_search) {
-      return (
-        <WebSearch
-          output={toolCall.output ?? ''}
-          initialProgress={toolCall.progress ?? 0.1}
-          isSubmitting={isSubmitting}
-          attachments={attachments}
-          isLast={isLast}
-          onExpand={onToolExpand}
-        />
-      );
-    } else if (isToolCall && (toolCall.name === 'file_search' || toolCall.name === 'retrieval')) {
-      return (
-        <RetrievalCall
-          initialProgress={toolCall.progress ?? 0.1}
-          isSubmitting={isSubmitting}
-          output={toolCall.output ?? undefined}
-          attachments={attachments}
-          onExpand={onToolExpand}
-        />
-      );
-    } else if (isToolCall && toolCall.name?.startsWith(Constants.LC_TRANSFER_TO_)) {
-      return <AgentHandoff args={toolCall.args ?? ''} name={toolCall.name || ''} />;
-    } else if (isToolCall) {
-      return (
-        <ToolCall
-          args={toolCall.args ?? ''}
-          name={toolCall.name || ''}
-          output={toolCall.output ?? ''}
-          initialProgress={toolCall.progress ?? 0.1}
-          isSubmitting={isSubmitting}
-          attachments={attachments}
-          auth={toolCall.auth}
-          isLast={isLast}
-          hideAttachments={hideAttachments}
-          onExpand={onToolExpand}
-        />
-      );
+        return (
+          <ToolCall
+            args={toolCall.args ?? ''}
+            name={toolCall.name || ''}
+            output={toolCall.output ?? ''}
+            initialProgress={toolCall.progress ?? 0.1}
+            isSubmitting={isSubmitting}
+            attachments={attachments}
+            auth={toolCall.auth}
+            isLast={isLast}
+            hideAttachments={hideAttachments}
+            onExpand={onToolExpand}
+          />
+        );
+      })();
+
+      /** Render approval controls for ANY paused agent tool — not just the generic
+       *  card — so a HITL policy that gates a specialized tool (bash, code, file…)
+       *  still surfaces approve/reject/edit/respond. Only while the call is unresolved
+       *  (no output yet). */
+      if (toolCall.approval != null && (toolCall.output?.length ?? 0) === 0) {
+        return (
+          <>
+            {card}
+            <ToolApproval
+              approval={toolCall.approval}
+              toolCallId={toolCall.id ?? ''}
+              args={toolCall.args}
+            />
+          </>
+        );
+      }
+      return card;
     } else if (toolCall.type === ToolCallTypes.CODE_INTERPRETER) {
       const code_interpreter = toolCall[ToolCallTypes.CODE_INTERPRETER];
       return (
diff --git a/client/src/components/Chat/Messages/Content/Parts/SubagentCall.tsx b/client/src/components/Chat/Messages/Content/Parts/SubagentCall.tsx
index 23f5537e24..69060ca939 100644
--- a/client/src/components/Chat/Messages/Content/Parts/SubagentCall.tsx
+++ b/client/src/components/Chat/Messages/Content/Parts/SubagentCall.tsx
@@ -3,19 +3,18 @@ import { useRecoilValue } from 'recoil';
 import { ContentTypes, EModelEndpoint } from 'librechat-data-provider';
 import { ArrowDown, ChevronRight, Maximize2, Minimize2, Users } from 'lucide-react';
 import { OGDialog, OGDialogContent, OGDialogTitle, OGDialogDescription } from '@librechat/client';
-
-import type { TAttachment, TMessage, TMessageContentParts } from 'librechat-data-provider';
+import type { Agents, TAttachment, TMessage, TMessageContentParts } from 'librechat-data-provider';
 import type { PartWithIndex } from '~/components/Chat/Messages/Content/ParallelContent';
 import type { SubagentTickerLine } from '~/utils/subagentContent';
-
 import ToolCallGroup from '~/components/Chat/Messages/Content/ToolCallGroup';
 import MarkdownLite from '~/components/Chat/Messages/Content/MarkdownLite';
+import ToolApproval from '~/components/Chat/Messages/Content/ToolApproval';
 import { cn, groupSequentialToolCalls, parseToolName } from '~/utils';
 import Container from '~/components/Chat/Messages/Content/Container';
 import ToolCall from '~/components/Chat/Messages/Content/ToolCall';
 import { MessageContext } from '~/Providers/MessageContext';
-import { subagentProgressByToolCallId } from '~/store';
 import MessageIcon from '~/components/Share/MessageIcon';
+import { subagentProgressByToolCallId } from '~/store';
 import { useAgentsMapContext } from '~/Providers';
 import { AttachmentGroup } from './Attachment';
 import { useLocalize } from '~/hooks';
@@ -841,15 +840,17 @@ function SubagentDialogPart({
     const tc = (
       part as {
         [ContentTypes.TOOL_CALL]?: {
+          id?: string;
           args?: string | Record<string, unknown>;
           output?: string;
           name?: string;
           progress?: number;
+          approval?: Agents.ToolCall['approval'];
         };
       }
     )[ContentTypes.TOOL_CALL];
     if (!tc) return null;
-    return (
+    const toolCall = (
       <ToolCall
         args={tc.args ?? ''}
         output={tc.output ?? ''}
@@ -860,6 +861,19 @@ function SubagentDialogPart({
         onExpand={onToolExpand}
       />
     );
+    // Surface approve/reject/edit controls for a tool paused INSIDE this subagent —
+    // its tool_call lives in subagent_content, not as a top-level message part, so the
+    // top-level Part.tsx render never sees it. Only while unresolved (no output yet).
+    // The dialog portals but React context still flows, so ToolApproval resolves here.
+    if (tc.approval != null && (tc.output?.length ?? 0) === 0) {
+      return (
+        <>
+          {toolCall}
+          <ToolApproval approval={tc.approval} toolCallId={tc.id ?? ''} args={tc.args} />
+        </>
+      );
+    }
+    return toolCall;
   }
   return null;
 }
diff --git a/client/src/components/Chat/Messages/Content/ToolApproval.tsx b/client/src/components/Chat/Messages/Content/ToolApproval.tsx
new file mode 100644
index 0000000000..1068eadd1e
--- /dev/null
+++ b/client/src/components/Chat/Messages/Content/ToolApproval.tsx
@@ -0,0 +1,273 @@
+import { useEffect, useMemo, useState } from 'react';
+import { Button, TextareaAutosize } from '@librechat/client';
+import { Check, X, Pencil, MessageSquare, TriangleAlert } from 'lucide-react';
+import type { Agents } from 'librechat-data-provider';
+import type { TranslationKeys } from '~/hooks';
+import { useApprovalContext, useResumeSubmit } from './ApprovalContext';
+import { useLocalize } from '~/hooks';
+import { cn, logger } from '~/utils';
+
+/**
+ * The resume route rejects an `edit` whose `editedArguments` isn't a plain object
+ * (`findIncompleteDecisions`), so mirror that here — `JSON.parse` alone also accepts
+ * `null`/arrays/primitives, which would enable Submit for a value that can only 400.
+ */
+function isPlainObject(value: unknown): value is Record<string, unknown> {
+  return typeof value === 'object' && value !== null && !Array.isArray(value);
+}
+
+type DecisionType = Agents.ToolApprovalDecisionType;
+
+const DECISION_ICON: Record<DecisionType, React.ComponentType<{ className?: string }>> = {
+  approve: Check,
+  reject: X,
+  edit: Pencil,
+  respond: MessageSquare,
+};
+
+const DECISION_LABEL: Record<DecisionType, TranslationKeys> = {
+  approve: 'com_ui_approve',
+  reject: 'com_ui_reject',
+  edit: 'com_ui_edit',
+  respond: 'com_ui_respond',
+};
+
+/** Pretty-print tool args as JSON for the `edit` textarea seed. */
+function seedArgs(args: string | Record<string, unknown> | undefined): string {
+  if (args == null) {
+    return '{}';
+  }
+  if (typeof args === 'string') {
+    try {
+      return JSON.stringify(JSON.parse(args), null, 2);
+    } catch {
+      return args;
+    }
+  }
+  try {
+    return JSON.stringify(args, null, 2);
+  } catch (e) {
+    logger.error('ToolApproval - failed to stringify args', e);
+    return '{}';
+  }
+}
+
+/**
+ * Renders approve / reject / edit / respond controls for a paused tool call,
+ * scoped to the decisions the server allows. Records its decision in the
+ * batch {@link useApprovalContext}; the lead card additionally renders the
+ * single submit button covering every paused call in the action.
+ */
+export default function ToolApproval({
+  approval,
+  toolCallId,
+  args,
+}: {
+  approval: NonNullable<Agents.ToolCall['approval']>;
+  toolCallId: string;
+  args: string | Record<string, unknown> | undefined;
+}) {
+  const localize = useLocalize();
+  const { actionId, allowed_decisions: allowedDecisions, description } = approval;
+  const {
+    registerToolCall,
+    unregisterToolCall,
+    setDecision,
+    isReady,
+    getStatus,
+    getLeadToolCallId,
+    getRegisteredCount,
+  } = useApprovalContext();
+  const { submitToolApproval } = useResumeSubmit();
+
+  const [active, setActive] = useState<DecisionType | null>(null);
+  const [editText, setEditText] = useState(() => seedArgs(args));
+  const [responseText, setResponseText] = useState('');
+  const [reason, setReason] = useState('');
+
+  useEffect(() => {
+    registerToolCall(actionId, toolCallId);
+    // Drop the registration when this card unmounts (e.g. the tool resolved and the
+    // card was replaced) so a stale entry can't keep the batch's `isReady` false.
+    return () => unregisterToolCall(actionId, toolCallId);
+  }, [registerToolCall, unregisterToolCall, actionId, toolCallId]);
+
+  const status = getStatus(actionId);
+  const locked = status === 'submitting' || status === 'submitted' || status === 'expired';
+
+  /** Recompute and store this card's decision whenever inputs change. A null
+   *  resolution (e.g. invalid edit JSON) clears it so submit stays disabled. */
+  useEffect(() => {
+    if (locked) {
+      return;
+    }
+    if (active == null) {
+      setDecision(actionId, toolCallId, null);
+      return;
+    }
+    if (active === 'approve') {
+      setDecision(actionId, toolCallId, { tool_call_id: toolCallId, decision: 'approve' });
+      return;
+    }
+    if (active === 'reject') {
+      setDecision(actionId, toolCallId, {
+        tool_call_id: toolCallId,
+        decision: 'reject',
+        reason: reason.trim() || undefined,
+      });
+      return;
+    }
+    if (active === 'respond') {
+      const trimmed = responseText.trim();
+      setDecision(
+        actionId,
+        toolCallId,
+        trimmed.length > 0
+          ? { tool_call_id: toolCallId, decision: 'respond', responseText: trimmed }
+          : null,
+      );
+      return;
+    }
+    if (active === 'edit') {
+      try {
+        const parsed = JSON.parse(editText) as unknown;
+        setDecision(
+          actionId,
+          toolCallId,
+          isPlainObject(parsed)
+            ? { tool_call_id: toolCallId, decision: 'edit', editedArguments: parsed }
+            : null,
+        );
+      } catch {
+        setDecision(actionId, toolCallId, null);
+      }
+    }
+  }, [active, editText, responseText, reason, locked, setDecision, actionId, toolCallId]);
+
+  const editIsValid = useMemo(() => {
+    if (active !== 'edit') {
+      return true;
+    }
+    try {
+      return isPlainObject(JSON.parse(editText));
+    } catch {
+      return false;
+    }
+  }, [active, editText]);
+
+  const isLead = getLeadToolCallId(actionId) === toolCallId;
+  const count = getRegisteredCount(actionId);
+  const ready = isReady(actionId);
+
+  const submitLabel = useMemo(() => {
+    if (status === 'submitting') {
+      return localize('com_ui_submitting');
+    }
+    if (count > 1) {
+      return localize('com_ui_submit_decisions', { 0: count });
+    }
+    return localize('com_ui_submit');
+  }, [status, count, localize]);
+
+  if (status === 'submitted') {
+    return null;
+  }
+
+  return (
+    <div className="my-2 flex w-full flex-col gap-2 rounded-lg border border-border-light bg-surface-secondary p-3">
+      {description != null && description.length > 0 && (
+        <p className="text-sm text-text-secondary">{description}</p>
+      )}
+      <div className="flex flex-wrap gap-2">
+        {allowedDecisions.map((decision) => {
+          const Icon = DECISION_ICON[decision];
+          return (
+            <Button
+              key={decision}
+              size="sm"
+              variant={active === decision ? 'default' : 'outline'}
+              disabled={locked}
+              aria-pressed={active === decision}
+              onClick={() => setActive((prev) => (prev === decision ? null : decision))}
+              className="inline-flex items-center gap-1.5"
+            >
+              <Icon className="h-4 w-4" aria-hidden="true" />
+              {localize(DECISION_LABEL[decision])}
+            </Button>
+          );
+        })}
+      </div>
+
+      {active === 'edit' && (
+        <div className="flex flex-col gap-1">
+          <TextareaAutosize
+            value={editText}
+            disabled={locked}
+            onChange={(e) => setEditText(e.target.value)}
+            minRows={3}
+            maxRows={16}
+            className={cn(
+              'w-full resize-none rounded-md border bg-surface-primary p-2 font-mono text-xs',
+              editIsValid ? 'border-border-light' : 'border-red-500',
+            )}
+            aria-label={localize('com_ui_edit')}
+          />
+          {!editIsValid && (
+            <span className="text-xs text-text-warning">{localize('com_ui_invalid_json')}</span>
+          )}
+        </div>
+      )}
+
+      {active === 'respond' && (
+        <TextareaAutosize
+          value={responseText}
+          disabled={locked}
+          onChange={(e) => setResponseText(e.target.value)}
+          minRows={2}
+          maxRows={12}
+          placeholder={localize('com_ui_tool_response_placeholder')}
+          className="w-full resize-none rounded-md border border-border-light bg-surface-primary p-2 text-sm"
+          aria-label={localize('com_ui_respond')}
+        />
+      )}
+
+      {active === 'reject' && (
+        <TextareaAutosize
+          value={reason}
+          disabled={locked}
+          onChange={(e) => setReason(e.target.value)}
+          minRows={1}
+          maxRows={6}
+          placeholder={localize('com_ui_reject_reason_placeholder')}
+          className="w-full resize-none rounded-md border border-border-light bg-surface-primary p-2 text-sm"
+          aria-label={localize('com_ui_reject')}
+        />
+      )}
+
+      {isLead && (
+        <div className="mt-1 flex items-center gap-3">
+          <Button
+            size="sm"
+            variant="submit"
+            disabled={!ready || locked}
+            onClick={() => submitToolApproval(actionId)}
+          >
+            {submitLabel}
+          </Button>
+          {status === 'expired' && (
+            <span className="flex items-center text-xs text-text-warning">
+              <TriangleAlert className="mr-1.5 h-4 w-4" aria-hidden="true" />
+              {localize('com_ui_approval_expired')}
+            </span>
+          )}
+          {status === 'error' && (
+            <span className="flex items-center text-xs text-text-warning">
+              <TriangleAlert className="mr-1.5 h-4 w-4" aria-hidden="true" />
+              {localize('com_ui_approval_error')}
+            </span>
+          )}
+        </div>
+      )}
+    </div>
+  );
+}
diff --git a/client/src/data-provider/SSE/mutations.ts b/client/src/data-provider/SSE/mutations.ts
index 0861babbe9..2d95bb377b 100644
--- a/client/src/data-provider/SSE/mutations.ts
+++ b/client/src/data-provider/SSE/mutations.ts
@@ -1,5 +1,6 @@
 import { useMutation } from '@tanstack/react-query';
-import { apiBaseUrl, request } from 'librechat-data-provider';
+import { apiBaseUrl, request, EModelEndpoint } from 'librechat-data-provider';
+import type { Agents, TEphemeralAgent } from 'librechat-data-provider';
 
 export interface AbortStreamParams {
   /** The stream ID to abort (if known) */
@@ -40,3 +41,102 @@ export function useAbortStreamMutation() {
     mutationFn: abortStream,
   });
 }
+
+/**
+ * Agent/endpoint selection fields the resume route needs so its shared
+ * `buildEndpointOption` middleware can reconstruct the same agent that paused.
+ * Mirrors the fields a normal `POST /api/agents/chat` message carries; sourced
+ * from the active conversation (and the conversation's ephemeral-agent state).
+ */
+export interface ResumeAgentFields {
+  conversationId: string;
+  endpoint?: EModelEndpoint | string | null;
+  endpointType?: EModelEndpoint | string | null;
+  agent_id?: string | null;
+  model?: string | null;
+  spec?: string | null;
+  /** Ephemeral agents derive instructions from this; re-sent so resume rebuilds the
+   *  same graph (and matches the server's request fingerprint). */
+  promptPrefix?: string | null;
+  ephemeralAgent?: TEphemeralAgent | null;
+  isTemporary?: boolean;
+}
+
+/** Successful resume ACK. The continuation streams over the existing SSE. */
+export interface ResumeResponse {
+  streamId: string;
+  conversationId: string;
+  status: 'resuming';
+}
+
+/**
+ * Shared base for both resume payloads: the agent selection fields, plus the
+ * `endpoint` defaulted to `agents` (the resume route lives under the agents
+ * router and `buildEndpointOption` keys off it).
+ */
+const buildResumeBase = (fields: ResumeAgentFields) => ({
+  ...fields,
+  endpoint: fields.endpoint ?? EModelEndpoint.agents,
+});
+
+export interface SubmitToolApprovalParams extends ResumeAgentFields {
+  /** Identifies the paused action; the server 409s a stale/mismatched id. */
+  actionId: string;
+  /** One entry per paused tool_call_id — the server 400s a partial batch. */
+  decisions: Agents.ToolApprovalResolution[];
+}
+
+/**
+ * Submit a batch of tool-approval decisions to resume a paused generation.
+ * POSTs to the shared resume route; the continuation streams over the existing
+ * SSE connection (this only fires the POST — it does not open a new stream).
+ */
+export const submitToolApproval = async (
+  params: SubmitToolApprovalParams,
+): Promise<ResumeResponse> => {
+  const { actionId, decisions, ...fields } = params;
+  return request.post(`${apiBaseUrl()}/api/agents/chat/resume`, {
+    ...buildResumeBase(fields),
+    actionId,
+    decisions,
+  }) as Promise<ResumeResponse>;
+};
+
+/**
+ * React Query mutation hook for submitting tool-approval decisions.
+ * Mirrors {@link useAbortStreamMutation}; the resumed stream arrives on the SSE.
+ */
+export function useSubmitToolApprovalMutation() {
+  return useMutation({
+    mutationFn: submitToolApproval,
+  });
+}
+
+export interface SubmitAskAnswerParams extends ResumeAgentFields {
+  actionId: string;
+  /** Free-form answer to the agent's ask-user question. */
+  answer: string;
+}
+
+/**
+ * Submit an ask-user-question answer to resume a paused generation.
+ * POSTs to the shared resume route; the continuation streams over the existing SSE.
+ */
+export const submitAskAnswer = async (params: SubmitAskAnswerParams): Promise<ResumeResponse> => {
+  const { actionId, answer, ...fields } = params;
+  return request.post(`${apiBaseUrl()}/api/agents/chat/resume`, {
+    ...buildResumeBase(fields),
+    actionId,
+    answer,
+  }) as Promise<ResumeResponse>;
+};
+
+/**
+ * React Query mutation hook for submitting an ask-user answer.
+ * Mirrors {@link useAbortStreamMutation}; the resumed stream arrives on the SSE.
+ */
+export function useSubmitAskAnswerMutation() {
+  return useMutation({
+    mutationFn: submitAskAnswer,
+  });
+}
diff --git a/client/src/data-provider/SSE/queries.ts b/client/src/data-provider/SSE/queries.ts
index 8cd116394b..3f9830712f 100644
--- a/client/src/data-provider/SSE/queries.ts
+++ b/client/src/data-provider/SSE/queries.ts
@@ -1,6 +1,6 @@
 import { useEffect, useMemo, useState } from 'react';
-import { apiBaseUrl, QueryKeys, request, dataService } from 'librechat-data-provider';
 import { useQuery, useQueries, useQueryClient } from '@tanstack/react-query';
+import { apiBaseUrl, QueryKeys, request, dataService } from 'librechat-data-provider';
 import type { Agents, TConversation } from 'librechat-data-provider';
 import { isNotFoundError, updateConvoInAllQueries } from '~/utils';
 import { useGetStartupConfig } from '../Endpoints';
@@ -8,10 +8,13 @@ import { useGetStartupConfig } from '../Endpoints';
 export interface StreamStatusResponse {
   active: boolean;
   streamId?: string;
-  status?: 'running' | 'complete' | 'error' | 'aborted';
+  status?: 'running' | 'complete' | 'error' | 'aborted' | 'requires_action';
   aggregatedContent?: Array<{ type: string; text?: string }>;
   createdAt?: number;
   resumeState?: Agents.ResumeState;
+  /** Live pending approval when `status === 'requires_action'`; mirrors
+   *  `resumeState.pendingAction`, surfaced top-level for the resume-on-load path. */
+  pendingAction?: Agents.PendingAction;
 }
 
 export const streamStatusQueryKey = (conversationId: string) => ['streamStatus', conversationId];
diff --git a/client/src/hooks/SSE/__tests__/useResumeOnLoad.spec.tsx b/client/src/hooks/SSE/__tests__/useResumeOnLoad.spec.tsx
index fdb6d79c9f..03a5a9b3d1 100644
--- a/client/src/hooks/SSE/__tests__/useResumeOnLoad.spec.tsx
+++ b/client/src/hooks/SSE/__tests__/useResumeOnLoad.spec.tsx
@@ -261,6 +261,62 @@ describe('useResumeOnLoad', () => {
     );
   });
 
+  it('strips the paused user/assistant rows from submission.messages (no duplicate on resume)', async () => {
+    const observedSubmissions: Array<TSubmission | null> = [];
+    mockUseStreamStatus.mockReturnValue({
+      isSuccess: true,
+      isFetching: false,
+      data: {
+        active: true,
+        status: 'running',
+        streamId: CONVERSATION_ID,
+        resumeState: {
+          runSteps: [],
+          aggregatedContent: [{ type: 'text', text: 'Streaming...' }],
+          responseMessageId: RESPONSE_MESSAGE_ID,
+          conversationId: CONVERSATION_ID,
+          sender: 'Agent',
+          userMessage: {
+            messageId: USER_MESSAGE_ID,
+            parentMessageId: Constants.NO_PARENT,
+            conversationId: CONVERSATION_ID,
+            text: 'Hello',
+          },
+        },
+      },
+    });
+
+    renderUseResumeOnLoad({
+      // The reloaded DB array already holds the paused user row + the partial
+      // (unfinished) assistant row under the same ids the resume re-supplies.
+      messages: [
+        buildUserMessage(CONVERSATION_ID),
+        {
+          messageId: RESPONSE_MESSAGE_ID,
+          parentMessageId: USER_MESSAGE_ID,
+          conversationId: CONVERSATION_ID,
+          text: '',
+          isCreatedByUser: false,
+          unfinished: true,
+        } as TMessage,
+      ],
+      onSubmission: (currentSubmission) => observedSubmissions.push(currentSubmission),
+    });
+
+    await act(async () => {
+      await Promise.resolve();
+    });
+
+    const submission = observedSubmissions[observedSubmissions.length - 1];
+    const ids = (submission?.messages ?? []).map((m) => m.messageId);
+    // Stripped from the flat array (re-supplied via the placeholders + final event)...
+    expect(ids).not.toContain(USER_MESSAGE_ID);
+    expect(ids).not.toContain(RESPONSE_MESSAGE_ID);
+    // ...but still carried on the placeholders for re-insertion.
+    expect(submission?.userMessage?.messageId).toBe(USER_MESSAGE_ID);
+    expect(submission?.initialResponse?.messageId).toBe(RESPONSE_MESSAGE_ID);
+  });
+
   it('restores the branch that owns a pending OAuth resume user message', async () => {
     const rootUser = buildUserMessage(CONVERSATION_ID, 'root-user');
     const branchOneResponse = {
diff --git a/client/src/hooks/SSE/useResumableSSE.ts b/client/src/hooks/SSE/useResumableSSE.ts
index 501f8a73c5..e898a261f8 100644
--- a/client/src/hooks/SSE/useResumableSSE.ts
+++ b/client/src/hooks/SSE/useResumableSSE.ts
@@ -12,6 +12,7 @@ import {
   apiBaseUrl,
   UsageEvents,
   createPayload,
+  ApprovalEvents,
   ViolationTypes,
   removeNullishValues,
 } from 'librechat-data-provider';
@@ -29,10 +30,13 @@ import type { TResData } from '~/common';
 import {
   logger,
   clearAllDrafts,
+  applyPendingAction,
   removeConvoFromAllQueries,
   upsertConvoInAllQueries,
+  countTaggedApprovalParts,
   countTrailingOutputChars,
   markStreamStartFailedMetadata,
+  findPendingActionMessageIndex,
 } from '~/utils';
 import {
   useGetUserBalance,
@@ -449,6 +453,9 @@ export default function useResumableSSE(
   const submissionRef = useRef<TSubmission | null>(null);
   const optimisticStreamIdsRef = useRef(new Set<string>());
   const createdStreamIdsRef = useRef(new Set<string>());
+  /** Pending action whose tool-call content part hasn't rendered yet — retried
+   *  on the next frame so a fast pause-before-render race still attaches. */
+  const pendingActionRetryRef = useRef<number | null>(null);
 
   const {
     stepHandler,
@@ -509,6 +516,60 @@ export default function useResumableSSE(
         }
       };
 
+      /**
+       * Maps a pending action onto the in-flight response message so the
+       * approval / ask-user UI renders. Syncs the result back into the step
+       * handler's map so subsequent deltas build on the approval-tagged content
+       * rather than clobbering it.
+       *
+       * If the mapping is a no-op (the paused tool-call content part hasn't
+       * rendered yet — a pause-before-render race), retry on subsequent frames
+       * so the approval still attaches. `ask_user_question` always applies (it
+       * appends a synthetic part), so only `tool_approval` ever retries.
+       */
+      // Keep retrying across frames (not just once): Recoil/React message updates from
+      // the preceding created/step events are async and can take several frames under
+      // load, so a single retry would drop a valid pause and leave the run with no
+      // approval controls. Bounded so a genuinely-absent target can't spin forever.
+      const PENDING_ACTION_MAX_RETRY_FRAMES = 120;
+      const applyPendingActionToMessages = (pendingAction: Agents.PendingAction, attempt = 0) => {
+        const retryNextFrame = () => {
+          if (attempt < PENDING_ACTION_MAX_RETRY_FRAMES) {
+            pendingActionRetryRef.current = requestAnimationFrame(() =>
+              applyPendingActionToMessages(pendingAction, attempt + 1),
+            );
+          }
+        };
+        const messages = getMessages() ?? [];
+        const index = findPendingActionMessageIndex(messages, pendingAction);
+        if (index < 0) {
+          retryNextFrame();
+          return;
+        }
+        const updated = applyPendingAction(messages[index], pendingAction);
+        const changed = updated !== messages[index];
+        if (changed) {
+          const nextMessages = [...messages];
+          nextMessages[index] = updated;
+          setMessages(nextMessages);
+          syncStepMessage(updated);
+        }
+        // A `tool_approval` pause can carry several `action_requests` whose tool-call
+        // parts render on different frames; tagging only the first to arrive would leave
+        // late siblings with no approval card, and the resume route then 400s the partial
+        // batch ("every paused tool call must be decided"). Keep retrying (bounded) until
+        // EVERY paused call is tagged. `ask_user_question` applies its single synthetic
+        // part in one shot, so it only needs the original "did anything change" retry.
+        if (pendingAction.payload.type === 'tool_approval') {
+          const expected = pendingAction.payload.action_requests.length;
+          if (countTaggedApprovalParts(updated, pendingAction.actionId) < expected) {
+            retryNextFrame();
+          }
+        } else if (!changed) {
+          retryNextFrame();
+        }
+      };
+
       const baseUrl = `${apiBaseUrl()}/api/agents/chat/stream/${encodeURIComponent(currentStreamId)}`;
       const url = isResume ? `${baseUrl}?resume=true` : baseUrl;
       logger.log('ResumableSSE', 'Subscribing to stream:', url, { isResume });
@@ -614,6 +675,12 @@ export default function useResumableSSE(
             return;
           }
 
+          if (data.event === ApprovalEvents.ON_PENDING_ACTION) {
+            applyPendingActionToMessages(data.data as Agents.PendingAction);
+            setIsSubmitting(true);
+            return;
+          }
+
           if (data.event != null) {
             if (
               data.event === StepEvents.ON_MESSAGE_DELTA ||
@@ -749,6 +816,15 @@ export default function useResumableSSE(
               }
             }
 
+            /**
+             * Re-pause on reconnect: the run is parked on a human-review
+             * interrupt. Re-apply the pending action so the approval / ask-user
+             * controls render after a reload or dropped connection.
+             */
+            if (data.resumeState?.pendingAction) {
+              applyPendingActionToMessages(data.resumeState.pendingAction as Agents.PendingAction);
+            }
+
             if (data.resumeState?.titleEvent) {
               titleHandler(data.resumeState.titleEvent);
             }
@@ -763,6 +839,10 @@ export default function useResumableSSE(
                   contextHandler(replayEvent.data, resumeSubmission);
                 } else if (replayEvent.event === UsageEvents.ON_TOKEN_USAGE) {
                   usageHandler(replayEvent.data, resumeSubmission);
+                } else if (replayEvent.event === ApprovalEvents.ON_PENDING_ACTION) {
+                  // A pause that landed after the resume snapshot must still render its
+                  // controls (mirror the live handler), not fall through to stepHandler.
+                  applyPendingActionToMessages(replayEvent.data as Agents.PendingAction);
                 } else if (replayEvent.event != null) {
                   if (
                     replayEvent.event === StepEvents.ON_MESSAGE_DELTA ||
@@ -784,6 +864,12 @@ export default function useResumableSSE(
                   contextHandler(pendingEvent.data, resumeSubmission);
                 } else if (pendingEvent.event === UsageEvents.ON_TOKEN_USAGE) {
                   usageHandler(pendingEvent.data, resumeSubmission);
+                } else if (pendingEvent.event === ApprovalEvents.ON_PENDING_ACTION) {
+                  // In-memory mode can surface a pause that landed between getResumeState()
+                  // and the subscription here; route it to the same handler as a live event
+                  // so the approval / ask-user controls render (else the stream sits paused
+                  // with no UI until a full status reload).
+                  applyPendingActionToMessages(pendingEvent.data as Agents.PendingAction);
                 } else if (pendingEvent.event != null) {
                   if (
                     pendingEvent.event === StepEvents.ON_MESSAGE_DELTA ||
@@ -1289,6 +1375,10 @@ export default function useResumableSSE(
         clearTimeout(reconnectTimeoutRef.current);
         reconnectTimeoutRef.current = null;
       }
+      if (pendingActionRetryRef.current != null) {
+        cancelAnimationFrame(pendingActionRetryRef.current);
+        pendingActionRetryRef.current = null;
+      }
       // Reset reconnect counter before closing (so abort handler doesn't think we're reconnecting)
       reconnectAttemptRef.current = 0;
       if (sseRef.current) {
diff --git a/client/src/hooks/SSE/useResumeOnLoad.ts b/client/src/hooks/SSE/useResumeOnLoad.ts
index 97c1bc91c0..abd7f748cf 100644
--- a/client/src/hooks/SSE/useResumeOnLoad.ts
+++ b/client/src/hooks/SSE/useResumeOnLoad.ts
@@ -3,7 +3,7 @@ import { useSetRecoilState, useRecoilValue, useRecoilCallback } from 'recoil';
 import { Constants, tMessageSchema, isAssistantsEndpoint } from 'librechat-data-provider';
 import type { TMessage, TConversation, TSubmission, Agents } from 'librechat-data-provider';
 import type { StreamStatusResponse } from '~/data-provider';
-import { getBranchSiblingIndexesForTarget } from '~/utils';
+import { getBranchSiblingIndexesForTarget, applyPendingAction } from '~/utils';
 import { useStreamStatus } from '~/data-provider';
 import store from '~/store';
 
@@ -130,7 +130,7 @@ function buildSubmissionFromResumeState(
 
   // ALWAYS use aggregatedContent from resumeState - it has the latest content from the running job.
   // DB content may be stale (saved at disconnect, but generation continued).
-  const initialResponse: TMessage = {
+  let initialResponse: TMessage = {
     messageId: existingResponseMessage?.messageId ?? responseMessageId,
     parentMessageId: existingResponseMessage?.parentMessageId ?? userMessage.messageId,
     conversationId,
@@ -144,14 +144,33 @@ function buildSubmissionFromResumeState(
     iconURL: preferDefinedString(existingResponseMessage?.iconURL, resumeState.iconURL),
   } as TMessage;
 
+  // Re-paused turn: seed the approval / ask-user controls straight onto the
+  // placeholder so they render on load without waiting for the SSE sync replay.
+  if (resumeState.pendingAction) {
+    initialResponse = applyPendingAction(initialResponse, resumeState.pendingAction);
+  }
+
   const conversation: TConversation = {
     conversationId,
     title: 'Resumed Chat',
     endpoint: null,
   } as TConversation;
 
+  // On reload, `messages` is the full DB array, which already holds the paused user
+  // row and the partial (unfinished) assistant row under the same ids that
+  // `userMessage` / `initialResponse` (and the resume final event's request/response
+  // messages) re-supply. Strip them so createdHandler/finalHandler — which build
+  // `[...messages, requestMessage, responseMessage]` — don't append a duplicate pair.
+  const pausedResponseIdUnpadded = initialResponse.messageId.replace(/_+$/, '');
+  const dedupedMessages = messages.filter(
+    (m) =>
+      m.messageId !== userMessage.messageId &&
+      m.messageId !== initialResponse.messageId &&
+      m.messageId !== pausedResponseIdUnpadded,
+  );
+
   return {
-    messages,
+    messages: dedupedMessages,
     userMessage,
     initialResponse,
     conversation,
diff --git a/client/src/hooks/SSE/useSSE.ts b/client/src/hooks/SSE/useSSE.ts
index 8f18f1d556..f1f17eaee1 100644
--- a/client/src/hooks/SSE/useSSE.ts
+++ b/client/src/hooks/SSE/useSSE.ts
@@ -7,16 +7,23 @@ import {
   UsageEvents,
   StepEvents,
   createPayload,
+  ApprovalEvents,
   removeNullishValues,
 } from 'librechat-data-provider';
-import type { TMessage, TPayload, TSubmission, EventSubmission } from 'librechat-data-provider';
+import type {
+  Agents,
+  TMessage,
+  TPayload,
+  TSubmission,
+  EventSubmission,
+} from 'librechat-data-provider';
 import type { EventHandlerParams } from './useEventHandlers';
 import type { TResData } from '~/common';
+import { clearAllDrafts, applyPendingAction, findPendingActionMessageIndex } from '~/utils';
 import { useGetStartupConfig, useGetUserBalance } from '~/data-provider';
 import { useAuthContext } from '~/hooks/AuthContext';
 import useEventHandlers from './useEventHandlers';
 import useUsageHandler from './useUsageHandler';
-import { clearAllDrafts } from '~/utils';
 import store from '~/store';
 
 type ChatHelpers = Pick<
@@ -137,6 +144,18 @@ export default function useSSE(
         contextHandler(data.data, { ...submission, userMessage });
       } else if (data.event === UsageEvents.ON_TOKEN_USAGE) {
         usageHandler(data.data, { ...submission, userMessage });
+      } else if (data.event === ApprovalEvents.ON_PENDING_ACTION) {
+        const pendingAction = data.data as Agents.PendingAction;
+        const messages = getMessages() ?? [];
+        const index = findPendingActionMessageIndex(messages, pendingAction);
+        if (index >= 0) {
+          const updated = applyPendingAction(messages[index], pendingAction);
+          if (updated !== messages[index]) {
+            const nextMessages = [...messages];
+            nextMessages[index] = updated;
+            setMessages(nextMessages);
+          }
+        }
       } else if (data.event != null) {
         if (
           data.event === StepEvents.ON_MESSAGE_DELTA ||
diff --git a/client/src/locales/en/translation.json b/client/src/locales/en/translation.json
index e7af1f886a..1c32ffa291 100644
--- a/client/src/locales/en/translation.json
+++ b/client/src/locales/en/translation.json
@@ -829,6 +829,9 @@
   "com_ui_api_keys_empty_text": "API keys let external applications run your agents through the OpenAI-compatible API. Create a key to get started.",
   "com_ui_api_keys_empty_title": "Connect your agents to external apps",
   "com_ui_api_keys_load_error": "Failed to load API keys",
+  "com_ui_approval_error": "Something went wrong submitting your decision. Please try again.",
+  "com_ui_approval_expired": "This request expired or was already handled.",
+  "com_ui_approve": "Approve",
   "com_ui_archive": "Archive",
   "com_ui_archive_delete_error": "Failed to delete archived conversation",
   "com_ui_archive_error": "Failed to archive conversation",
@@ -1228,6 +1231,7 @@
   "com_ui_initializing": "Initializing...",
   "com_ui_input": "Input",
   "com_ui_instructions": "Instructions",
+  "com_ui_invalid_json": "Invalid JSON",
   "com_ui_invocation_auto": "Auto",
   "com_ui_invocation_auto_info": "The skill is automatically applied by the agent when relevant to the conversation",
   "com_ui_invocation_both": "Both",
@@ -1505,6 +1509,8 @@
   "com_ui_regenerating": "Regenerating...",
   "com_ui_region": "Region",
   "com_ui_reinitialize": "Reinitialize",
+  "com_ui_reject": "Reject",
+  "com_ui_reject_reason_placeholder": "Optional: reason for rejecting (shared with the agent)",
   "com_ui_relevance": "Relevance",
   "com_ui_remote_access": "Remote Access",
   "com_ui_remote_agent_role_editor": "Editor",
@@ -1534,6 +1540,7 @@
   "com_ui_reset_var": "Reset {{0}}",
   "com_ui_reset_zoom": "Reset Zoom",
   "com_ui_resource": "resource",
+  "com_ui_respond": "Respond",
   "com_ui_response": "Response",
   "com_ui_result": "Result",
   "com_ui_result_found": "{{count}} result found",
@@ -1794,6 +1801,8 @@
   "com_ui_subagent_ticker_writing": "Writing",
   "com_ui_subagent_waiting": "Waiting for first update…",
   "com_ui_submit": "Submit",
+  "com_ui_submit_decisions": "Submit {{0}} decisions",
+  "com_ui_submitting": "Submitting…",
   "com_ui_summarized": "Summarized",
   "com_ui_summarizing": "Summarizing...",
   "com_ui_support_contact": "Support Contact",
@@ -1825,6 +1834,7 @@
   "com_ui_tool_name_image_edit": "Image Edit",
   "com_ui_tool_name_image_gen": "Image Generation",
   "com_ui_tool_name_web_search": "Web Search",
+  "com_ui_tool_response_placeholder": "Type a substitute result to return to the agent",
   "com_ui_tools": "Tools",
   "com_ui_tools_and_actions": "Tools and Actions",
   "com_ui_transferred_to": "Transferred to",
@@ -1927,6 +1937,7 @@
   "com_ui_xhigh": "Extra High",
   "com_ui_yes": "Yes",
   "com_ui_you": "You",
+  "com_ui_your_answer": "Type your answer…",
   "com_ui_your_api_key": "Your API Key",
   "com_ui_your_projects": "Your projects",
   "com_ui_zoom": "Zoom",
diff --git a/client/src/utils/approval.spec.ts b/client/src/utils/approval.spec.ts
new file mode 100644
index 0000000000..49265a9995
--- /dev/null
+++ b/client/src/utils/approval.spec.ts
@@ -0,0 +1,299 @@
+import { ContentTypes } from 'librechat-data-provider';
+import type { Agents, TMessage, TMessageContentParts } from 'librechat-data-provider';
+import {
+  ASK_USER_QUESTION,
+  applyPendingAction,
+  countTaggedApprovalParts,
+  getAskUserQuestionPart,
+  findPendingActionMessageIndex,
+} from './approval';
+
+const toolCallPart = (id: string, extra: Record<string, unknown> = {}): TMessageContentParts =>
+  ({
+    type: ContentTypes.TOOL_CALL,
+    [ContentTypes.TOOL_CALL]: { id, name: 'search', args: '{}', ...extra },
+  }) as unknown as TMessageContentParts;
+
+const textPart = (text: string): TMessageContentParts =>
+  ({ type: ContentTypes.TEXT, text }) as unknown as TMessageContentParts;
+
+const msg = (over: Partial<TMessage> = {}): TMessage =>
+  ({ messageId: 'm1', isCreatedByUser: false, content: [], ...over }) as unknown as TMessage;
+
+const toolApprovalAction = (over: Record<string, unknown> = {}): Agents.PendingAction =>
+  ({
+    actionId: 'a1',
+    streamId: 's1',
+    createdAt: 0,
+    payload: {
+      type: 'tool_approval',
+      action_requests: [
+        { name: 'search', arguments: '{}', tool_call_id: 'tc1', description: 'Run search' },
+      ],
+      review_configs: [
+        { action_name: 'search', tool_call_id: 'tc1', allowed_decisions: ['approve', 'reject'] },
+      ],
+    },
+    ...over,
+  }) as unknown as Agents.PendingAction;
+
+const askAction = (over: Record<string, unknown> = {}): Agents.PendingAction =>
+  ({
+    actionId: 'a1',
+    streamId: 's1',
+    createdAt: 0,
+    payload: { type: 'ask_user_question', question: { question: 'What name?' } },
+    ...over,
+  }) as unknown as Agents.PendingAction;
+
+const getToolCall = (part: TMessageContentParts | undefined) =>
+  (part as unknown as { tool_call?: Agents.ToolCall & { approval?: unknown } })?.tool_call;
+
+describe('applyPendingAction — tool_approval', () => {
+  it('joins by tool_call_id (not position) and sets approval from the matching request + review', () => {
+    // tc1 is the SECOND part — a by-position join would mis-target the first.
+    const message = msg({ content: [toolCallPart('tcX'), toolCallPart('tc1')] });
+    const result = applyPendingAction(message, toolApprovalAction());
+
+    expect(result).not.toBe(message); // new reference: something changed
+    expect(getToolCall(result.content?.[0] as TMessageContentParts)?.approval).toBeUndefined();
+    expect(getToolCall(result.content?.[1] as TMessageContentParts)?.approval).toEqual({
+      actionId: 'a1',
+      allowed_decisions: ['approve', 'reject'],
+      description: 'Run search',
+    });
+  });
+
+  it('leaves a completed tool call (with output) untouched and returns the same message reference', () => {
+    const message = msg({ content: [toolCallPart('tc1', { output: 'already ran' })] });
+    const result = applyPendingAction(message, toolApprovalAction());
+    expect(result).toBe(message);
+  });
+
+  it('defaults allowed_decisions to [] when no review config matches the tool call', () => {
+    const action = toolApprovalAction({
+      payload: {
+        type: 'tool_approval',
+        action_requests: [{ name: 'search', arguments: '{}', tool_call_id: 'tc1' }],
+        review_configs: [], // no config for tc1
+      },
+    });
+    const message = msg({ content: [toolCallPart('tc1')] });
+    const result = applyPendingAction(message, action);
+    expect(getToolCall(result.content?.[0] as TMessageContentParts)?.approval).toMatchObject({
+      allowed_decisions: [],
+    });
+  });
+
+  it('returns the same message when content is empty or not an array', () => {
+    const empty = msg({ content: [] });
+    expect(applyPendingAction(empty, toolApprovalAction())).toBe(empty);
+    const nonArray = msg({ content: undefined as unknown as TMessageContentParts[] });
+    expect(applyPendingAction(nonArray, toolApprovalAction())).toBe(nonArray);
+  });
+
+  it('returns the same message when no tool call matches the pending request', () => {
+    const message = msg({ content: [toolCallPart('other')] });
+    expect(applyPendingAction(message, toolApprovalAction())).toBe(message);
+  });
+});
+
+describe('countTaggedApprovalParts', () => {
+  const twoToolAction = () =>
+    toolApprovalAction({
+      payload: {
+        type: 'tool_approval',
+        action_requests: [
+          { name: 'search', arguments: '{}', tool_call_id: 'tc1' },
+          { name: 'search', arguments: '{}', tool_call_id: 'tc2' },
+        ],
+        review_configs: [],
+      },
+    });
+
+  it('counts tagged parts so a partial multi-tool apply is detectable (1 of 2)', () => {
+    const action = twoToolAction();
+    // Only tc1 has rendered when the action is first applied → 1 < 2, retry should continue.
+    const partial = applyPendingAction(msg({ content: [toolCallPart('tc1')] }), action);
+    expect(countTaggedApprovalParts(partial, 'a1')).toBe(1);
+    // Both siblings present → both tagged → retry can stop.
+    const full = applyPendingAction(
+      msg({ content: [toolCallPart('tc1'), toolCallPart('tc2')] }),
+      action,
+    );
+    expect(countTaggedApprovalParts(full, 'a1')).toBe(2);
+  });
+
+  it('returns 0 when nothing is tagged or content is not an array', () => {
+    expect(countTaggedApprovalParts(msg({ content: [toolCallPart('tc1')] }), 'a1')).toBe(0);
+    expect(countTaggedApprovalParts(msg({ content: [textPart('hi')] }), 'a1')).toBe(0);
+    expect(
+      countTaggedApprovalParts(
+        msg({ content: undefined as unknown as TMessageContentParts[] }),
+        'a1',
+      ),
+    ).toBe(0);
+  });
+
+  it('ignores parts tagged with a different actionId', () => {
+    const tagged = applyPendingAction(
+      msg({ content: [toolCallPart('tc1')] }),
+      toolApprovalAction(),
+    );
+    expect(countTaggedApprovalParts(tagged, 'a1')).toBe(1);
+    expect(countTaggedApprovalParts(tagged, 'other-action')).toBe(0);
+  });
+});
+
+describe('applyPendingAction — subagent-nested tool calls', () => {
+  const subagentMsg = (childId: string): TMessage =>
+    msg({
+      content: [
+        {
+          type: ContentTypes.TOOL_CALL,
+          [ContentTypes.TOOL_CALL]: {
+            id: 'sub1',
+            name: 'subagent',
+            args: '{}',
+            subagent_content: [toolCallPart(childId)],
+          },
+        } as unknown as TMessageContentParts,
+      ],
+    });
+
+  const childAction = () =>
+    toolApprovalAction({
+      payload: {
+        type: 'tool_approval',
+        action_requests: [{ name: 'search', arguments: '{}', tool_call_id: 'child-tc1' }],
+        review_configs: [
+          {
+            action_name: 'search',
+            tool_call_id: 'child-tc1',
+            allowed_decisions: ['approve', 'reject'],
+          },
+        ],
+      },
+    });
+
+  it('tags a tool paused inside a subagent and makes it countable', () => {
+    const message = subagentMsg('child-tc1');
+    const result = applyPendingAction(message, childAction());
+    expect(result).not.toBe(message); // new reference
+
+    const parentToolCall = getToolCall(result.content?.[0] as TMessageContentParts) as
+      | { subagent_content?: TMessageContentParts[] }
+      | undefined;
+    const nested = parentToolCall?.subagent_content?.[0];
+    expect(getToolCall(nested)?.approval).toMatchObject({ actionId: 'a1' });
+    // The retry loop's "all tagged" check is now reachable for the nested call.
+    expect(countTaggedApprovalParts(result, 'a1')).toBe(1);
+  });
+
+  it('returns the same message when no nested tool call matches', () => {
+    const message = subagentMsg('child-other');
+    expect(applyPendingAction(message, childAction())).toBe(message);
+    expect(countTaggedApprovalParts(message, 'a1')).toBe(0);
+  });
+});
+
+describe('applyPendingAction — ask_user_question', () => {
+  it('appends a synthetic ask-user-question part carrying the actionId and question', () => {
+    const message = msg({ content: [textPart('hello')] });
+    const result = applyPendingAction(message, askAction());
+    expect(result.content).toHaveLength(2);
+    const part = getAskUserQuestionPart(result.content?.[1] as TMessageContentParts);
+    expect(part?.[ASK_USER_QUESTION]).toMatchObject({
+      actionId: 'a1',
+      question: { question: 'What name?' },
+    });
+  });
+
+  it('is idempotent on replay: the same actionId replaces in place rather than stacking', () => {
+    const once = applyPendingAction(msg({ content: [] }), askAction());
+    const twice = applyPendingAction(once, askAction());
+    expect(twice.content).toHaveLength(1); // not duplicated
+
+    const other = applyPendingAction(twice, askAction({ actionId: 'a2' }));
+    expect(other.content).toHaveLength(2); // a different action does append
+  });
+
+  it('coerces non-array content to a single-part array', () => {
+    const message = msg({ content: undefined as unknown as TMessageContentParts[] });
+    const result = applyPendingAction(message, askAction());
+    expect(result.content).toHaveLength(1);
+  });
+});
+
+describe('applyPendingAction — unsupported type', () => {
+  it('returns the original message unchanged', () => {
+    const message = msg({ content: [textPart('hi')] });
+    const action = {
+      actionId: 'a1',
+      payload: { type: 'mystery' },
+    } as unknown as Agents.PendingAction;
+    expect(applyPendingAction(message, action)).toBe(message);
+  });
+});
+
+describe('getAskUserQuestionPart', () => {
+  it('returns the typed part for an ask-user-question synthetic part', () => {
+    const appended = applyPendingAction(msg({ content: [] }), askAction());
+    const part = getAskUserQuestionPart(appended.content?.[0] as TMessageContentParts);
+    expect(part?.type).toBe(ASK_USER_QUESTION);
+    expect(part?.[ASK_USER_QUESTION].actionId).toBe('a1');
+  });
+
+  it('returns undefined for a non-ask part, undefined, or a wrong type', () => {
+    expect(getAskUserQuestionPart(toolCallPart('tc1'))).toBeUndefined();
+    expect(getAskUserQuestionPart(undefined)).toBeUndefined();
+    expect(getAskUserQuestionPart(textPart('x'))).toBeUndefined();
+  });
+});
+
+describe('findPendingActionMessageIndex', () => {
+  const assistant = (messageId: string) => msg({ messageId, isCreatedByUser: false });
+  const user = (messageId: string) => msg({ messageId, isCreatedByUser: true });
+
+  it('returns the index of the assistant message matching responseMessageId exactly', () => {
+    const messages = [user('u1'), assistant('r1'), assistant('r2')];
+    const idx = findPendingActionMessageIndex(
+      messages,
+      toolApprovalAction({ responseMessageId: 'r1' }),
+    );
+    expect(idx).toBe(1);
+  });
+
+  it('returns -1 (retry) when a provided responseMessageId matches only a user message', () => {
+    // `<userMsg>_` style ids could collide with the user bubble — never resolve to it;
+    // -1 makes the caller retry on the next frame once the assistant placeholder renders.
+    const messages = [user('shared'), assistant('r-last')];
+    const idx = findPendingActionMessageIndex(
+      messages,
+      toolApprovalAction({ responseMessageId: 'shared' }),
+    );
+    expect(idx).toBe(-1);
+  });
+
+  it('returns -1 (retry) when a provided responseMessageId is not found at all', () => {
+    // Provided-but-absent means the in-flight assistant placeholder is not in the list
+    // yet — defer rather than attach the prompt/approval to a prior reply.
+    const messages = [assistant('r1'), user('u1'), assistant('r2')];
+    const idx = findPendingActionMessageIndex(
+      messages,
+      toolApprovalAction({ responseMessageId: 'missing' }),
+    );
+    expect(idx).toBe(-1);
+  });
+
+  it('falls back to the last assistant message only when no responseMessageId is provided', () => {
+    const messages = [assistant('r1'), user('u1'), assistant('r2')];
+    const idx = findPendingActionMessageIndex(messages, toolApprovalAction());
+    expect(idx).toBe(2);
+  });
+
+  it('returns -1 for an empty list or when no assistant message exists', () => {
+    expect(findPendingActionMessageIndex([], toolApprovalAction())).toBe(-1);
+    expect(findPendingActionMessageIndex([user('u1')], toolApprovalAction())).toBe(-1);
+  });
+});
diff --git a/client/src/utils/approval.ts b/client/src/utils/approval.ts
new file mode 100644
index 0000000000..6fdf29b8d9
--- /dev/null
+++ b/client/src/utils/approval.ts
@@ -0,0 +1,273 @@
+import { ContentTypes } from 'librechat-data-provider';
+import type { Agents, TMessage, TMessageContentParts } from 'librechat-data-provider';
+
+/**
+ * UI-only content-part type used to render an `ask_user_question` pause inline
+ * with the assistant's other content parts. It rides on the standard `content`
+ * array (which `Agents.MessageContentComplex` allows for arbitrary `type`
+ * strings), so it survives SSE, sync, and DB rehydration without a new wire
+ * type or an extra field on `TMessage`.
+ */
+export const ASK_USER_QUESTION = 'ask_user_question' as const;
+
+/** Shape of the synthetic content part carrying an ask-user pending action. */
+export interface AskUserQuestionPart {
+  type: typeof ASK_USER_QUESTION;
+  [ASK_USER_QUESTION]: {
+    actionId: string;
+    question: Agents.AskUserQuestionRequest;
+  };
+}
+
+/**
+ * The synthetic type isn't in the `ContentTypes` union, so this reads the
+ * `type` field through a cast rather than a type-predicate (which TS rejects
+ * because `AskUserQuestionPart` isn't assignable to the strict
+ * `TMessageContentParts` union).
+ */
+const isAskUserQuestionPart = (part: TMessageContentParts | undefined): boolean =>
+  (part as { type?: string } | undefined)?.type === ASK_USER_QUESTION &&
+  part != null &&
+  ASK_USER_QUESTION in part;
+
+const getToolCallId = (part: TMessageContentParts | undefined): string =>
+  (part?.[ContentTypes.TOOL_CALL] as Agents.ToolCall | undefined)?.id ?? '';
+
+/** A tool-call value that may carry approval state and/or nested subagent content. */
+type ToolCallWithApproval = Agents.ToolCall & {
+  approval?: unknown;
+  subagent_content?: TMessageContentParts[];
+};
+
+/**
+ * Tags one tool-call part with the pending action's approval when it matches an
+ * `action_request` (joined by `tool_call_id`, NOT position) and is still unresolved.
+ *
+ * Recurses into a subagent's `subagent_content`: a tool paused INSIDE a subagent
+ * lives there, not as a top-level part, so without this the approval never attaches
+ * and the user gets no controls. Returns a NEW part only when something changed.
+ */
+function tagApprovalOnPart(
+  part: TMessageContentParts,
+  actionId: string,
+  requestByToolCallId: Map<string, Agents.ToolApprovalRequest>,
+  reviewByToolCallId: Map<string, Agents.ToolReviewConfig>,
+): { part: TMessageContentParts; changed: boolean } {
+  if (part?.type !== ContentTypes.TOOL_CALL) {
+    return { part, changed: false };
+  }
+  const toolCall = part[ContentTypes.TOOL_CALL] as ToolCallWithApproval | undefined;
+  if (!toolCall) {
+    return { part, changed: false };
+  }
+
+  let nextToolCall = toolCall;
+  let changed = false;
+
+  // Descend into nested subagent tool calls first.
+  if (Array.isArray(toolCall.subagent_content) && toolCall.subagent_content.length > 0) {
+    let nestedChanged = false;
+    const nextNested = toolCall.subagent_content.map((nestedPart) => {
+      const res = tagApprovalOnPart(nestedPart, actionId, requestByToolCallId, reviewByToolCallId);
+      if (res.changed) {
+        nestedChanged = true;
+      }
+      return res.part;
+    });
+    if (nestedChanged) {
+      nextToolCall = { ...nextToolCall, subagent_content: nextNested };
+      changed = true;
+    }
+  }
+
+  // Tag this call itself when it's one of the paused requests and still unresolved
+  // (an `output` means the pause already resolved — leave it alone).
+  const toolCallId = getToolCallId(part);
+  const request = toolCallId ? requestByToolCallId.get(toolCallId) : undefined;
+  if (request && (nextToolCall.output?.length ?? 0) === 0) {
+    const reviewConfig = reviewByToolCallId.get(toolCallId);
+    nextToolCall = {
+      ...nextToolCall,
+      approval: {
+        actionId,
+        allowed_decisions: reviewConfig?.allowed_decisions ?? [],
+        description: request.description,
+      },
+    };
+    changed = true;
+  }
+
+  if (!changed) {
+    return { part, changed: false };
+  }
+  return {
+    part: { ...part, [ContentTypes.TOOL_CALL]: nextToolCall } as TMessageContentParts,
+    changed: true,
+  };
+}
+
+/**
+ * Maps a tool-approval pending action onto a message's tool-call content parts,
+ * including tool calls nested inside subagents (see {@link tagApprovalOnPart}).
+ *
+ * Returns a NEW message only when something changed (referential stability lets
+ * React bail out of needless re-renders); otherwise the original is returned.
+ */
+function applyToolApproval(
+  message: TMessage,
+  actionId: string,
+  payload: Agents.ToolApprovalInterruptPayload,
+): TMessage {
+  const content = message.content;
+  if (!Array.isArray(content) || content.length === 0) {
+    return message;
+  }
+
+  const reviewByToolCallId = new Map<string, Agents.ToolReviewConfig>();
+  for (const config of payload.review_configs) {
+    reviewByToolCallId.set(config.tool_call_id, config);
+  }
+  const requestByToolCallId = new Map<string, Agents.ToolApprovalRequest>();
+  for (const requestItem of payload.action_requests) {
+    requestByToolCallId.set(requestItem.tool_call_id, requestItem);
+  }
+
+  let changed = false;
+  const nextContent = content.map((part) => {
+    const res = tagApprovalOnPart(part, actionId, requestByToolCallId, reviewByToolCallId);
+    if (res.changed) {
+      changed = true;
+    }
+    return res.part;
+  });
+
+  if (!changed) {
+    return message;
+  }
+  return { ...message, content: nextContent };
+}
+
+/**
+ * Appends (or refreshes) an ask-user-question content part for the pending
+ * action. Idempotent: replaces an existing part with the same `actionId` rather
+ * than stacking duplicates on reconnect/replay.
+ */
+function applyAskUserQuestion(
+  message: TMessage,
+  actionId: string,
+  payload: Agents.AskUserQuestionInterruptPayload,
+): TMessage {
+  const content = Array.isArray(message.content) ? message.content : [];
+  const askPart = {
+    type: ASK_USER_QUESTION,
+    [ASK_USER_QUESTION]: { actionId, question: payload.question },
+  } as unknown as TMessageContentParts;
+
+  const existingIdx = content.findIndex(
+    (part) =>
+      isAskUserQuestionPart(part) &&
+      (part as unknown as AskUserQuestionPart)[ASK_USER_QUESTION].actionId === actionId,
+  );
+  if (existingIdx >= 0) {
+    const nextContent = [...content];
+    nextContent[existingIdx] = askPart;
+    return { ...message, content: nextContent };
+  }
+  return { ...message, content: [...content, askPart] };
+}
+
+/**
+ * Applies a {@link Agents.PendingAction} onto the target response message,
+ * dispatching on the interrupt type. Pure — returns a new message only when the
+ * mapping actually changed something.
+ */
+export function applyPendingAction(
+  message: TMessage,
+  pendingAction: Agents.PendingAction,
+): TMessage {
+  const { payload, actionId } = pendingAction;
+  if (payload.type === 'tool_approval') {
+    return applyToolApproval(message, actionId, payload);
+  }
+  if (payload.type === 'ask_user_question') {
+    return applyAskUserQuestion(message, actionId, payload);
+  }
+  return message;
+}
+
+/**
+ * Counts the tool-call content parts already tagged with this action's approval —
+ * i.e. how many of a tool-approval pending action's `action_requests` have rendered
+ * and been mapped. A multi-tool pause can render its sibling cards across several
+ * frames, so the SSE retry compares this against `action_requests.length` to know
+ * whether EVERY paused call is tagged yet.
+ */
+export function countTaggedApprovalParts(message: TMessage, actionId: string): number {
+  const content = message.content;
+  if (!Array.isArray(content)) {
+    return 0;
+  }
+  const countIn = (parts: TMessageContentParts[]): number => {
+    let count = 0;
+    for (const part of parts) {
+      if (part?.type !== ContentTypes.TOOL_CALL) {
+        continue;
+      }
+      const toolCall = part[ContentTypes.TOOL_CALL] as ToolCallWithApproval | undefined;
+      if ((toolCall?.approval as { actionId?: string } | undefined)?.actionId === actionId) {
+        count += 1;
+      }
+      // Nested subagent tool calls count too, so the retry loop's "all tagged" check
+      // is reachable for a tool paused inside a subagent.
+      if (Array.isArray(toolCall?.subagent_content)) {
+        count += countIn(toolCall.subagent_content);
+      }
+    }
+    return count;
+  };
+  return countIn(content);
+}
+
+/** Returns the ask-user-question synthetic part when `part` is one, else undefined. */
+export function getAskUserQuestionPart(
+  part: TMessageContentParts | undefined,
+): AskUserQuestionPart | undefined {
+  return isAskUserQuestionPart(part) ? (part as unknown as AskUserQuestionPart) : undefined;
+}
+
+/**
+ * Resolves the assistant response message a pending action targets within
+ * `messages`. Returns the index, or -1 when the assistant placeholder isn't present
+ * yet (the caller retries on the next frame).
+ *
+ * Only ever matches an ASSISTANT message. The `responseMessageId` for a fresh turn
+ * is the user message id with a trailing underscore (`<userMsg>_`), so a naive
+ * underscore-strip would resolve to the just-created USER message before the
+ * assistant placeholder exists — appending the prompt to the wrong bubble and never
+ * triggering the retry. Matching strictly on assistant messages avoids that.
+ */
+export function findPendingActionMessageIndex(
+  messages: TMessage[],
+  pendingAction: Agents.PendingAction,
+): number {
+  const isAssistant = (message: TMessage | undefined) => message?.isCreatedByUser === false;
+  const { responseMessageId } = pendingAction;
+  if (responseMessageId) {
+    // When the id is provided, ONLY an exact assistant match counts. A miss means the
+    // assistant placeholder for this turn hasn't been inserted yet — return -1 so the
+    // caller retries on the next frame. Falling back to the last assistant here would
+    // attach the prompt/approval to a PRIOR reply (applyAskUserQuestion always appends),
+    // and the retry would never run. The id is the in-flight response, so once it renders
+    // the retry resolves it.
+    return messages.findIndex(
+      (message) => message.messageId === responseMessageId && isAssistant(message),
+    );
+  }
+  /** No responseMessageId: best-effort to the last assistant (the in-flight placeholder). */
+  for (let i = messages.length - 1; i >= 0; i--) {
+    if (isAssistant(messages[i])) {
+      return i;
+    }
+  }
+  return -1;
+}
diff --git a/client/src/utils/index.ts b/client/src/utils/index.ts
index ea322b047f..43dc5baeea 100644
--- a/client/src/utils/index.ts
+++ b/client/src/utils/index.ts
@@ -38,6 +38,7 @@ export * from './previewCache';
 export * from './groupToolCalls';
 export * from './toolLabels';
 export * from './favoritesError';
+export * from './approval';
 export { default as cn } from './cn';
 export { default as logger } from './logger';
 export { default as getLoginError } from './getLoginError';
diff --git a/package-lock.json b/package-lock.json
index e61012372b..e7ac78345b 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -61,7 +61,7 @@
         "@azure/storage-blob": "^12.30.0",
         "@google/genai": "^2.8.0",
         "@keyv/redis": "^4.3.3",
-        "@librechat/agents": "^3.2.52",
+        "@librechat/agents": "^3.2.53",
         "@librechat/api": "*",
         "@librechat/data-schemas": "*",
         "@microsoft/microsoft-graph-client": "^3.0.7",
@@ -9511,13 +9511,13 @@
       }
     },
     "node_modules/@langchain/langgraph": {
-      "version": "1.4.5",
-      "resolved": "https://registry.npmjs.org/@langchain/langgraph/-/langgraph-1.4.5.tgz",
-      "integrity": "sha512-V+o29JPBaMoK/e+8R/m81XaC8h5iNuwWymvgLFhXfJbf7E2xt2mQUkcVXTi4cudGRHbRd14kidCpfaQbfPoYCw==",
+      "version": "1.4.7",
+      "resolved": "https://registry.npmjs.org/@langchain/langgraph/-/langgraph-1.4.7.tgz",
+      "integrity": "sha512-2tcyf3QGC7v89kqSxMCtRvzg/3L/4yHtOaWC49A8KieCciWJs7LGaxHoPB6QRxXyUgyR+Zg9Q1ss/XJIE+JuSQ==",
       "license": "MIT",
       "dependencies": {
-        "@langchain/langgraph-checkpoint": "^1.1.2",
-        "@langchain/langgraph-sdk": "~1.9.24",
+        "@langchain/langgraph-checkpoint": "^1.1.3",
+        "@langchain/langgraph-sdk": "~1.9.25",
         "@langchain/protocol": "^0.0.18",
         "@standard-schema/spec": "1.1.0"
       },
@@ -9526,19 +9526,13 @@
       },
       "peerDependencies": {
         "@langchain/core": "^1.1.48",
-        "zod": "^3.25.32 || ^4.2.0",
-        "zod-to-json-schema": "^3.x"
-      },
-      "peerDependenciesMeta": {
-        "zod-to-json-schema": {
-          "optional": true
-        }
+        "zod": "^3.25.32 || ^4.2.0"
       }
     },
     "node_modules/@langchain/langgraph-checkpoint": {
-      "version": "1.1.2",
-      "resolved": "https://registry.npmjs.org/@langchain/langgraph-checkpoint/-/langgraph-checkpoint-1.1.2.tgz",
-      "integrity": "sha512-m5Xd7W3G9JrlEhFZ5WAcqZPgE46R9gr1gFDFaVqEKeuwin3tgEp0jlPbru+iFXCug338DcQjFS/Kuuci21ydvw==",
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/@langchain/langgraph-checkpoint/-/langgraph-checkpoint-1.1.3.tgz",
+      "integrity": "sha512-wgzdQNeEsdw1e+4lvlj0tdq/RYR/k1vPin10g0ymGoehZDDgd9nvIllGXSXN4TFgF9sf5qQP/KTkOcLfeseIhA==",
       "license": "MIT",
       "engines": {
         "node": ">=18"
@@ -9547,10 +9541,72 @@
         "@langchain/core": "^1.1.48"
       }
     },
+    "node_modules/@langchain/langgraph-checkpoint-mongodb": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/@langchain/langgraph-checkpoint-mongodb/-/langgraph-checkpoint-mongodb-1.4.0.tgz",
+      "integrity": "sha512-CFTrK7LrhyjotGn2YEyqYQIT1YDnw8rGC3ZTApkPvLHAW1BpVwC5N2WhA/StGOCqbCNx1HZkskLtBFhjkcKGwA==",
+      "license": "MIT",
+      "dependencies": {
+        "mongodb": "^6.21.0"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "peerDependencies": {
+        "@langchain/core": "^1.1.44",
+        "@langchain/langgraph-checkpoint": "^1.0.0"
+      }
+    },
+    "node_modules/@langchain/langgraph-checkpoint-mongodb/node_modules/mongodb": {
+      "version": "6.21.0",
+      "resolved": "https://registry.npmjs.org/mongodb/-/mongodb-6.21.0.tgz",
+      "integrity": "sha512-URyb/VXMjJ4da46OeSXg+puO39XH9DeQpWCslifrRn9JWugy0D+DvvBvkm2WxmHe61O/H19JM66p1z7RHVkZ6A==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@mongodb-js/saslprep": "^1.3.0",
+        "bson": "^6.10.4",
+        "mongodb-connection-string-url": "^3.0.2"
+      },
+      "engines": {
+        "node": ">=16.20.1"
+      },
+      "peerDependencies": {
+        "@aws-sdk/credential-providers": "^3.188.0",
+        "@mongodb-js/zstd": "^1.1.0 || ^2.0.0",
+        "gcp-metadata": "^5.2.0",
+        "kerberos": "^2.0.1",
+        "mongodb-client-encryption": ">=6.0.0 <7",
+        "snappy": "^7.3.2",
+        "socks": "^2.7.1"
+      },
+      "peerDependenciesMeta": {
+        "@aws-sdk/credential-providers": {
+          "optional": true
+        },
+        "@mongodb-js/zstd": {
+          "optional": true
+        },
+        "gcp-metadata": {
+          "optional": true
+        },
+        "kerberos": {
+          "optional": true
+        },
+        "mongodb-client-encryption": {
+          "optional": true
+        },
+        "snappy": {
+          "optional": true
+        },
+        "socks": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/@langchain/langgraph-sdk": {
-      "version": "1.9.24",
-      "resolved": "https://registry.npmjs.org/@langchain/langgraph-sdk/-/langgraph-sdk-1.9.24.tgz",
-      "integrity": "sha512-WhM6QdxNipndQjl5nkvqnBt9Wl16oO2p0KiVhndAFLJMwO3bZLEx++lwtbqUFQu1sHyNxiWixgRGm8qZsuHCeA==",
+      "version": "1.9.25",
+      "resolved": "https://registry.npmjs.org/@langchain/langgraph-sdk/-/langgraph-sdk-1.9.25.tgz",
+      "integrity": "sha512-mRKW8zyQUaHox+HirRFMRrPqOvNbQI3xeXDt6kkk4PbBg77V92bsO1WzUVNrmJ81zCkvxyOrWSK8D6ioCj0a8A==",
       "license": "MIT",
       "dependencies": {
         "@langchain/protocol": "^0.0.18",
@@ -9823,9 +9879,9 @@
       }
     },
     "node_modules/@librechat/agents": {
-      "version": "3.2.52",
-      "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.2.52.tgz",
-      "integrity": "sha512-NS/yXn412h1/JZ5LQpDGxIpW0Pb3r6zDKpAyH/U88CLcSAXwStivU7JouAphtdemJOGQpmGSv7IXpBM7wY0tOw==",
+      "version": "3.2.53",
+      "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.2.53.tgz",
+      "integrity": "sha512-GQ4n14AUUzISgcQMCCBkDfiUEWWujWOoeaIx1D9BgaYo6k1IW/mV1epthpGYbARQNaM7FiKVJFINs3CMJXfBXA==",
       "license": "MIT",
       "dependencies": {
         "@anthropic-ai/sdk": "^0.103.0",
@@ -9838,7 +9894,7 @@
         "@langchain/google-gauth": "2.2.0",
         "@langchain/google-genai": "2.2.0",
         "@langchain/google-vertexai": "2.2.0",
-        "@langchain/langgraph": "^1.4.5",
+        "@langchain/langgraph": "^1.4.6",
         "@langchain/mistralai": "^1.2.0",
         "@langchain/openai": "1.5.3",
         "@langchain/textsplitters": "^1.0.1",
@@ -42288,6 +42344,9 @@
       "name": "@librechat/api",
       "version": "1.7.34",
       "license": "ISC",
+      "dependencies": {
+        "@langchain/langgraph-checkpoint-mongodb": "^1.4.0"
+      },
       "devDependencies": {
         "@babel/preset-env": "^7.29.5",
         "@babel/preset-react": "^7.18.6",
@@ -42346,7 +42405,7 @@
         "@azure/storage-blob": "^12.30.0",
         "@google/genai": "^2.8.0",
         "@keyv/redis": "^4.3.3",
-        "@librechat/agents": "^3.2.52",
+        "@librechat/agents": "^3.2.53",
         "@librechat/data-schemas": "*",
         "@modelcontextprotocol/sdk": "^1.29.0",
         "@opentelemetry/api": "^1.9.0",
diff --git a/packages/api/package.json b/packages/api/package.json
index 35f0d42ef2..0e7c0f098f 100644
--- a/packages/api/package.json
+++ b/packages/api/package.json
@@ -113,7 +113,7 @@
     "@azure/storage-blob": "^12.30.0",
     "@google/genai": "^2.8.0",
     "@keyv/redis": "^4.3.3",
-    "@librechat/agents": "^3.2.52",
+    "@librechat/agents": "^3.2.53",
     "@librechat/data-schemas": "*",
     "@modelcontextprotocol/sdk": "^1.29.0",
     "@opentelemetry/api": "^1.9.0",
@@ -163,5 +163,8 @@
     "undici": "^7.24.1",
     "yauzl": "^3.2.1",
     "zod": "^3.22.4"
+  },
+  "dependencies": {
+    "@langchain/langgraph-checkpoint-mongodb": "^1.4.0"
   }
 }
diff --git a/packages/api/src/agents/__tests__/run-summarization.test.ts b/packages/api/src/agents/__tests__/run-summarization.test.ts
index e4b3a85dbb..d67d904b58 100644
--- a/packages/api/src/agents/__tests__/run-summarization.test.ts
+++ b/packages/api/src/agents/__tests__/run-summarization.test.ts
@@ -69,6 +69,11 @@ jest.mock('@librechat/agents', () => {
   };
 });
 
+// Stub the durable checkpointer so the HITL-enabled path doesn't need a live Mongo.
+jest.mock('~/agents/checkpointer', () => ({
+  getAgentCheckpointer: jest.fn().mockResolvedValue({}),
+}));
+
 import { Run } from '@librechat/agents';
 
 /** Minimal RunAgent factory */
@@ -1853,3 +1858,149 @@ describe('toolOutputReferences gating', () => {
     expect(callArgs).not.toHaveProperty('toolOutputReferences');
   });
 });
+
+// ---------------------------------------------------------------------------
+// Suite: deferred-tool replay on HITL resume (Codex G3)
+//
+// The resume path rebuilds the graph with `messages: []` (state comes from the
+// durable checkpoint), so the in-turn `tool_search` results that mark a deferred
+// tool discovered aren't on the critical path. createRun's `discoveredToolNames`
+// input replays those names — captured at pause — so the paused deferred tool is
+// promoted back into `toolDefinitions` (and `defer_loading` flipped) and is present
+// in the rebuilt schema-only toolMap. Without it, the approved tool would be missing
+// and resume would fail with "unknown tool".
+// ---------------------------------------------------------------------------
+describe('createRun deferred-tool replay (HITL resume)', () => {
+  /** Agent whose discoverable `deep_tool` lives ONLY in the registry (deferred). */
+  const makeDeferredAgent = (registryExtra: Array<[string, Record<string, unknown>]> = []) => {
+    const toolRegistry = new Map<string, Record<string, unknown>>([
+      ['deep_tool', { name: 'deep_tool', defer_loading: true }],
+      ...registryExtra,
+    ]);
+    return makeAgent({
+      hasDeferredTools: true,
+      // tool_search is in definitions; the discoverable deep_tool is NOT (deferred).
+      toolDefinitions: [{ name: 'tool_search' }],
+      toolRegistry,
+    });
+  };
+
+  const captureAgents = async (
+    agent: ReturnType<typeof makeAgent>,
+    extra: Record<string, unknown>,
+  ) => {
+    const signal = new AbortController().signal;
+    await createRun({
+      agents: [agent] as never,
+      signal,
+      streaming: true,
+      streamUsage: true,
+      ...extra,
+    });
+    const createMock = Run.create as jest.Mock;
+    const callArgs = createMock.mock.calls[0][0];
+    return callArgs.graphConfig.agents as Array<Record<string, unknown>>;
+  };
+
+  const defNames = (agents: Array<Record<string, unknown>>): string[] =>
+    (agents[0].toolDefinitions as Array<{ name: string }>).map((d) => d.name);
+
+  it('promotes a replayed discovered tool into toolDefinitions when messages is empty (resume)', async () => {
+    const agents = await captureAgents(makeDeferredAgent(), {
+      messages: [],
+      discoveredToolNames: ['deep_tool'],
+    });
+    expect(defNames(agents)).toContain('deep_tool');
+  });
+
+  it('does NOT include the deferred tool without replayed names (the bug being fixed)', async () => {
+    const agents = await captureAgents(makeDeferredAgent(), { messages: [] });
+    expect(defNames(agents)).not.toContain('deep_tool');
+  });
+
+  it('flips defer_loading=false on the replayed tool so the model binds it', async () => {
+    const agents = await captureAgents(makeDeferredAgent(), {
+      messages: [],
+      discoveredToolNames: ['deep_tool'],
+    });
+    const registry = agents[0].toolRegistry as Map<string, { defer_loading?: boolean }>;
+    expect(registry.get('deep_tool')?.defer_loading).toBe(false);
+  });
+
+  it('unions replayed names with names extracted from message history', async () => {
+    const toolSearchResult = {
+      _getType: () => 'tool',
+      name: 'tool_search',
+      content: JSON.stringify({ tools: [{ name: 'from_history' }] }),
+    };
+    const agents = await captureAgents(
+      makeDeferredAgent([['from_history', { name: 'from_history', defer_loading: true }]]),
+      { messages: [toolSearchResult], discoveredToolNames: ['deep_tool'] },
+    );
+    const names = defNames(agents);
+    expect(names).toContain('deep_tool'); // replayed
+    expect(names).toContain('from_history'); // extracted from messages
+  });
+
+  it('ignores replayed names when the agent has no deferred tools (inert)', async () => {
+    const agents = await captureAgents(
+      makeAgent({ hasDeferredTools: false, toolDefinitions: [], toolRegistry: new Map() }),
+      { messages: [], discoveredToolNames: ['deep_tool'] },
+    );
+    expect(defNames(agents)).not.toContain('deep_tool');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Suite: HITL wiring gated to resumable callers (Codex J3)
+//
+// The tool-approval wiring (humanInTheLoop switch + PreToolUse hook) must engage ONLY for
+// callers that implement the pause/resume lifecycle. AgentClient passes hitlCapable: true;
+// the OpenAI-compatible + Responses controllers don't, so an approval-gated tool can't
+// pause on a route with no approval surface or resume endpoint.
+// ---------------------------------------------------------------------------
+describe('HITL wiring is gated on hitlCapable', () => {
+  const hitlAppConfig = {
+    config: {},
+    fileStrategy: FileSources.local,
+    imageOutputType: 'png',
+    endpoints: {
+      [EModelEndpoint.agents]: { toolApproval: { enabled: true } },
+    },
+  } as unknown as AppConfig;
+
+  const runAndGetConfig = async (extra: Record<string, unknown>) => {
+    await createRun({
+      agents: [makeAgent()] as never,
+      signal: new AbortController().signal,
+      appConfig: hitlAppConfig,
+      streaming: true,
+      streamUsage: true,
+      ...extra,
+    });
+    const createMock = Run.create as jest.Mock;
+    return createMock.mock.calls[0][0] as Record<string, unknown>;
+  };
+
+  it('attaches humanInTheLoop when the caller is hitlCapable and approval is enabled', async () => {
+    const config = await runAndGetConfig({ hitlCapable: true });
+    expect(config.humanInTheLoop).toBeDefined();
+    expect(config.hooks).toBeDefined();
+  });
+
+  it('does NOT attach HITL for a non-resumable caller even when approval is enabled', async () => {
+    const config = await runAndGetConfig({ hitlCapable: false });
+    expect(config).not.toHaveProperty('humanInTheLoop');
+    expect(config.graphConfig).toBeDefined();
+    // No checkpointer either — the run is identical to the no-HITL path.
+    expect(
+      (config.graphConfig as { compileOptions?: { checkpointer?: unknown } }).compileOptions
+        ?.checkpointer,
+    ).toBeUndefined();
+  });
+
+  it('defaults to non-HITL when hitlCapable is omitted', async () => {
+    const config = await runAndGetConfig({});
+    expect(config).not.toHaveProperty('humanInTheLoop');
+  });
+});
diff --git a/packages/api/src/agents/checkpointer.integration.spec.ts b/packages/api/src/agents/checkpointer.integration.spec.ts
new file mode 100644
index 0000000000..ae10502608
--- /dev/null
+++ b/packages/api/src/agents/checkpointer.integration.spec.ts
@@ -0,0 +1,109 @@
+import mongoose from 'mongoose';
+import { MongoMemoryServer } from 'mongodb-memory-server';
+import { emptyCheckpoint } from '@langchain/langgraph-checkpoint';
+import {
+  getAgentCheckpointer,
+  deleteAgentCheckpoint,
+  __resetCheckpointerForTests,
+} from './checkpointer';
+
+/**
+ * Integration tests for the durable Mongo checkpointer seam, against a real
+ * (in-memory) MongoDB. The unit spec covers config/selection with no connection;
+ * this proves the part that actually matters for correctness — that a checkpoint
+ * written for a thread can be read back and that `deleteAgentCheckpoint` truly
+ * prunes it (the cross-turn isolation guarantee), scoped to a single thread.
+ */
+
+const MONGO_CFG = { type: 'mongo' as const, ttl: 3600 };
+
+/** Minimal LangGraph put() args for an empty checkpoint under a thread. */
+function putArgs(threadId: string) {
+  const config = { configurable: { thread_id: threadId, checkpoint_ns: '' } };
+  const metadata = { source: 'input' as const, step: -1, writes: null, parents: {} };
+  return { config, checkpoint: emptyCheckpoint(), metadata };
+}
+
+const readConfig = (threadId: string) => ({
+  configurable: { thread_id: threadId, checkpoint_ns: '' },
+});
+
+let mongoServer: MongoMemoryServer;
+
+beforeAll(async () => {
+  mongoServer = await MongoMemoryServer.create();
+  await mongoose.connect(mongoServer.getUri());
+}, 60000);
+
+afterAll(async () => {
+  await mongoose.disconnect();
+  await mongoServer.stop();
+});
+
+beforeEach(() => {
+  // Force a fresh saver build (+ setup) against the live connection each test.
+  __resetCheckpointerForTests();
+});
+
+afterEach(async () => {
+  await mongoose.connection.dropDatabase();
+});
+
+describe('checkpointer (mongodb-memory-server integration)', () => {
+  it('builds a real MongoDBSaver when Mongo is connected', async () => {
+    const saver = await getAgentCheckpointer(MONGO_CFG);
+    expect(saver).toBeDefined();
+    // setup() created the checkpoint collection with a TTL index on `upserted_at`.
+    const indexes = await mongoose.connection.db!.collection('agent_checkpoints').indexes();
+    const ttlIndex = indexes.find((idx) => idx.expireAfterSeconds != null);
+    expect(ttlIndex).toBeDefined();
+    expect(ttlIndex?.expireAfterSeconds).toBe(3600);
+  });
+
+  it('returns undefined for the memory type (SDK MemorySaver fallback) even when connected', async () => {
+    expect(await getAgentCheckpointer({ type: 'memory' })).toBeUndefined();
+  });
+
+  it('memoizes one saver per resolved config', async () => {
+    const a = await getAgentCheckpointer(MONGO_CFG);
+    const b = await getAgentCheckpointer(MONGO_CFG);
+    expect(a).toBe(b);
+  });
+
+  it('deleteAgentCheckpoint prunes a thread’s persisted checkpoint', async () => {
+    const saver = await getAgentCheckpointer(MONGO_CFG);
+    expect(saver).toBeDefined();
+
+    const threadId = `convo-${new mongoose.Types.ObjectId().toString()}`;
+    const { config, checkpoint, metadata } = putArgs(threadId);
+    await saver!.put(config, checkpoint, metadata);
+
+    // The checkpoint is durably readable before pruning…
+    expect(await saver!.getTuple(readConfig(threadId))).toBeDefined();
+
+    await deleteAgentCheckpoint(threadId, MONGO_CFG);
+
+    // …and gone after (so turn N+1 on the same conversationId can't rehydrate it).
+    expect(await saver!.getTuple(readConfig(threadId))).toBeUndefined();
+  });
+
+  it('prunes only the targeted thread, leaving other conversations intact', async () => {
+    const saver = await getAgentCheckpointer(MONGO_CFG);
+    const threadA = `convo-${new mongoose.Types.ObjectId().toString()}`;
+    const threadB = `convo-${new mongoose.Types.ObjectId().toString()}`;
+
+    const a = putArgs(threadA);
+    const b = putArgs(threadB);
+    await saver!.put(a.config, a.checkpoint, a.metadata);
+    await saver!.put(b.config, b.checkpoint, b.metadata);
+
+    await deleteAgentCheckpoint(threadA, MONGO_CFG);
+
+    expect(await saver!.getTuple(readConfig(threadA))).toBeUndefined();
+    expect(await saver!.getTuple(readConfig(threadB))).toBeDefined();
+  });
+
+  it('deleteAgentCheckpoint is a no-op for an undefined threadId', async () => {
+    await expect(deleteAgentCheckpoint(undefined, MONGO_CFG)).resolves.toBeUndefined();
+  });
+});
diff --git a/packages/api/src/agents/checkpointer.spec.ts b/packages/api/src/agents/checkpointer.spec.ts
new file mode 100644
index 0000000000..cfadc2df73
--- /dev/null
+++ b/packages/api/src/agents/checkpointer.spec.ts
@@ -0,0 +1,72 @@
+import {
+  resolveCheckpointerConfig,
+  getApprovalTtlMs,
+  getAgentCheckpointer,
+  deleteAgentCheckpoint,
+  DEFAULT_CHECKPOINT_TTL_SECONDS,
+  __resetCheckpointerForTests,
+} from './checkpointer';
+
+beforeEach(() => {
+  __resetCheckpointerForTests();
+});
+
+describe('resolveCheckpointerConfig', () => {
+  test('applies defaults when nothing is configured', () => {
+    expect(resolveCheckpointerConfig(undefined)).toEqual({
+      type: 'mongo',
+      ttlSeconds: DEFAULT_CHECKPOINT_TTL_SECONDS,
+      checkpointCollectionName: 'agent_checkpoints',
+      checkpointWritesCollectionName: 'agent_checkpoint_writes',
+    });
+  });
+
+  test('honors explicit type, ttl, and collection overrides', () => {
+    expect(
+      resolveCheckpointerConfig({
+        type: 'memory',
+        ttl: 60,
+        checkpointCollectionName: 'cp',
+        checkpointWritesCollectionName: 'cpw',
+      }),
+    ).toEqual({
+      type: 'memory',
+      ttlSeconds: 60,
+      checkpointCollectionName: 'cp',
+      checkpointWritesCollectionName: 'cpw',
+    });
+  });
+
+  test('falls back to the default ttl for non-positive values', () => {
+    expect(resolveCheckpointerConfig({ ttl: 0 }).ttlSeconds).toBe(DEFAULT_CHECKPOINT_TTL_SECONDS);
+    expect(resolveCheckpointerConfig({ ttl: -5 }).ttlSeconds).toBe(DEFAULT_CHECKPOINT_TTL_SECONDS);
+  });
+});
+
+describe('getApprovalTtlMs', () => {
+  test('converts the resolved ttl to milliseconds', () => {
+    expect(getApprovalTtlMs(undefined)).toBe(DEFAULT_CHECKPOINT_TTL_SECONDS * 1000);
+    expect(getApprovalTtlMs({ ttl: 60 })).toBe(60_000);
+  });
+});
+
+describe('getAgentCheckpointer', () => {
+  test('returns undefined for the in-memory type (SDK MemorySaver fallback)', async () => {
+    await expect(getAgentCheckpointer({ type: 'memory' })).resolves.toBeUndefined();
+  });
+
+  test('returns undefined when Mongo is not connected', async () => {
+    // No mongoose connection is established in the unit test env (readyState 0).
+    await expect(getAgentCheckpointer(undefined)).resolves.toBeUndefined();
+  });
+});
+
+describe('deleteAgentCheckpoint', () => {
+  test('is a no-op (no throw) for a missing threadId', async () => {
+    await expect(deleteAgentCheckpoint(undefined)).resolves.toBeUndefined();
+  });
+
+  test('is a no-op (no throw) when no durable saver is available', async () => {
+    await expect(deleteAgentCheckpoint('conversation-1')).resolves.toBeUndefined();
+  });
+});
diff --git a/packages/api/src/agents/checkpointer.ts b/packages/api/src/agents/checkpointer.ts
new file mode 100644
index 0000000000..6a99a616aa
--- /dev/null
+++ b/packages/api/src/agents/checkpointer.ts
@@ -0,0 +1,171 @@
+import mongoose from 'mongoose';
+import { logger } from '@librechat/data-schemas';
+import { MongoDBSaver } from '@langchain/langgraph-checkpoint-mongodb';
+import type { TCheckpointerConfig } from 'librechat-data-provider';
+
+/**
+ * Durable checkpointing for human-in-the-loop (HITL) resume.
+ *
+ * This is the seam between LibreChat and LangGraph's checkpoint machinery. A run
+ * that pauses for tool approval suspends its graph state to a checkpoint; resuming
+ * rebuilds that state on a *fresh* `Run` (see `agents/run.ts`), which only works if
+ * the checkpoint outlives the original request — across a restart, or on another
+ * replica. So HITL needs a durable saver, not the SDK's process-local `MemorySaver`.
+ *
+ * Two adapters sit behind the one interface ({@link getAgentCheckpointer}):
+ *   - `MongoDBSaver` over the app's existing Mongo connection (the default), and
+ *   - `undefined`, which lets the SDK install its own in-process `MemorySaver`
+ *     (single-process / dev, or whenever Mongo isn't ready yet).
+ *
+ * Storage is bounded two ways: a Mongo TTL index reclaims runs that are never
+ * resolved ({@link DEFAULT_CHECKPOINT_TTL_SECONDS}), and {@link deleteAgentCheckpoint}
+ * prunes a thread's checkpoints eagerly on every terminal transition.
+ */
+
+/** Default approval window and checkpoint TTL: 24h. */
+export const DEFAULT_CHECKPOINT_TTL_SECONDS = 86400;
+
+const DEFAULT_CHECKPOINT_COLLECTION = 'agent_checkpoints';
+const DEFAULT_CHECKPOINT_WRITES_COLLECTION = 'agent_checkpoint_writes';
+
+/** Checkpointer settings with all defaults applied. */
+export interface ResolvedCheckpointerConfig {
+  type: 'mongo' | 'memory';
+  /** Approval window / TTL in seconds. */
+  ttlSeconds: number;
+  checkpointCollectionName: string;
+  checkpointWritesCollectionName: string;
+}
+
+/**
+ * Apply defaults to the YAML `endpoints.agents.checkpointer` block. Mirrors
+ * {@link resolveRecursionLimit} — the schema stays descriptive, defaults live here.
+ */
+export function resolveCheckpointerConfig(
+  cfg: TCheckpointerConfig | undefined,
+): ResolvedCheckpointerConfig {
+  return {
+    type: cfg?.type ?? 'mongo',
+    ttlSeconds:
+      typeof cfg?.ttl === 'number' && cfg.ttl > 0 ? cfg.ttl : DEFAULT_CHECKPOINT_TTL_SECONDS,
+    checkpointCollectionName: cfg?.checkpointCollectionName ?? DEFAULT_CHECKPOINT_COLLECTION,
+    checkpointWritesCollectionName:
+      cfg?.checkpointWritesCollectionName ?? DEFAULT_CHECKPOINT_WRITES_COLLECTION,
+  };
+}
+
+/** Approval-window milliseconds from the resolved config; drives pending-action expiry. */
+export function getApprovalTtlMs(cfg: TCheckpointerConfig | undefined): number {
+  return resolveCheckpointerConfig(cfg).ttlSeconds * 1000;
+}
+
+/**
+ * One saver per process, built lazily on first use so `setup()` (index creation)
+ * runs exactly once. Keyed by the resolved settings so a config change rebuilds.
+ */
+let saverPromise: Promise<MongoDBSaver | undefined> | undefined;
+let cachedKey: string | undefined;
+
+function settingsKey(resolved: ResolvedCheckpointerConfig): string {
+  return `${resolved.checkpointCollectionName}|${resolved.checkpointWritesCollectionName}|${resolved.ttlSeconds}`;
+}
+
+/**
+ * The durable saver to hand to `graphConfig.compileOptions.checkpointer`, or
+ * `undefined` to let the SDK fall back to its in-process `MemorySaver`.
+ *
+ * Returns `undefined` (without caching) when the config selects `memory` or when
+ * Mongo isn't connected yet, so a later run retries once the connection is up.
+ * The SDK types the checkpointer as `unknown`, so a `MongoDBSaver` passes directly.
+ */
+export async function getAgentCheckpointer(
+  cfg: TCheckpointerConfig | undefined,
+): Promise<MongoDBSaver | undefined> {
+  const resolved = resolveCheckpointerConfig(cfg);
+  if (resolved.type === 'memory') {
+    return undefined;
+  }
+  if (mongoose.connection.readyState !== 1) {
+    logger.warn(
+      '[checkpointer] Mongoose not connected; HITL runs will use an in-process checkpointer this turn (paused runs will not survive a restart or resolve on another replica).',
+    );
+    return undefined;
+  }
+
+  const key = settingsKey(resolved);
+  if (!saverPromise || cachedKey !== key) {
+    cachedKey = key;
+    saverPromise = buildMongoSaver(resolved);
+  }
+  return saverPromise;
+}
+
+async function buildMongoSaver(
+  resolved: ResolvedCheckpointerConfig,
+): Promise<MongoDBSaver | undefined> {
+  try {
+    const saver = new MongoDBSaver({
+      // mongoose vends the live MongoClient; reuse it instead of opening a second
+      // connection. The driver type is structurally identical but resolves to a
+      // different `mongodb` copy than checkpoint-mongodb's, hence the cast.
+      client: mongoose.connection.getClient() as unknown as ConstructorParameters<
+        typeof MongoDBSaver
+      >[0]['client'],
+      checkpointCollectionName: resolved.checkpointCollectionName,
+      checkpointWritesCollectionName: resolved.checkpointWritesCollectionName,
+      // TTL index on `upserted_at`: an unresolved paused run is reclaimed after the
+      // approval window, so a forgotten approval can never leak checkpoints forever.
+      ttl: resolved.ttlSeconds,
+    });
+    const errors = await saver.setup();
+    if (errors.length > 0) {
+      logger.warn(
+        '[checkpointer] MongoDBSaver.setup() reported errors (checkpoint indexes may be incomplete):',
+        errors,
+      );
+    }
+    logger.info('[checkpointer] Durable Mongo checkpointer ready for HITL resume');
+    return saver;
+  } catch (err) {
+    // Reset so a later run can retry rather than being stuck on a failed build.
+    saverPromise = undefined;
+    cachedKey = undefined;
+    logger.error(
+      '[checkpointer] Failed to initialize Mongo checkpointer; falling back to in-process checkpointer:',
+      err,
+    );
+    return undefined;
+  }
+}
+
+/**
+ * Prune a thread's checkpoints on a terminal transition — natural completion,
+ * abort, or expiry — so the durable store stays bounded. The TTL index is the
+ * safety net; this is the eager cleanup. No-op in memory mode or before any run
+ * has built the saver (nothing to delete).
+ *
+ * @param threadId - the LangGraph `thread_id` (LibreChat's conversationId).
+ */
+export async function deleteAgentCheckpoint(
+  threadId: string | undefined,
+  cfg?: TCheckpointerConfig,
+): Promise<void> {
+  if (!threadId) {
+    return;
+  }
+  const saver = await getAgentCheckpointer(cfg);
+  if (!saver) {
+    return;
+  }
+  try {
+    await saver.deleteThread(threadId);
+  } catch (err) {
+    logger.warn(`[checkpointer] Failed to delete checkpoints for thread ${threadId}:`, err);
+  }
+}
+
+/** Test-only: drop the memoized saver so a fresh build is forced. */
+export function __resetCheckpointerForTests(): void {
+  saverPromise = undefined;
+  cachedKey = undefined;
+}
diff --git a/packages/api/src/agents/hitl/index.ts b/packages/api/src/agents/hitl/index.ts
index 62d5b849e4..690ccb1828 100644
--- a/packages/api/src/agents/hitl/index.ts
+++ b/packages/api/src/agents/hitl/index.ts
@@ -1 +1,3 @@
 export * from './policy';
+export * from './runtime';
+export * from './resume';
diff --git a/packages/api/src/agents/hitl/policy.spec.ts b/packages/api/src/agents/hitl/policy.spec.ts
index 369922a5ff..579b0503ef 100644
--- a/packages/api/src/agents/hitl/policy.spec.ts
+++ b/packages/api/src/agents/hitl/policy.spec.ts
@@ -1,12 +1,41 @@
 import type { Agents, TToolApprovalPolicy } from 'librechat-data-provider';
 import {
+  resolveToolApprovalPolicy,
   isHITLEnabled,
   mapToolApprovalPolicy,
   buildToolApprovalPayload,
   buildAskUserQuestionPayload,
   buildPendingAction,
+  computeAgentRequestFingerprint,
+  pickResumeContext,
+  applyResumeContext,
 } from './policy';
 
+describe('resolveToolApprovalPolicy', () => {
+  test('returns the endpoint policy unchanged (single layer wired today)', () => {
+    const endpoint: TToolApprovalPolicy = { enabled: true, mode: 'default', deny: ['rm'] };
+    // Identity, not a copy — the resolver is a passthrough until more layers ship.
+    expect(resolveToolApprovalPolicy({ endpoint })).toBe(endpoint);
+  });
+
+  test('returns undefined when there is no endpoint policy', () => {
+    expect(resolveToolApprovalPolicy({})).toBeUndefined();
+    expect(resolveToolApprovalPolicy({ endpoint: undefined })).toBeUndefined();
+  });
+
+  test('ignores the reserved agent/skills layers for now (behaviour-preserving)', () => {
+    const endpoint: TToolApprovalPolicy = { enabled: true, mode: 'bypass' };
+    const resolved = resolveToolApprovalPolicy({
+      endpoint,
+      agent: { enabled: true, mode: 'default', ask: ['shell'] },
+      skills: [{ deny: ['delete_*'] }],
+    });
+    // Until merge lands, the result must still be exactly the endpoint policy so
+    // enabling the seam can't change runtime behaviour.
+    expect(resolved).toBe(endpoint);
+  });
+});
+
 describe('isHITLEnabled', () => {
   test('default-off when no policy configured', () => {
     expect(isHITLEnabled(undefined)).toBe(false);
@@ -227,3 +256,178 @@ describe('buildPendingAction', () => {
     expect(action.expiresAt).toBeLessThanOrEqual(after);
   });
 });
+
+describe('computeAgentRequestFingerprint', () => {
+  it('is stable for the same graph-determining fields (ignoring other body keys)', () => {
+    const a = computeAgentRequestFingerprint({
+      endpoint: 'agents',
+      agent_id: 'agent-1',
+      model: 'gpt',
+    });
+    // Extra/unknown fields on the body must not change the fingerprint.
+    const b = computeAgentRequestFingerprint({
+      endpoint: 'agents',
+      agent_id: 'agent-1',
+      model: 'gpt',
+      ...({ conversationId: 'c', decisions: [] } as Record<string, unknown>),
+    });
+    expect(a).toBe(b);
+  });
+
+  it('differs when a graph-determining field changes', () => {
+    const base = { endpoint: 'agents', agent_id: 'agent-1', model: 'gpt' };
+    expect(computeAgentRequestFingerprint(base)).not.toBe(
+      computeAgentRequestFingerprint({ ...base, model: 'other' }),
+    );
+    expect(computeAgentRequestFingerprint(base)).not.toBe(
+      computeAgentRequestFingerprint({ ...base, agent_id: 'agent-2' }),
+    );
+  });
+
+  it('differs when promptPrefix changes (ephemeral instructions)', () => {
+    const base = { endpoint: 'agents', promptPrefix: 'be terse' };
+    expect(computeAgentRequestFingerprint(base)).not.toBe(
+      computeAgentRequestFingerprint({ ...base, promptPrefix: 'be verbose' }),
+    );
+    // null vs absent are treated the same
+    expect(computeAgentRequestFingerprint({ endpoint: 'agents' })).toBe(
+      computeAgentRequestFingerprint({ endpoint: 'agents', promptPrefix: null }),
+    );
+  });
+
+  it('normalizes ephemeralAgent so key/array order does not matter', () => {
+    const x = computeAgentRequestFingerprint({
+      endpoint: 'agents',
+      ephemeralAgent: { mcp: ['b', 'a'], execute_code: true },
+    });
+    const y = computeAgentRequestFingerprint({
+      endpoint: 'agents',
+      ephemeralAgent: { execute_code: true, mcp: ['a', 'b'] },
+    });
+    expect(x).toBe(y);
+  });
+
+  it('distinguishes a different ephemeral capability set (the swap it guards against)', () => {
+    const a = computeAgentRequestFingerprint({
+      endpoint: 'agents',
+      ephemeralAgent: { execute_code: true },
+    });
+    const b = computeAgentRequestFingerprint({
+      endpoint: 'agents',
+      ephemeralAgent: { execute_code: false, mcp: ['evil'] },
+    });
+    expect(a).not.toBe(b);
+  });
+});
+
+describe('pickResumeContext / applyResumeContext', () => {
+  it('picks only the graph-determining fields (incl. addedConvo + timezone), dropping unrelated keys', () => {
+    const ctx = pickResumeContext({
+      endpoint: 'agents',
+      agent_id: 'a1',
+      model: 'gpt',
+      promptPrefix: 'be terse',
+      ephemeralAgent: { execute_code: true },
+      addedConvo: { agent_id: 'secondary' },
+      // Feeds temporal prompt vars; must round-trip so resume compiles the same prompt.
+      timezone: 'America/New_York',
+      // Graph-determining: skill allowed-tools union into the tool set.
+      manualSkills: ['code-reviewer'],
+      conversationId: 'c',
+      decisions: [],
+      actionId: 'x',
+    });
+    expect(ctx).toEqual({
+      endpoint: 'agents',
+      agent_id: 'a1',
+      model: 'gpt',
+      promptPrefix: 'be terse',
+      ephemeralAgent: { execute_code: true },
+      addedConvo: { agent_id: 'secondary' },
+      timezone: 'America/New_York',
+      manualSkills: ['code-reviewer'],
+    });
+  });
+
+  it('replays a dropped manualSkills and drops a client-injected one', () => {
+    // Reload case: the resume client lost manualSkills; the server restores it.
+    const restored: Record<string, unknown> = { conversationId: 'c', actionId: 'x' };
+    applyResumeContext(restored, { endpoint: 'agents', manualSkills: ['code-reviewer'] });
+    expect(restored.manualSkills).toEqual(['code-reviewer']);
+    // Security: a paused turn with no manual skill can't be made to inject one.
+    const injected: Record<string, unknown> = { conversationId: 'c', manualSkills: ['evil-skill'] };
+    applyResumeContext(injected, { endpoint: 'agents', agent_id: 'a1' });
+    expect('manualSkills' in injected).toBe(false);
+  });
+
+  it('omits absent (undefined) fields but keeps explicit null', () => {
+    const ctx = pickResumeContext({ endpoint: 'agents', ephemeralAgent: null });
+    expect(ctx).toEqual({ endpoint: 'agents', ephemeralAgent: null });
+    expect('agent_id' in ctx).toBe(false);
+  });
+
+  it('replays the persisted context onto a body, overwriting what the client sent', () => {
+    // The reload case: the client lost ephemeralAgent (null); the server restores it.
+    const body: Record<string, unknown> = {
+      conversationId: 'c',
+      actionId: 'x',
+      ephemeralAgent: null,
+      promptPrefix: 'tampered',
+    };
+    applyResumeContext(body, {
+      endpoint: 'agents',
+      ephemeralAgent: { execute_code: true, mcp: ['srv'] },
+      promptPrefix: 'original',
+    });
+    expect(body.ephemeralAgent).toEqual({ execute_code: true, mcp: ['srv'] });
+    expect(body.promptPrefix).toBe('original');
+    expect(body.endpoint).toBe('agents');
+    // Non-context fields are untouched.
+    expect(body.conversationId).toBe('c');
+    expect(body.actionId).toBe('x');
+  });
+
+  it('drops graph-determining fields the client sent that the persisted context lacks', () => {
+    // Security: the paused turn carried no addedConvo/spec, so a crafted resume must not
+    // be able to inject them (addedConvo isn't covered by the fingerprint). Any
+    // RESUME_CONTEXT_KEY absent from the persisted context is cleared from the body.
+    const body: Record<string, unknown> = {
+      conversationId: 'c',
+      actionId: 'x',
+      addedConvo: { agent_id: 'injected-secondary' },
+      spec: 'injected-spec',
+    };
+    applyResumeContext(body, { endpoint: 'agents', agent_id: 'a1' });
+    // Persisted keys are restored...
+    expect(body.endpoint).toBe('agents');
+    expect(body.agent_id).toBe('a1');
+    // ...and client-injected graph-determining fields absent from the context are gone.
+    expect('addedConvo' in body).toBe(false);
+    expect('spec' in body).toBe(false);
+    // Non-context fields are untouched.
+    expect(body.conversationId).toBe('c');
+    expect(body.actionId).toBe('x');
+  });
+
+  it('is a no-op for a null/undefined context', () => {
+    const body: Record<string, unknown> = { ephemeralAgent: null };
+    applyResumeContext(body, undefined);
+    expect(body.ephemeralAgent).toBeNull();
+  });
+
+  it('round-trips through buildPendingAction so replay restores the original body', () => {
+    const original = {
+      endpoint: 'agents',
+      ephemeralAgent: { execute_code: true },
+      promptPrefix: 'p',
+    };
+    const action = buildPendingAction(
+      { type: 'ask_user_question', question: { question: 'q' } } as Agents.HumanInterruptPayload,
+      { streamId: 's', resumeContext: pickResumeContext(original) },
+    );
+    const reloadedBody: Record<string, unknown> = { ephemeralAgent: null };
+    applyResumeContext(reloadedBody, action.resumeContext);
+    expect(reloadedBody.ephemeralAgent).toEqual({ execute_code: true });
+    expect(reloadedBody.promptPrefix).toBe('p');
+  });
+});
diff --git a/packages/api/src/agents/hitl/policy.ts b/packages/api/src/agents/hitl/policy.ts
index bfe0543c7c..795b435aff 100644
--- a/packages/api/src/agents/hitl/policy.ts
+++ b/packages/api/src/agents/hitl/policy.ts
@@ -1,4 +1,4 @@
-import { randomUUID } from 'crypto';
+import { randomUUID, createHash } from 'crypto';
 import type { Agents, TToolApprovalPolicy } from 'librechat-data-provider';
 import type { ToolPolicyConfig } from '@librechat/agents';
 
@@ -11,6 +11,54 @@ import type { ToolPolicyConfig } from '@librechat/agents';
  */
 const DEFAULT_REVIEW_DECISIONS: Agents.ToolApprovalDecisionType[] = ['approve', 'reject', 'edit'];
 
+/**
+ * Layered sources that combine into the effective tool-approval policy for a turn.
+ *
+ * Only {@link ToolApprovalPolicyLayers.endpoint} is consumed today; `agent` and
+ * `skills` are reserved seams so future per-agent / per-skill plumbing lands in
+ * {@link resolveToolApprovalPolicy} rather than being threaded through the run
+ * call site.
+ */
+export interface ToolApprovalPolicyLayers {
+  /**
+   * App/endpoint policy — `endpoints.agents.toolApproval` from librechat.yaml.
+   * The baseline, and the sole owner of the `enabled` kill switch.
+   */
+  endpoint?: TToolApprovalPolicy;
+  /**
+   * Per-agent override (not yet wired). Layered over `endpoint` to refine
+   * `mode`/`allow`/`deny`/`ask`/`reason` for a specific agent. Must NOT flip
+   * `enabled` — enablement stays endpoint-level by design.
+   */
+  agent?: TToolApprovalPolicy;
+  /**
+   * Skill-contributed policy (not yet wired). May only TIGHTEN — contribute
+   * `ask`/`deny` entries — never grant `bypass` or widen `allow`, so a selected
+   * skill can never silently auto-approve a tool.
+   */
+  skills?: TToolApprovalPolicy[];
+}
+
+/**
+ * Resolve the effective tool-approval policy for a turn from its layered sources.
+ *
+ * This is the single seam where policy sources combine, kept out of the run call
+ * site so adding per-agent or per-skill policy later is a change to ONE function
+ * rather than to `createRun`. Intended precedence once those layers are wired:
+ *   - `endpoint` is the baseline and owns the `enabled` kill switch;
+ *   - `agent` overrides `mode`/`allow`/`deny`/`ask`/`reason`;
+ *   - `skills` may only tighten (add `ask`/`deny`), never loosen.
+ *
+ * Today only `endpoint` is consumed, so the result is identical to reading
+ * `endpoints.agents.toolApproval` directly — `agent`/`skills` are accepted but
+ * not yet merged. Behaviour-preserving until those layers ship.
+ */
+export function resolveToolApprovalPolicy(
+  layers: ToolApprovalPolicyLayers,
+): TToolApprovalPolicy | undefined {
+  return layers.endpoint;
+}
+
 /**
  * Whether the HITL machinery should run for this policy.
  *
@@ -121,6 +169,127 @@ export interface PendingActionContext {
   interruptId?: string;
   /** LangGraph `thread_id` (`RunInterruptResult.threadId`) for cross-process resume. */
   threadId?: string;
+  /** Fingerprint of the graph-determining request fields; see {@link computeAgentRequestFingerprint}. */
+  requestFingerprint?: string;
+  /** Graph-determining fields to replay on resume; see {@link RESUME_CONTEXT_KEYS}. */
+  resumeContext?: Record<string, unknown>;
+}
+
+/** Request fields that decide which agent/graph + tool set a turn runs. */
+export interface AgentRequestFingerprintFields {
+  endpoint?: string | null;
+  endpointType?: string | null;
+  agent_id?: string | null;
+  model?: string | null;
+  spec?: string | null;
+  /** Ephemeral agents derive their system instructions from this; pin it too. */
+  promptPrefix?: string | null;
+  ephemeralAgent?: Record<string, unknown> | null;
+}
+
+/** Stable, order-independent serialization of the ephemeral capability config. */
+function normalizeEphemeralAgent(ephemeral: Record<string, unknown> | null | undefined): unknown {
+  if (ephemeral == null || typeof ephemeral !== 'object') {
+    return null;
+  }
+  const out: Record<string, unknown> = {};
+  for (const key of Object.keys(ephemeral).sort()) {
+    const value = ephemeral[key];
+    out[key] = Array.isArray(value) ? [...value].sort() : value;
+  }
+  return out;
+}
+
+/**
+ * Fingerprint the request fields that determine which agent/graph + tool set a turn
+ * runs. Persisted on the pending action at pause time and recomputed on resume; a
+ * mismatch means the resume would rebuild a DIFFERENT graph. This is the guard that
+ * catches an ephemeral-agent config swap — those have an undefined `agent_id`, so the
+ * id check alone can't tell two ephemeral configs apart.
+ */
+/**
+ * Request fields that determine the agent/graph + tool set, persisted with the pending
+ * action so the resume can REPLAY them server-side. The client can't reliably re-send
+ * these after a reload (e.g. the ephemeralAgent state resets), so replaying them from
+ * the job guarantees the rebuilt run is the SAME graph the pause used — durable resume
+ * works across reloads/replicas, and a crafted resume can't swap the tool set.
+ */
+export const RESUME_CONTEXT_KEYS = [
+  'endpoint',
+  'endpointType',
+  'agent_id',
+  'spec',
+  'model',
+  'promptPrefix',
+  'ephemeralAgent',
+  // The agents build reads addedConvo into endpointOption to add parallel/secondary
+  // agents; the resume POST can't reconstruct it, so replay it from the paused request.
+  'addedConvo',
+  // Feeds temporal prompt vars ({{current_datetime}} etc.) via initializeAgent. The
+  // resume POST omits it, so without replay a different-tz client (or none) compiles a
+  // different system prompt than the paused graph. Replay-only — not in the fingerprint.
+  'timezone',
+  // Manually-selected skills union their allowed-tools into the tool set before tools
+  // load (initializeAgent → resolveManualSkills), so they're graph-determining. The
+  // resume POST can't reliably re-send them after a reload; replay them, and the
+  // delete-absent half of applyResumeContext stops a crafted resume from injecting a
+  // different skill's tools (manualSkills isn't covered by the fingerprint). Replay-only.
+  // (alwaysAppliedSkills is NOT here — it's resolved server-side from the DB, not req.body.)
+  'manualSkills',
+] as const;
+
+export type ResumeContext = Partial<Record<(typeof RESUME_CONTEXT_KEYS)[number], unknown>>;
+
+/** Extract the graph-determining fields from a request body for durable replay. */
+export function pickResumeContext(body: Record<string, unknown> | undefined | null): ResumeContext {
+  const ctx: ResumeContext = {};
+  if (body == null) {
+    return ctx;
+  }
+  for (const key of RESUME_CONTEXT_KEYS) {
+    if (body[key] !== undefined) {
+      ctx[key] = body[key];
+    }
+  }
+  return ctx;
+}
+
+/**
+ * Replay a persisted resume context onto a request body so the rebuilt run matches the
+ * paused one. Every graph-determining field is forced to the persisted value: a key the
+ * context HAS overwrites whatever the client sent; a key it LACKS is deleted from the
+ * body. The delete is the security half — without it, a field the paused turn never
+ * carried (e.g. `addedConvo`, which {@link computeAgentRequestFingerprint} does NOT
+ * cover) could be injected by a crafted resume to rebuild the paused single-agent
+ * checkpoint as a different multi-agent graph/tool set.
+ */
+export function applyResumeContext(
+  body: Record<string, unknown> | undefined | null,
+  ctx: ResumeContext | undefined | null,
+): void {
+  if (body == null || ctx == null) {
+    return;
+  }
+  for (const key of RESUME_CONTEXT_KEYS) {
+    if (ctx[key] !== undefined) {
+      body[key] = ctx[key];
+    } else {
+      delete body[key];
+    }
+  }
+}
+
+export function computeAgentRequestFingerprint(fields: AgentRequestFingerprintFields): string {
+  const canonical = JSON.stringify({
+    endpoint: fields.endpoint ?? null,
+    endpointType: fields.endpointType ?? null,
+    agent_id: fields.agent_id ?? null,
+    model: fields.model ?? null,
+    spec: fields.spec ?? null,
+    promptPrefix: fields.promptPrefix ?? null,
+    ephemeralAgent: normalizeEphemeralAgent(fields.ephemeralAgent),
+  });
+  return createHash('sha256').update(canonical).digest('hex');
 }
 
 /**
@@ -146,5 +315,7 @@ export function buildPendingAction(
     expiresAt: typeof ctx.ttlMs === 'number' ? createdAt + ctx.ttlMs : undefined,
     interruptId: ctx.interruptId,
     threadId: ctx.threadId,
+    requestFingerprint: ctx.requestFingerprint,
+    resumeContext: ctx.resumeContext,
   };
 }
diff --git a/packages/api/src/agents/hitl/resume.spec.ts b/packages/api/src/agents/hitl/resume.spec.ts
new file mode 100644
index 0000000000..f164366cf7
--- /dev/null
+++ b/packages/api/src/agents/hitl/resume.spec.ts
@@ -0,0 +1,171 @@
+import type { Agents } from 'librechat-data-provider';
+import {
+  mapToolApprovalResolutions,
+  mapAskUserAnswer,
+  findUndecidedToolCalls,
+  findDisallowedDecisions,
+  findIncompleteDecisions,
+} from './resume';
+
+describe('mapToolApprovalResolutions', () => {
+  test('maps each decision type to the SDK discriminated shape, keyed by tool_call_id', () => {
+    const resolutions: Agents.ToolApprovalResolution[] = [
+      { tool_call_id: 'a', decision: 'approve' },
+      { tool_call_id: 'b', decision: 'reject', reason: 'no' },
+      { tool_call_id: 'c', decision: 'edit', editedArguments: { x: 1 } },
+      { tool_call_id: 'd', decision: 'respond', responseText: 'done' },
+    ];
+
+    expect(mapToolApprovalResolutions(resolutions)).toEqual({
+      a: { type: 'approve' },
+      b: { type: 'reject', reason: 'no' },
+      c: { type: 'edit', updatedInput: { x: 1 } },
+      d: { type: 'respond', responseText: 'done' },
+    });
+  });
+
+  test('defaults missing edit args to {} and missing respond text to "" rather than throwing', () => {
+    const resolutions: Agents.ToolApprovalResolution[] = [
+      { tool_call_id: 'a', decision: 'edit' },
+      { tool_call_id: 'b', decision: 'respond' },
+    ];
+    expect(mapToolApprovalResolutions(resolutions)).toEqual({
+      a: { type: 'edit', updatedInput: {} },
+      b: { type: 'respond', responseText: '' },
+    });
+  });
+
+  test('fails closed (reject) on an unrecognized decision', () => {
+    const resolutions = [
+      { tool_call_id: 'a', decision: 'nonsense' as Agents.ToolApprovalDecisionType },
+    ];
+    expect(mapToolApprovalResolutions(resolutions)).toEqual({
+      a: { type: 'reject', reason: 'Unrecognized approval decision' },
+    });
+  });
+
+  test('last write wins when the same tool_call_id appears twice', () => {
+    const resolutions: Agents.ToolApprovalResolution[] = [
+      { tool_call_id: 'a', decision: 'approve' },
+      { tool_call_id: 'a', decision: 'reject' },
+    ];
+    expect(mapToolApprovalResolutions(resolutions)).toEqual({ a: { type: 'reject' } });
+  });
+});
+
+describe('mapAskUserAnswer', () => {
+  test('passes the answer through unchanged', () => {
+    expect(mapAskUserAnswer({ answer: 'staging' })).toEqual({ answer: 'staging' });
+  });
+});
+
+describe('findUndecidedToolCalls', () => {
+  const payload: Agents.ToolApprovalInterruptPayload = {
+    type: 'tool_approval',
+    action_requests: [
+      { tool_call_id: 'a', name: 'read', arguments: {} },
+      { tool_call_id: 'b', name: 'write', arguments: {} },
+    ],
+    review_configs: [],
+  };
+
+  test('returns the tool_call_ids with no decision', () => {
+    expect(findUndecidedToolCalls(payload, [{ tool_call_id: 'a', decision: 'approve' }])).toEqual([
+      'b',
+    ]);
+  });
+
+  test('returns [] when every requested tool call is decided', () => {
+    expect(
+      findUndecidedToolCalls(payload, [
+        { tool_call_id: 'a', decision: 'approve' },
+        { tool_call_id: 'b', decision: 'reject' },
+      ]),
+    ).toEqual([]);
+  });
+
+  test('ignores resolutions for tool calls not in the action', () => {
+    expect(
+      findUndecidedToolCalls(payload, [
+        { tool_call_id: 'a', decision: 'approve' },
+        { tool_call_id: 'b', decision: 'approve' },
+        { tool_call_id: 'z', decision: 'approve' },
+      ]),
+    ).toEqual([]);
+  });
+});
+
+describe('findDisallowedDecisions', () => {
+  const payload: Agents.ToolApprovalInterruptPayload = {
+    type: 'tool_approval',
+    action_requests: [
+      { tool_call_id: 'a', name: 'read', arguments: {} },
+      { tool_call_id: 'b', name: 'write', arguments: {} },
+    ],
+    review_configs: [
+      { action_name: 'read', tool_call_id: 'a', allowed_decisions: ['approve', 'reject'] },
+      { action_name: 'write', tool_call_id: 'b', allowed_decisions: ['reject', 'respond'] },
+    ],
+  };
+
+  test('returns [] when every decision is permitted by its review config', () => {
+    expect(
+      findDisallowedDecisions(payload, [
+        { tool_call_id: 'a', decision: 'approve' },
+        { tool_call_id: 'b', decision: 'respond', responseText: 'x' },
+      ]),
+    ).toEqual([]);
+  });
+
+  test('flags a decision the policy does not allow for that tool', () => {
+    // `b` is restricted to reject/respond — approving it must be rejected.
+    expect(
+      findDisallowedDecisions(payload, [
+        { tool_call_id: 'a', decision: 'approve' },
+        { tool_call_id: 'b', decision: 'approve' },
+      ]),
+    ).toEqual(['b']);
+  });
+
+  test('fails closed for a tool_call_id with no matching review config', () => {
+    expect(findDisallowedDecisions(payload, [{ tool_call_id: 'z', decision: 'approve' }])).toEqual([
+      'z',
+    ]);
+  });
+});
+
+describe('findIncompleteDecisions', () => {
+  it('flags an edit decision without editedArguments', () => {
+    expect(findIncompleteDecisions([{ tool_call_id: 'a', decision: 'edit' }])).toEqual(['a']);
+  });
+
+  it('flags an edit decision whose editedArguments is not a plain object', () => {
+    expect(
+      findIncompleteDecisions([
+        {
+          tool_call_id: 'a',
+          decision: 'edit',
+          editedArguments: [] as unknown as Record<string, unknown>,
+        },
+      ]),
+    ).toEqual(['a']);
+  });
+
+  it('flags a respond decision without responseText (or empty)', () => {
+    expect(findIncompleteDecisions([{ tool_call_id: 'a', decision: 'respond' }])).toEqual(['a']);
+    expect(
+      findIncompleteDecisions([{ tool_call_id: 'b', decision: 'respond', responseText: '' }]),
+    ).toEqual(['b']);
+  });
+
+  it('accepts complete edit/respond and ignores approve/reject', () => {
+    expect(
+      findIncompleteDecisions([
+        { tool_call_id: 'a', decision: 'edit', editedArguments: { q: 1 } },
+        { tool_call_id: 'b', decision: 'respond', responseText: 'done' },
+        { tool_call_id: 'c', decision: 'approve' },
+        { tool_call_id: 'd', decision: 'reject', reason: 'no' },
+      ]),
+    ).toEqual([]);
+  });
+});
diff --git a/packages/api/src/agents/hitl/resume.ts b/packages/api/src/agents/hitl/resume.ts
new file mode 100644
index 0000000000..3f34331002
--- /dev/null
+++ b/packages/api/src/agents/hitl/resume.ts
@@ -0,0 +1,119 @@
+import type {
+  ToolApprovalDecision,
+  ToolApprovalDecisionMap,
+  AskUserQuestionResolution,
+} from '@librechat/agents';
+import type { Agents } from 'librechat-data-provider';
+
+/**
+ * Translate the host-facing approval wire format into the SDK's resume value.
+ *
+ * The wire format ({@link Agents.ToolApprovalResolution}) is shaped for the UI —
+ * a flat `decision` string plus optional `editedArguments` / `responseText`. The
+ * SDK consumes a discriminated {@link ToolApprovalDecision} per tool call. This is
+ * the single adapter between the two; the resume route maps once, here, instead of
+ * branching on `decision` at the call site.
+ *
+ * Returns the map form (keyed by `tool_call_id`) so a batch that calls the same
+ * tool twice resolves unambiguously — by-position ordering breaks with duplicates.
+ */
+export function mapToolApprovalResolutions(
+  resolutions: readonly Agents.ToolApprovalResolution[],
+): ToolApprovalDecisionMap {
+  const decisions: ToolApprovalDecisionMap = {};
+  for (const resolution of resolutions) {
+    decisions[resolution.tool_call_id] = toSdkDecision(resolution);
+  }
+  return decisions;
+}
+
+function toSdkDecision(resolution: Agents.ToolApprovalResolution): ToolApprovalDecision {
+  switch (resolution.decision) {
+    case 'approve':
+      return { type: 'approve' };
+    case 'reject':
+      return { type: 'reject', reason: resolution.reason };
+    case 'edit':
+      // `editedArguments` is required for edit on the wire; default to {} so a
+      // malformed payload re-runs the tool with empty args rather than throwing.
+      return { type: 'edit', updatedInput: resolution.editedArguments ?? {} };
+    case 'respond':
+      return { type: 'respond', responseText: resolution.responseText ?? '' };
+    default:
+      // Unknown decision (forward-compat / malformed): fail closed by rejecting,
+      // never by silently approving a tool the user didn't sanction.
+      return { type: 'reject', reason: 'Unrecognized approval decision' };
+  }
+}
+
+/** Translate the ask-user wire answer into the SDK's resume value. */
+export function mapAskUserAnswer(
+  resolution: Agents.AskUserQuestionResolution,
+): AskUserQuestionResolution {
+  return { answer: resolution.answer };
+}
+
+/**
+ * Validate that a set of resolutions covers exactly the tool calls a pending
+ * `tool_approval` action is waiting on. Returns the list of `tool_call_id`s that
+ * were requested but not decided (empty when the batch is fully resolved), so the
+ * resume route can 400 a partial submission instead of driving a half-decided run.
+ */
+export function findUndecidedToolCalls(
+  payload: Agents.ToolApprovalInterruptPayload,
+  resolutions: readonly Agents.ToolApprovalResolution[],
+): string[] {
+  const decided = new Set(resolutions.map((r) => r.tool_call_id));
+  return payload.action_requests.map((a) => a.tool_call_id).filter((id) => !decided.has(id));
+}
+
+/**
+ * Enforce the policy's per-tool `allowed_decisions`. Returns the `tool_call_id`s
+ * whose submitted decision is NOT one the interrupt's `review_configs` permits for
+ * that tool — so the resume route can reject a crafted request that, e.g., approves
+ * a tool the policy restricted to `reject`/`respond`. A resolution for a tool with
+ * no matching review_config (shouldn't happen) is treated as disallowed (fail closed).
+ */
+export function findDisallowedDecisions(
+  payload: Agents.ToolApprovalInterruptPayload,
+  resolutions: readonly Agents.ToolApprovalResolution[],
+): string[] {
+  const allowedByToolCallId = new Map<string, Set<Agents.ToolApprovalDecisionType>>();
+  for (const config of payload.review_configs) {
+    allowedByToolCallId.set(config.tool_call_id, new Set(config.allowed_decisions));
+  }
+  return resolutions
+    .filter((r) => !allowedByToolCallId.get(r.tool_call_id)?.has(r.decision))
+    .map((r) => r.tool_call_id);
+}
+
+/**
+ * Enforce that `edit` and `respond` decisions carry their required payload. Returns
+ * the `tool_call_id`s whose decision is structurally incomplete:
+ *   - `edit` without an object `editedArguments`, or
+ *   - `respond` without a non-empty `responseText`.
+ *
+ * Without this, {@link toSdkDecision}'s defensive defaults (`{}` / `''`) would turn a
+ * crafted or buggy submission into an empty tool input or an empty synthetic result —
+ * resuming the run with behavior the user never actually approved. The route rejects
+ * these (400) rather than mapping them.
+ */
+export function findIncompleteDecisions(
+  resolutions: readonly Agents.ToolApprovalResolution[],
+): string[] {
+  return resolutions
+    .filter((r) => {
+      if (r.decision === 'edit') {
+        return (
+          r.editedArguments == null ||
+          typeof r.editedArguments !== 'object' ||
+          Array.isArray(r.editedArguments)
+        );
+      }
+      if (r.decision === 'respond') {
+        return typeof r.responseText !== 'string' || r.responseText.length === 0;
+      }
+      return false;
+    })
+    .map((r) => r.tool_call_id);
+}
diff --git a/packages/api/src/agents/hitl/runtime.spec.ts b/packages/api/src/agents/hitl/runtime.spec.ts
new file mode 100644
index 0000000000..79d4ef05f6
--- /dev/null
+++ b/packages/api/src/agents/hitl/runtime.spec.ts
@@ -0,0 +1,29 @@
+import { HookRegistry } from '@librechat/agents';
+import { buildHITLRunWiring } from './runtime';
+
+describe('buildHITLRunWiring', () => {
+  test('returns undefined when HITL is disabled (the default)', () => {
+    expect(buildHITLRunWiring(undefined)).toBeUndefined();
+    expect(buildHITLRunWiring({})).toBeUndefined();
+    expect(buildHITLRunWiring({ enabled: false })).toBeUndefined();
+    expect(buildHITLRunWiring({ mode: 'default', allow: ['read_*'] })).toBeUndefined();
+  });
+
+  test('returns the run wiring when enabled', () => {
+    const wiring = buildHITLRunWiring({ enabled: true });
+    expect(wiring).toBeDefined();
+    expect(wiring?.humanInTheLoop).toEqual({ enabled: true });
+    expect(wiring?.hooks).toBeInstanceOf(HookRegistry);
+  });
+
+  test('registers exactly one PreToolUse policy hook', () => {
+    const wiring = buildHITLRunWiring({ enabled: true, mode: 'bypass', allow: ['x'] });
+    const matchers = wiring?.hooks.getMatchers('PreToolUse') ?? [];
+    expect(matchers).toHaveLength(1);
+  });
+
+  test('an enabled policy with no lists still wires (every tool falls through to ask)', () => {
+    const wiring = buildHITLRunWiring({ enabled: true });
+    expect(wiring?.hooks.getMatchers('PreToolUse')).toHaveLength(1);
+  });
+});
diff --git a/packages/api/src/agents/hitl/runtime.ts b/packages/api/src/agents/hitl/runtime.ts
new file mode 100644
index 0000000000..bbc350fc3d
--- /dev/null
+++ b/packages/api/src/agents/hitl/runtime.ts
@@ -0,0 +1,41 @@
+import { HookRegistry, createToolPolicyHook } from '@librechat/agents';
+import type { TToolApprovalPolicy } from 'librechat-data-provider';
+import { isHITLEnabled, mapToolApprovalPolicy } from './policy';
+
+/**
+ * The HITL fragment spread onto a `RunConfig` when tool approval is enabled.
+ *
+ * Kept as one object so the run seam attaches the opt-in switch and the policy
+ * hook together — they're meaningless apart. The checkpointer is resolved
+ * separately (it's an async, process-wide singleton) and merged into
+ * `graphConfig.compileOptions` at the call site.
+ */
+export interface HITLRunWiring {
+  humanInTheLoop: { enabled: true };
+  hooks: HookRegistry;
+}
+
+/**
+ * Assemble the run-level HITL wiring for a tool-approval policy, or `undefined`
+ * when HITL is disabled (the default) — in which case the run attaches nothing
+ * and behaves exactly as it did before this feature.
+ *
+ * The returned `hooks` registry carries a single `PreToolUse` policy hook built
+ * from {@link mapToolApprovalPolicy}. An enabled policy with no allow/deny/ask
+ * lists falls through to `mode: 'default'`, i.e. every tool prompts — the safe
+ * default for "HITL on, nothing else specified".
+ */
+export function buildHITLRunWiring(
+  policy: TToolApprovalPolicy | undefined,
+): HITLRunWiring | undefined {
+  if (!isHITLEnabled(policy)) {
+    return undefined;
+  }
+
+  const registry = new HookRegistry();
+  registry.register('PreToolUse', {
+    hooks: [createToolPolicyHook(mapToolApprovalPolicy(policy) ?? {})],
+  });
+
+  return { humanInTheLoop: { enabled: true }, hooks: registry };
+}
diff --git a/packages/api/src/agents/index.ts b/packages/api/src/agents/index.ts
index a94eb39d9d..28c70bb538 100644
--- a/packages/api/src/agents/index.ts
+++ b/packages/api/src/agents/index.ts
@@ -3,6 +3,7 @@ export * from './attachments';
 export * from './chain';
 export * from './client';
 export * from './config';
+export * from './checkpointer';
 export * from './contact';
 export * from './context';
 export * from './discovery';
diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts
index e23f8270e9..810b56e1e3 100644
--- a/packages/api/src/agents/run.ts
+++ b/packages/api/src/agents/run.ts
@@ -36,9 +36,12 @@ import type { SubagentUsageEvent } from '~/agents/usage';
 import type * as t from '~/types';
 import { getLLMConfig as getAnthropicLLMConfig } from '~/endpoints/anthropic/llm';
 import { getProviderConfig } from '~/endpoints/config/providers';
+import { resolveToolApprovalPolicy } from '~/agents/hitl/policy';
 import { extractDefaultParams } from '~/endpoints/openai/llm';
 import { resolveHeaders, createSafeUser } from '~/utils/env';
+import { getAgentCheckpointer } from '~/agents/checkpointer';
 import { getOpenAIConfig } from '~/endpoints/openai/config';
+import { buildHITLRunWiring } from '~/agents/hitl/runtime';
 import { buildLangfuseConfig } from '~/langfuse/config';
 import { resolveConfigHeaders } from '~/utils/headers';
 import { applyTestRunHook } from '~/agents/testHook';
@@ -870,6 +873,7 @@ export async function createRun({
   signal,
   agents,
   messages,
+  discoveredToolNames,
   requestBody,
   user,
   tenantId,
@@ -882,6 +886,7 @@ export async function createRun({
   calibrationRatio,
   appConfig,
   subagentUsageSink,
+  hitlCapable = false,
   streaming = true,
   streamUsage = true,
 }: {
@@ -895,6 +900,16 @@ export async function createRun({
   tenantId?: string;
   /** Message history for extracting previously discovered tools */
   messages?: BaseMessage[];
+  /**
+   * Pre-discovered deferred-tool names to force-load directly, bypassing message
+   * extraction. The HITL resume path rebuilds the graph with `messages: []` (state
+   * comes from the durable checkpoint), so the in-turn `tool_search` results that
+   * would normally mark a deferred tool discovered aren't present — without this the
+   * paused tool would be absent from the rebuilt schema-only toolMap and resume would
+   * fail with "unknown tool". Captured at pause via `extractDiscoveredToolsFromHistory`
+   * and replayed here. Merged with (not replacing) any names extracted from `messages`.
+   */
+  discoveredToolNames?: string[];
   summarizationConfig?: SummarizationConfig;
   /** Cross-run summary from formatAgentMessages, forwarded to AgentContext */
   initialSummary?: { text: string; tokenCount: number };
@@ -914,6 +929,15 @@ export async function createRun({
    * Switch to the `RunConfig` pick once the dependency is bumped.
    */
   subagentUsageSink?: (event: SubagentUsageEvent) => void;
+  /**
+   * Whether the caller implements the HITL pause/resume lifecycle (inspects
+   * `run.getInterrupt()`, persists a pending action, exposes a resume route). Gates the
+   * tool-approval wiring: only AgentClient (chat + resume) sets this. The OpenAI-compatible
+   * and Responses controllers leave it false, so an approval-gated tool can't pause on a
+   * route that has no approval surface or resume endpoint (it would otherwise emit a normal
+   * final response / `[DONE]` with the tool call left unresolved).
+   */
+  hitlCapable?: boolean;
 } & Pick<
   RunConfig,
   'tokenCounter' | 'customHandlers' | 'indexTokenCountMap' | 'initialSessions'
@@ -928,10 +952,22 @@ export async function createRun({
    */
   const hasAnyDeferredTools = agents.some((agent) => agent.hasDeferredTools === true);
 
-  const discoveredTools =
-    hasAnyDeferredTools && messages?.length
-      ? extractDiscoveredToolsFromHistory(messages)
-      : new Set<string>();
+  const discoveredTools = new Set<string>();
+  if (hasAnyDeferredTools) {
+    // Normal path: extract from this run's message history (tool_search results).
+    if (messages?.length) {
+      for (const name of extractDiscoveredToolsFromHistory(messages)) {
+        discoveredTools.add(name);
+      }
+    }
+    // Resume path: replay names captured at pause, since `messages` is empty (the
+    // paused run's tool_search results live only in the checkpoint, not here).
+    if (discoveredToolNames?.length) {
+      for (const name of discoveredToolNames) {
+        discoveredTools.add(name);
+      }
+    }
+  }
 
   const buildAgentInput = (agent: RunAgent, opts: { isSubagent?: boolean } = {}): AgentInputs => {
     const isSubagent = opts.isSubagent === true;
@@ -1134,6 +1170,34 @@ export async function createRun({
    */
   const enableToolOutputReferences = anyAgentHasCodeEnv(agents);
 
+  /**
+   * Human-in-the-loop tool approval — OFF by default. When the agents endpoint
+   * opts in (`toolApproval.enabled`), attach the `PreToolUse` policy hook + the
+   * `humanInTheLoop` switch, and bind a durable checkpointer so a run that pauses
+   * for review can be rebuilt and resumed on any worker (see `agents/checkpointer.ts`
+   * and the resume route). When disabled, nothing attaches and the run is identical
+   * to before this feature shipped.
+   */
+  const agentsEndpointConfig = appConfig?.endpoints?.[EModelEndpoint.agents];
+  // Resolve the effective policy through the single seam so per-agent / per-skill
+  // sources can layer in later without touching this call site (see
+  // `resolveToolApprovalPolicy`). Only the endpoint layer is wired today, so this
+  // is identical to reading `toolApproval` directly.
+  const toolApprovalPolicy = resolveToolApprovalPolicy({
+    endpoint: agentsEndpointConfig?.toolApproval,
+  });
+  // Gate HITL to callers that actually implement the pause/resume lifecycle. The
+  // OpenAI-compatible + Responses controllers also call createRun/processStream but never
+  // inspect `run.getInterrupt()` or persist a pending action — so an approval-gated tool
+  // would pause with no approval surface or resume endpoint, and the route would emit a
+  // normal final response / `[DONE]` with the tool call dangling. Only AgentClient (chat +
+  // resume) passes `hitlCapable`; without it the run is identical to the no-HITL path.
+  const hitl = hitlCapable ? buildHITLRunWiring(toolApprovalPolicy) : undefined;
+  if (hitl) {
+    const checkpointer = await getAgentCheckpointer(agentsEndpointConfig?.checkpointer);
+    graphConfig.compileOptions = { ...graphConfig.compileOptions, checkpointer };
+  }
+
   /**
    * Built as a variable (not an inline literal) so the extra
    * `subagentUsageSink` field passes assignability against SDK versions
@@ -1159,6 +1223,10 @@ export async function createRun({
     ...(enableToolOutputReferences && {
       toolOutputReferences: { enabled: true },
     }),
+    // HITL opt-in: the `humanInTheLoop` switch + the PreToolUse policy hook. Spread
+    // here (not just `compileOptions.checkpointer` above) so an `ask` decision raises
+    // a real interrupt — without these the run would never pause. Absent when disabled.
+    ...(hitl && { humanInTheLoop: hitl.humanInTheLoop, hooks: hitl.hooks }),
   };
   const run = await Run.create(runConfig);
 
diff --git a/packages/api/src/middleware/messageFilterPii.spec.ts b/packages/api/src/middleware/messageFilterPii.spec.ts
index c67227ff2d..fc5c4f3ff6 100644
--- a/packages/api/src/middleware/messageFilterPii.spec.ts
+++ b/packages/api/src/middleware/messageFilterPii.spec.ts
@@ -89,6 +89,47 @@ describe('messageFilterPii middleware', () => {
     expect(capturedRes.status).toBe(400);
   });
 
+  const SK = 'sk-proj-FAKE1234567890ABCDEF';
+
+  it('rejects a resume ask-user answer containing a blocked token', () => {
+    const { capturedRes, nextCalls } = runMiddleware({}, { answer: `the key is ${SK}` });
+    expect(nextCalls).toBe(0);
+    expect(capturedRes.status).toBe(400);
+  });
+
+  it('rejects a tool-approval decision responseText containing a blocked token', () => {
+    const { capturedRes, nextCalls } = runMiddleware(
+      {},
+      { decisions: [{ tool_call_id: 'a', decision: 'respond', responseText: `use ${SK}` }] },
+    );
+    expect(nextCalls).toBe(0);
+    expect(capturedRes.status).toBe(400);
+  });
+
+  it('rejects a reject-reason containing a blocked token', () => {
+    const { capturedRes, nextCalls } = runMiddleware(
+      {},
+      { decisions: [{ tool_call_id: 'a', decision: 'reject', reason: `leaked ${SK}` }] },
+    );
+    expect(nextCalls).toBe(0);
+    expect(capturedRes.status).toBe(400);
+  });
+
+  it('rejects edited tool arguments containing a blocked token (stringified)', () => {
+    const { capturedRes, nextCalls } = runMiddleware(
+      {},
+      { decisions: [{ tool_call_id: 'a', decision: 'edit', editedArguments: { token: SK } }] },
+    );
+    expect(nextCalls).toBe(0);
+    expect(capturedRes.status).toBe(400);
+  });
+
+  it('passes a clean resume answer through', () => {
+    const { capturedRes, nextCalls } = runMiddleware({}, { answer: 'name it report.pdf' });
+    expect(nextCalls).toBe(1);
+    expect(capturedRes.status).toBeUndefined();
+  });
+
   it('honors a starterPatterns subset (sk passes when only bearer is enabled)', () => {
     const { capturedRes, nextCalls } = runMiddleware(
       { starterPatterns: ['bearer_header'] },
diff --git a/packages/api/src/middleware/messageFilterPii.ts b/packages/api/src/middleware/messageFilterPii.ts
index ff322cba8f..b2140950c9 100644
--- a/packages/api/src/middleware/messageFilterPii.ts
+++ b/packages/api/src/middleware/messageFilterPii.ts
@@ -141,6 +141,36 @@ export function createMessageFilterPii(options: CreateMessageFilterPiiOptions):
       candidates.push(...quotes);
       candidates.push(mergeQuotedText(text, quotes));
     }
+    /**
+     * The shared `/agents/chat/resume` route carries user-authored text in different
+     * fields than a typed message: an ask-user `answer`, and a tool-approval decision's
+     * `respond` text, `reject` reason, and edited tool arguments. Scan them too — else a
+     * blocked token could ride a resume payload straight back into the model/tool,
+     * bypassing the filter the typed path enforces.
+     */
+    if (typeof req.body?.answer === 'string' && req.body.answer.length > 0) {
+      candidates.push(req.body.answer);
+    }
+    if (Array.isArray(req.body?.decisions)) {
+      for (const decision of req.body.decisions) {
+        if (typeof decision?.responseText === 'string' && decision.responseText.length > 0) {
+          candidates.push(decision.responseText);
+        }
+        if (typeof decision?.reason === 'string' && decision.reason.length > 0) {
+          candidates.push(decision.reason);
+        }
+        if (decision?.editedArguments != null) {
+          try {
+            const edited = JSON.stringify(decision.editedArguments);
+            if (edited.length > 0) {
+              candidates.push(edited);
+            }
+          } catch {
+            /* ignore unstringifiable edited args */
+          }
+        }
+      }
+    }
     if (candidates.length === 0) {
       next();
       return;
diff --git a/packages/api/src/stream/GenerationJobManager.ts b/packages/api/src/stream/GenerationJobManager.ts
index 1986641189..1509335276 100644
--- a/packages/api/src/stream/GenerationJobManager.ts
+++ b/packages/api/src/stream/GenerationJobManager.ts
@@ -2,6 +2,7 @@ import { logger, getTenantId, SYSTEM_TENANT_ID } from '@librechat/data-schemas';
 import {
   Constants,
   UsageEvents,
+  ApprovalEvents,
   parseTextParts,
   reconcileContextUsage,
   promptTokensFromUsage,
@@ -28,12 +29,15 @@ import {
   setGenerationJobsInFlight,
   recordGenerationJob,
 } from '~/app/metrics';
+import { isPendingActionStale, isPendingActionExpired } from './interfaces/IJobStore';
 import { InMemoryEventTransport } from './implementations/InMemoryEventTransport';
 import { InMemoryJobStore } from './implementations/InMemoryJobStore';
 import { filterPersistableAbortContent } from './abortContent';
-import { isPendingActionStale } from './interfaces/IJobStore';
 import { ApprovalLifecycle } from './ApprovalLifecycle';
 
+/** Terminal error surfaced to a client still attached when its approval window lapses. */
+const APPROVAL_EXPIRED_ERROR = 'Approval expired before a decision was made';
+
 /** Error surfaced to any client still attached when a stale/hung job is reaped. */
 const REAPED_JOB_ERROR = 'Generation timed out';
 const OAUTH_TOOL_CALL_PREFIX = `oauth${Constants.mcp_delimiter}`;
@@ -492,6 +496,14 @@ class GenerationJobManagerClass {
         iconURL: jobData.iconURL,
         model: jobData.model,
         promptTokens: jobData.promptTokens,
+        // Surface the originating agent so the resume route can refuse to rebuild a
+        // paused run on a different agent.
+        agent_id: jobData.agent_id,
+        // Surface whether the turn was temporary so a resume keeps it non-persisted.
+        isTemporary: jobData.isTemporary,
+        // Surface deferred tools discovered before the pause so the resume route can
+        // replay them into createRun (the rebuilt graph passes `messages: []`).
+        discoveredTools: jobData.discoveredTools,
         // Surface the pending review so status/resume routes built on the
         // facade can render the prompt for a `requires_action` job.
         pendingAction: jobData.pendingAction,
@@ -1052,6 +1064,31 @@ class GenerationJobManagerClass {
       skipBufferReplay: true,
     });
 
+    // Close the snapshot→subscribe race: getResumeState() snapshots BEFORE we attach the
+    // subscription, so a pause that becomes durable in that window is in neither
+    // resumeState.pendingAction nor (Redis mode) pendingEvents — and trackReplayEvent does
+    // not persist approval events — leaving the client attached to a paused job with no
+    // approval UI. Re-read the live job AFTER subscribing; if it is now requires_action and
+    // the snapshot didn't already carry the action, surface it as a pending event so the
+    // approval prompt renders. Idempotent: a pause landing AFTER attach is delivered live
+    // too, and the client's handler just sets the current action, so a duplicate is benign.
+    if (!resumeState?.pendingAction) {
+      const liveJob = await this.jobStore.getJob(streamId);
+      if (
+        liveJob?.status === 'requires_action' &&
+        liveJob.pendingAction != null &&
+        !isPendingActionStale(liveJob)
+      ) {
+        pendingEvents = [
+          ...pendingEvents,
+          {
+            event: ApprovalEvents.ON_PENDING_ACTION,
+            data: liveJob.pendingAction as unknown as Record<string, unknown>,
+          },
+        ];
+      }
+    }
+
     return { subscription, resumeState, pendingEvents };
   }
 
@@ -1351,6 +1388,11 @@ class GenerationJobManagerClass {
     }
 
     const { message } = event;
+    const extra = message as {
+      manualSkills?: string[];
+      alwaysAppliedSkills?: string[];
+      files?: unknown[];
+    };
     const updates: Partial<SerializableJobData> = {
       createdEventEmitted: true,
       userMessage: {
@@ -1359,6 +1401,19 @@ class GenerationJobManagerClass {
         conversationId: message.conversationId,
         text: message.text,
         quotes: message.quotes,
+        // Persist the turn's uploaded files so a HITL resume sources them from the job
+        // (this authoritative writer), not a user DB row whose save can still be racing
+        // the approval prompt.
+        ...(Array.isArray(extra.files) && extra.files.length > 0 && { files: extra.files }),
+        // Carry skill selections so a HITL resume's reconstructed requestMessage keeps
+        // its pills — this is the authoritative writer of job.metadata.userMessage and
+        // would otherwise drop them (the emitted created message includes them).
+        ...(Array.isArray(extra.manualSkills) &&
+          extra.manualSkills.length > 0 && { manualSkills: extra.manualSkills }),
+        ...(Array.isArray(extra.alwaysAppliedSkills) &&
+          extra.alwaysAppliedSkills.length > 0 && {
+            alwaysAppliedSkills: extra.alwaysAppliedSkills,
+          }),
       },
     };
 
@@ -1398,9 +1453,18 @@ class GenerationJobManagerClass {
     if (metadata.model) {
       updates.model = metadata.model;
     }
+    if (metadata.agent_id) {
+      updates.agent_id = metadata.agent_id;
+    }
+    if (metadata.isTemporary !== undefined) {
+      updates.isTemporary = metadata.isTemporary;
+    }
     if (metadata.promptTokens !== undefined) {
       updates.promptTokens = metadata.promptTokens;
     }
+    if (metadata.discoveredTools) {
+      updates.discoveredTools = metadata.discoveredTools;
+    }
     await this.jobStore.updateJob(streamId, updates);
   }
 
@@ -1598,7 +1662,97 @@ class GenerationJobManagerClass {
    * Cleanup expired jobs.
    * Also cleans up any orphaned runtime state, buffers, and event transport entries.
    */
+  /**
+   * Expire any locally-tracked approval whose window has lapsed: drive the atomic
+   * `requires_action → aborted` transition and, if this caller won it, emit a
+   * terminal error so a connected SSE client closes. Only streams this replica has
+   * runtime for are scanned — those are exactly the ones with a client subscribed
+   * here; a paused job on another replica is finalized by that replica's sweep (and
+   * the store's own cleanup). The durable checkpoint is reclaimed by its Mongo TTL
+   * index, which shares the approval window, so no cross-layer delete is needed here.
+   */
+  /**
+   * Expire a single observed-stale pending approval NOW (immediate, not via the periodic
+   * sweep): run the `requires_action → aborted` CAS — pinned to `actionId` so a concurrent
+   * resolve + re-pause on a fresh action isn't aborted — and, on success, emit the terminal
+   * `APPROVAL_EXPIRED_ERROR` so any attached SSE client gets a terminal event instead of a
+   * hung stream. Used by the periodic sweeper and by the resume route, which observes a
+   * just-expired action when the user submits a decision after the TTL lapsed. Returns true
+   * if this call expired the action.
+   */
+  async expireApproval(streamId: string, actionId?: string): Promise<boolean> {
+    const expired = await this._approvals.expire(streamId, actionId);
+    if (!expired) {
+      return false;
+    }
+    try {
+      await this.emitError(streamId, APPROVAL_EXPIRED_ERROR);
+    } catch (err) {
+      logger.error(`[GenerationJobManager] Failed to notify expired approval ${streamId}`, err);
+    }
+    this.runningJobs.delete(streamId);
+    return true;
+  }
+
+  private async expireStaleApprovals(): Promise<void> {
+    let changed = false;
+    for (const streamId of this.runtimeState.keys()) {
+      let job: SerializableJobData | null;
+      try {
+        job = await this.jobStore.getJob(streamId);
+      } catch (err) {
+        logger.error(
+          `[GenerationJobManager] Failed to read job during approval expiry sweep: ${streamId}`,
+          err,
+        );
+        continue;
+      }
+      // Loser-replica relay: in a multi-replica deployment another replica's store
+      // cleanup (`cleanupRequiresActionIndex`) can win the requires_action → aborted
+      // approval-expiry CAS — it sets the hash error but cannot emit (the store has no
+      // event transport). A client subscribed on THIS replica would then never get a
+      // terminal event until the reap path. If the job is already aborted *for approval
+      // expiry* and we haven't emitted here, relay the terminal error to our subscriber.
+      // The `errorEvent` flag (set by emitError) keeps this idempotent vs the win path.
+      const runtime = this.runtimeState.get(streamId);
+      if (
+        job?.status === 'aborted' &&
+        job.error === APPROVAL_EXPIRED_ERROR &&
+        !runtime?.errorEvent
+      ) {
+        try {
+          await this.emitError(streamId, APPROVAL_EXPIRED_ERROR);
+        } catch (err) {
+          logger.error(`[GenerationJobManager] Failed to relay expired approval ${streamId}`, err);
+        }
+        changed = this.runningJobs.delete(streamId) || changed;
+        continue;
+      }
+      if (!job || job.status !== 'requires_action' || !isPendingActionExpired(job)) {
+        continue;
+      }
+      // Pass the OBSERVED action id so the expire CAS only fires for the action we read
+      // as stale. Between this read and the CAS, the user could resolve it and the run
+      // re-pause on a fresh action; without the id, the CAS (status-only) would abort
+      // that valid new pause and leave it terminal.
+      const didExpire = await this.expireApproval(streamId, job.pendingAction?.actionId);
+      if (!didExpire) {
+        continue;
+      }
+      changed = true;
+      logger.debug(`[GenerationJobManager] Expired pending approval: ${streamId}`);
+    }
+    if (changed) {
+      this.syncRunningJobMetrics();
+    }
+  }
+
   private async cleanup(): Promise<void> {
+    // Finalize approvals whose window lapsed before the store's own cleanup, so a
+    // client still attached to a paused stream gets a terminal event instead of a
+    // connection that hangs open until it gives up.
+    await this.expireStaleApprovals();
+
     const count = await this.jobStore.cleanup();
     let runningJobsChanged = false;
 
diff --git a/packages/api/src/stream/__tests__/RedisJobStore.stream_integration.spec.ts b/packages/api/src/stream/__tests__/RedisJobStore.stream_integration.spec.ts
index 55229ebc5e..5dd3cfb164 100644
--- a/packages/api/src/stream/__tests__/RedisJobStore.stream_integration.spec.ts
+++ b/packages/api/src/stream/__tests__/RedisJobStore.stream_integration.spec.ts
@@ -287,6 +287,184 @@ describe('RedisJobStore Integration Tests', () => {
       await store.destroy();
     });
 
+    test('appendChunk preserves a paused job’s extended TTL (does not reset to running)', async () => {
+      if (!ioredisClient) {
+        return;
+      }
+
+      const { RedisJobStore } = await import('../implementations/RedisJobStore');
+      const store = new RedisJobStore(ioredisClient, { runningTtl: 60 });
+      await store.initialize();
+
+      const streamId = `paused-chunk-ttl-${Date.now()}`;
+      const chunkKey = `stream:{${streamId}}:chunks`;
+
+      await store.createJob(streamId, 'user-1', streamId);
+      await store.appendChunk(streamId, { event: 'on_message_delta', data: { text: 'hello' } });
+
+      // Pause: transitionStatus extends the chunk-key TTL to the long approval window.
+      await store.transitionStatus(streamId, {
+        from: 'running',
+        to: 'requires_action',
+        patch: { pendingAction: buildPendingAction(streamId) },
+      });
+      expect(await ioredisClient.ttl(chunkKey)).toBeGreaterThan(60);
+
+      // The on_pending_action chunk is appended AFTER the pause. The bug Codex flagged was
+      // that appendChunk unconditionally reset the TTL back to the (short) running TTL,
+      // evicting the pre-pause content before resume. It must now leave the long TTL intact.
+      await store.appendChunk(streamId, {
+        event: 'on_pending_action',
+        data: buildPendingAction(streamId),
+      });
+      expect(await ioredisClient.ttl(chunkKey)).toBeGreaterThan(60);
+      // The chunk was still appended (XADD ran), so resume can read the full stream.
+      expect(await ioredisClient.xlen(chunkKey)).toBeGreaterThanOrEqual(2);
+
+      await store.destroy();
+    });
+
+    test('saveRunSteps preserves a paused job’s extended TTL (does not reset to running)', async () => {
+      if (!ioredisClient) {
+        return;
+      }
+
+      const { RedisJobStore } = await import('../implementations/RedisJobStore');
+      const store = new RedisJobStore(ioredisClient, { runningTtl: 60 });
+      await store.initialize();
+
+      const streamId = `paused-runsteps-ttl-${Date.now()}`;
+      const runStepsKey = `stream:{${streamId}}:runsteps`;
+
+      await store.createJob(streamId, 'user-1', streamId);
+      await store.saveRunSteps!(streamId, [{ id: 'step-1', type: 'tool_call' }] as never);
+
+      // Pause: transitionStatus extends the run-steps key TTL to the long approval window.
+      await store.transitionStatus(streamId, {
+        from: 'running',
+        to: 'requires_action',
+        patch: { pendingAction: buildPendingAction(streamId) },
+      });
+      expect(await ioredisClient.ttl(runStepsKey)).toBeGreaterThan(60);
+
+      // A run-step save landing AFTER the pause must NOT reset the key to the running TTL,
+      // or a reload of the still-live approval after that window loses the tool timeline.
+      await store.saveRunSteps!(streamId, [
+        { id: 'step-1', type: 'tool_call' },
+        { id: 'step-2', type: 'tool_call' },
+      ] as never);
+      expect(await ioredisClient.ttl(runStepsKey)).toBeGreaterThan(60);
+      // The save still landed, so resume can read the full timeline.
+      const steps = await store.getRunSteps(streamId);
+      expect(steps.length).toBe(2);
+
+      await store.destroy();
+    });
+
+    test('appendChunk gives the approval TTL when the chunk key did not exist at pause time', async () => {
+      if (!ioredisClient) {
+        return;
+      }
+
+      const { RedisJobStore } = await import('../implementations/RedisJobStore');
+      const store = new RedisJobStore(ioredisClient, { runningTtl: 60 });
+      await store.initialize();
+
+      const streamId = `paused-no-chunk-${Date.now()}`;
+      const chunkKey = `stream:{${streamId}}:chunks`;
+
+      // The job pauses BEFORE any chunk was persisted (ask-user pause with no prior
+      // delta, or the first appendChunk still in flight because emitChunk is
+      // fire-and-forget). The pause's `EXPIRE chunks` is a no-op because the key
+      // does not exist yet, so the chunk stream carries no extended TTL.
+      await store.createJob(streamId, 'user-1', streamId);
+      await store.transitionStatus(streamId, {
+        from: 'running',
+        to: 'requires_action',
+        patch: { pendingAction: buildPendingAction(streamId) },
+      });
+      expect(await ioredisClient.exists(chunkKey)).toBe(0);
+
+      // The first chunk lands AFTER the pause. The bug Codex re-raised: appendChunk
+      // would create the stream with only the short running TTL (60s), so the
+      // aggregated tool-call content is evicted before the 24h approval window ends.
+      // The fix reads the paused window from the job key and bumps the chunk TTL to it.
+      await store.appendChunk(streamId, {
+        event: 'on_pending_action',
+        data: buildPendingAction(streamId),
+      });
+
+      expect(await ioredisClient.ttl(chunkKey)).toBeGreaterThan(60);
+      expect(await ioredisClient.xlen(chunkKey)).toBeGreaterThanOrEqual(1);
+
+      await store.destroy();
+    });
+
+    test('appendChunk keeps a normally-running job on the short running TTL (no inflation)', async () => {
+      if (!ioredisClient) {
+        return;
+      }
+
+      const { RedisJobStore } = await import('../implementations/RedisJobStore');
+      const store = new RedisJobStore(ioredisClient, { runningTtl: 60 });
+      await store.initialize();
+
+      const streamId = `running-no-inflate-${Date.now()}`;
+      const chunkKey = `stream:{${streamId}}:chunks`;
+
+      // A normal running job: the job key carries the running TTL (set by createJob),
+      // NOT the long approval window. appendChunk must settle the chunk TTL on the
+      // short running TTL — never max it against the job key — so a live stream is
+      // not inflated to the 24h approval window.
+      await store.createJob(streamId, 'user-1', streamId);
+      await store.appendChunk(streamId, {
+        event: 'on_message_delta',
+        data: { text: 'hello' },
+      });
+
+      const ttl = await ioredisClient.ttl(chunkKey);
+      expect(ttl).toBeGreaterThan(0);
+      expect(ttl).toBeLessThanOrEqual(60);
+
+      await store.destroy();
+    });
+
+    test('createJob clears stale per-turn identity (agent_id, isTemporary) from a reused hash', async () => {
+      if (!ioredisClient) {
+        return;
+      }
+
+      const { RedisJobStore } = await import('../implementations/RedisJobStore');
+      const store = new RedisJobStore(ioredisClient);
+      await store.initialize();
+
+      const streamId = `stale-agent-${Date.now()}`;
+      // Turn 1: a saved agent in a temporary chat that discovered a deferred tool.
+      await store.createJob(streamId, 'user-1', streamId);
+      await store.updateJob(streamId, {
+        agent_id: 'saved-agent-1',
+        isTemporary: true,
+        discoveredTools: ['deep_tool'],
+      });
+      const turn1 = await store.getJob(streamId);
+      expect(turn1?.agent_id).toBe('saved-agent-1');
+      expect(turn1?.isTemporary).toBe(true);
+      expect(turn1?.discoveredTools).toEqual(['deep_tool']);
+
+      // Turn 2 on the SAME conversation switches to an ephemeral / non-temporary turn.
+      // The hash is keyed by conversationId, so without clearing, the old agent_id and
+      // isTemporary would survive — the resume guard would reject the valid pause as a
+      // different agent, and the resumed response would be saved as temporary. The stale
+      // discoveredTools would also force-load deferred tools this turn never discovered.
+      await store.createJob(streamId, 'user-1', streamId);
+      const turn2 = await store.getJob(streamId);
+      expect(turn2?.agent_id).toBeUndefined();
+      expect(turn2?.isTemporary).toBeUndefined();
+      expect(turn2?.discoveredTools).toBeUndefined();
+
+      await store.destroy();
+    });
+
     test('should not drop paused jobs from user tracking when cleanup sees a stale running index', async () => {
       if (!ioredisClient) {
         return;
diff --git a/packages/api/src/stream/__tests__/pendingAction.spec.ts b/packages/api/src/stream/__tests__/pendingAction.spec.ts
index de4136a513..4edfed9619 100644
--- a/packages/api/src/stream/__tests__/pendingAction.spec.ts
+++ b/packages/api/src/stream/__tests__/pendingAction.spec.ts
@@ -172,6 +172,21 @@ describe('ApprovalLifecycle via GenerationJobManager.approvals (in-memory)', ()
       expect(await manager.approvals.expire(streamId)).toBe(false);
     });
 
+    test('a mismatched expectedActionId no-ops (protects a re-paused action from a stale sweep)', async () => {
+      const streamId = 'stream-expire-mismatch';
+      await manager.createJob(streamId, 'user-1');
+      await manager.approvals.pause(streamId, buildAction(streamId, { actionId: 'action-A' }));
+
+      // A sweep that observed an OLDER (now-resolved) action must not abort the current
+      // pause — its CAS only fires when the live pendingActionId still matches.
+      expect(await manager.approvals.expire(streamId, 'stale-other-action')).toBe(false);
+      expect(await manager.getJobStatus(streamId)).toBe('requires_action');
+
+      // The matching id still expires it.
+      expect(await manager.approvals.expire(streamId, 'action-A')).toBe(true);
+      expect(await manager.getJobStatus(streamId)).toBe('aborted');
+    });
+
     test('sets completedAt so terminal cleanup can reclaim the job', async () => {
       const streamId = 'stream-expire-completed';
       await manager.createJob(streamId, 'user-1');
@@ -249,3 +264,88 @@ describe('InMemoryJobStore — approval expiry cleanup', () => {
     expect(await store.getJob('s1')).toBeNull();
   });
 });
+
+describe('GenerationJobManager HITL resume metadata (round 19)', () => {
+  let manager: GenerationJobManagerClass;
+
+  beforeEach(() => {
+    manager = new GenerationJobManagerClass();
+    manager.configure({
+      jobStore: new InMemoryJobStore({ ttlAfterComplete: 60000 }),
+      eventTransport: new InMemoryEventTransport(),
+      isRedis: false,
+      cleanupOnComplete: false,
+    });
+    manager.initialize();
+  });
+
+  afterEach(async () => {
+    await manager.destroy();
+  });
+
+  function buildAction(streamId: string) {
+    const payload = buildToolApprovalPayload([
+      { name: 'shell', arguments: { command: 'ls' }, tool_call_id: 'call_abc' },
+    ]);
+    return buildPendingAction(payload, {
+      streamId,
+      conversationId: streamId,
+      runId: 'run-1',
+      responseMessageId: 'msg-1',
+    });
+  }
+
+  // H1: round-18 captured discoveredTools but the metadata allowlists (updateMetadata,
+  // Redis deserialize, buildJobFacade) dropped it, so resume replayed `undefined`.
+  test('updateMetadata persists discoveredTools and the job facade exposes them', async () => {
+    const streamId = 'stream-discovered';
+    await manager.createJob(streamId, 'user-1');
+    await manager.updateMetadata(streamId, { discoveredTools: ['deep_tool', 'other_tool'] });
+    const job = await manager.getJob(streamId);
+    expect(job?.metadata.discoveredTools).toEqual(['deep_tool', 'other_tool']);
+  });
+
+  // H4: a pause that lands AFTER the resume snapshot but before the subscription must
+  // still reach the client. subscribeWithResume re-reads the live job and surfaces it.
+  test('subscribeWithResume surfaces a pause that the resume snapshot missed', async () => {
+    const streamId = 'stream-race';
+    await manager.createJob(streamId, 'user-1');
+    const action = buildAction(streamId);
+    await manager.approvals.pause(streamId, action);
+
+    // Simulate the snapshot being taken BEFORE the pause: drop pendingAction from the
+    // resume state even though the live job is now requires_action.
+    const realGetResumeState = manager.getResumeState.bind(manager);
+    jest.spyOn(manager, 'getResumeState').mockImplementation(async (id: string) => {
+      const state = await realGetResumeState(id);
+      return state ? { ...state, pendingAction: undefined } : state;
+    });
+
+    const result = await manager.subscribeWithResume(streamId, () => {});
+    const pending = result.pendingEvents.find(
+      (e) => 'event' in e && e.event === 'on_pending_action',
+    );
+    expect(pending).toBeDefined();
+    expect((pending as unknown as { data: { actionId: string } }).data.actionId).toBe(
+      action.actionId,
+    );
+    result.subscription?.unsubscribe();
+  });
+
+  // H4 negative: when the snapshot already carried the action, the re-read is skipped
+  // (the client gets it via resumeState.pendingAction) — no duplicate pending event.
+  test('does not re-surface the pending action when the snapshot already carried it', async () => {
+    const streamId = 'stream-norace';
+    await manager.createJob(streamId, 'user-1');
+    const action = buildAction(streamId);
+    await manager.approvals.pause(streamId, action);
+
+    const result = await manager.subscribeWithResume(streamId, () => {});
+    const pendingCount = result.pendingEvents.filter(
+      (e) => 'event' in e && e.event === 'on_pending_action',
+    ).length;
+    expect(pendingCount).toBe(0);
+    expect(result.resumeState?.pendingAction?.actionId).toBe(action.actionId);
+    result.subscription?.unsubscribe();
+  });
+});
diff --git a/packages/api/src/stream/implementations/RedisJobStore.ts b/packages/api/src/stream/implementations/RedisJobStore.ts
index 32d78ee487..6453ab4dcb 100644
--- a/packages/api/src/stream/implementations/RedisJobStore.ts
+++ b/packages/api/src/stream/implementations/RedisJobStore.ts
@@ -39,6 +39,71 @@ const JOB_CAS_LUA =
   'redis.call("EXPIRE", KEYS[1], ttl) ' +
   'return 1';
 
+/**
+ * XADD a chunk + set the chunk-stream TTL to the right window WITHOUT ever shrinking it.
+ *
+ * During a live stream the running TTL is refreshed on every chunk. But a job paused
+ * for HITL review must keep its chunk stream alive for the whole approval window, not
+ * the ~20m running TTL — otherwise the pre-pause aggregated content (tool call + earlier
+ * text) is evicted before the user resolves and `getResumeState()` loses it.
+ *
+ * `transitionStatus` extends the chunk-key TTL to the approval window at pause time, but
+ * that alone is not enough:
+ *   1. The pause's `EXPIRE chunks` is a no-op if the chunk key does not exist yet — and
+ *      `appendChunk` is fire-and-forget, so the first chunk's XADD can land AFTER the
+ *      pause, or an ask-user pause can occur before any chunk was ever persisted.
+ *   2. The `on_pending_action` chunk (and any chunk that races in after the pause) would
+ *      otherwise reset an already-extended TTL back to the short running TTL.
+ * So this script derives the target window itself: the running TTL normally, but when the
+ * job hash is paused (`status == "requires_action"`) it takes the larger of the running
+ * TTL and the job key's own remaining TTL (which `transitionStatus` set to the approval
+ * window). It only ever EXTENDS — `cur < target` — so a normally-running stream keeps the
+ * round-10 extend-only behavior and is never inflated to the approval window.
+ *
+ * Reading the paused window from the job key (rather than always max-ing against it) is
+ * what keeps a normal running run on the short TTL: TTL(jobKey) is only the long approval
+ * window while paused; for a running job the job key carries the running TTL, so target
+ * stays `run`.
+ *
+ *   KEYS: [chunks, job]
+ *   ARGV: [eventJson, runningTtl]
+ */
+const CHUNK_APPEND_LUA =
+  'redis.call("XADD", KEYS[1], "*", "event", ARGV[1]) ' +
+  'local run = tonumber(ARGV[2]) ' +
+  'local target = run ' +
+  'if redis.call("HGET", KEYS[2], "status") == "requires_action" then ' +
+  'local jt = redis.call("TTL", KEYS[2]) ' +
+  'if jt > target then target = jt end ' +
+  'end ' +
+  'local cur = redis.call("TTL", KEYS[1]) ' +
+  'if cur < target then redis.call("EXPIRE", KEYS[1], target) end ' +
+  'return 1';
+
+/**
+ * Persist the run-step timeline with the same paused-window TTL as the chunk stream.
+ * `saveRunSteps` SETs (overwrites) the whole array, so unlike the chunk append there's no
+ * prior key TTL worth preserving — but the write must still extend to the APPROVAL window
+ * when the job is paused (`status == "requires_action"`). Otherwise a run-step save that
+ * lands at/after a fast pause resets the key to the short running TTL, and a reload of a
+ * still-live approval after that window loses the tool/run-step timeline even though the
+ * approval remains resumable. Reads the paused window from the job key (which
+ * `transitionStatus` set); a normally-running job keeps the short running TTL.
+ *
+ *   KEYS: [runSteps, job]
+ *   ARGV: [runStepsJson, runningTtl]
+ */
+const RUNSTEPS_SAVE_LUA =
+  'redis.call("SET", KEYS[1], ARGV[1]) ' +
+  'local run = tonumber(ARGV[2]) ' +
+  'local target = run ' +
+  'if redis.call("HGET", KEYS[2], "status") == "requires_action" then ' +
+  'local jt = redis.call("TTL", KEYS[2]) ' +
+  'if jt > target then target = jt end ' +
+  'end ' +
+  'redis.call("EXPIRE", KEYS[1], target) ' +
+  'return 1';
+
 /** Decision kinds the SDK can emit, used to sanity-check persisted records. */
 const KNOWN_INTERRUPT_TYPES = new Set(['tool_approval', 'ask_user_question']);
 
@@ -206,14 +271,27 @@ export class RedisJobStore implements IJobStore {
     const key = KEYS.job(streamId);
     const userJobsKey = KEYS.userJobs(userId, tenantId);
 
-    // A reused streamId overlays onto any existing hash, so paused-run fields
-    // from a prior generation could survive. Drop the HITL fields so the fresh
-    // running job never exposes stale approval metadata and cleanup keys off the
-    // new createdAt rather than a leftover lastActiveAt.
+    // A reused streamId overlays onto any existing hash, so per-turn fields from a
+    // prior generation could survive. Drop the HITL fields so the fresh running job
+    // never exposes stale approval metadata and cleanup keys off the new createdAt
+    // rather than a leftover lastActiveAt. `agent_id` is included because
+    // updateMetadata only writes it when truthy — without clearing it here, a
+    // conversation that switches from a saved agent to an ephemeral/no-agent turn
+    // would keep the old agent_id and the resume guard would reject the valid pause.
     const staleHitlFields: Array<keyof SerializableJobData> = [
       'pendingAction',
       'pendingActionId',
       'lastActiveAt',
+      'agent_id',
+      // Same reasoning as agent_id: updateMetadata only writes isTemporary when the new
+      // metadata carries it, so a prior temporary turn's isTemporary=1 would otherwise
+      // survive and a later non-temporary resume would save its response as temporary.
+      'isTemporary',
+      // Same reasoning again: handleRunInterrupt only writes discoveredTools when THIS
+      // turn discovered ≥1 deferred tool, so a replacement turn that later pauses without
+      // its own discovery would otherwise inherit the prior run's tool names and force-load
+      // deferred tools it never discovered on resume.
+      'discoveredTools',
     ];
 
     // For cluster mode, we can't pipeline keys on different slots
@@ -630,6 +708,12 @@ export class RedisJobStore implements IJobStore {
                 error: 'Approval expired before a decision was made',
                 completedAt: Date.now(),
               },
+              // Scope the CAS to the action we observed as stale: if the user resolved it
+              // and the run re-paused on a fresh action between the read and here, the
+              // pendingActionId no longer matches and this no-ops instead of aborting the
+              // valid new pause. (Undefined for a missing/malformed pendingAction — nothing
+              // to protect — so it falls back to the status-only check.)
+              expectActionId: job.pendingAction?.actionId,
             });
             return 1;
           }
@@ -937,14 +1021,24 @@ export class RedisJobStore implements IJobStore {
    */
   async appendChunk(streamId: string, event: unknown): Promise<void> {
     const key = KEYS.chunks(streamId);
-    // Pipeline XADD + EXPIRE in a single round-trip.
-    // EXPIRE is O(1) and idempotent — refreshing TTL on every chunk is better than
-    // only setting it once, since the original approach could let the TTL expire
-    // during long-running streams.
-    const pipeline = this.redis.pipeline();
-    pipeline.xadd(key, '*', 'event', JSON.stringify(event));
-    pipeline.expire(key, this.ttl.running);
-    await pipeline.exec();
+    const jobKey = KEYS.job(streamId);
+    // XADD + derive-and-extend-only EXPIRE in a single atomic eval. Refreshing the TTL on
+    // every chunk (vs only once) keeps the key alive through long streams, but it must
+    // NEVER shrink an already-longer TTL — a paused (requires_action) job needs this key
+    // to live for the whole approval window, and the on_pending_action append (or any
+    // chunk that lands after the pause) would otherwise reset it to the short running TTL.
+    // The script reads the paused window from the job key, so it bumps to the approval TTL
+    // even when the pause's own EXPIRE no-op'd because this key didn't exist yet, while a
+    // normally-running run still settles on the short running TTL. Both keys share the
+    // {streamId} hash tag, so the 2-key eval stays on one slot under Redis Cluster.
+    await this.redis.eval(
+      CHUNK_APPEND_LUA,
+      2,
+      key,
+      jobKey,
+      JSON.stringify(event),
+      String(this.ttl.running),
+    );
   }
 
   /**
@@ -970,11 +1064,20 @@ export class RedisJobStore implements IJobStore {
   }
 
   /**
-   * Save run steps for resume state.
+   * Save run steps for resume state. Uses the paused-window TTL script so a run-step save
+   * landing at/after a HITL pause extends to the approval window instead of resetting the
+   * key to the short running TTL (which would drop the tool timeline on a reload of a
+   * still-live approval — mirrors the chunk-stream no-shrink behavior).
    */
   async saveRunSteps(streamId: string, runSteps: Agents.RunStep[]): Promise<void> {
-    const key = KEYS.runSteps(streamId);
-    await this.redis.set(key, JSON.stringify(runSteps), 'EX', this.ttl.running);
+    await this.redis.eval(
+      RUNSTEPS_SAVE_LUA,
+      2,
+      KEYS.runSteps(streamId),
+      KEYS.job(streamId),
+      JSON.stringify(runSteps),
+      String(this.ttl.running),
+    );
   }
 
   // ===== Consumer Group Methods =====
@@ -1215,6 +1318,10 @@ export class RedisJobStore implements IJobStore {
       iconURL: data.iconURL || undefined,
       model: data.model || undefined,
       promptTokens: data.promptTokens ? parseInt(data.promptTokens, 10) : undefined,
+      agent_id: data.agent_id || undefined,
+      isTemporary: data.isTemporary != null ? data.isTemporary === '1' : undefined,
+      // Deferred tools discovered before a HITL pause; replayed into createRun on resume.
+      discoveredTools: data.discoveredTools ? JSON.parse(data.discoveredTools) : undefined,
       titleEvent: data.titleEvent || undefined,
       replayEvents: data.replayEvents || undefined,
       contextUsage: data.contextUsage || undefined,
diff --git a/packages/api/src/stream/interfaces/IJobStore.ts b/packages/api/src/stream/interfaces/IJobStore.ts
index f3dcb752bf..9e55d543e1 100644
--- a/packages/api/src/stream/interfaces/IJobStore.ts
+++ b/packages/api/src/stream/interfaces/IJobStore.ts
@@ -32,11 +32,25 @@ export interface SerializableJobData {
     /** Quoted excerpts referenced on this turn, carried so resumable/aborted
      *  reconstructions of the user message keep their `MessageQuotes`. */
     quotes?: string[];
+    /** Skill selections, carried so a HITL-resumed turn's requestMessage keeps its pills. */
+    manualSkills?: string[];
+    alwaysAppliedSkills?: string[];
+    /** Uploaded files for the turn, carried so a HITL resume sources them from the job
+     *  rather than a user DB row whose save can still be racing the approval prompt. */
+    files?: unknown[];
   };
 
   /** Response message ID for reconnection */
   responseMessageId?: string;
 
+  /**
+   * Deferred-tool names discovered (via `tool_search`) before a HITL pause, captured
+   * so a resume can replay them into `createRun` — the rebuilt graph uses `messages: []`
+   * (state comes from the checkpoint), so without these the paused deferred tool would be
+   * absent from the schema-only toolMap and resume would fail with "unknown tool".
+   */
+  discoveredTools?: string[];
+
   /** Whether the user-message created event has been emitted */
   createdEventEmitted?: boolean;
 
@@ -67,6 +81,20 @@ export interface SerializableJobData {
   model?: string;
   promptTokens?: number;
 
+  /**
+   * Agent that initiated the run. Persisted so a HITL resume can verify it rebuilds
+   * the SAME agent that paused — resuming Agent A's checkpoint on Agent B's graph
+   * would mis-execute the paused tool calls.
+   */
+  agent_id?: string;
+
+  /**
+   * Whether the originating turn was a temporary (non-persisted) chat. Persisted so
+   * a HITL resume keeps the resumed response temporary instead of saving it — the
+   * resume request can't be trusted to re-send the flag.
+   */
+  isTemporary?: boolean;
+
   /**
    * Set when status is `requires_action`. Describes the human review the
    * run is waiting on. Cleared by the resume path before the job returns to `running`.
diff --git a/packages/api/src/types/stream.ts b/packages/api/src/types/stream.ts
index d43a1112f1..47a94d6ead 100644
--- a/packages/api/src/types/stream.ts
+++ b/packages/api/src/types/stream.ts
@@ -20,6 +20,16 @@ export interface GenerationJobMetadata {
   model?: string;
   /** Prompt token count for abort token spending */
   promptTokens?: number;
+  /** Agent that initiated the run; a HITL resume verifies it rebuilds the same agent. */
+  agent_id?: string;
+  /** Whether the originating turn was a temporary chat; a HITL resume keeps it so. */
+  isTemporary?: boolean;
+  /**
+   * Deferred-tool names discovered (via `tool_search`) before a HITL pause. A resume
+   * replays these into `createRun` because the rebuilt graph uses `messages: []`, so
+   * without them the paused deferred tool would be missing from the schema-only toolMap.
+   */
+  discoveredTools?: string[];
   /** Set when the job is paused for human review (status === 'requires_action') */
   pendingAction?: Agents.PendingAction;
 }
diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts
index 01db2feeed..fefa3305a5 100644
--- a/packages/data-provider/src/config.ts
+++ b/packages/data-provider/src/config.ts
@@ -801,6 +801,45 @@ export const toolApprovalPolicySchema = z
 
 export type TToolApprovalPolicy = z.infer<typeof toolApprovalPolicySchema>;
 
+/**
+ * Durable checkpointer backing human-in-the-loop resume.
+ *
+ * When `toolApproval.enabled` is true, a run that pauses for review suspends its
+ * LangGraph state to a checkpoint; resuming rebuilds that state on a *fresh* `Run`
+ * — possibly on a different replica, or the same worker after a restart. That only
+ * works if the checkpoint outlives the original request, so HITL needs a durable
+ * saver, not the SDK's process-local `MemorySaver` fallback.
+ *
+ * Defaults are zero-config: with `toolApproval.enabled` on and no `checkpointer`
+ * block, LibreChat persists checkpoints to its primary MongoDB, so resume works
+ * across replicas out of the box.
+ *
+ * - `type: 'mongo'` (default) — persist to the app database; survives restarts and
+ *   resolves on any replica. A TTL index reclaims runs that are never resolved.
+ * - `type: 'memory'` — process-local only. Paused runs do NOT survive a restart and
+ *   can only be resolved on the originating worker. Single-process / dev only.
+ */
+export const checkpointerTypeSchema = z.enum(['mongo', 'memory']);
+export type TCheckpointerType = z.infer<typeof checkpointerTypeSchema>;
+
+export const checkpointerSchema = z
+  .object({
+    type: checkpointerTypeSchema.optional(),
+    /**
+     * Approval window, in seconds: how long a paused run waits for a decision
+     * before it is reclaimed. Drives both the Mongo TTL index on checkpoints and
+     * the pending-action expiry, keeping the two layers in lockstep. Defaults to
+     * 86400 (24h). Raise it for longer review windows.
+     */
+    ttl: z.number().int().positive().optional(),
+    /** Advanced: override the Mongo collection names used for checkpoints. */
+    checkpointCollectionName: z.string().optional(),
+    checkpointWritesCollectionName: z.string().optional(),
+  })
+  .optional();
+
+export type TCheckpointerConfig = z.infer<typeof checkpointerSchema>;
+
 export const agentsEndpointSchema = baseEndpointSchema
   .omit({ baseURL: true })
   .merge(
@@ -825,6 +864,9 @@ export const agentsEndpointSchema = baseEndpointSchema
       remoteApi: remoteApiSchema.optional(),
       /** Human-in-the-loop tool approval policy. Off by default. */
       toolApproval: toolApprovalPolicySchema,
+      /** Durable checkpointer backing HITL resume. Defaults to the app's MongoDB
+       *  when `toolApproval.enabled` is set; ignored otherwise. */
+      checkpointer: checkpointerSchema,
     }),
   )
   .default({
diff --git a/packages/data-provider/src/types/agents.ts b/packages/data-provider/src/types/agents.ts
index 6318dd651f..0e7463f924 100644
--- a/packages/data-provider/src/types/agents.ts
+++ b/packages/data-provider/src/types/agents.ts
@@ -217,6 +217,11 @@ export namespace Agents {
     parentMessageId?: string;
     conversationId?: string;
     text?: string;
+    /** Skill selections on the turn, carried so a HITL-resumed turn's reconstructed
+     *  requestMessage keeps its skill pills (they aren't on the DB row the client refetches
+     *  until reload). */
+    manualSkills?: string[];
+    alwaysAppliedSkills?: string[];
   }
 
   /** State data sent to reconnecting clients */
@@ -403,6 +408,21 @@ export namespace Agents {
      * worker that didn't originate the run.
      */
     threadId?: string;
+    /**
+     * Fingerprint of the request fields that determine the agent/graph + tool set
+     * (endpoint, agent_id, model, spec, ephemeralAgent), captured at pause time. The
+     * resume route recomputes it from the resume request and rejects a mismatch — the
+     * guard that catches an ephemeral-agent config swap, where `agent_id` is undefined
+     * so the id check can't.
+     */
+    requestFingerprint?: string;
+    /**
+     * Graph-determining request fields (endpoint, agent_id, model, spec, promptPrefix,
+     * ephemeralAgent) captured at pause. The resume route REPLAYS these onto the request
+     * before rebuilding the run, so a reload/cross-replica resume — where the client can
+     * no longer reconstruct the ephemeral config — still rebuilds the same agent/graph.
+     */
+    resumeContext?: Record<string, unknown>;
   }
 
   /**
diff --git a/packages/data-provider/src/types/runs.ts b/packages/data-provider/src/types/runs.ts
index 1fdc07cd81..da386ac5f4 100644
--- a/packages/data-provider/src/types/runs.ts
+++ b/packages/data-provider/src/types/runs.ts
@@ -48,6 +48,15 @@ export enum UsageEvents {
   ON_TOKEN_USAGE = 'on_token_usage',
 }
 
+/**
+ * Human-in-the-loop event names. Streamed to live clients when a run pauses for
+ * tool approval (or an ask-user question). Reconnecting clients instead read the
+ * same record from `resumeState.pendingAction` on the sync event / status route.
+ */
+export enum ApprovalEvents {
+  ON_PENDING_ACTION = 'on_pending_action',
+}
+
 /** Mirrors TokenBudgetBreakdown from @librechat/agents (data-provider cannot import it). */
 export type TTokenBudgetBreakdown = {
   maxContextTokens: number;