From bb7d99d56cfd6f6975ca46615940cbd6dab057d5 Mon Sep 17 00:00:00 2001
From: Danny Avila <danny@librechat.ai>
Date: Wed, 1 Jul 2026 11:07:30 -0400
Subject: [PATCH] =?UTF-8?q?=F0=9F=AB=B7=20feat:=20Exclude=20File=20Authori?=
 =?UTF-8?q?ng=20Tools=20From=20Eager=20Execution=20(#14051)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat: exclude create_file/edit_file from eager execution

Side-effecting host file-authoring tools should not be speculatively
eager-executed: a write can land before the turn commits, and the eager path's
incrementally-streamed args can diverge from the final tool call, tripping the
SDK's 'changed after eager execution' guard so the model is told the write
failed and loops (observed with create_file writing a large file to /mnt/data).

Pass excludeToolNames so these tools run on the normal ToolNode path with the
final args. Requires @librechat/agents with eager-exclusion support; older
versions ignore the field.

* chore: Bump `@librechat/agents` to v3.2.56

* refactor: reorder imports in run.ts for clarity

* fix: also exclude execute_code/bash_tool from eager execution

The eager 'changed after eager execution' corruption isn't specific to file
authoring — any tool with a large free-form streamed arg is exposed. Observed
live: a bash_tool heredoc (a full Python script in `command`) tripped the guard
and the write never landed. execute_code (`code`) and bash_tool (`command`)
carry large args and run code (side effects), so exclude them from eager
alongside create_file/edit_file.

* feat: wire codeSessionToolNames so create_file/edit_file share the code sandbox

Activates the agents#283 capability: pass create_file/edit_file as
codeSessionToolNames so their exec session/files fold into the shared code
session and a file they write is visible to later execute_code/bash_tool calls
(and the existing session is injected into their requests). No-op until
@librechat/agents ships codeSessionToolNames (agents#283).

* test: guard code-tool eager/session wiring in createRun

Asserts createRun passes excludeToolNames (create_file/edit_file/execute_code/
bash_tool) and codeSessionToolNames (create_file/edit_file) to Run.create — the
wiring the create_file->bash_tool sandbox-sharing chain depends on, which was
silently missing before. Guards against a future edit dropping it. Mirrors the
run-summarization test harness (mocks Run.create).

The full create_file->bash_tool chain runs through the real code sandbox and
can't run in the mock CI harness; the SDK mechanism is covered by
@librechat/agents unit tests, and this guards the LibreChat wiring.

* style: fix prettier formatting in run-codeTools test

* chore: Bump `@librechat/agents` to v3.2.57
---
 api/package.json                              |   2 +-
 package-lock.json                             |  10 +-
 packages/api/package.json                     |   2 +-
 .../agents/__tests__/run-codeTools.test.ts    | 113 ++++++++++++++++++
 packages/api/src/agents/run.ts                |  27 ++++-
 5 files changed, 146 insertions(+), 8 deletions(-)
 create mode 100644 packages/api/src/agents/__tests__/run-codeTools.test.ts

diff --git a/api/package.json b/api/package.json
index 36b045d103..b9de475faa 100644
--- a/api/package.json
+++ b/api/package.json
@@ -46,7 +46,7 @@
     "@azure/storage-blob": "^12.30.0",
     "@google/genai": "^2.8.0",
     "@keyv/redis": "^4.3.3",
-    "@librechat/agents": "^3.2.55",
+    "@librechat/agents": "^3.2.57",
     "@librechat/api": "*",
     "@librechat/data-schemas": "*",
     "@microsoft/microsoft-graph-client": "^3.0.7",
diff --git a/package-lock.json b/package-lock.json
index 2b2aeaa0fe..09f9455322 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -61,7 +61,7 @@
         "@azure/storage-blob": "^12.30.0",
         "@google/genai": "^2.8.0",
         "@keyv/redis": "^4.3.3",
-        "@librechat/agents": "^3.2.55",
+        "@librechat/agents": "^3.2.57",
         "@librechat/api": "*",
         "@librechat/data-schemas": "*",
         "@microsoft/microsoft-graph-client": "^3.0.7",
@@ -9879,9 +9879,9 @@
       }
     },
     "node_modules/@librechat/agents": {
-      "version": "3.2.55",
-      "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.2.55.tgz",
-      "integrity": "sha512-eOysFxc9o70ORArMiy42iMGUyPEWZ0ylLYEr+UlPl3Wgvklw5cK5QvJoAQaSVKiN3vL8V4No9YiNhDfcvHH83A==",
+      "version": "3.2.57",
+      "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.2.57.tgz",
+      "integrity": "sha512-utC5z5GpNP4U8HAFPtxmFHt1V72H9cP7rNN3hQx//CH18z//hP623qzgnjo6ZkbLgmu4Gc/GfgykT/svndF+Dg==",
       "license": "MIT",
       "dependencies": {
         "@anthropic-ai/sdk": "^0.103.0",
@@ -42405,7 +42405,7 @@
         "@azure/storage-blob": "^12.30.0",
         "@google/genai": "^2.8.0",
         "@keyv/redis": "^4.3.3",
-        "@librechat/agents": "^3.2.55",
+        "@librechat/agents": "^3.2.57",
         "@librechat/data-schemas": "*",
         "@modelcontextprotocol/sdk": "^1.29.0",
         "@opentelemetry/api": "^1.9.0",
diff --git a/packages/api/package.json b/packages/api/package.json
index 48c4c7fd8a..0d663ccd05 100644
--- a/packages/api/package.json
+++ b/packages/api/package.json
@@ -113,7 +113,7 @@
     "@azure/storage-blob": "^12.30.0",
     "@google/genai": "^2.8.0",
     "@keyv/redis": "^4.3.3",
-    "@librechat/agents": "^3.2.55",
+    "@librechat/agents": "^3.2.57",
     "@librechat/data-schemas": "*",
     "@modelcontextprotocol/sdk": "^1.29.0",
     "@opentelemetry/api": "^1.9.0",
diff --git a/packages/api/src/agents/__tests__/run-codeTools.test.ts b/packages/api/src/agents/__tests__/run-codeTools.test.ts
new file mode 100644
index 0000000000..8fa6783aa4
--- /dev/null
+++ b/packages/api/src/agents/__tests__/run-codeTools.test.ts
@@ -0,0 +1,113 @@
+import { createRun } from '~/agents/run';
+
+/**
+ * Guards the code-tool eager/session wiring in `createRun`. The whole
+ * create_file -> bash_tool sandbox-sharing chain depends on run.ts passing
+ * `codeSessionToolNames` (so file-authoring tools share the code session) and
+ * `excludeToolNames` (so side-effecting/large-arg tools aren't eager-executed).
+ * These were silently missing before and only surfaced with both the
+ * file-authoring and code-execution capabilities enabled — assert they're wired
+ * so a future edit can't drop them without failing CI.
+ */
+
+jest.mock('winston', () => ({
+  createLogger: jest.fn(() => ({
+    debug: jest.fn(),
+    warn: jest.fn(),
+    error: jest.fn(),
+    info: jest.fn(),
+  })),
+  format: Object.assign(
+    jest.fn((fn) => () => ({ transform: fn })),
+    {
+      combine: jest.fn(),
+      colorize: jest.fn(),
+      simple: jest.fn(),
+      label: jest.fn(),
+      timestamp: jest.fn(),
+      printf: jest.fn(),
+      errors: jest.fn(),
+      splat: jest.fn(),
+      json: jest.fn(),
+    },
+  ),
+  addColors: jest.fn(),
+  transports: { Console: jest.fn(), DailyRotateFile: jest.fn(), File: jest.fn() },
+}));
+
+jest.mock('~/utils/env', () => ({
+  resolveHeaders: jest.fn((opts: { headers: unknown }) => opts?.headers ?? {}),
+  createSafeUser: jest.fn(() => ({})),
+}));
+
+jest.mock('@librechat/data-schemas', () => ({
+  ...jest.requireActual('@librechat/data-schemas'),
+  logger: { debug: jest.fn(), warn: jest.fn(), error: jest.fn(), info: jest.fn() },
+}));
+
+jest.mock('@librechat/agents', () => {
+  const actual = jest.requireActual('@librechat/agents');
+  return {
+    ...actual,
+    Run: {
+      create: jest.fn().mockResolvedValue({
+        processStream: jest.fn().mockResolvedValue(undefined),
+      }),
+    },
+  };
+});
+
+jest.mock('~/agents/checkpointer', () => ({
+  getAgentCheckpointer: jest.fn().mockResolvedValue({}),
+}));
+
+import { Run } from '@librechat/agents';
+
+function makeAgent(overrides?: Record<string, unknown>) {
+  return {
+    id: 'agent_1',
+    provider: 'openAI',
+    endpoint: 'openAI',
+    model: 'gpt-4o',
+    tools: [],
+    model_parameters: { model: 'gpt-4o' },
+    maxContextTokens: 100_000,
+    toolContextMap: {},
+    ...overrides,
+  };
+}
+
+async function captureRunConfig(): Promise<Record<string, unknown>> {
+  await createRun({
+    agents: [makeAgent()] as never,
+    signal: new AbortController().signal,
+    streaming: true,
+    streamUsage: true,
+  });
+  const createMock = Run.create as jest.Mock;
+  expect(createMock).toHaveBeenCalledTimes(1);
+  return createMock.mock.calls[0][0] as Record<string, unknown>;
+}
+
+describe('createRun code-tool eager/session wiring', () => {
+  beforeEach(() => jest.clearAllMocks());
+
+  it('excludes side-effecting/large-arg tools from eager execution', async () => {
+    const runConfig = await captureRunConfig();
+    const eager = runConfig.eagerEventToolExecution as {
+      enabled?: boolean;
+      excludeToolNames?: string[];
+    };
+    expect(eager.enabled).toBe(true);
+    expect(eager.excludeToolNames).toEqual(
+      expect.arrayContaining(['create_file', 'edit_file', 'execute_code', 'bash_tool']),
+    );
+  });
+
+  it('declares create_file/edit_file as code-session participants', async () => {
+    const runConfig = await captureRunConfig();
+    expect(runConfig.codeSessionToolNames).toEqual(
+      expect.arrayContaining(['create_file', 'edit_file']),
+    );
+  });
+});
diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts
index 810b56e1e3..82606f5e51 100644
--- a/packages/api/src/agents/run.ts
+++ b/packages/api/src/agents/run.ts
@@ -35,6 +35,7 @@ import type { AppConfig, IUser } from '@librechat/data-schemas';
 import type { SubagentUsageEvent } from '~/agents/usage';
 import type * as t from '~/types';
 import { getLLMConfig as getAnthropicLLMConfig } from '~/endpoints/anthropic/llm';
+import { CREATE_FILE_TOOL_NAME, EDIT_FILE_TOOL_NAME } from '~/agents/tools';
 import { getProviderConfig } from '~/endpoints/config/providers';
 import { resolveToolApprovalPolicy } from '~/agents/hitl/policy';
 import { extractDefaultParams } from '~/endpoints/openai/llm';
@@ -1214,7 +1215,31 @@ export async function createRun({
     calibrationRatio,
     indexTokenCountMap,
     subagentUsageSink,
-    eagerEventToolExecution: { enabled: true },
+    // Exclude side-effecting / large-free-form-arg tools from eager execution.
+    // Eager speculatively runs a tool mid-stream; for a big streamed arg (a
+    // file body, a bash heredoc, a code block) the accumulated args can diverge
+    // from the final tool call and trip the SDK's "changed after eager
+    // execution" guard, and a speculative write/exec can land before the turn
+    // commits. create_file/edit_file write files; execute_code/bash_tool run
+    // code with large `code`/`command` args. `excludeToolNames` requires
+    // @librechat/agents with the eager-exclusion support (agents#281); older
+    // versions ignore the field.
+    eagerEventToolExecution: {
+      enabled: true,
+      excludeToolNames: [
+        CREATE_FILE_TOOL_NAME,
+        EDIT_FILE_TOOL_NAME,
+        Constants.EXECUTE_CODE,
+        Constants.BASH_TOOL,
+      ],
+    },
+    // Let host file-authoring tools share the code-execution sandbox session so
+    // a file created with create_file/edit_file is visible to later
+    // execute_code/bash_tool calls (and vice versa). The SDK folds these tools'
+    // returned exec session/files into the shared code session and injects the
+    // existing session into their requests. Requires @librechat/agents with
+    // codeSessionToolNames support (agents#283); older versions ignore it.
+    codeSessionToolNames: [CREATE_FILE_TOOL_NAME, EDIT_FILE_TOOL_NAME],
     // Derive the Langfuse trace id deterministically from runId so message
     // feedback can be scored against the trace without a lookup (see the
     // feedback route in api/server/routes/messages.js). No-op unless Langfuse