From bb7d99d56cfd6f6975ca46615940cbd6dab057d5 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Wed, 1 Jul 2026 11:07:30 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=AB=B7=20feat:=20Exclude=20File=20Authori?= =?UTF-8?q?ng=20Tools=20From=20Eager=20Execution=20(#14051)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: exclude create_file/edit_file from eager execution Side-effecting host file-authoring tools should not be speculatively eager-executed: a write can land before the turn commits, and the eager path's incrementally-streamed args can diverge from the final tool call, tripping the SDK's 'changed after eager execution' guard so the model is told the write failed and loops (observed with create_file writing a large file to /mnt/data). Pass excludeToolNames so these tools run on the normal ToolNode path with the final args. Requires @librechat/agents with eager-exclusion support; older versions ignore the field. * chore: Bump `@librechat/agents` to v3.2.56 * refactor: reorder imports in run.ts for clarity * fix: also exclude execute_code/bash_tool from eager execution The eager 'changed after eager execution' corruption isn't specific to file authoring — any tool with a large free-form streamed arg is exposed. Observed live: a bash_tool heredoc (a full Python script in `command`) tripped the guard and the write never landed. execute_code (`code`) and bash_tool (`command`) carry large args and run code (side effects), so exclude them from eager alongside create_file/edit_file. * feat: wire codeSessionToolNames so create_file/edit_file share the code sandbox Activates the agents#283 capability: pass create_file/edit_file as codeSessionToolNames so their exec session/files fold into the shared code session and a file they write is visible to later execute_code/bash_tool calls (and the existing session is injected into their requests). No-op until @librechat/agents ships codeSessionToolNames (agents#283). * test: guard code-tool eager/session wiring in createRun Asserts createRun passes excludeToolNames (create_file/edit_file/execute_code/ bash_tool) and codeSessionToolNames (create_file/edit_file) to Run.create — the wiring the create_file->bash_tool sandbox-sharing chain depends on, which was silently missing before. Guards against a future edit dropping it. Mirrors the run-summarization test harness (mocks Run.create). The full create_file->bash_tool chain runs through the real code sandbox and can't run in the mock CI harness; the SDK mechanism is covered by @librechat/agents unit tests, and this guards the LibreChat wiring. * style: fix prettier formatting in run-codeTools test * chore: Bump `@librechat/agents` to v3.2.57 --- api/package.json | 2 +- package-lock.json | 10 +- packages/api/package.json | 2 +- .../agents/__tests__/run-codeTools.test.ts | 113 ++++++++++++++++++ packages/api/src/agents/run.ts | 27 ++++- 5 files changed, 146 insertions(+), 8 deletions(-) create mode 100644 packages/api/src/agents/__tests__/run-codeTools.test.ts diff --git a/api/package.json b/api/package.json index 36b045d103..b9de475faa 100644 --- a/api/package.json +++ b/api/package.json @@ -46,7 +46,7 @@ "@azure/storage-blob": "^12.30.0", "@google/genai": "^2.8.0", "@keyv/redis": "^4.3.3", - "@librechat/agents": "^3.2.55", + "@librechat/agents": "^3.2.57", "@librechat/api": "*", "@librechat/data-schemas": "*", "@microsoft/microsoft-graph-client": "^3.0.7", diff --git a/package-lock.json b/package-lock.json index 2b2aeaa0fe..09f9455322 100644 --- a/package-lock.json +++ b/package-lock.json @@ -61,7 +61,7 @@ "@azure/storage-blob": "^12.30.0", "@google/genai": "^2.8.0", "@keyv/redis": "^4.3.3", - "@librechat/agents": "^3.2.55", + "@librechat/agents": "^3.2.57", "@librechat/api": "*", "@librechat/data-schemas": "*", "@microsoft/microsoft-graph-client": "^3.0.7", @@ -9879,9 +9879,9 @@ } }, "node_modules/@librechat/agents": { - "version": "3.2.55", - "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.2.55.tgz", - "integrity": "sha512-eOysFxc9o70ORArMiy42iMGUyPEWZ0ylLYEr+UlPl3Wgvklw5cK5QvJoAQaSVKiN3vL8V4No9YiNhDfcvHH83A==", + "version": "3.2.57", + "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.2.57.tgz", + "integrity": "sha512-utC5z5GpNP4U8HAFPtxmFHt1V72H9cP7rNN3hQx//CH18z//hP623qzgnjo6ZkbLgmu4Gc/GfgykT/svndF+Dg==", "license": "MIT", "dependencies": { "@anthropic-ai/sdk": "^0.103.0", @@ -42405,7 +42405,7 @@ "@azure/storage-blob": "^12.30.0", "@google/genai": "^2.8.0", "@keyv/redis": "^4.3.3", - "@librechat/agents": "^3.2.55", + "@librechat/agents": "^3.2.57", "@librechat/data-schemas": "*", "@modelcontextprotocol/sdk": "^1.29.0", "@opentelemetry/api": "^1.9.0", diff --git a/packages/api/package.json b/packages/api/package.json index 48c4c7fd8a..0d663ccd05 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -113,7 +113,7 @@ "@azure/storage-blob": "^12.30.0", "@google/genai": "^2.8.0", "@keyv/redis": "^4.3.3", - "@librechat/agents": "^3.2.55", + "@librechat/agents": "^3.2.57", "@librechat/data-schemas": "*", "@modelcontextprotocol/sdk": "^1.29.0", "@opentelemetry/api": "^1.9.0", diff --git a/packages/api/src/agents/__tests__/run-codeTools.test.ts b/packages/api/src/agents/__tests__/run-codeTools.test.ts new file mode 100644 index 0000000000..8fa6783aa4 --- /dev/null +++ b/packages/api/src/agents/__tests__/run-codeTools.test.ts @@ -0,0 +1,113 @@ +import { createRun } from '~/agents/run'; + +/** + * Guards the code-tool eager/session wiring in `createRun`. The whole + * create_file -> bash_tool sandbox-sharing chain depends on run.ts passing + * `codeSessionToolNames` (so file-authoring tools share the code session) and + * `excludeToolNames` (so side-effecting/large-arg tools aren't eager-executed). + * These were silently missing before and only surfaced with both the + * file-authoring and code-execution capabilities enabled — assert they're wired + * so a future edit can't drop them without failing CI. + */ + +jest.mock('winston', () => ({ + createLogger: jest.fn(() => ({ + debug: jest.fn(), + warn: jest.fn(), + error: jest.fn(), + info: jest.fn(), + })), + format: Object.assign( + jest.fn((fn) => () => ({ transform: fn })), + { + combine: jest.fn(), + colorize: jest.fn(), + simple: jest.fn(), + label: jest.fn(), + timestamp: jest.fn(), + printf: jest.fn(), + errors: jest.fn(), + splat: jest.fn(), + json: jest.fn(), + }, + ), + addColors: jest.fn(), + transports: { Console: jest.fn(), DailyRotateFile: jest.fn(), File: jest.fn() }, +})); + +jest.mock('~/utils/env', () => ({ + resolveHeaders: jest.fn((opts: { headers: unknown }) => opts?.headers ?? {}), + createSafeUser: jest.fn(() => ({})), +})); + +jest.mock('@librechat/data-schemas', () => ({ + ...jest.requireActual('@librechat/data-schemas'), + logger: { debug: jest.fn(), warn: jest.fn(), error: jest.fn(), info: jest.fn() }, +})); + +jest.mock('@librechat/agents', () => { + const actual = jest.requireActual('@librechat/agents'); + return { + ...actual, + Run: { + create: jest.fn().mockResolvedValue({ + processStream: jest.fn().mockResolvedValue(undefined), + }), + }, + }; +}); + +jest.mock('~/agents/checkpointer', () => ({ + getAgentCheckpointer: jest.fn().mockResolvedValue({}), +})); + +import { Run } from '@librechat/agents'; + +function makeAgent(overrides?: Record) { + return { + id: 'agent_1', + provider: 'openAI', + endpoint: 'openAI', + model: 'gpt-4o', + tools: [], + model_parameters: { model: 'gpt-4o' }, + maxContextTokens: 100_000, + toolContextMap: {}, + ...overrides, + }; +} + +async function captureRunConfig(): Promise> { + await createRun({ + agents: [makeAgent()] as never, + signal: new AbortController().signal, + streaming: true, + streamUsage: true, + }); + const createMock = Run.create as jest.Mock; + expect(createMock).toHaveBeenCalledTimes(1); + return createMock.mock.calls[0][0] as Record; +} + +describe('createRun code-tool eager/session wiring', () => { + beforeEach(() => jest.clearAllMocks()); + + it('excludes side-effecting/large-arg tools from eager execution', async () => { + const runConfig = await captureRunConfig(); + const eager = runConfig.eagerEventToolExecution as { + enabled?: boolean; + excludeToolNames?: string[]; + }; + expect(eager.enabled).toBe(true); + expect(eager.excludeToolNames).toEqual( + expect.arrayContaining(['create_file', 'edit_file', 'execute_code', 'bash_tool']), + ); + }); + + it('declares create_file/edit_file as code-session participants', async () => { + const runConfig = await captureRunConfig(); + expect(runConfig.codeSessionToolNames).toEqual( + expect.arrayContaining(['create_file', 'edit_file']), + ); + }); +}); diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index 810b56e1e3..82606f5e51 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -35,6 +35,7 @@ import type { AppConfig, IUser } from '@librechat/data-schemas'; import type { SubagentUsageEvent } from '~/agents/usage'; import type * as t from '~/types'; import { getLLMConfig as getAnthropicLLMConfig } from '~/endpoints/anthropic/llm'; +import { CREATE_FILE_TOOL_NAME, EDIT_FILE_TOOL_NAME } from '~/agents/tools'; import { getProviderConfig } from '~/endpoints/config/providers'; import { resolveToolApprovalPolicy } from '~/agents/hitl/policy'; import { extractDefaultParams } from '~/endpoints/openai/llm'; @@ -1214,7 +1215,31 @@ export async function createRun({ calibrationRatio, indexTokenCountMap, subagentUsageSink, - eagerEventToolExecution: { enabled: true }, + // Exclude side-effecting / large-free-form-arg tools from eager execution. + // Eager speculatively runs a tool mid-stream; for a big streamed arg (a + // file body, a bash heredoc, a code block) the accumulated args can diverge + // from the final tool call and trip the SDK's "changed after eager + // execution" guard, and a speculative write/exec can land before the turn + // commits. create_file/edit_file write files; execute_code/bash_tool run + // code with large `code`/`command` args. `excludeToolNames` requires + // @librechat/agents with the eager-exclusion support (agents#281); older + // versions ignore the field. + eagerEventToolExecution: { + enabled: true, + excludeToolNames: [ + CREATE_FILE_TOOL_NAME, + EDIT_FILE_TOOL_NAME, + Constants.EXECUTE_CODE, + Constants.BASH_TOOL, + ], + }, + // Let host file-authoring tools share the code-execution sandbox session so + // a file created with create_file/edit_file is visible to later + // execute_code/bash_tool calls (and vice versa). The SDK folds these tools' + // returned exec session/files into the shared code session and injects the + // existing session into their requests. Requires @librechat/agents with + // codeSessionToolNames support (agents#283); older versions ignore it. + codeSessionToolNames: [CREATE_FILE_TOOL_NAME, EDIT_FILE_TOOL_NAME], // Derive the Langfuse trace id deterministically from runId so message // feedback can be scored against the trace without a lookup (see the // feedback route in api/server/routes/messages.js). No-op unless Langfuse