mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-07-02 04:12:36 +00:00
🫷 feat: Exclude File Authoring Tools From Eager Execution (#14051)
* feat: exclude create_file/edit_file from eager execution Side-effecting host file-authoring tools should not be speculatively eager-executed: a write can land before the turn commits, and the eager path's incrementally-streamed args can diverge from the final tool call, tripping the SDK's 'changed after eager execution' guard so the model is told the write failed and loops (observed with create_file writing a large file to /mnt/data). Pass excludeToolNames so these tools run on the normal ToolNode path with the final args. Requires @librechat/agents with eager-exclusion support; older versions ignore the field. * chore: Bump `@librechat/agents` to v3.2.56 * refactor: reorder imports in run.ts for clarity * fix: also exclude execute_code/bash_tool from eager execution The eager 'changed after eager execution' corruption isn't specific to file authoring — any tool with a large free-form streamed arg is exposed. Observed live: a bash_tool heredoc (a full Python script in `command`) tripped the guard and the write never landed. execute_code (`code`) and bash_tool (`command`) carry large args and run code (side effects), so exclude them from eager alongside create_file/edit_file. * feat: wire codeSessionToolNames so create_file/edit_file share the code sandbox Activates the agents#283 capability: pass create_file/edit_file as codeSessionToolNames so their exec session/files fold into the shared code session and a file they write is visible to later execute_code/bash_tool calls (and the existing session is injected into their requests). No-op until @librechat/agents ships codeSessionToolNames (agents#283). * test: guard code-tool eager/session wiring in createRun Asserts createRun passes excludeToolNames (create_file/edit_file/execute_code/ bash_tool) and codeSessionToolNames (create_file/edit_file) to Run.create — the wiring the create_file->bash_tool sandbox-sharing chain depends on, which was silently missing before. Guards against a future edit dropping it. Mirrors the run-summarization test harness (mocks Run.create). The full create_file->bash_tool chain runs through the real code sandbox and can't run in the mock CI harness; the SDK mechanism is covered by @librechat/agents unit tests, and this guards the LibreChat wiring. * style: fix prettier formatting in run-codeTools test * chore: Bump `@librechat/agents` to v3.2.57
This commit is contained in:
parent
f5c64a4d6d
commit
bb7d99d56c
5 changed files with 146 additions and 8 deletions
|
|
@ -46,7 +46,7 @@
|
|||
"@azure/storage-blob": "^12.30.0",
|
||||
"@google/genai": "^2.8.0",
|
||||
"@keyv/redis": "^4.3.3",
|
||||
"@librechat/agents": "^3.2.55",
|
||||
"@librechat/agents": "^3.2.57",
|
||||
"@librechat/api": "*",
|
||||
"@librechat/data-schemas": "*",
|
||||
"@microsoft/microsoft-graph-client": "^3.0.7",
|
||||
|
|
|
|||
10
package-lock.json
generated
10
package-lock.json
generated
|
|
@ -61,7 +61,7 @@
|
|||
"@azure/storage-blob": "^12.30.0",
|
||||
"@google/genai": "^2.8.0",
|
||||
"@keyv/redis": "^4.3.3",
|
||||
"@librechat/agents": "^3.2.55",
|
||||
"@librechat/agents": "^3.2.57",
|
||||
"@librechat/api": "*",
|
||||
"@librechat/data-schemas": "*",
|
||||
"@microsoft/microsoft-graph-client": "^3.0.7",
|
||||
|
|
@ -9879,9 +9879,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@librechat/agents": {
|
||||
"version": "3.2.55",
|
||||
"resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.2.55.tgz",
|
||||
"integrity": "sha512-eOysFxc9o70ORArMiy42iMGUyPEWZ0ylLYEr+UlPl3Wgvklw5cK5QvJoAQaSVKiN3vL8V4No9YiNhDfcvHH83A==",
|
||||
"version": "3.2.57",
|
||||
"resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.2.57.tgz",
|
||||
"integrity": "sha512-utC5z5GpNP4U8HAFPtxmFHt1V72H9cP7rNN3hQx//CH18z//hP623qzgnjo6ZkbLgmu4Gc/GfgykT/svndF+Dg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.103.0",
|
||||
|
|
@ -42405,7 +42405,7 @@
|
|||
"@azure/storage-blob": "^12.30.0",
|
||||
"@google/genai": "^2.8.0",
|
||||
"@keyv/redis": "^4.3.3",
|
||||
"@librechat/agents": "^3.2.55",
|
||||
"@librechat/agents": "^3.2.57",
|
||||
"@librechat/data-schemas": "*",
|
||||
"@modelcontextprotocol/sdk": "^1.29.0",
|
||||
"@opentelemetry/api": "^1.9.0",
|
||||
|
|
|
|||
|
|
@ -113,7 +113,7 @@
|
|||
"@azure/storage-blob": "^12.30.0",
|
||||
"@google/genai": "^2.8.0",
|
||||
"@keyv/redis": "^4.3.3",
|
||||
"@librechat/agents": "^3.2.55",
|
||||
"@librechat/agents": "^3.2.57",
|
||||
"@librechat/data-schemas": "*",
|
||||
"@modelcontextprotocol/sdk": "^1.29.0",
|
||||
"@opentelemetry/api": "^1.9.0",
|
||||
|
|
|
|||
113
packages/api/src/agents/__tests__/run-codeTools.test.ts
Normal file
113
packages/api/src/agents/__tests__/run-codeTools.test.ts
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
import { createRun } from '~/agents/run';
|
||||
|
||||
/**
|
||||
* Guards the code-tool eager/session wiring in `createRun`. The whole
|
||||
* create_file -> bash_tool sandbox-sharing chain depends on run.ts passing
|
||||
* `codeSessionToolNames` (so file-authoring tools share the code session) and
|
||||
* `excludeToolNames` (so side-effecting/large-arg tools aren't eager-executed).
|
||||
* These were silently missing before and only surfaced with both the
|
||||
* file-authoring and code-execution capabilities enabled — assert they're wired
|
||||
* so a future edit can't drop them without failing CI.
|
||||
*/
|
||||
|
||||
jest.mock('winston', () => ({
|
||||
createLogger: jest.fn(() => ({
|
||||
debug: jest.fn(),
|
||||
warn: jest.fn(),
|
||||
error: jest.fn(),
|
||||
info: jest.fn(),
|
||||
})),
|
||||
format: Object.assign(
|
||||
jest.fn((fn) => () => ({ transform: fn })),
|
||||
{
|
||||
combine: jest.fn(),
|
||||
colorize: jest.fn(),
|
||||
simple: jest.fn(),
|
||||
label: jest.fn(),
|
||||
timestamp: jest.fn(),
|
||||
printf: jest.fn(),
|
||||
errors: jest.fn(),
|
||||
splat: jest.fn(),
|
||||
json: jest.fn(),
|
||||
},
|
||||
),
|
||||
addColors: jest.fn(),
|
||||
transports: { Console: jest.fn(), DailyRotateFile: jest.fn(), File: jest.fn() },
|
||||
}));
|
||||
|
||||
jest.mock('~/utils/env', () => ({
|
||||
resolveHeaders: jest.fn((opts: { headers: unknown }) => opts?.headers ?? {}),
|
||||
createSafeUser: jest.fn(() => ({})),
|
||||
}));
|
||||
|
||||
jest.mock('@librechat/data-schemas', () => ({
|
||||
...jest.requireActual('@librechat/data-schemas'),
|
||||
logger: { debug: jest.fn(), warn: jest.fn(), error: jest.fn(), info: jest.fn() },
|
||||
}));
|
||||
|
||||
jest.mock('@librechat/agents', () => {
|
||||
const actual = jest.requireActual('@librechat/agents');
|
||||
return {
|
||||
...actual,
|
||||
Run: {
|
||||
create: jest.fn().mockResolvedValue({
|
||||
processStream: jest.fn().mockResolvedValue(undefined),
|
||||
}),
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
jest.mock('~/agents/checkpointer', () => ({
|
||||
getAgentCheckpointer: jest.fn().mockResolvedValue({}),
|
||||
}));
|
||||
|
||||
import { Run } from '@librechat/agents';
|
||||
|
||||
function makeAgent(overrides?: Record<string, unknown>) {
|
||||
return {
|
||||
id: 'agent_1',
|
||||
provider: 'openAI',
|
||||
endpoint: 'openAI',
|
||||
model: 'gpt-4o',
|
||||
tools: [],
|
||||
model_parameters: { model: 'gpt-4o' },
|
||||
maxContextTokens: 100_000,
|
||||
toolContextMap: {},
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
async function captureRunConfig(): Promise<Record<string, unknown>> {
|
||||
await createRun({
|
||||
agents: [makeAgent()] as never,
|
||||
signal: new AbortController().signal,
|
||||
streaming: true,
|
||||
streamUsage: true,
|
||||
});
|
||||
const createMock = Run.create as jest.Mock;
|
||||
expect(createMock).toHaveBeenCalledTimes(1);
|
||||
return createMock.mock.calls[0][0] as Record<string, unknown>;
|
||||
}
|
||||
|
||||
describe('createRun code-tool eager/session wiring', () => {
|
||||
beforeEach(() => jest.clearAllMocks());
|
||||
|
||||
it('excludes side-effecting/large-arg tools from eager execution', async () => {
|
||||
const runConfig = await captureRunConfig();
|
||||
const eager = runConfig.eagerEventToolExecution as {
|
||||
enabled?: boolean;
|
||||
excludeToolNames?: string[];
|
||||
};
|
||||
expect(eager.enabled).toBe(true);
|
||||
expect(eager.excludeToolNames).toEqual(
|
||||
expect.arrayContaining(['create_file', 'edit_file', 'execute_code', 'bash_tool']),
|
||||
);
|
||||
});
|
||||
|
||||
it('declares create_file/edit_file as code-session participants', async () => {
|
||||
const runConfig = await captureRunConfig();
|
||||
expect(runConfig.codeSessionToolNames).toEqual(
|
||||
expect.arrayContaining(['create_file', 'edit_file']),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
@ -35,6 +35,7 @@ import type { AppConfig, IUser } from '@librechat/data-schemas';
|
|||
import type { SubagentUsageEvent } from '~/agents/usage';
|
||||
import type * as t from '~/types';
|
||||
import { getLLMConfig as getAnthropicLLMConfig } from '~/endpoints/anthropic/llm';
|
||||
import { CREATE_FILE_TOOL_NAME, EDIT_FILE_TOOL_NAME } from '~/agents/tools';
|
||||
import { getProviderConfig } from '~/endpoints/config/providers';
|
||||
import { resolveToolApprovalPolicy } from '~/agents/hitl/policy';
|
||||
import { extractDefaultParams } from '~/endpoints/openai/llm';
|
||||
|
|
@ -1214,7 +1215,31 @@ export async function createRun({
|
|||
calibrationRatio,
|
||||
indexTokenCountMap,
|
||||
subagentUsageSink,
|
||||
eagerEventToolExecution: { enabled: true },
|
||||
// Exclude side-effecting / large-free-form-arg tools from eager execution.
|
||||
// Eager speculatively runs a tool mid-stream; for a big streamed arg (a
|
||||
// file body, a bash heredoc, a code block) the accumulated args can diverge
|
||||
// from the final tool call and trip the SDK's "changed after eager
|
||||
// execution" guard, and a speculative write/exec can land before the turn
|
||||
// commits. create_file/edit_file write files; execute_code/bash_tool run
|
||||
// code with large `code`/`command` args. `excludeToolNames` requires
|
||||
// @librechat/agents with the eager-exclusion support (agents#281); older
|
||||
// versions ignore the field.
|
||||
eagerEventToolExecution: {
|
||||
enabled: true,
|
||||
excludeToolNames: [
|
||||
CREATE_FILE_TOOL_NAME,
|
||||
EDIT_FILE_TOOL_NAME,
|
||||
Constants.EXECUTE_CODE,
|
||||
Constants.BASH_TOOL,
|
||||
],
|
||||
},
|
||||
// Let host file-authoring tools share the code-execution sandbox session so
|
||||
// a file created with create_file/edit_file is visible to later
|
||||
// execute_code/bash_tool calls (and vice versa). The SDK folds these tools'
|
||||
// returned exec session/files into the shared code session and injects the
|
||||
// existing session into their requests. Requires @librechat/agents with
|
||||
// codeSessionToolNames support (agents#283); older versions ignore it.
|
||||
codeSessionToolNames: [CREATE_FILE_TOOL_NAME, EDIT_FILE_TOOL_NAME],
|
||||
// Derive the Langfuse trace id deterministically from runId so message
|
||||
// feedback can be scored against the trace without a lookup (see the
|
||||
// feedback route in api/server/routes/messages.js). No-op unless Langfuse
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue