From edd7d4cea41650b45373dd1552d42e97d3738792 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Sun, 22 Mar 2026 13:50:59 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=A7=20feat:=20Lazy=20file=20provisioni?= =?UTF-8?q?ng=20=E2=80=94=20defer=20uploads=20to=20tool=20invocation=20tim?= =?UTF-8?q?e?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move file provisioning from eager (at chat-request start) to lazy (at tool invocation time via ON_TOOL_EXECUTE). Files are now only uploaded to code env / vector DB when the LLM actually calls the respective tool. - resources.ts: primeResources no longer provisions; computes provisionState (which files need code env / vector DB uploads) with staleness check and single credential load - handlers.ts: add provisionFiles callback to ToolExecuteOptions, called once per tool-call batch before execution - initialize.ts: pass provisionState through InitializedAgent - initialize.js: implement provisionFiles closure that provisions files in parallel, batches DB updates, clears state after use; store provisionState in agentToolContexts for all agent types --- .../services/Endpoints/agents/initialize.js | 71 ++++++- .../services/Endpoints/agents/skillDeps.js | 1 + packages/api/src/agents/handlers.ts | 9 +- packages/api/src/agents/initialize.ts | 8 +- packages/api/src/agents/resources.ts | 179 ++++++------------ 5 files changed, 147 insertions(+), 121 deletions(-) diff --git a/api/server/services/Endpoints/agents/initialize.js b/api/server/services/Endpoints/agents/initialize.js index 4c8ef707b1..020719302b 100644 --- a/api/server/services/Endpoints/agents/initialize.js +++ b/api/server/services/Endpoints/agents/initialize.js @@ -1,5 +1,5 @@ const { logger } = require('@librechat/data-schemas'); -const { createContentAggregator } = require('@librechat/agents'); +const { Constants, createContentAggregator } = require('@librechat/agents'); const { loadSkillStates, initializeAgent, @@ -232,6 +232,70 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => { }, toolEndCallback, ...getSkillToolDeps(), + provisionFiles: async (toolNames, agentId) => { + const ctx = agentToolContexts.get(agentId); + if (!ctx?.provisionState) { + return; + } + + const { provisionState } = ctx; + const needsCode = + toolNames.includes(Constants.EXECUTE_CODE) || + toolNames.includes(Constants.PROGRAMMATIC_TOOL_CALLING); + const needsSearch = toolNames.includes('file_search'); + + if (!needsCode && !needsSearch) { + return; + } + + /** @type {import('@librechat/api').TFileUpdate[]} */ + const pendingUpdates = []; + + if (needsCode && provisionState.codeEnvFiles.length > 0 && provisionState.codeApiKey) { + const results = await Promise.allSettled( + provisionState.codeEnvFiles.map(async (file) => { + const { fileIdentifier, fileUpdate } = await provisionToCodeEnv({ + req, + file, + entity_id: agentId, + apiKey: provisionState.codeApiKey, + }); + file.metadata = { ...file.metadata, fileIdentifier }; + pendingUpdates.push(fileUpdate); + }), + ); + for (const result of results) { + if (result.status === 'rejected') { + logger.error('[provisionFiles] Code env provisioning failed', result.reason); + } + } + provisionState.codeEnvFiles = []; + } + + if (needsSearch && provisionState.vectorDBFiles.length > 0) { + const results = await Promise.allSettled( + provisionState.vectorDBFiles.map(async (file) => { + const result = await provisionToVectorDB({ req, file, entity_id: agentId }); + if (result.embedded) { + file.embedded = true; + if (result.fileUpdate) { + pendingUpdates.push(result.fileUpdate); + } + } + }), + ); + for (const result of results) { + if (result.status === 'rejected') { + logger.error('[provisionFiles] Vector DB provisioning failed', result.reason); + } + } + provisionState.vectorDBFiles = []; + } + + if (pendingUpdates.length > 0) { + await Promise.allSettled(pendingUpdates.map((update) => db.updateFile(update))); + } + }, }; const summarizationOptions = @@ -700,6 +764,11 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => { listSkillsByAccess: skillDbMethods.listSkillsByAccess, listAlwaysApplySkills: skillDbMethods.listAlwaysApplySkills, getSkillByName: skillDbMethods.getSkillByName, + provisionToCodeEnv, + provisionToVectorDB, + checkSessionsAlive, + loadCodeApiKey, + updateFile: db.updateFile, }, ); agentConfigs.set(agentId, config); diff --git a/api/server/services/Endpoints/agents/skillDeps.js b/api/server/services/Endpoints/agents/skillDeps.js index 7154c1d52b..be06a423bf 100644 --- a/api/server/services/Endpoints/agents/skillDeps.js +++ b/api/server/services/Endpoints/agents/skillDeps.js @@ -290,6 +290,7 @@ function buildAgentToolContext({ agent, config }) { fileAuthoringToolNames: config.fileAuthoringToolNames, skillPrimedIdsByName: buildSkillPrimedIdsByName(config.manualSkillPrimes, config.alwaysApplySkillPrimes) ?? {}, + provisionState: config.provisionState, }; } diff --git a/packages/api/src/agents/handlers.ts b/packages/api/src/agents/handlers.ts index 8605ef498e..552895abec 100644 --- a/packages/api/src/agents/handlers.ts +++ b/packages/api/src/agents/handlers.ts @@ -59,6 +59,8 @@ export interface ToolExecuteOptions { }>; /** Callback to process tool artifacts (code output files, file citations, etc.) */ toolEndCallback?: ToolEndCallback; + /** Called once per batch before tool execution to lazily provision files to tool environments */ + provisionFiles?: (toolNames: string[], agentId?: string) => Promise; /** * Loads a skill by name with ACL constraint (returns full body for injection). * @@ -3146,7 +3148,7 @@ function getFileAuthoringQueueKey( * executes them in parallel, and resolves with the results. */ export function createToolExecuteHandler(options: ToolExecuteOptions): EventHandler { - const { loadTools, toolEndCallback } = options; + const { loadTools, toolEndCallback, provisionFiles } = options; return { handle: async (_event: string, data: ToolExecuteBatchRequest) => { @@ -3174,6 +3176,11 @@ export function createToolExecuteHandler(options: ToolExecuteOptions): EventHand await runOutsideTracing(async () => { try { const toolNames = [...new Set(toolCalls.map((tc: ToolCallRequest) => tc.name))]; + + if (provisionFiles) { + await provisionFiles(toolNames, agentId); + } + const { loadedTools, configurable: toolConfigurable } = await loadTools( toolNames, agentId, diff --git a/packages/api/src/agents/initialize.ts b/packages/api/src/agents/initialize.ts index 91d1f1b1ce..f2bb84b5be 100644 --- a/packages/api/src/agents/initialize.ts +++ b/packages/api/src/agents/initialize.ts @@ -33,6 +33,7 @@ import type { LCAvailableTools, RequestScopedMCPConnectionStore } from '../mcp/t import type { ResolvedManualSkill, ResolvedAlwaysApplySkill } from './skills'; import type { TFileUpdate, + ProvisionState, TFilterFilesByAgentAccess, TProvisionToCodeEnv, TProvisionToVectorDB, @@ -334,6 +335,8 @@ export type InitializedAgent = Agent & { endpointTokenConfig?: EndpointTokenConfig; /** Warnings from lazy file provisioning (e.g., failed uploads) */ provisionWarnings?: string[]; + /** State for deferred file provisioning — actual uploads happen at tool invocation time */ + provisionState?: ProvisionState; }; export const DEFAULT_MAX_CONTEXT_TOKENS = 32000; @@ -762,6 +765,7 @@ export async function initializeAgent( requestAttachments: primedRequestAttachments, agentContextAttachments: primedAgentContextAttachments, tool_resources, + provisionState, warnings: provisionWarnings, } = await primeResources({ req: req as never, @@ -775,11 +779,8 @@ export async function initializeAgent( tool_resources: agent.tool_resources, requestFileSet: new Set(requestFiles?.map((file) => file.file_id)), enabledToolResources: toolResourceSet, - provisionToCodeEnv: db.provisionToCodeEnv, - provisionToVectorDB: db.provisionToVectorDB, checkSessionsAlive: db.checkSessionsAlive, loadCodeApiKey: db.loadCodeApiKey, - updateFile: db.updateFile as ((data: TFileUpdate) => Promise) | undefined, }); /** @@ -1303,6 +1304,7 @@ export async function initializeAgent( useLegacyContent: !!options.useLegacyContent, tools: (tools ?? []) as GenericTool[] & string[], maxToolResultChars: maxToolResultCharsResolved, + provisionState, provisionWarnings: provisionWarnings != null && provisionWarnings.length > 0 ? provisionWarnings : undefined, maxContextTokens: diff --git a/packages/api/src/agents/resources.ts b/packages/api/src/agents/resources.ts index 5634200819..758e1bd964 100644 --- a/packages/api/src/agents/resources.ts +++ b/packages/api/src/agents/resources.ts @@ -48,6 +48,18 @@ export type TCheckSessionsAlive = (params: { /** Loads CODE_API_KEY for a user. Call once per request. */ export type TLoadCodeApiKey = (userId: string) => Promise; +/** State computed during primeResources for lazy provisioning at tool invocation time */ +export type ProvisionState = { + /** Files that need uploading to the code execution environment */ + codeEnvFiles: TFile[]; + /** Files that need embedding into the vector DB for file_search */ + vectorDBFiles: TFile[]; + /** Pre-loaded CODE_API_KEY to avoid redundant credential fetches */ + codeApiKey?: string; + /** Set of file_ids confirmed alive in code env (from staleness check) */ + aliveFileIds: Set; +}; + /** * Function type for retrieving files from the database * @param filter - MongoDB filter query for files @@ -82,7 +94,7 @@ export type TFilterFilesByAgentAccess = (params: { * @param params.tool_resources - The agent's tool resources object to update * @param params.processedResourceFiles - Set tracking processed files per resource type */ -const addFileToResource = ({ +export const addFileToResource = ({ file, resourceType, tool_resources, @@ -205,11 +217,8 @@ export const primeResources = async ({ tool_resources: _tool_resources, agentId, enabledToolResources, - provisionToCodeEnv, - provisionToVectorDB, checkSessionsAlive, loadCodeApiKey, - updateFile, }: { req: ServerRequest & { user?: IUser }; appConfig?: AppConfig; @@ -221,21 +230,16 @@ export const primeResources = async ({ agentId?: string; /** Set of tool resource types the agent has enabled (e.g., execute_code, file_search) */ enabledToolResources?: Set; - /** Optional callback to provision a file to the code execution environment */ - provisionToCodeEnv?: TProvisionToCodeEnv; - /** Optional callback to provision a file to the vector DB for file_search */ - provisionToVectorDB?: TProvisionToVectorDB; /** Optional callback to batch-check code env file liveness by session */ checkSessionsAlive?: TCheckSessionsAlive; /** Optional callback to load CODE_API_KEY once per request */ loadCodeApiKey?: TLoadCodeApiKey; - /** Optional callback to persist file metadata updates after provisioning */ - updateFile?: (data: TFileUpdate) => Promise; }): Promise<{ attachments: Array | undefined; requestAttachments: Array | undefined; agentContextAttachments: Array | undefined; tool_resources: AgentToolResources | undefined; + provisionState?: ProvisionState; warnings: string[]; }> => { const requestAttachments: Array = []; @@ -391,20 +395,18 @@ export const primeResources = async ({ } /** - * Lazy provisioning: for deferred files that haven't been provisioned to the - * agent's enabled tool resources, provision them now (at chat-request start). - * This handles files uploaded via the unified upload flow (no tool_resource chosen at upload time). + * Lazy provisioning: instead of provisioning files now, compute which files + * need provisioning and return that state. Actual provisioning happens at + * tool invocation time via the ON_TOOL_EXECUTE handler. */ const warnings: string[] = []; + let provisionState: ProvisionState | undefined; if (enabledToolResources && enabledToolResources.size > 0 && attachments.length > 0) { - const needsCodeEnv = - enabledToolResources.has(EToolResources.execute_code) && provisionToCodeEnv != null; - const needsVectorDB = - enabledToolResources.has(EToolResources.file_search) && provisionToVectorDB != null; + const needsCodeEnv = enabledToolResources.has(EToolResources.execute_code); + const needsVectorDB = enabledToolResources.has(EToolResources.file_search); if (needsCodeEnv || needsVectorDB) { - // Load CODE_API_KEY once for all code env operations let codeApiKey: string | undefined; if (needsCodeEnv && loadCodeApiKey && req.user?.id) { try { @@ -415,7 +417,7 @@ export const primeResources = async ({ } } - // Batch staleness check: verify code env files are still alive + // Batch staleness check: identify which code env files are still alive let aliveFileIds: Set = new Set(); if (needsCodeEnv && codeApiKey && checkSessionsAlive) { const filesWithIdentifiers = attachments.filter( @@ -429,112 +431,55 @@ export const primeResources = async ({ } } - // Collect deferred DB updates from provisioning - const pendingUpdates: TFileUpdate[] = []; + // Compute which files need provisioning (don't actually provision yet) + const codeEnvFiles: TFile[] = []; + const vectorDBFiles: TFile[] = []; - // Provision files in parallel - const provisionResults = await Promise.allSettled( - attachments.map(async (file) => { - if (!file?.file_id) { - return; - } + for (const file of attachments) { + if (!file?.file_id) { + continue; + } - const isImage = file.type?.startsWith('image') ?? false; - const typedReq = req as ServerRequest & { user?: IUser }; + if ( + needsCodeEnv && + codeApiKey && + !processedResourceFiles.has(`${EToolResources.execute_code}:${file.file_id}`) + ) { + const hasFileIdentifier = !!file.metadata?.fileIdentifier; + const isStale = hasFileIdentifier && !aliveFileIds.has(file.file_id); - // Code env provisioning (with staleness check) - if ( - needsCodeEnv && - codeApiKey && - !processedResourceFiles.has(`${EToolResources.execute_code}:${file.file_id}`) - ) { - const hasFileIdentifier = !!file.metadata?.fileIdentifier; - const isStale = hasFileIdentifier && !aliveFileIds.has(file.file_id); - const needsProvision = !hasFileIdentifier || isStale; - - if (needsProvision) { - if (isStale) { - logger.info( - `[primeResources] Code env file expired for "${file.filename}" (${file.file_id}), re-provisioning`, - ); - file.metadata = { ...file.metadata, fileIdentifier: undefined }; - } - - try { - const { fileIdentifier, fileUpdate } = await provisionToCodeEnv({ - req: typedReq, - file, - entity_id: agentId, - apiKey: codeApiKey, - }); - file.metadata = { ...file.metadata, fileIdentifier }; - pendingUpdates.push(fileUpdate); - addFileToResource({ - file, - resourceType: EToolResources.execute_code, - tool_resources, - processedResourceFiles, - }); - } catch (error) { - const msg = `Failed to provision "${file.filename}" to code env`; - logger.error(`[primeResources] ${msg}`, error); - warnings.push(msg); - } - } else { - // File is alive, ensure it's categorized - addFileToResource({ - file, - resourceType: EToolResources.execute_code, - tool_resources, - processedResourceFiles, - }); + if (!hasFileIdentifier || isStale) { + if (isStale) { + logger.info( + `[primeResources] Code env file expired for "${file.filename}" (${file.file_id}), will re-provision on tool use`, + ); + file.metadata = { ...file.metadata, fileIdentifier: undefined }; } + codeEnvFiles.push(file); + } else { + // File is alive, categorize it now + addFileToResource({ + file, + resourceType: EToolResources.execute_code, + tool_resources, + processedResourceFiles, + }); } + } - // Vector DB provisioning - if ( - needsVectorDB && - !isImage && - file.embedded !== true && - !processedResourceFiles.has(`${EToolResources.file_search}:${file.file_id}`) - ) { - try { - const result = await provisionToVectorDB({ - req: typedReq, - file, - entity_id: agentId, - }); - if (result.embedded) { - file.embedded = true; - if (result.fileUpdate) { - pendingUpdates.push(result.fileUpdate); - } - addFileToResource({ - file, - resourceType: EToolResources.file_search, - tool_resources, - processedResourceFiles, - }); - } - } catch (error) { - const msg = `Failed to provision "${file.filename}" to vector DB`; - logger.error(`[primeResources] ${msg}`, error); - warnings.push(msg); - } - } - }), - ); - - // Log any unexpected rejections from Promise.allSettled - for (const result of provisionResults) { - if (result.status === 'rejected') { - logger.error('[primeResources] Unexpected provisioning rejection', result.reason); + const isImage = file.type?.startsWith('image') ?? false; + if ( + needsVectorDB && + !isImage && + file.embedded !== true && + !processedResourceFiles.has(`${EToolResources.file_search}:${file.file_id}`) + ) { + vectorDBFiles.push(file); } } - // Batch DB updates after all provisioning completes - if (pendingUpdates.length > 0 && updateFile) { - await Promise.allSettled(pendingUpdates.map((update) => updateFile(update))); + if (codeEnvFiles.length > 0 || vectorDBFiles.length > 0) { + provisionState = { codeEnvFiles, vectorDBFiles, codeApiKey, aliveFileIds }; } } } @@ -545,6 +490,7 @@ export const primeResources = async ({ agentContextAttachments: agentContextAttachments.length > 0 ? agentContextAttachments : undefined, tool_resources, + provisionState, warnings, }; } catch (error) { @@ -569,6 +515,7 @@ export const primeResources = async ({ agentContextAttachments: agentContextAttachments.length > 0 ? agentContextAttachments : undefined, tool_resources: _tool_resources, + provisionState: undefined, warnings: [], }; }