From 2e0187108df332025675cff76d9310998f506ba5 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Wed, 13 May 2026 04:03:32 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=80=20chore:=20align=20lazy=20provisio?= =?UTF-8?q?ning=20with=20codeEnvRef=20schema?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rebase onto current dev brought in the metadata.fileIdentifier → metadata.codeEnvRef migration (HEAD uploadCodeEnvFile now returns { storage_session_id, file_id } and requires kind/id). Update the unified-upload code paths to match: - provision.js: provisionToCodeEnv now derives kind/id from entity_id, calls uploadCodeEnvFile with the new signature, and returns codeEnvRef - checkSessionsAlive/checkCodeEnvFileAlive: read storage_session_id and remote file_id from metadata.codeEnvRef instead of parsing the legacy fileIdentifier string - resources.ts: primeResources gates on metadata.codeEnvRef and clears it on staleness; TProvisionToCodeEnv reflects the new return shape - initialize.js: provisionFiles closure destructures codeEnvRef - process.spec.js: align two legacyFileUploadUX tests with the endpoint-level check landed in 7384947 and update the execute_code expectation to the codeEnvRef metadata shape - resources.test.ts: import FileSources for the typed source field and guard the optional attachments map --- .../services/Endpoints/agents/initialize.js | 5 +- api/server/services/Files/process.spec.js | 23 +++++- api/server/services/Files/provision.js | 82 ++++++++++--------- packages/api/src/agents/resources.test.ts | 21 +++-- packages/api/src/agents/resources.ts | 15 ++-- 5 files changed, 84 insertions(+), 62 deletions(-) diff --git a/api/server/services/Endpoints/agents/initialize.js b/api/server/services/Endpoints/agents/initialize.js index d8f132714a..d29c348651 100644 --- a/api/server/services/Endpoints/agents/initialize.js +++ b/api/server/services/Endpoints/agents/initialize.js @@ -276,13 +276,12 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => { if (needsCode && provisionState.codeEnvFiles.length > 0 && provisionState.codeApiKey) { const results = await Promise.allSettled( provisionState.codeEnvFiles.map(async (file) => { - const { fileIdentifier, fileUpdate } = await provisionToCodeEnv({ + const { codeEnvRef, fileUpdate } = await provisionToCodeEnv({ req, file, entity_id: agentId, - apiKey: provisionState.codeApiKey, }); - file.metadata = { ...file.metadata, fileIdentifier }; + file.metadata = { ...file.metadata, codeEnvRef }; pendingUpdates.push(fileUpdate); }), ); diff --git a/api/server/services/Files/process.spec.js b/api/server/services/Files/process.spec.js index 0a2f257be8..d070f281d9 100644 --- a/api/server/services/Files/process.spec.js +++ b/api/server/services/Files/process.spec.js @@ -954,12 +954,15 @@ describe('processAgentFileUpload', () => { getStrategyFunctions.mockReturnValue({ handleFileUpload: storageUpload }); mergeFileConfig.mockReturnValue({ ...makeFileConfig(), - legacyFileUploadUX: true, + endpoints: { + [EModelEndpoint.agents]: { legacyFileUploadUX: true }, + }, defaultLLMDeliveryPath: { fallback: 'none', }, }); const req = makeReq({ mimetype: 'text/markdown', ocrConfig: null }); + req.body.endpoint = EModelEndpoint.agents; await processAgentFileUpload({ req, @@ -984,7 +987,9 @@ describe('processAgentFileUpload', () => { test('persists llmDeliveryPath none for explicit execute_code uploads', async () => { const { createFile } = require('~/models'); - const codeUpload = jest.fn().mockResolvedValue('session-1/file.csv'); + const codeUpload = jest + .fn() + .mockResolvedValue({ storage_session_id: 'sess-csv', file_id: 'fid-csv' }); const storageUpload = jest.fn().mockResolvedValue({ filepath: '/uploads/user-123/file-uuid-123__upload.bin', bytes: 128, @@ -1024,7 +1029,14 @@ describe('processAgentFileUpload', () => { filepath: '/uploads/user-123/file-uuid-123__upload.bin', source: FileSources.local, type: 'text/csv', - metadata: { fileIdentifier: 'session-1/file.csv' }, + metadata: { + codeEnvRef: { + kind: 'agent', + id: 'agent-abc', + storage_session_id: 'sess-csv', + file_id: 'fid-csv', + }, + }, llmDeliveryPath: 'none', }), true, @@ -1171,13 +1183,16 @@ describe('processImageFile', () => { }); mergeFileConfig.mockReturnValue({ ...makeFileConfig(), - legacyFileUploadUX: true, + endpoints: { + [EModelEndpoint.agents]: { legacyFileUploadUX: true }, + }, defaultLLMDeliveryPath: { overrides: { 'image/*': 'none' }, }, }); getStrategyFunctions.mockReturnValue({ handleImageUpload }); const req = makeReq({ mimetype: 'image/png', ocrConfig: null }); + req.body.endpoint = EModelEndpoint.agents; await processImageFile({ req, diff --git a/api/server/services/Files/provision.js b/api/server/services/Files/provision.js index 61cde00266..5b018bf5c5 100644 --- a/api/server/services/Files/provision.js +++ b/api/server/services/Files/provision.js @@ -14,7 +14,6 @@ const { loadAuthValues } = require('~/server/services/Tools/credentials'); const { getStrategyFunctions } = require('./strategies'); // TODO: check and potentially fix — concurrent temp file collision (deterministic path based on file_id) -// TODO: check and potentially fix — query params not forwarded in checkSessionsAlive batch liveness check // TODO: check and potentially fix — direct mutation of shared file objects in provisionFiles callback // TODO: check and potentially fix — this file should be TypeScript in packages/api per CLAUDE.md rules @@ -22,7 +21,7 @@ const axios = createAxiosInstance(); /** * Loads the CODE_API_KEY for a user. Call once per request and pass the result - * to provisionToCodeEnv / checkSessionsAlive to avoid redundant lookups. + * to checkSessionsAlive to avoid redundant lookups. * * @param {string} userId * @returns {Promise} The CODE_API_KEY @@ -34,16 +33,18 @@ async function loadCodeApiKey(userId) { /** * Provisions a file to the code execution environment. - * Gets a read stream from our storage and uploads to the code env. + * Gets a read stream from our storage and uploads to the code env, persisting + * the resulting `codeEnvRef` so downstream readers (primeFiles, code env + * categorization) can locate the sandbox copy on subsequent turns. * * @param {object} params * @param {object} params.req - Express request object (needs req.user.id) * @param {import('librechat-data-provider').TFile} params.file - The file record from DB - * @param {string} [params.entity_id] - Optional entity ID (agent_id) - * @param {string} [params.apiKey] - Pre-loaded CODE_API_KEY (avoids redundant loadAuthValues) - * @returns {Promise<{ fileIdentifier: string, fileUpdate: object }>} Result with deferred DB update + * @param {string} [params.entity_id] - Optional entity ID (agent_id); when present the ref + * is scoped to `kind: 'agent'`, otherwise it falls back to `kind: 'user'`. + * @returns {Promise<{ codeEnvRef: object, fileUpdate: object }>} Result with deferred DB update */ -async function provisionToCodeEnv({ req, file, entity_id = '', apiKey }) { +async function provisionToCodeEnv({ req, file, entity_id }) { const { getDownloadStream } = getStrategyFunctions(file.source); if (!getDownloadStream) { throw new Error( @@ -51,25 +52,34 @@ async function provisionToCodeEnv({ req, file, entity_id = '', apiKey }) { ); } - const resolvedApiKey = apiKey ?? (await loadCodeApiKey(req.user.id)); const { handleFileUpload: uploadCodeEnvFile } = getStrategyFunctions(FileSources.execute_code); const stream = await getDownloadStream(req, file.filepath); - const fileIdentifier = await uploadCodeEnvFile({ + const kind = entity_id ? 'agent' : 'user'; + const id = entity_id ?? req.user.id; + + const uploaded = await uploadCodeEnvFile({ req, stream, filename: file.filename, - apiKey: resolvedApiKey, - entity_id, + kind, + id, }); + const codeEnvRef = { + kind, + id, + storage_session_id: uploaded.storage_session_id, + file_id: uploaded.file_id, + }; + logger.debug( `[provisionToCodeEnv] Provisioned file "${file.filename}" (${file.file_id}) to code env`, ); return { - fileIdentifier, - fileUpdate: { file_id: file.file_id, metadata: { ...file.metadata, fileIdentifier } }, + codeEnvRef, + fileUpdate: { file_id: file.file_id, metadata: { ...file.metadata, codeEnvRef } }, }; } @@ -152,29 +162,22 @@ async function provisionToVectorDB({ req, file, entity_id, existingStream }) { * Check if a single code env file is still alive by querying its session. * * @param {object} params - * @param {import('librechat-data-provider').TFile} params.file - File with metadata.fileIdentifier + * @param {import('librechat-data-provider').TFile} params.file - File with metadata.codeEnvRef * @param {string} params.apiKey - CODE_API_KEY * @returns {Promise} true if the file is still accessible in the code env */ async function checkCodeEnvFileAlive({ file, apiKey }) { - if (!file.metadata?.fileIdentifier) { + const ref = file.metadata?.codeEnvRef; + if (!ref?.storage_session_id || !ref?.file_id) { return false; } try { const baseURL = getCodeBaseURL(); - const [filePath, queryString] = file.metadata.fileIdentifier.split('?'); - const session_id = filePath.split('/')[0]; - - let queryParams = {}; - if (queryString) { - queryParams = Object.fromEntries(new URLSearchParams(queryString).entries()); - } - const response = await axios({ method: 'get', - url: `${baseURL}/files/${session_id}`, - params: { detail: 'summary', ...queryParams }, + url: `${baseURL}/files/${ref.storage_session_id}`, + params: { detail: 'summary' }, headers: { 'User-Agent': 'LibreChat/1.0', 'X-API-Key': apiKey, @@ -184,7 +187,7 @@ async function checkCodeEnvFileAlive({ file, apiKey }) { timeout: 5000, }); - const found = response.data?.some((f) => f.name?.startsWith(filePath)); + const found = response.data?.some((f) => f.fileId === ref.file_id); return !!found; } catch (error) { logAxiosError({ @@ -196,11 +199,11 @@ async function checkCodeEnvFileAlive({ file, apiKey }) { } /** - * Batch-check code env file liveness by session_id. + * Batch-check code env file liveness by `storage_session_id`. * Groups files by session, makes one API call per session. * * @param {object} params - * @param {import('librechat-data-provider').TFile[]} params.files - Files with metadata.fileIdentifier + * @param {import('librechat-data-provider').TFile[]} params.files - Files with metadata.codeEnvRef * @param {string} params.apiKey - Pre-loaded CODE_API_KEY * @param {number} [params.staleSafeWindowMs=21600000] - Skip check if file updated within this window (default 6h) * @returns {Promise>} Set of file_ids that are confirmed alive @@ -209,12 +212,13 @@ async function checkSessionsAlive({ files, apiKey, staleSafeWindowMs = 6 * 60 * const aliveFileIds = new Set(); const now = Date.now(); - // Group files by session_id, skip recently-updated files (fast pre-filter) - /** @type {Map>} */ + // Group files by storage_session_id, skip recently-updated files (fast pre-filter) + /** @type {Map>} */ const sessionGroups = new Map(); for (const file of files) { - if (!file.metadata?.fileIdentifier) { + const ref = file.metadata?.codeEnvRef; + if (!ref?.storage_session_id || !ref?.file_id) { continue; } @@ -224,13 +228,13 @@ async function checkSessionsAlive({ files, apiKey, staleSafeWindowMs = 6 * 60 * continue; } - const [filePath] = file.metadata.fileIdentifier.split('?'); - const session_id = filePath.split('/')[0]; - - if (!sessionGroups.has(session_id)) { - sessionGroups.set(session_id, []); + if (!sessionGroups.has(ref.storage_session_id)) { + sessionGroups.set(ref.storage_session_id, []); } - sessionGroups.get(session_id).push({ file_id: file.file_id, filePath }); + sessionGroups.get(ref.storage_session_id).push({ + file_id: file.file_id, + remoteFileId: ref.file_id, + }); } // One API call per session (in parallel) @@ -252,8 +256,8 @@ async function checkSessionsAlive({ files, apiKey, staleSafeWindowMs = 6 * 60 * }); const remoteFiles = response.data ?? []; - for (const { file_id, filePath } of fileEntries) { - if (remoteFiles.some((f) => f.name?.startsWith(filePath))) { + for (const { file_id, remoteFileId } of fileEntries) { + if (remoteFiles.some((f) => f.fileId === remoteFileId)) { aliveFileIds.add(file_id); } } diff --git a/packages/api/src/agents/resources.test.ts b/packages/api/src/agents/resources.test.ts index 127fdfdf14..34d97501b7 100644 --- a/packages/api/src/agents/resources.test.ts +++ b/packages/api/src/agents/resources.test.ts @@ -1,6 +1,11 @@ import { primeResources } from './resources'; import { logger } from '@librechat/data-schemas'; -import { EModelEndpoint, EToolResources, AgentCapabilities } from 'librechat-data-provider'; +import { + FileSources, + EModelEndpoint, + EToolResources, + AgentCapabilities, +} from 'librechat-data-provider'; import type { TAgentsEndpoint, TFile } from 'librechat-data-provider'; import type { IUser, AppConfig } from '@librechat/data-schemas'; import type { Request as ServerRequest } from 'express'; @@ -1574,7 +1579,7 @@ describe('primeResources', () => { object: 'file' as const, usage: 0, embedded: false, - source: 'local', + source: FileSources.local, llmDeliveryPath: 'provider', width: 100, height: 100, @@ -1589,7 +1594,7 @@ describe('primeResources', () => { object: 'file' as const, usage: 0, embedded: false, - source: 'local', + source: FileSources.local, llmDeliveryPath: 'none', }; @@ -1604,7 +1609,7 @@ describe('primeResources', () => { agentId: 'agent1', }); - const attachmentIds = result.attachments.map((f) => f.file_id); + const attachmentIds = result.attachments.map((f) => f?.file_id); expect(attachmentIds).toContain('provider-file'); expect(attachmentIds).toContain('none-file'); }); @@ -1620,7 +1625,7 @@ describe('primeResources', () => { object: 'file' as const, usage: 0, embedded: false, - source: 'local', + source: FileSources.local, llmDeliveryPath: 'none', }; @@ -1637,7 +1642,7 @@ describe('primeResources', () => { loadCodeApiKey: jest.fn().mockResolvedValue('code-key'), }); - expect(result.attachments.map((f) => f.file_id)).toContain('none-file'); + expect(result.attachments.map((f) => f?.file_id)).toContain('none-file'); expect(result.provisionState?.codeEnvFiles.map((f) => f.file_id)).toContain('none-file'); expect(result.provisionState?.vectorDBFiles.map((f) => f.file_id)).toContain('none-file'); }); @@ -1653,7 +1658,7 @@ describe('primeResources', () => { object: 'file' as const, usage: 0, embedded: false, - source: 'local', + source: FileSources.local, }; const result = await primeResources({ @@ -1667,7 +1672,7 @@ describe('primeResources', () => { agentId: 'agent1', }); - const attachmentIds = result.attachments.map((f) => f.file_id); + const attachmentIds = result.attachments.map((f) => f?.file_id); expect(attachmentIds).toContain('legacy-file'); }); }); diff --git a/packages/api/src/agents/resources.ts b/packages/api/src/agents/resources.ts index a0c14c5791..6210decd95 100644 --- a/packages/api/src/agents/resources.ts +++ b/packages/api/src/agents/resources.ts @@ -14,14 +14,13 @@ export type TFileUpdate = { /** * Function type for provisioning a file to the code execution environment. - * @returns The fileIdentifier and a deferred DB update object + * @returns The codeEnvRef and a deferred DB update object */ export type TProvisionToCodeEnv = (params: { req: ServerRequest & { user?: IUser }; file: TFile; entity_id?: string; - apiKey?: string; -}) => Promise<{ fileIdentifier: string; fileUpdate: TFileUpdate }>; +}) => Promise<{ codeEnvRef: Record; fileUpdate: TFileUpdate }>; /** * Function type for provisioning a file to the vector DB for file_search. @@ -418,7 +417,7 @@ export const primeResources = async ({ let aliveFileIds: Set = new Set(); if (needsCodeEnv && codeApiKey && checkSessionsAlive) { const filesWithIdentifiers = attachments.filter( - (f) => f?.metadata?.fileIdentifier && f.file_id, + (f) => f?.metadata?.codeEnvRef && f.file_id, ); if (filesWithIdentifiers.length > 0) { aliveFileIds = await checkSessionsAlive({ @@ -442,15 +441,15 @@ export const primeResources = async ({ codeApiKey && !processedResourceFiles.has(`${EToolResources.execute_code}:${file.file_id}`) ) { - const hasFileIdentifier = !!file.metadata?.fileIdentifier; - const isStale = hasFileIdentifier && !aliveFileIds.has(file.file_id); + const hasCodeEnvRef = !!file.metadata?.codeEnvRef; + const isStale = hasCodeEnvRef && !aliveFileIds.has(file.file_id); - if (!hasFileIdentifier || isStale) { + if (!hasCodeEnvRef || isStale) { if (isStale) { logger.info( `[primeResources] Code env file expired for "${file.filename}" (${file.file_id}), will re-provision on tool use`, ); - file.metadata = { ...file.metadata, fileIdentifier: undefined }; + file.metadata = { ...file.metadata, codeEnvRef: undefined }; } codeEnvFiles.push(file); } else {