🔀 chore: align lazy provisioning with codeEnvRef schema

Rebase onto current dev brought in the metadata.fileIdentifier →
metadata.codeEnvRef migration (HEAD uploadCodeEnvFile now returns
{ storage_session_id, file_id } and requires kind/id). Update the
unified-upload code paths to match:

- provision.js: provisionToCodeEnv now derives kind/id from entity_id,
  calls uploadCodeEnvFile with the new signature, and returns codeEnvRef
- checkSessionsAlive/checkCodeEnvFileAlive: read storage_session_id and
  remote file_id from metadata.codeEnvRef instead of parsing the legacy
  fileIdentifier string
- resources.ts: primeResources gates on metadata.codeEnvRef and clears
  it on staleness; TProvisionToCodeEnv reflects the new return shape
- initialize.js: provisionFiles closure destructures codeEnvRef
- process.spec.js: align two legacyFileUploadUX tests with the
  endpoint-level check landed in 7384947 and update the execute_code
  expectation to the codeEnvRef metadata shape
- resources.test.ts: import FileSources for the typed source field and
  guard the optional attachments map
This commit is contained in:
Danny Avila 2026-05-13 04:03:32 -04:00
parent bf04cd7f83
commit 2e0187108d
5 changed files with 84 additions and 62 deletions

View file

@ -276,13 +276,12 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => {
if (needsCode && provisionState.codeEnvFiles.length > 0 && provisionState.codeApiKey) {
const results = await Promise.allSettled(
provisionState.codeEnvFiles.map(async (file) => {
const { fileIdentifier, fileUpdate } = await provisionToCodeEnv({
const { codeEnvRef, fileUpdate } = await provisionToCodeEnv({
req,
file,
entity_id: agentId,
apiKey: provisionState.codeApiKey,
});
file.metadata = { ...file.metadata, fileIdentifier };
file.metadata = { ...file.metadata, codeEnvRef };
pendingUpdates.push(fileUpdate);
}),
);

View file

@ -954,12 +954,15 @@ describe('processAgentFileUpload', () => {
getStrategyFunctions.mockReturnValue({ handleFileUpload: storageUpload });
mergeFileConfig.mockReturnValue({
...makeFileConfig(),
legacyFileUploadUX: true,
endpoints: {
[EModelEndpoint.agents]: { legacyFileUploadUX: true },
},
defaultLLMDeliveryPath: {
fallback: 'none',
},
});
const req = makeReq({ mimetype: 'text/markdown', ocrConfig: null });
req.body.endpoint = EModelEndpoint.agents;
await processAgentFileUpload({
req,
@ -984,7 +987,9 @@ describe('processAgentFileUpload', () => {
test('persists llmDeliveryPath none for explicit execute_code uploads', async () => {
const { createFile } = require('~/models');
const codeUpload = jest.fn().mockResolvedValue('session-1/file.csv');
const codeUpload = jest
.fn()
.mockResolvedValue({ storage_session_id: 'sess-csv', file_id: 'fid-csv' });
const storageUpload = jest.fn().mockResolvedValue({
filepath: '/uploads/user-123/file-uuid-123__upload.bin',
bytes: 128,
@ -1024,7 +1029,14 @@ describe('processAgentFileUpload', () => {
filepath: '/uploads/user-123/file-uuid-123__upload.bin',
source: FileSources.local,
type: 'text/csv',
metadata: { fileIdentifier: 'session-1/file.csv' },
metadata: {
codeEnvRef: {
kind: 'agent',
id: 'agent-abc',
storage_session_id: 'sess-csv',
file_id: 'fid-csv',
},
},
llmDeliveryPath: 'none',
}),
true,
@ -1171,13 +1183,16 @@ describe('processImageFile', () => {
});
mergeFileConfig.mockReturnValue({
...makeFileConfig(),
legacyFileUploadUX: true,
endpoints: {
[EModelEndpoint.agents]: { legacyFileUploadUX: true },
},
defaultLLMDeliveryPath: {
overrides: { 'image/*': 'none' },
},
});
getStrategyFunctions.mockReturnValue({ handleImageUpload });
const req = makeReq({ mimetype: 'image/png', ocrConfig: null });
req.body.endpoint = EModelEndpoint.agents;
await processImageFile({
req,

View file

@ -14,7 +14,6 @@ const { loadAuthValues } = require('~/server/services/Tools/credentials');
const { getStrategyFunctions } = require('./strategies');
// TODO: check and potentially fix — concurrent temp file collision (deterministic path based on file_id)
// TODO: check and potentially fix — query params not forwarded in checkSessionsAlive batch liveness check
// TODO: check and potentially fix — direct mutation of shared file objects in provisionFiles callback
// TODO: check and potentially fix — this file should be TypeScript in packages/api per CLAUDE.md rules
@ -22,7 +21,7 @@ const axios = createAxiosInstance();
/**
* Loads the CODE_API_KEY for a user. Call once per request and pass the result
* to provisionToCodeEnv / checkSessionsAlive to avoid redundant lookups.
* to checkSessionsAlive to avoid redundant lookups.
*
* @param {string} userId
* @returns {Promise<string>} The CODE_API_KEY
@ -34,16 +33,18 @@ async function loadCodeApiKey(userId) {
/**
* Provisions a file to the code execution environment.
* Gets a read stream from our storage and uploads to the code env.
* Gets a read stream from our storage and uploads to the code env, persisting
* the resulting `codeEnvRef` so downstream readers (primeFiles, code env
* categorization) can locate the sandbox copy on subsequent turns.
*
* @param {object} params
* @param {object} params.req - Express request object (needs req.user.id)
* @param {import('librechat-data-provider').TFile} params.file - The file record from DB
* @param {string} [params.entity_id] - Optional entity ID (agent_id)
* @param {string} [params.apiKey] - Pre-loaded CODE_API_KEY (avoids redundant loadAuthValues)
* @returns {Promise<{ fileIdentifier: string, fileUpdate: object }>} Result with deferred DB update
* @param {string} [params.entity_id] - Optional entity ID (agent_id); when present the ref
* is scoped to `kind: 'agent'`, otherwise it falls back to `kind: 'user'`.
* @returns {Promise<{ codeEnvRef: object, fileUpdate: object }>} Result with deferred DB update
*/
async function provisionToCodeEnv({ req, file, entity_id = '', apiKey }) {
async function provisionToCodeEnv({ req, file, entity_id }) {
const { getDownloadStream } = getStrategyFunctions(file.source);
if (!getDownloadStream) {
throw new Error(
@ -51,25 +52,34 @@ async function provisionToCodeEnv({ req, file, entity_id = '', apiKey }) {
);
}
const resolvedApiKey = apiKey ?? (await loadCodeApiKey(req.user.id));
const { handleFileUpload: uploadCodeEnvFile } = getStrategyFunctions(FileSources.execute_code);
const stream = await getDownloadStream(req, file.filepath);
const fileIdentifier = await uploadCodeEnvFile({
const kind = entity_id ? 'agent' : 'user';
const id = entity_id ?? req.user.id;
const uploaded = await uploadCodeEnvFile({
req,
stream,
filename: file.filename,
apiKey: resolvedApiKey,
entity_id,
kind,
id,
});
const codeEnvRef = {
kind,
id,
storage_session_id: uploaded.storage_session_id,
file_id: uploaded.file_id,
};
logger.debug(
`[provisionToCodeEnv] Provisioned file "${file.filename}" (${file.file_id}) to code env`,
);
return {
fileIdentifier,
fileUpdate: { file_id: file.file_id, metadata: { ...file.metadata, fileIdentifier } },
codeEnvRef,
fileUpdate: { file_id: file.file_id, metadata: { ...file.metadata, codeEnvRef } },
};
}
@ -152,29 +162,22 @@ async function provisionToVectorDB({ req, file, entity_id, existingStream }) {
* Check if a single code env file is still alive by querying its session.
*
* @param {object} params
* @param {import('librechat-data-provider').TFile} params.file - File with metadata.fileIdentifier
* @param {import('librechat-data-provider').TFile} params.file - File with metadata.codeEnvRef
* @param {string} params.apiKey - CODE_API_KEY
* @returns {Promise<boolean>} true if the file is still accessible in the code env
*/
async function checkCodeEnvFileAlive({ file, apiKey }) {
if (!file.metadata?.fileIdentifier) {
const ref = file.metadata?.codeEnvRef;
if (!ref?.storage_session_id || !ref?.file_id) {
return false;
}
try {
const baseURL = getCodeBaseURL();
const [filePath, queryString] = file.metadata.fileIdentifier.split('?');
const session_id = filePath.split('/')[0];
let queryParams = {};
if (queryString) {
queryParams = Object.fromEntries(new URLSearchParams(queryString).entries());
}
const response = await axios({
method: 'get',
url: `${baseURL}/files/${session_id}`,
params: { detail: 'summary', ...queryParams },
url: `${baseURL}/files/${ref.storage_session_id}`,
params: { detail: 'summary' },
headers: {
'User-Agent': 'LibreChat/1.0',
'X-API-Key': apiKey,
@ -184,7 +187,7 @@ async function checkCodeEnvFileAlive({ file, apiKey }) {
timeout: 5000,
});
const found = response.data?.some((f) => f.name?.startsWith(filePath));
const found = response.data?.some((f) => f.fileId === ref.file_id);
return !!found;
} catch (error) {
logAxiosError({
@ -196,11 +199,11 @@ async function checkCodeEnvFileAlive({ file, apiKey }) {
}
/**
* Batch-check code env file liveness by session_id.
* Batch-check code env file liveness by `storage_session_id`.
* Groups files by session, makes one API call per session.
*
* @param {object} params
* @param {import('librechat-data-provider').TFile[]} params.files - Files with metadata.fileIdentifier
* @param {import('librechat-data-provider').TFile[]} params.files - Files with metadata.codeEnvRef
* @param {string} params.apiKey - Pre-loaded CODE_API_KEY
* @param {number} [params.staleSafeWindowMs=21600000] - Skip check if file updated within this window (default 6h)
* @returns {Promise<Set<string>>} Set of file_ids that are confirmed alive
@ -209,12 +212,13 @@ async function checkSessionsAlive({ files, apiKey, staleSafeWindowMs = 6 * 60 *
const aliveFileIds = new Set();
const now = Date.now();
// Group files by session_id, skip recently-updated files (fast pre-filter)
/** @type {Map<string, Array<{ file_id: string; filePath: string }>>} */
// Group files by storage_session_id, skip recently-updated files (fast pre-filter)
/** @type {Map<string, Array<{ file_id: string; remoteFileId: string }>>} */
const sessionGroups = new Map();
for (const file of files) {
if (!file.metadata?.fileIdentifier) {
const ref = file.metadata?.codeEnvRef;
if (!ref?.storage_session_id || !ref?.file_id) {
continue;
}
@ -224,13 +228,13 @@ async function checkSessionsAlive({ files, apiKey, staleSafeWindowMs = 6 * 60 *
continue;
}
const [filePath] = file.metadata.fileIdentifier.split('?');
const session_id = filePath.split('/')[0];
if (!sessionGroups.has(session_id)) {
sessionGroups.set(session_id, []);
if (!sessionGroups.has(ref.storage_session_id)) {
sessionGroups.set(ref.storage_session_id, []);
}
sessionGroups.get(session_id).push({ file_id: file.file_id, filePath });
sessionGroups.get(ref.storage_session_id).push({
file_id: file.file_id,
remoteFileId: ref.file_id,
});
}
// One API call per session (in parallel)
@ -252,8 +256,8 @@ async function checkSessionsAlive({ files, apiKey, staleSafeWindowMs = 6 * 60 *
});
const remoteFiles = response.data ?? [];
for (const { file_id, filePath } of fileEntries) {
if (remoteFiles.some((f) => f.name?.startsWith(filePath))) {
for (const { file_id, remoteFileId } of fileEntries) {
if (remoteFiles.some((f) => f.fileId === remoteFileId)) {
aliveFileIds.add(file_id);
}
}

View file

@ -1,6 +1,11 @@
import { primeResources } from './resources';
import { logger } from '@librechat/data-schemas';
import { EModelEndpoint, EToolResources, AgentCapabilities } from 'librechat-data-provider';
import {
FileSources,
EModelEndpoint,
EToolResources,
AgentCapabilities,
} from 'librechat-data-provider';
import type { TAgentsEndpoint, TFile } from 'librechat-data-provider';
import type { IUser, AppConfig } from '@librechat/data-schemas';
import type { Request as ServerRequest } from 'express';
@ -1574,7 +1579,7 @@ describe('primeResources', () => {
object: 'file' as const,
usage: 0,
embedded: false,
source: 'local',
source: FileSources.local,
llmDeliveryPath: 'provider',
width: 100,
height: 100,
@ -1589,7 +1594,7 @@ describe('primeResources', () => {
object: 'file' as const,
usage: 0,
embedded: false,
source: 'local',
source: FileSources.local,
llmDeliveryPath: 'none',
};
@ -1604,7 +1609,7 @@ describe('primeResources', () => {
agentId: 'agent1',
});
const attachmentIds = result.attachments.map((f) => f.file_id);
const attachmentIds = result.attachments.map((f) => f?.file_id);
expect(attachmentIds).toContain('provider-file');
expect(attachmentIds).toContain('none-file');
});
@ -1620,7 +1625,7 @@ describe('primeResources', () => {
object: 'file' as const,
usage: 0,
embedded: false,
source: 'local',
source: FileSources.local,
llmDeliveryPath: 'none',
};
@ -1637,7 +1642,7 @@ describe('primeResources', () => {
loadCodeApiKey: jest.fn().mockResolvedValue('code-key'),
});
expect(result.attachments.map((f) => f.file_id)).toContain('none-file');
expect(result.attachments.map((f) => f?.file_id)).toContain('none-file');
expect(result.provisionState?.codeEnvFiles.map((f) => f.file_id)).toContain('none-file');
expect(result.provisionState?.vectorDBFiles.map((f) => f.file_id)).toContain('none-file');
});
@ -1653,7 +1658,7 @@ describe('primeResources', () => {
object: 'file' as const,
usage: 0,
embedded: false,
source: 'local',
source: FileSources.local,
};
const result = await primeResources({
@ -1667,7 +1672,7 @@ describe('primeResources', () => {
agentId: 'agent1',
});
const attachmentIds = result.attachments.map((f) => f.file_id);
const attachmentIds = result.attachments.map((f) => f?.file_id);
expect(attachmentIds).toContain('legacy-file');
});
});

View file

@ -14,14 +14,13 @@ export type TFileUpdate = {
/**
* Function type for provisioning a file to the code execution environment.
* @returns The fileIdentifier and a deferred DB update object
* @returns The codeEnvRef and a deferred DB update object
*/
export type TProvisionToCodeEnv = (params: {
req: ServerRequest & { user?: IUser };
file: TFile;
entity_id?: string;
apiKey?: string;
}) => Promise<{ fileIdentifier: string; fileUpdate: TFileUpdate }>;
}) => Promise<{ codeEnvRef: Record<string, unknown>; fileUpdate: TFileUpdate }>;
/**
* Function type for provisioning a file to the vector DB for file_search.
@ -418,7 +417,7 @@ export const primeResources = async ({
let aliveFileIds: Set<string> = new Set();
if (needsCodeEnv && codeApiKey && checkSessionsAlive) {
const filesWithIdentifiers = attachments.filter(
(f) => f?.metadata?.fileIdentifier && f.file_id,
(f) => f?.metadata?.codeEnvRef && f.file_id,
);
if (filesWithIdentifiers.length > 0) {
aliveFileIds = await checkSessionsAlive({
@ -442,15 +441,15 @@ export const primeResources = async ({
codeApiKey &&
!processedResourceFiles.has(`${EToolResources.execute_code}:${file.file_id}`)
) {
const hasFileIdentifier = !!file.metadata?.fileIdentifier;
const isStale = hasFileIdentifier && !aliveFileIds.has(file.file_id);
const hasCodeEnvRef = !!file.metadata?.codeEnvRef;
const isStale = hasCodeEnvRef && !aliveFileIds.has(file.file_id);
if (!hasFileIdentifier || isStale) {
if (!hasCodeEnvRef || isStale) {
if (isStale) {
logger.info(
`[primeResources] Code env file expired for "${file.filename}" (${file.file_id}), will re-provision on tool use`,
);
file.metadata = { ...file.metadata, fileIdentifier: undefined };
file.metadata = { ...file.metadata, codeEnvRef: undefined };
}
codeEnvFiles.push(file);
} else {