mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-07-02 04:12:36 +00:00
🔀 chore: align lazy provisioning with codeEnvRef schema
Rebase onto current dev brought in the metadata.fileIdentifier →
metadata.codeEnvRef migration (HEAD uploadCodeEnvFile now returns
{ storage_session_id, file_id } and requires kind/id). Update the
unified-upload code paths to match:
- provision.js: provisionToCodeEnv now derives kind/id from entity_id,
calls uploadCodeEnvFile with the new signature, and returns codeEnvRef
- checkSessionsAlive/checkCodeEnvFileAlive: read storage_session_id and
remote file_id from metadata.codeEnvRef instead of parsing the legacy
fileIdentifier string
- resources.ts: primeResources gates on metadata.codeEnvRef and clears
it on staleness; TProvisionToCodeEnv reflects the new return shape
- initialize.js: provisionFiles closure destructures codeEnvRef
- process.spec.js: align two legacyFileUploadUX tests with the
endpoint-level check landed in 7384947 and update the execute_code
expectation to the codeEnvRef metadata shape
- resources.test.ts: import FileSources for the typed source field and
guard the optional attachments map
This commit is contained in:
parent
bf04cd7f83
commit
2e0187108d
5 changed files with 84 additions and 62 deletions
|
|
@ -276,13 +276,12 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => {
|
|||
if (needsCode && provisionState.codeEnvFiles.length > 0 && provisionState.codeApiKey) {
|
||||
const results = await Promise.allSettled(
|
||||
provisionState.codeEnvFiles.map(async (file) => {
|
||||
const { fileIdentifier, fileUpdate } = await provisionToCodeEnv({
|
||||
const { codeEnvRef, fileUpdate } = await provisionToCodeEnv({
|
||||
req,
|
||||
file,
|
||||
entity_id: agentId,
|
||||
apiKey: provisionState.codeApiKey,
|
||||
});
|
||||
file.metadata = { ...file.metadata, fileIdentifier };
|
||||
file.metadata = { ...file.metadata, codeEnvRef };
|
||||
pendingUpdates.push(fileUpdate);
|
||||
}),
|
||||
);
|
||||
|
|
|
|||
|
|
@ -954,12 +954,15 @@ describe('processAgentFileUpload', () => {
|
|||
getStrategyFunctions.mockReturnValue({ handleFileUpload: storageUpload });
|
||||
mergeFileConfig.mockReturnValue({
|
||||
...makeFileConfig(),
|
||||
legacyFileUploadUX: true,
|
||||
endpoints: {
|
||||
[EModelEndpoint.agents]: { legacyFileUploadUX: true },
|
||||
},
|
||||
defaultLLMDeliveryPath: {
|
||||
fallback: 'none',
|
||||
},
|
||||
});
|
||||
const req = makeReq({ mimetype: 'text/markdown', ocrConfig: null });
|
||||
req.body.endpoint = EModelEndpoint.agents;
|
||||
|
||||
await processAgentFileUpload({
|
||||
req,
|
||||
|
|
@ -984,7 +987,9 @@ describe('processAgentFileUpload', () => {
|
|||
|
||||
test('persists llmDeliveryPath none for explicit execute_code uploads', async () => {
|
||||
const { createFile } = require('~/models');
|
||||
const codeUpload = jest.fn().mockResolvedValue('session-1/file.csv');
|
||||
const codeUpload = jest
|
||||
.fn()
|
||||
.mockResolvedValue({ storage_session_id: 'sess-csv', file_id: 'fid-csv' });
|
||||
const storageUpload = jest.fn().mockResolvedValue({
|
||||
filepath: '/uploads/user-123/file-uuid-123__upload.bin',
|
||||
bytes: 128,
|
||||
|
|
@ -1024,7 +1029,14 @@ describe('processAgentFileUpload', () => {
|
|||
filepath: '/uploads/user-123/file-uuid-123__upload.bin',
|
||||
source: FileSources.local,
|
||||
type: 'text/csv',
|
||||
metadata: { fileIdentifier: 'session-1/file.csv' },
|
||||
metadata: {
|
||||
codeEnvRef: {
|
||||
kind: 'agent',
|
||||
id: 'agent-abc',
|
||||
storage_session_id: 'sess-csv',
|
||||
file_id: 'fid-csv',
|
||||
},
|
||||
},
|
||||
llmDeliveryPath: 'none',
|
||||
}),
|
||||
true,
|
||||
|
|
@ -1171,13 +1183,16 @@ describe('processImageFile', () => {
|
|||
});
|
||||
mergeFileConfig.mockReturnValue({
|
||||
...makeFileConfig(),
|
||||
legacyFileUploadUX: true,
|
||||
endpoints: {
|
||||
[EModelEndpoint.agents]: { legacyFileUploadUX: true },
|
||||
},
|
||||
defaultLLMDeliveryPath: {
|
||||
overrides: { 'image/*': 'none' },
|
||||
},
|
||||
});
|
||||
getStrategyFunctions.mockReturnValue({ handleImageUpload });
|
||||
const req = makeReq({ mimetype: 'image/png', ocrConfig: null });
|
||||
req.body.endpoint = EModelEndpoint.agents;
|
||||
|
||||
await processImageFile({
|
||||
req,
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@ const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
|||
const { getStrategyFunctions } = require('./strategies');
|
||||
|
||||
// TODO: check and potentially fix — concurrent temp file collision (deterministic path based on file_id)
|
||||
// TODO: check and potentially fix — query params not forwarded in checkSessionsAlive batch liveness check
|
||||
// TODO: check and potentially fix — direct mutation of shared file objects in provisionFiles callback
|
||||
// TODO: check and potentially fix — this file should be TypeScript in packages/api per CLAUDE.md rules
|
||||
|
||||
|
|
@ -22,7 +21,7 @@ const axios = createAxiosInstance();
|
|||
|
||||
/**
|
||||
* Loads the CODE_API_KEY for a user. Call once per request and pass the result
|
||||
* to provisionToCodeEnv / checkSessionsAlive to avoid redundant lookups.
|
||||
* to checkSessionsAlive to avoid redundant lookups.
|
||||
*
|
||||
* @param {string} userId
|
||||
* @returns {Promise<string>} The CODE_API_KEY
|
||||
|
|
@ -34,16 +33,18 @@ async function loadCodeApiKey(userId) {
|
|||
|
||||
/**
|
||||
* Provisions a file to the code execution environment.
|
||||
* Gets a read stream from our storage and uploads to the code env.
|
||||
* Gets a read stream from our storage and uploads to the code env, persisting
|
||||
* the resulting `codeEnvRef` so downstream readers (primeFiles, code env
|
||||
* categorization) can locate the sandbox copy on subsequent turns.
|
||||
*
|
||||
* @param {object} params
|
||||
* @param {object} params.req - Express request object (needs req.user.id)
|
||||
* @param {import('librechat-data-provider').TFile} params.file - The file record from DB
|
||||
* @param {string} [params.entity_id] - Optional entity ID (agent_id)
|
||||
* @param {string} [params.apiKey] - Pre-loaded CODE_API_KEY (avoids redundant loadAuthValues)
|
||||
* @returns {Promise<{ fileIdentifier: string, fileUpdate: object }>} Result with deferred DB update
|
||||
* @param {string} [params.entity_id] - Optional entity ID (agent_id); when present the ref
|
||||
* is scoped to `kind: 'agent'`, otherwise it falls back to `kind: 'user'`.
|
||||
* @returns {Promise<{ codeEnvRef: object, fileUpdate: object }>} Result with deferred DB update
|
||||
*/
|
||||
async function provisionToCodeEnv({ req, file, entity_id = '', apiKey }) {
|
||||
async function provisionToCodeEnv({ req, file, entity_id }) {
|
||||
const { getDownloadStream } = getStrategyFunctions(file.source);
|
||||
if (!getDownloadStream) {
|
||||
throw new Error(
|
||||
|
|
@ -51,25 +52,34 @@ async function provisionToCodeEnv({ req, file, entity_id = '', apiKey }) {
|
|||
);
|
||||
}
|
||||
|
||||
const resolvedApiKey = apiKey ?? (await loadCodeApiKey(req.user.id));
|
||||
const { handleFileUpload: uploadCodeEnvFile } = getStrategyFunctions(FileSources.execute_code);
|
||||
const stream = await getDownloadStream(req, file.filepath);
|
||||
|
||||
const fileIdentifier = await uploadCodeEnvFile({
|
||||
const kind = entity_id ? 'agent' : 'user';
|
||||
const id = entity_id ?? req.user.id;
|
||||
|
||||
const uploaded = await uploadCodeEnvFile({
|
||||
req,
|
||||
stream,
|
||||
filename: file.filename,
|
||||
apiKey: resolvedApiKey,
|
||||
entity_id,
|
||||
kind,
|
||||
id,
|
||||
});
|
||||
|
||||
const codeEnvRef = {
|
||||
kind,
|
||||
id,
|
||||
storage_session_id: uploaded.storage_session_id,
|
||||
file_id: uploaded.file_id,
|
||||
};
|
||||
|
||||
logger.debug(
|
||||
`[provisionToCodeEnv] Provisioned file "${file.filename}" (${file.file_id}) to code env`,
|
||||
);
|
||||
|
||||
return {
|
||||
fileIdentifier,
|
||||
fileUpdate: { file_id: file.file_id, metadata: { ...file.metadata, fileIdentifier } },
|
||||
codeEnvRef,
|
||||
fileUpdate: { file_id: file.file_id, metadata: { ...file.metadata, codeEnvRef } },
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -152,29 +162,22 @@ async function provisionToVectorDB({ req, file, entity_id, existingStream }) {
|
|||
* Check if a single code env file is still alive by querying its session.
|
||||
*
|
||||
* @param {object} params
|
||||
* @param {import('librechat-data-provider').TFile} params.file - File with metadata.fileIdentifier
|
||||
* @param {import('librechat-data-provider').TFile} params.file - File with metadata.codeEnvRef
|
||||
* @param {string} params.apiKey - CODE_API_KEY
|
||||
* @returns {Promise<boolean>} true if the file is still accessible in the code env
|
||||
*/
|
||||
async function checkCodeEnvFileAlive({ file, apiKey }) {
|
||||
if (!file.metadata?.fileIdentifier) {
|
||||
const ref = file.metadata?.codeEnvRef;
|
||||
if (!ref?.storage_session_id || !ref?.file_id) {
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
const baseURL = getCodeBaseURL();
|
||||
const [filePath, queryString] = file.metadata.fileIdentifier.split('?');
|
||||
const session_id = filePath.split('/')[0];
|
||||
|
||||
let queryParams = {};
|
||||
if (queryString) {
|
||||
queryParams = Object.fromEntries(new URLSearchParams(queryString).entries());
|
||||
}
|
||||
|
||||
const response = await axios({
|
||||
method: 'get',
|
||||
url: `${baseURL}/files/${session_id}`,
|
||||
params: { detail: 'summary', ...queryParams },
|
||||
url: `${baseURL}/files/${ref.storage_session_id}`,
|
||||
params: { detail: 'summary' },
|
||||
headers: {
|
||||
'User-Agent': 'LibreChat/1.0',
|
||||
'X-API-Key': apiKey,
|
||||
|
|
@ -184,7 +187,7 @@ async function checkCodeEnvFileAlive({ file, apiKey }) {
|
|||
timeout: 5000,
|
||||
});
|
||||
|
||||
const found = response.data?.some((f) => f.name?.startsWith(filePath));
|
||||
const found = response.data?.some((f) => f.fileId === ref.file_id);
|
||||
return !!found;
|
||||
} catch (error) {
|
||||
logAxiosError({
|
||||
|
|
@ -196,11 +199,11 @@ async function checkCodeEnvFileAlive({ file, apiKey }) {
|
|||
}
|
||||
|
||||
/**
|
||||
* Batch-check code env file liveness by session_id.
|
||||
* Batch-check code env file liveness by `storage_session_id`.
|
||||
* Groups files by session, makes one API call per session.
|
||||
*
|
||||
* @param {object} params
|
||||
* @param {import('librechat-data-provider').TFile[]} params.files - Files with metadata.fileIdentifier
|
||||
* @param {import('librechat-data-provider').TFile[]} params.files - Files with metadata.codeEnvRef
|
||||
* @param {string} params.apiKey - Pre-loaded CODE_API_KEY
|
||||
* @param {number} [params.staleSafeWindowMs=21600000] - Skip check if file updated within this window (default 6h)
|
||||
* @returns {Promise<Set<string>>} Set of file_ids that are confirmed alive
|
||||
|
|
@ -209,12 +212,13 @@ async function checkSessionsAlive({ files, apiKey, staleSafeWindowMs = 6 * 60 *
|
|||
const aliveFileIds = new Set();
|
||||
const now = Date.now();
|
||||
|
||||
// Group files by session_id, skip recently-updated files (fast pre-filter)
|
||||
/** @type {Map<string, Array<{ file_id: string; filePath: string }>>} */
|
||||
// Group files by storage_session_id, skip recently-updated files (fast pre-filter)
|
||||
/** @type {Map<string, Array<{ file_id: string; remoteFileId: string }>>} */
|
||||
const sessionGroups = new Map();
|
||||
|
||||
for (const file of files) {
|
||||
if (!file.metadata?.fileIdentifier) {
|
||||
const ref = file.metadata?.codeEnvRef;
|
||||
if (!ref?.storage_session_id || !ref?.file_id) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -224,13 +228,13 @@ async function checkSessionsAlive({ files, apiKey, staleSafeWindowMs = 6 * 60 *
|
|||
continue;
|
||||
}
|
||||
|
||||
const [filePath] = file.metadata.fileIdentifier.split('?');
|
||||
const session_id = filePath.split('/')[0];
|
||||
|
||||
if (!sessionGroups.has(session_id)) {
|
||||
sessionGroups.set(session_id, []);
|
||||
if (!sessionGroups.has(ref.storage_session_id)) {
|
||||
sessionGroups.set(ref.storage_session_id, []);
|
||||
}
|
||||
sessionGroups.get(session_id).push({ file_id: file.file_id, filePath });
|
||||
sessionGroups.get(ref.storage_session_id).push({
|
||||
file_id: file.file_id,
|
||||
remoteFileId: ref.file_id,
|
||||
});
|
||||
}
|
||||
|
||||
// One API call per session (in parallel)
|
||||
|
|
@ -252,8 +256,8 @@ async function checkSessionsAlive({ files, apiKey, staleSafeWindowMs = 6 * 60 *
|
|||
});
|
||||
|
||||
const remoteFiles = response.data ?? [];
|
||||
for (const { file_id, filePath } of fileEntries) {
|
||||
if (remoteFiles.some((f) => f.name?.startsWith(filePath))) {
|
||||
for (const { file_id, remoteFileId } of fileEntries) {
|
||||
if (remoteFiles.some((f) => f.fileId === remoteFileId)) {
|
||||
aliveFileIds.add(file_id);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,11 @@
|
|||
import { primeResources } from './resources';
|
||||
import { logger } from '@librechat/data-schemas';
|
||||
import { EModelEndpoint, EToolResources, AgentCapabilities } from 'librechat-data-provider';
|
||||
import {
|
||||
FileSources,
|
||||
EModelEndpoint,
|
||||
EToolResources,
|
||||
AgentCapabilities,
|
||||
} from 'librechat-data-provider';
|
||||
import type { TAgentsEndpoint, TFile } from 'librechat-data-provider';
|
||||
import type { IUser, AppConfig } from '@librechat/data-schemas';
|
||||
import type { Request as ServerRequest } from 'express';
|
||||
|
|
@ -1574,7 +1579,7 @@ describe('primeResources', () => {
|
|||
object: 'file' as const,
|
||||
usage: 0,
|
||||
embedded: false,
|
||||
source: 'local',
|
||||
source: FileSources.local,
|
||||
llmDeliveryPath: 'provider',
|
||||
width: 100,
|
||||
height: 100,
|
||||
|
|
@ -1589,7 +1594,7 @@ describe('primeResources', () => {
|
|||
object: 'file' as const,
|
||||
usage: 0,
|
||||
embedded: false,
|
||||
source: 'local',
|
||||
source: FileSources.local,
|
||||
llmDeliveryPath: 'none',
|
||||
};
|
||||
|
||||
|
|
@ -1604,7 +1609,7 @@ describe('primeResources', () => {
|
|||
agentId: 'agent1',
|
||||
});
|
||||
|
||||
const attachmentIds = result.attachments.map((f) => f.file_id);
|
||||
const attachmentIds = result.attachments.map((f) => f?.file_id);
|
||||
expect(attachmentIds).toContain('provider-file');
|
||||
expect(attachmentIds).toContain('none-file');
|
||||
});
|
||||
|
|
@ -1620,7 +1625,7 @@ describe('primeResources', () => {
|
|||
object: 'file' as const,
|
||||
usage: 0,
|
||||
embedded: false,
|
||||
source: 'local',
|
||||
source: FileSources.local,
|
||||
llmDeliveryPath: 'none',
|
||||
};
|
||||
|
||||
|
|
@ -1637,7 +1642,7 @@ describe('primeResources', () => {
|
|||
loadCodeApiKey: jest.fn().mockResolvedValue('code-key'),
|
||||
});
|
||||
|
||||
expect(result.attachments.map((f) => f.file_id)).toContain('none-file');
|
||||
expect(result.attachments.map((f) => f?.file_id)).toContain('none-file');
|
||||
expect(result.provisionState?.codeEnvFiles.map((f) => f.file_id)).toContain('none-file');
|
||||
expect(result.provisionState?.vectorDBFiles.map((f) => f.file_id)).toContain('none-file');
|
||||
});
|
||||
|
|
@ -1653,7 +1658,7 @@ describe('primeResources', () => {
|
|||
object: 'file' as const,
|
||||
usage: 0,
|
||||
embedded: false,
|
||||
source: 'local',
|
||||
source: FileSources.local,
|
||||
};
|
||||
|
||||
const result = await primeResources({
|
||||
|
|
@ -1667,7 +1672,7 @@ describe('primeResources', () => {
|
|||
agentId: 'agent1',
|
||||
});
|
||||
|
||||
const attachmentIds = result.attachments.map((f) => f.file_id);
|
||||
const attachmentIds = result.attachments.map((f) => f?.file_id);
|
||||
expect(attachmentIds).toContain('legacy-file');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -14,14 +14,13 @@ export type TFileUpdate = {
|
|||
|
||||
/**
|
||||
* Function type for provisioning a file to the code execution environment.
|
||||
* @returns The fileIdentifier and a deferred DB update object
|
||||
* @returns The codeEnvRef and a deferred DB update object
|
||||
*/
|
||||
export type TProvisionToCodeEnv = (params: {
|
||||
req: ServerRequest & { user?: IUser };
|
||||
file: TFile;
|
||||
entity_id?: string;
|
||||
apiKey?: string;
|
||||
}) => Promise<{ fileIdentifier: string; fileUpdate: TFileUpdate }>;
|
||||
}) => Promise<{ codeEnvRef: Record<string, unknown>; fileUpdate: TFileUpdate }>;
|
||||
|
||||
/**
|
||||
* Function type for provisioning a file to the vector DB for file_search.
|
||||
|
|
@ -418,7 +417,7 @@ export const primeResources = async ({
|
|||
let aliveFileIds: Set<string> = new Set();
|
||||
if (needsCodeEnv && codeApiKey && checkSessionsAlive) {
|
||||
const filesWithIdentifiers = attachments.filter(
|
||||
(f) => f?.metadata?.fileIdentifier && f.file_id,
|
||||
(f) => f?.metadata?.codeEnvRef && f.file_id,
|
||||
);
|
||||
if (filesWithIdentifiers.length > 0) {
|
||||
aliveFileIds = await checkSessionsAlive({
|
||||
|
|
@ -442,15 +441,15 @@ export const primeResources = async ({
|
|||
codeApiKey &&
|
||||
!processedResourceFiles.has(`${EToolResources.execute_code}:${file.file_id}`)
|
||||
) {
|
||||
const hasFileIdentifier = !!file.metadata?.fileIdentifier;
|
||||
const isStale = hasFileIdentifier && !aliveFileIds.has(file.file_id);
|
||||
const hasCodeEnvRef = !!file.metadata?.codeEnvRef;
|
||||
const isStale = hasCodeEnvRef && !aliveFileIds.has(file.file_id);
|
||||
|
||||
if (!hasFileIdentifier || isStale) {
|
||||
if (!hasCodeEnvRef || isStale) {
|
||||
if (isStale) {
|
||||
logger.info(
|
||||
`[primeResources] Code env file expired for "${file.filename}" (${file.file_id}), will re-provision on tool use`,
|
||||
);
|
||||
file.metadata = { ...file.metadata, fileIdentifier: undefined };
|
||||
file.metadata = { ...file.metadata, codeEnvRef: undefined };
|
||||
}
|
||||
codeEnvFiles.push(file);
|
||||
} else {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue