const path = require('path'); const { v4 } = require('uuid'); const { logger } = require('@librechat/data-schemas'); const { getCodeBaseURL } = require('@librechat/agents'); const { getBasePath, logAxiosError, sanitizeArtifactPath, flattenArtifactPath, createAxiosInstance, classifyCodeArtifact, codeServerHttpAgent, codeServerHttpsAgent, extractCodeArtifactText, } = require('@librechat/api'); const { Tools, megabyte, fileConfig, FileContext, FileSources, imageExtRegex, inferMimeType, EToolResources, EModelEndpoint, mergeFileConfig, getEndpointFileConfig, } = require('librechat-data-provider'); const { filterFilesByAgentAccess } = require('~/server/services/Files/permissions'); const { createFile, getFiles, updateFile, claimCodeFile } = require('~/models'); const { getStrategyFunctions } = require('~/server/services/Files/strategies'); const { convertImage } = require('~/server/services/Files/images/convert'); const { determineFileType } = require('~/server/utils'); const axios = createAxiosInstance(); /** * Creates a fallback download URL response when file cannot be processed locally. * Used when: file exceeds size limit, storage strategy unavailable, or download error occurs. * @param {Object} params - The parameters. * @param {string} params.name - The filename. * @param {string} params.session_id - The code execution session ID. * @param {string} params.id - The file ID from the code environment. * @param {string} params.conversationId - The current conversation ID. * @param {string} params.toolCallId - The tool call ID that generated the file. * @param {string} params.messageId - The current message ID. * @param {number} params.expiresAt - Expiration timestamp (24 hours from creation). * @returns {Object} Fallback response with download URL. */ const createDownloadFallback = ({ id, name, messageId, expiresAt, session_id, toolCallId, conversationId, }) => { const basePath = getBasePath(); return { filename: name, filepath: `${basePath}/api/files/code/download/${session_id}/${id}`, expiresAt, conversationId, toolCallId, messageId, }; }; /** * Process code execution output files - downloads and saves both images and non-image files. * All files are saved to local storage with fileIdentifier metadata for code env re-upload. * @param {ServerRequest} params.req - The Express request object. * @param {string} params.id - The file ID from the code environment. * @param {string} params.name - The filename. * @param {string} params.toolCallId - The tool call ID that generated the file. * @param {string} params.session_id - The code execution session ID. * @param {string} params.conversationId - The current conversation ID. * @param {string} params.messageId - The current message ID. * @returns {Promise} The file metadata or undefined if an error occurs. */ const processCodeOutput = async ({ req, id, name, toolCallId, conversationId, messageId, session_id, }) => { const appConfig = req.config; const currentDate = new Date(); const baseURL = getCodeBaseURL(); const fileExt = path.extname(name).toLowerCase(); const isImage = fileExt && imageExtRegex.test(name); const mergedFileConfig = mergeFileConfig(appConfig.fileConfig); const endpointFileConfig = getEndpointFileConfig({ fileConfig: mergedFileConfig, endpoint: EModelEndpoint.agents, }); const fileSizeLimit = endpointFileConfig.fileSizeLimit ?? mergedFileConfig.serverFileSizeLimit; try { const formattedDate = currentDate.toISOString(); const response = await axios({ method: 'get', url: `${baseURL}/download/${session_id}/${id}`, responseType: 'arraybuffer', headers: { 'User-Agent': 'LibreChat/1.0', }, httpAgent: codeServerHttpAgent, httpsAgent: codeServerHttpsAgent, timeout: 15000, }); const buffer = Buffer.from(response.data, 'binary'); // Enforce file size limit if (buffer.length > fileSizeLimit) { logger.warn( `[processCodeOutput] File "${name}" (${(buffer.length / megabyte).toFixed(2)} MB) exceeds size limit of ${(fileSizeLimit / megabyte).toFixed(2)} MB, falling back to download URL`, ); return createDownloadFallback({ id, name, messageId, toolCallId, session_id, conversationId, expiresAt: currentDate.getTime() + 86400000, }); } const fileIdentifier = `${session_id}/${id}`; /* `safeName` keeps the directory structure (`a/b/file.txt` -> `a/b/file.txt`) * so the next prime() can place the file at the same nested path in the * sandbox; flattening would re-create the bug where every nested artifact * collapsed into the root and read_file calls 404'd. The flat-form * storage key is composed below once `file_id` is known so we can cap * the total length at filesystem NAME_MAX. */ const safeName = sanitizeArtifactPath(name); if (safeName !== name) { logger.warn( `[processCodeOutput] Filename sanitized: "${name}" -> "${safeName}" | conv=${conversationId}`, ); } /** * Atomically claim a file_id for this (filename, conversationId, context) tuple. * Uses $setOnInsert so concurrent calls for the same filename converge on * a single record instead of creating duplicates (TOCTOU race fix). * * Claim by `safeName` (not raw `name`) so the claim and the eventual * `createFile` agree on the filename column — otherwise weird inputs * (e.g. `"proj name/file@v1.txt"`) would claim under the raw name and * then write under the sanitized one, leaving the claim row orphaned. */ const newFileId = v4(); const claimed = await claimCodeFile({ filename: safeName, conversationId, file_id: newFileId, user: req.user.id, }); const file_id = claimed.file_id; const isUpdate = file_id !== newFileId; if (isUpdate) { logger.debug( `[processCodeOutput] Updating existing file "${safeName}" (${file_id}) instead of creating duplicate`, ); } /** * Preserve the original `messageId` on update. Each `processCodeOutput` * call would otherwise overwrite it with the current run's run id, which * decouples the file from the assistant message that originally created * it. `getCodeGeneratedFiles` filters by `messageId IN `, so a * stale id (e.g. from a later regeneration / failed re-read attempt) * silently excludes the file from priming on subsequent turns. */ const persistedMessageId = isUpdate ? (claimed.messageId ?? messageId) : messageId; if (isImage) { const usage = isUpdate ? (claimed.usage ?? 0) + 1 : 1; const _file = await convertImage(req, buffer, 'high', `${file_id}${fileExt}`); const filepath = usage > 1 ? `${_file.filepath}?v=${Date.now()}` : _file.filepath; const file = { ..._file, filepath, file_id, messageId: persistedMessageId, usage, filename: safeName, conversationId, user: req.user.id, type: `image/${appConfig.imageOutputType}`, createdAt: isUpdate ? claimed.createdAt : formattedDate, updatedAt: formattedDate, source: appConfig.fileStrategy, context: FileContext.execute_code, metadata: { fileIdentifier }, }; await createFile(file, true); return Object.assign(file, { messageId, toolCallId }); } const { saveBuffer } = getStrategyFunctions(appConfig.fileStrategy); if (!saveBuffer) { logger.warn( `[processCodeOutput] saveBuffer not available for strategy ${appConfig.fileStrategy}, falling back to download URL`, ); return createDownloadFallback({ id, name, messageId, toolCallId, session_id, conversationId, expiresAt: currentDate.getTime() + 86400000, }); } const detectedType = await determineFileType(buffer, true); const mimeType = detectedType?.mime || inferMimeType(name, '') || 'application/octet-stream'; /** Check MIME type support - for code-generated files, we're lenient but log unsupported types */ const isSupportedMimeType = fileConfig.checkType( mimeType, endpointFileConfig.supportedMimeTypes, ); if (!isSupportedMimeType) { logger.warn( `[processCodeOutput] File "${name}" has unsupported MIME type "${mimeType}", proceeding with storage but may not be usable as tool resource`, ); } /* Compose the storage key here, after `file_id` is known, so the * `flattenArtifactPath` cap budget can be calculated against the * actual prefix length. The full key has to fit in one filesystem * path component (NAME_MAX = 255 on most filesystems); without this * cap, deeply-nested artifact paths whose individual segments were * within bounds can still produce a flat form that overflows once * `${file_id}__` is prepended, causing `ENAMETOOLONG` inside * saveBuffer and falling back to a download URL. The 255 figure is * the conservative cross-platform NAME_MAX (Linux ext4, NTFS, APFS). */ const NAME_MAX = 255; const flatName = flattenArtifactPath(safeName, NAME_MAX - file_id.length - 2); const fileName = `${file_id}__${flatName}`; const filepath = await saveBuffer({ userId: req.user.id, buffer, fileName, basePath: 'uploads', }); /* `classifyCodeArtifact` and `extractCodeArtifactText` make * extension/bare-name decisions on the input string. With the * path-preserving sanitizer they can now receive a nested path like * `reports.v1/Makefile`, which the classifier's `extensionOf` reads * as `v1/Makefile` (the slice after the dot in the directory name) * and the bare-name branch rejects because it sees a `.` anywhere in * the string. Result: extensionless artifacts under dotted folders * (Makefile, Dockerfile, etc.) get misclassified as `other` and * skip text extraction. Pass the basename so classification matches * what it would have gotten with the old flat-name flow. */ const leafName = path.basename(safeName); const category = classifyCodeArtifact(leafName, mimeType); const text = await extractCodeArtifactText(buffer, leafName, mimeType, category); const file = { file_id, filepath, messageId: persistedMessageId, object: 'file', filename: safeName, type: mimeType, conversationId, user: req.user.id, bytes: buffer.length, updatedAt: formattedDate, metadata: { fileIdentifier }, source: appConfig.fileStrategy, context: FileContext.execute_code, usage: isUpdate ? (claimed.usage ?? 0) + 1 : 1, createdAt: isUpdate ? claimed.createdAt : formattedDate, // Always set `text` explicitly (string or null) so that an update which // produces a binary or oversized artifact clears any previously cached // text — `createFile` uses findOneAndUpdate with $set semantics, which // would otherwise leave a stale value behind. text: text ?? null, }; await createFile(file, true); return Object.assign(file, { messageId, toolCallId }); } catch (error) { if (error?.message === 'Path traversal detected in filename') { logger.warn( `[processCodeOutput] Path traversal blocked for file "${name}" | conv=${conversationId}`, ); } logAxiosError({ message: 'Error downloading/processing code environment file', error, }); // Fallback for download errors - return download URL so user can still manually download return createDownloadFallback({ id, name, messageId, toolCallId, session_id, conversationId, expiresAt: currentDate.getTime() + 86400000, }); } }; function checkIfActive(dateString) { const givenDate = new Date(dateString); const currentDate = new Date(); const timeDifference = currentDate - givenDate; const hoursPassed = timeDifference / (1000 * 60 * 60); return hoursPassed < 23; } /** * Retrieves the `lastModified` time string for a specified file from Code Execution Server. * * @param {string} fileIdentifier - The identifier for the file (e.g., "session_id/fileId"). * * @returns {Promise} * A promise that resolves to the `lastModified` time string of the file if successful, or null if there is an * error in initialization or fetching the info. */ async function getSessionInfo(fileIdentifier) { try { const baseURL = getCodeBaseURL(); const [path, queryString] = fileIdentifier.split('?'); const [session_id, fileId] = path.split('/'); let queryParams = {}; if (queryString) { queryParams = Object.fromEntries(new URLSearchParams(queryString).entries()); } const response = await axios({ method: 'get', url: `${baseURL}/sessions/${session_id}/objects/${fileId}`, params: queryParams, headers: { 'User-Agent': 'LibreChat/1.0', }, httpAgent: codeServerHttpAgent, httpsAgent: codeServerHttpsAgent, timeout: 5000, }); return response.data?.lastModified; } catch (error) { logAxiosError({ message: `Error fetching session info: ${error.message}`, error, }); return null; } } /** * * @param {Object} options * @param {ServerRequest} options.req * @param {Agent['tool_resources']} options.tool_resources * @param {string} [options.agentId] - The agent ID for file access control * @returns {Promise<{ * files: Array<{ id: string; session_id: string; name: string }>, * toolContext: string, * }>} */ const primeFiles = async (options) => { const { tool_resources, req, agentId } = options; const file_ids = tool_resources?.[EToolResources.execute_code]?.file_ids ?? []; const agentResourceIds = new Set(file_ids); const resourceFiles = tool_resources?.[EToolResources.execute_code]?.files ?? []; // Get all files first const allFiles = (await getFiles({ file_id: { $in: file_ids } }, null, { text: 0 })) ?? []; // Filter by access if user and agent are provided let dbFiles; if (req?.user?.id && agentId) { dbFiles = await filterFilesByAgentAccess({ files: allFiles, userId: req.user.id, role: req.user.role, agentId, }); } else { dbFiles = allFiles; } dbFiles = dbFiles.concat(resourceFiles); const files = []; const sessions = new Map(); let toolContext = ''; for (let i = 0; i < dbFiles.length; i++) { const file = dbFiles[i]; if (!file) { continue; } if (file.metadata.fileIdentifier) { const [path, queryString] = file.metadata.fileIdentifier.split('?'); const [session_id, id] = path.split('/'); /** * `pushFile` accepts optional overrides so the reupload path can * push the FRESH `(session_id, id)` parsed off the new * `fileIdentifier`. Without these overrides, the closure would * capture the stale pre-reupload refs from the outer loop and * the in-memory `files` array (now consumed by * `buildInitialToolSessions` to seed `Graph.sessions`) would * point at a sandbox object that no longer exists. The DB record * gets the new identifier via `updateFile`, but the seed would * still inject the old one — bash_tool / read_file would 404 * trying to mount the file until the next turn re-reads metadata. */ const pushFile = (overrideSessionId, overrideId) => { if (!toolContext) { toolContext = `- Note: The following files are available in the "${Tools.execute_code}" tool environment:`; } let fileSuffix = ''; if (!agentResourceIds.has(file.file_id)) { fileSuffix = file.context === FileContext.execute_code ? ' (from previous code execution)' : ' (attached by user)'; } toolContext += `\n\t- /mnt/data/${file.filename}${fileSuffix}`; files.push({ id: overrideId ?? id, session_id: overrideSessionId ?? session_id, name: file.filename, }); }; if (sessions.has(session_id)) { pushFile(); continue; } let queryParams = {}; if (queryString) { queryParams = Object.fromEntries(new URLSearchParams(queryString).entries()); } const reuploadFile = async () => { try { const { getDownloadStream } = getStrategyFunctions(file.source); const { handleFileUpload: uploadCodeEnvFile } = getStrategyFunctions( FileSources.execute_code, ); const stream = await getDownloadStream(options.req, file.filepath); const fileIdentifier = await uploadCodeEnvFile({ req: options.req, stream, filename: file.filename, entity_id: queryParams.entity_id, }); // Preserve existing metadata when adding fileIdentifier const updatedMetadata = { ...file.metadata, // Preserve existing metadata (like S3 storage info) fileIdentifier, // Add fileIdentifier }; await updateFile({ file_id: file.file_id, metadata: updatedMetadata, }); /** * Parse the FRESH fileIdentifier returned by the reupload and * route it through both the dedupe Map and the in-memory * `files` list. The original `(session_id, id)` parsed at the * top of this iteration refer to the old, expired/missing * sandbox object — using them here would silently re-introduce * the bug `Graph.sessions` seeding is supposed to fix. */ const [newPath] = fileIdentifier.split('?'); const [newSessionId, newId] = newPath.split('/'); sessions.set(newSessionId, true); pushFile(newSessionId, newId); } catch (error) { logger.error( `Error re-uploading file ${id} in session ${session_id}: ${error.message}`, error, ); } }; const uploadTime = await getSessionInfo(file.metadata.fileIdentifier); if (!uploadTime) { logger.warn(`Failed to get upload time for file ${id} in session ${session_id}`); await reuploadFile(); continue; } if (!checkIfActive(uploadTime)) { await reuploadFile(); continue; } sessions.set(session_id, true); pushFile(); } } return { files, toolContext }; }; /** * Reads a single file from the code-execution sandbox by shelling `cat` * through the sandbox `/exec` endpoint. Used by the `read_file` host * handler when the requested path is a code-env path (`/mnt/data/...`) * or otherwise not resolvable as a skill file. Resolves to * `{ content }` from stdout on success, or `null` when the codeapi base * URL isn't configured / the read returns no content (caller turns that * into a model-visible error). Throws axios-style errors on transport * failure so the caller can surface a meaningful error message. * * `session_id` and `files` come from the seeded `tc.codeSessionContext` * (emitted by the agents-side `ToolNode` for `read_file` calls in * v3.1.72+) so the read lands in the same sandbox session that holds * the agent's prior-turn artifacts. * * @param {Object} params * @param {string} params.file_path - Absolute path inside the sandbox (e.g. `/mnt/data/foo.txt`). * @param {string} [params.session_id] - Sandbox session id from the seeded context. * @param {Array<{id: string, name: string, session_id?: string}>} [params.files] - File refs to mount. * @returns {Promise<{content: string} | null>} */ async function readSandboxFile({ file_path, session_id, files }) { const baseURL = getCodeBaseURL(); if (!baseURL) { return null; } /** Single-quote `file_path` with embedded-quote escaping so a malicious * filename can't break out of the `cat` command. The handler upstream * has already established this is a code-env path the model * legitimately asked to read; this just keeps the shell quoting safe. */ const safePath = `'${file_path.replace(/'/g, `'\\''`)}'`; /** @type {Record} */ const postData = { lang: 'bash', code: `cat ${safePath}` }; if (session_id) { postData.session_id = session_id; } if (files && files.length > 0) { postData.files = files; } try { const response = await axios({ method: 'post', url: `${baseURL}/exec`, data: postData, headers: { 'Content-Type': 'application/json', 'User-Agent': 'LibreChat/1.0', }, httpAgent: codeServerHttpAgent, httpsAgent: codeServerHttpsAgent, timeout: 15000, }); const result = response?.data ?? {}; if (result.stderr && (result.stdout == null || result.stdout === '')) { throw new Error(String(result.stderr).trim()); } if (result.stdout == null) { return null; } return { content: String(result.stdout) }; } catch (error) { logAxiosError({ message: `Error reading sandbox file "${file_path}"`, error, }); throw error; } } module.exports = { primeFiles, checkIfActive, getSessionInfo, processCodeOutput, readSandboxFile, };