const path = require('path'); const { v4 } = require('uuid'); const { logger } = require('@librechat/data-schemas'); const { getCodeBaseURL } = require('@librechat/agents'); const { withTimeout, getBasePath, logAxiosError, hasOfficeHtmlPath, sanitizeArtifactPath, flattenArtifactPath, createAxiosInstance, getCodeApiAuthHeaders, classifyCodeArtifact, codeServerHttpAgent, codeServerHttpsAgent, extractCodeArtifactText, getExtractedTextFormat, getStorageMetadata, buildCodeEnvDownloadQuery, } = require('@librechat/api'); const { Tools, megabyte, fileConfig, FileContext, FileSources, imageExtRegex, inferMimeType, EToolResources, EModelEndpoint, mergeFileConfig, getEndpointFileConfig, } = require('librechat-data-provider'); const { filterFilesByAgentAccess } = require('~/server/services/Files/permissions'); const { createFile, getFiles, updateFile, claimCodeFile } = require('~/models'); const { getStrategyFunctions } = require('~/server/services/Files/strategies'); const { convertImage } = require('~/server/services/Files/images/convert'); const { getRetentionExpiry } = require('~/server/services/Files/process'); const { determineFileType } = require('~/server/utils'); const axios = createAxiosInstance(); /** * Creates a fallback download URL response when file cannot be processed locally. * Used when: file exceeds size limit, storage strategy unavailable, or download error occurs. * @param {Object} params - The parameters. * @param {string} params.name - The filename. * @param {string} params.session_id - The code execution session ID. * @param {string} params.id - The file ID from the code environment. * @param {string} params.conversationId - The current conversation ID. * @param {string} params.toolCallId - The tool call ID that generated the file. * @param {string} params.messageId - The current message ID. * @param {number} params.expiresAt - Expiration timestamp (24 hours from creation). * @returns {Object} Fallback response with download URL. */ const createDownloadFallback = ({ id, name, messageId, expiresAt, session_id, toolCallId, conversationId, }) => { const basePath = getBasePath(); return { filename: name, filepath: `${basePath}/api/files/code/download/${session_id}/${id}`, expiresAt, conversationId, toolCallId, messageId, }; }; /** * Hard ceiling on the deferred preview rendering (HTML extraction + DB * update). The inner office-render path already has its own 12s timeout * and a concurrency-limited queue; this is the outer guard that catches * pathological cases where queue wait + render + DB write would * otherwise hang the file in `status: 'pending'` indefinitely. * * If the timeout fires the record is updated to `status: 'failed'` * with `previewError: 'timeout'` and the UI shows download-only. */ const PREVIEW_FINALIZE_TIMEOUT_MS = 60_000; /** * Render the inline HTML preview for a code-execution file (or plain * text for non-office buckets that still benefit from caching), then * atomically transition the DB record to `status: 'ready'` (with * `text`/`textFormat`) or `status: 'failed'` (with `previewError`). * * Decoupled from `processCodeOutput` so the agent's final response is * not blocked on potentially slow office rendering. The caller fires * this without awaiting; promises continue running after the HTTP * response closes (Node doesn't kill them) and the frontend learns of * completion via the `attachment` update SSE event (if the stream is * still open) or via React Query polling otherwise. Process restart * is the only thing that can lose progress — covered by the boot-time * orphan sweep. * * @param {object} params * @param {Buffer} params.buffer - The full downloaded file contents, * bounded by the server's `fileSizeLimit` config (defaults far above * the 1MB extractor cap). The buffer is captured by the closure * returned in `{ finalize }`, so when many office files queue behind * the inner concurrency limiter (cap 2), all queued buffers stay * resident until each one's slot frees. For a tool result emitting * N office files, peak heap usage from this path is up to * `N * fileSizeLimit`. Acceptable for typical agent runs (a handful * of files at a few hundred KB each); pathological cases are bounded * by the inner per-file 12s timeout and the outer 60s render cap. * @param {string} params.leafName - Basename for classification. * @param {string} params.mimeType - Detected/inferred MIME. * @param {string} params.category - Classifier output. * @param {string} params.file_id - The DB record key for the update. * @param {string} [params.previewRevision] - Generation marker stamped * by the immediate persist step. The DB commit is conditional on * this — if a newer emit (cross-turn filename reuse) has rotated * the revision before this render finishes, `updateFile` returns * null and the stale render is silently discarded rather than * overwriting the newer record. * @returns {Promise} The post-update record on * success; `null` if the DB update itself failed (extraction failure * is reflected as `status: 'failed'`, not a thrown error) or if the * `previewRevision` guard rejected the write. */ const finalizePreview = async ({ buffer, leafName, mimeType, category, file_id, previewRevision, }) => { let text = null; let previewError; try { text = await withTimeout( extractCodeArtifactText(buffer, leafName, mimeType, category), PREVIEW_FINALIZE_TIMEOUT_MS, `Preview extraction exceeded ${PREVIEW_FINALIZE_TIMEOUT_MS}ms`, ); } catch (_error) { /* `extractCodeArtifactText` swallows its own errors and returns null, * so the only way to reach here is a `withTimeout` rejection — i.e. * the queue + render combined exceeded the outer 60s ceiling. */ previewError = 'timeout'; logger.warn( `[finalizePreview] ${file_id}: extraction timed out after ${PREVIEW_FINALIZE_TIMEOUT_MS}ms`, ); } /* HTML-or-null contract (PR #12934): null result on an office file * must NOT fall back to plain text — surface as failed. Caller gates * on `hasOfficeHtmlPath`, so reaching here always means office. */ const textFormat = getExtractedTextFormat(leafName, mimeType, text); const failed = text == null; const status = failed ? 'failed' : 'ready'; if (failed && !previewError) { previewError = 'parser-error'; } try { /* Conditional update: commit only if `previewRevision` still * matches what the immediate persist step stamped. If a newer * emit has rotated the revision (cross-turn filename reuse), * `updateFile` returns null and the stale render is silently * discarded. (Codex P1 review on PR #12957.) */ const updated = await updateFile( { file_id, text, textFormat, status, previewError: failed ? previewError : null, }, previewRevision ? { previewRevision } : undefined, ); if (!updated && previewRevision) { logger.debug( `[finalizePreview] ${file_id}: stale render skipped — newer emit has superseded revision ${previewRevision}`, ); } return updated; } catch (error) { logger.error( `[finalizePreview] ${file_id}: failed to persist preview result: ${error?.message ?? error}`, ); return null; } }; /** * Run the background `finalize` thunk returned by `processCodeOutput` * and route the resolved record to the caller's emit logic. Shared * between `callbacks.js` (chat-completions + Open Responses) and * `tools.js` (direct tool endpoint) so the fire-and-forget pattern * doesn't drift across callsites. * * `onResolved` receives the post-update DB record and is the only piece * that varies — chat-completions writes the legacy `attachment` SSE * event, Open Responses writes the spec-shaped `librechat:attachment` * event with a sequence number, and the direct tool endpoint has no * stream to write to (caller passes a no-op). * * The catch path is the safety net for unexpected programming errors * inside `finalizePreview` ONLY. The function is designed to never * throw (extraction and DB failures are translated to `status: 'failed'` * inside it), but a ref error or future regression would otherwise * leave the DB record stuck at `'pending'` until the boot-time orphan * sweep — potentially hours away on a stable server. We attempt a * best-effort `updateFile` to mark the record `'failed'` with * `previewError: 'unexpected'` so the UI stops polling and the * next-turn LLM context surfaces the failure. * * `onResolved` errors are deliberately isolated in their own try/catch. * Without that isolation, a transient transport-side failure (SSE write * race after the stream closed, an emitter listener throwing) would * propagate into the finalize catch and downgrade an *already-resolved* * record to `failed` with `previewError: 'unexpected'` — surfacing * "preview unavailable" in the UI even though extraction succeeded * and the file is on disk. The emit failure is logged but the DB * record stays at whatever `finalizePreview` wrote (typically * `'ready'`), so the polling layer / next page load still sees the * resolved preview. * * @param {object} params * @param {(() => Promise) | undefined} params.finalize - The * thunk returned by `processCodeOutput`. No-op when undefined. * @param {string | undefined} params.fileId - DB key for the failure * marker; if absent the catch only logs. * @param {string | undefined} [params.previewRevision] - Generation * marker stamped by the immediate persist step. The defensive * `updateFile` in the catch is conditional on this — if a newer * emit has rotated the revision, the stale failure marker is * silently discarded so a programming error from an older render * doesn't override a newer turn's record. * @param {(updated: object) => void} [params.onResolved] - Called once * on success with the post-update record. */ const runPreviewFinalize = ({ finalize, fileId, previewRevision, onResolved }) => { if (typeof finalize !== 'function') { return; } finalize() .then((updated) => { if (!updated || !onResolved) { return; } /* Isolated try/catch — a throw inside `onResolved` (transport-side * SSE write race, emitter listener error) MUST NOT propagate to * the outer `.catch`, which would downgrade an already-resolved * record to `failed` with `previewError: 'unexpected'`. * Extraction succeeded at this point and `finalizePreview` has * already persisted the terminal status; the polling layer / next * page load will surface the resolved preview even if this turn's * SSE emit didn't land. */ try { onResolved(updated); } catch (emitError) { logger.error( `[runPreviewFinalize] onResolved threw for ${fileId}; record stays at the finalized status:`, emitError, ); } }) .catch((error) => { logger.error('Error rendering deferred preview:', error); if (!fileId) { return; } updateFile( { file_id: fileId, status: 'failed', previewError: 'unexpected', }, previewRevision ? { previewRevision } : undefined, ).catch((updateErr) => { logger.error( `[runPreviewFinalize] also failed to mark ${fileId} as failed after error:`, updateErr, ); }); }); }; /** * Process code execution output files — downloads and saves both images * and non-image files. All files are saved to local storage with * `codeEnvRef` metadata for code env re-upload. * * Returns a two-part shape so callers can ship the attachment to the * client immediately and run preview extraction in the background: * - `file`: persisted metadata (file is on disk, downloadable, and * has `status: 'pending'` if a preview is still being rendered). * - `finalize` (optional): a thunk returning the deferred preview * result promise. Present only when an inline HTML preview is * expected (office buckets — DOCX/XLSX/XLS/ODS/CSV/PPTX). Caller * decides whether to await or fire-and-forget. * * Existing fallback paths (size limit, missing storage strategy, error * catch) return `{ file }` with no `finalize` — there's nothing to * extract. * * @param {ServerRequest} params.req - The Express request object. * @param {string} params.id - The file ID from the code environment. * @param {string} params.name - The filename. * @param {string} params.toolCallId - The tool call ID that generated the file. * @param {string} params.session_id - The code execution session ID. * @param {string} params.conversationId - The current conversation ID. * @param {string} params.messageId - The current message ID. * @returns {Promise<{ file: MongoFile & { messageId: string, toolCallId: string }, finalize?: () => Promise }>} */ const processCodeOutput = async ({ req, id, name, toolCallId, conversationId, messageId, session_id, }) => { const appConfig = req.config; const currentDate = new Date(); const baseURL = getCodeBaseURL(); const fileExt = path.extname(name).toLowerCase(); const isImage = fileExt && imageExtRegex.test(name); const mergedFileConfig = mergeFileConfig(appConfig.fileConfig); const endpointFileConfig = getEndpointFileConfig({ fileConfig: mergedFileConfig, endpoint: EModelEndpoint.agents, }); const fileSizeLimit = endpointFileConfig.fileSizeLimit ?? mergedFileConfig.serverFileSizeLimit; try { const formattedDate = currentDate.toISOString(); const authHeaders = await getCodeApiAuthHeaders(req); /* Code-output files are always user-private — no skill execution * produces a skill-scoped output bucket. The download URL must * carry `?kind=user&id=` so codeapi's `sessionAuth` * resolves the matching `:user:` sessionKey. See * codeapi #1455 / Phase C. */ const downloadQuery = buildCodeEnvDownloadQuery({ kind: 'user', id: req.user.id }); const response = await axios({ method: 'get', url: `${baseURL}/download/${session_id}/${id}${downloadQuery}`, responseType: 'arraybuffer', headers: { 'User-Agent': 'LibreChat/1.0', ...authHeaders, }, httpAgent: codeServerHttpAgent, httpsAgent: codeServerHttpsAgent, timeout: 15000, }); const buffer = Buffer.from(response.data, 'binary'); // Enforce file size limit if (buffer.length > fileSizeLimit) { logger.warn( `[processCodeOutput] File "${name}" (${(buffer.length / megabyte).toFixed(2)} MB) exceeds size limit of ${(fileSizeLimit / megabyte).toFixed(2)} MB, falling back to download URL`, ); return { file: createDownloadFallback({ id, name, messageId, toolCallId, session_id, conversationId, expiresAt: currentDate.getTime() + 86400000, }), }; } /* Code-output files belong to the user who ran the execution. * SessionKey on codeapi will be `:user:` for these, * so cache and access stay user-private. */ const codeEnvRef = { kind: 'user', id: req.user.id, storage_session_id: session_id, file_id: id, }; /* `safeName` keeps the directory structure (`a/b/file.txt` -> `a/b/file.txt`) * so the next prime() can place the file at the same nested path in the * sandbox; flattening would re-create the bug where every nested artifact * collapsed into the root and read_file calls 404'd. The flat-form * storage key is composed below once `file_id` is known so we can cap * the total length at filesystem NAME_MAX. */ const safeName = sanitizeArtifactPath(name); if (safeName !== name) { logger.warn( `[processCodeOutput] Filename sanitized: "${name}" -> "${safeName}" | conv=${conversationId}`, ); } /** * Atomically claim a file_id for this (filename, conversationId, context) tuple. * Uses $setOnInsert so concurrent calls for the same filename converge on * a single record instead of creating duplicates (TOCTOU race fix). * * Claim by `safeName` (not raw `name`) so the claim and the eventual * `createFile` agree on the filename column — otherwise weird inputs * (e.g. `"proj name/file@v1.txt"`) would claim under the raw name and * then write under the sanitized one, leaving the claim row orphaned. */ const newFileId = v4(); const claimed = await claimCodeFile({ filename: safeName, conversationId, file_id: newFileId, user: req.user.id, tenantId: req.user.tenantId, }); const file_id = claimed.file_id; const isUpdate = file_id !== newFileId; if (isUpdate) { logger.debug( `[processCodeOutput] Updating existing file "${safeName}" (${file_id}) instead of creating duplicate`, ); } /** * Preserve the original `messageId` on update. Each `processCodeOutput` * call would otherwise overwrite it with the current run's run id, which * decouples the file from the assistant message that originally created * it. `getCodeGeneratedFiles` filters by `messageId IN `, so a * stale id (e.g. from a later regeneration / failed re-read attempt) * silently excludes the file from priming on subsequent turns. */ const persistedMessageId = isUpdate ? (claimed.messageId ?? messageId) : messageId; if (isImage) { const usage = isUpdate ? (claimed.usage ?? 0) + 1 : 1; const _file = await convertImage(req, buffer, 'high', `${file_id}${fileExt}`); const filepath = usage > 1 ? `${_file.filepath}?v=${Date.now()}` : _file.filepath; const storageMetadata = getStorageMetadata({ filepath: _file.filepath, source: appConfig.fileStrategy, storageKey: _file.storageKey, storageRegion: _file.storageRegion, }); const file = { ..._file, filepath, ...storageMetadata, file_id, messageId: persistedMessageId, usage, filename: safeName, conversationId, user: req.user.id, tenantId: req.user.tenantId, type: `image/${appConfig.imageOutputType}`, createdAt: isUpdate ? claimed.createdAt : formattedDate, updatedAt: formattedDate, source: appConfig.fileStrategy, context: FileContext.execute_code, metadata: { codeEnvRef }, ...getRetentionExpiry(req), }; await createFile(file, true); return { file: Object.assign(file, { messageId, toolCallId }) }; } const { saveBuffer } = getStrategyFunctions(appConfig.fileStrategy); if (!saveBuffer) { logger.warn( `[processCodeOutput] saveBuffer not available for strategy ${appConfig.fileStrategy}, falling back to download URL`, ); return { file: createDownloadFallback({ id, name, messageId, toolCallId, session_id, conversationId, expiresAt: currentDate.getTime() + 86400000, }), }; } const detectedType = await determineFileType(buffer, true); const mimeType = detectedType?.mime || inferMimeType(name, '') || 'application/octet-stream'; /** Check MIME type support - for code-generated files, we're lenient but log unsupported types */ const isSupportedMimeType = fileConfig.checkType( mimeType, endpointFileConfig.supportedMimeTypes, ); if (!isSupportedMimeType) { logger.warn( `[processCodeOutput] File "${name}" has unsupported MIME type "${mimeType}", proceeding with storage but may not be usable as tool resource`, ); } /* Compose the storage key here, after `file_id` is known, so the * `flattenArtifactPath` cap budget can be calculated against the * actual prefix length. The full key has to fit in one filesystem * path component (NAME_MAX = 255 on most filesystems); without this * cap, deeply-nested artifact paths whose individual segments were * within bounds can still produce a flat form that overflows once * `${file_id}__` is prepended, causing `ENAMETOOLONG` inside * saveBuffer and falling back to a download URL. The 255 figure is * the conservative cross-platform NAME_MAX (Linux ext4, NTFS, APFS). */ const NAME_MAX = 255; const flatName = flattenArtifactPath(safeName, NAME_MAX - file_id.length - 2); const fileName = `${file_id}__${flatName}`; const filepath = await saveBuffer({ userId: req.user.id, buffer, fileName, basePath: 'uploads', tenantId: req.user.tenantId, }); const storageMetadata = getStorageMetadata({ filepath, source: appConfig.fileStrategy, }); /* `classifyCodeArtifact` and `extractCodeArtifactText` make * extension/bare-name decisions on the input string. With the * path-preserving sanitizer they can now receive a nested path like * `reports.v1/Makefile`, which the classifier's `extensionOf` reads * as `v1/Makefile` (the slice after the dot in the directory name) * and the bare-name branch rejects because it sees a `.` anywhere in * the string. Result: extensionless artifacts under dotted folders * (Makefile, Dockerfile, etc.) get misclassified as `other` and * skip text extraction. Pass the basename so classification matches * what it would have gotten with the old flat-name flow. */ const leafName = path.basename(safeName); const category = classifyCodeArtifact(leafName, mimeType); /* Office-bucket files (DOCX/XLSX/XLS/ODS/CSV/PPTX) route through * `bufferToOfficeHtml` which is CPU-heavy. Persist the record now * with `status: 'pending'` and `text: null` so the agent's response * isn't blocked, then return a `finalize` thunk the caller can run * in the background. Non-office files have cheap or no extraction * — run it inline so the caller gets a fully-resolved record * without juggling a finalize step. */ const expectsPreview = hasOfficeHtmlPath(leafName, mimeType); const baseFile = { file_id, filepath, ...storageMetadata, messageId: persistedMessageId, object: 'file', filename: safeName, type: mimeType, conversationId, user: req.user.id, tenantId: req.user.tenantId, bytes: buffer.length, updatedAt: formattedDate, metadata: { codeEnvRef }, source: appConfig.fileStrategy, context: FileContext.execute_code, usage: isUpdate ? (claimed.usage ?? 0) + 1 : 1, createdAt: isUpdate ? claimed.createdAt : formattedDate, ...getRetentionExpiry(req), }; if (expectsPreview) { /* Persist with `status: 'pending'` and explicit * `text: null` / `textFormat: null` so an update that previously * had cached text gets cleared. The deferred finalize transitions * to 'ready' (with text/textFormat) or 'failed' (with * previewError). * * `previewRevision` is a fresh UUID stamped on every emit. The * deferred finalize's `updateFile` is conditional on this — if * a newer turn (cross-turn filename reuse) has rotated the * revision before this render finishes, the stale render is * silently discarded rather than overwriting the newer record. * (Codex P1 review on PR #12957.) */ const previewRevision = v4(); const file = { ...baseFile, text: null, textFormat: null, status: 'pending', previewError: null, previewRevision, }; await createFile(file, true); return { file: Object.assign(file, { messageId, toolCallId }), finalize: () => finalizePreview({ buffer, leafName, mimeType, category, file_id, previewRevision }), previewRevision, }; } /* Non-office path: extraction is cheap (utf8 decode, parseDocument * for PDF/ODT, or null for binaries). Run inline and return a * fully-resolved record — no `finalize` needed. */ const text = await extractCodeArtifactText(buffer, leafName, mimeType, category); /* `textFormat` accompanies `text` so the client can gate * office-HTML-bucket routing on a trusted signal — clients MUST * NOT inject `text` into the iframe as HTML unless `textFormat === * 'html'`. RAG-uploaded `.docx` etc. arrive with plain text from * mammoth.extractRawText and would otherwise be hijacked by the * extension-based office routing into the HTML-injection path * (Codex P1 review on PR #12934). null on extract failure — the * client treats absence as 'text' for safety. */ const textFormat = getExtractedTextFormat(leafName, mimeType, text); const file = { ...baseFile, // Always set explicitly so an update which produces a binary or // oversized artifact clears any previously cached text — createFile // uses findOneAndUpdate with $set semantics. text: text ?? null, textFormat: textFormat ?? null, // Clear deferred-preview lifecycle fields in case the prior emit // at this (filename, conversationId) was an office file — // otherwise stale `pending`/`failed` would persist and the client // would render the wrong state for the now non-office artifact. status: null, previewError: null, previewRevision: null, }; await createFile(file, true); return { file: Object.assign(file, { messageId, toolCallId }) }; } catch (error) { if (error?.message === 'Path traversal detected in filename') { logger.warn( `[processCodeOutput] Path traversal blocked for file "${name}" | conv=${conversationId}`, ); } logAxiosError({ message: 'Error downloading/processing code environment file', error, }); logger.warn( `[processCodeOutput] Falling back to Code API download URL for strategy ${appConfig.fileStrategy}`, ); // Fallback for download errors - return download URL so user can still manually download return { file: createDownloadFallback({ id, name, messageId, toolCallId, session_id, conversationId, expiresAt: currentDate.getTime() + 86400000, }), }; } }; function checkIfActive(dateString) { const givenDate = new Date(dateString); const currentDate = new Date(); const timeDifference = currentDate - givenDate; const hoursPassed = timeDifference / (1000 * 60 * 60); return hoursPassed < 23; } /** * Retrieves the `lastModified` time string for a specified file from Code Execution Server. * * @param {import('librechat-data-provider').CodeEnvRef} ref - Typed pointer * into codeapi storage. Carries kind/id/storage_session_id/file_id; * codeapi resolves the sessionKey from the request's auth context. * @param {ServerRequest} [req] - Current authenticated request, used to mint Code API auth. * * @returns {Promise} * A promise that resolves to the `lastModified` time string of the file if successful, or null if there is an * error in initialization or fetching the info. */ async function getSessionInfo(ref, req) { try { const baseURL = getCodeBaseURL(); const authHeaders = await getCodeApiAuthHeaders(req); /* `/sessions/.../objects/...` is gated by codeapi's `sessionAuth` * middleware (post-Phase C). The middleware reconstructs the * sessionKey from the URL query (`kind`/`id`/`version?`) plus the * requester's auth context, then matches it against the cached * sessionKey on the storage bucket. We have the full `codeEnvRef` * here, so pass kind+id (+version when skill) directly. */ const query = buildCodeEnvDownloadQuery({ kind: ref.kind, id: ref.id, ...(ref.kind === 'skill' ? { version: ref.version } : {}), }); const response = await axios({ method: 'get', url: `${baseURL}/sessions/${ref.storage_session_id}/objects/${ref.file_id}${query}`, headers: { 'User-Agent': 'LibreChat/1.0', ...authHeaders, }, httpAgent: codeServerHttpAgent, httpsAgent: codeServerHttpsAgent, timeout: 5000, }); return response.data?.lastModified; } catch (error) { logAxiosError({ message: `Error fetching session info: ${error.message}`, error, }); return null; } } const getPreviewContextSuffix = (file) => { if (file.status === 'pending') { return ' (preview not yet generated)'; } if (file.status !== 'failed') { return ''; } return file.previewError ? ` (preview unavailable: ${file.previewError})` : ' (preview unavailable)'; }; const getVisibleCodeFileContextLine = (file, agentResourceIds) => { if (file.context === FileContext.execute_code) { return ''; } const fileSuffix = agentResourceIds.has(file.file_id) ? '' : ' (attached by user)'; return `\n\t- /mnt/data/${file.filename}${fileSuffix}${getPreviewContextSuffix(file)}`; }; const appendVisibleCodeFileContext = (toolContext, contextLine) => { if (!contextLine) { return toolContext; } if (toolContext) { return `${toolContext}${contextLine}`; } return `- Note: The following files are available in the "${Tools.execute_code}" tool environment:${contextLine}`; }; /** * * @param {Object} options * @param {ServerRequest} options.req * @param {Agent['tool_resources']} options.tool_resources * @param {string} [options.agentId] - The agent ID for file access control * @returns {Promise<{ * files: Array<{ id: string; session_id: string; name: string }>, * toolContext: string, * }>} */ const primeFiles = async (options) => { const { tool_resources, req, agentId } = options; const file_ids = tool_resources?.[EToolResources.execute_code]?.file_ids ?? []; const agentResourceIds = new Set(file_ids); const resourceFiles = tool_resources?.[EToolResources.execute_code]?.files ?? []; /* Step 1 of the priming trace: input volume. Pair with the * per-file `[primeCodeFiles] file=...` lines and the final * `[primeCodeFiles] returned=...` line below to locate which * layer drops a file the sandbox doesn't end up seeing. */ logger.debug( `[primeCodeFiles] in: file_ids=${file_ids.length} resourceFiles=${resourceFiles.length}`, { agentId, file_ids, resourceFileIds: resourceFiles.map((f) => f?.file_id) }, ); // Get all files first const allFiles = (await getFiles({ file_id: { $in: file_ids } }, null, { text: 0 })) ?? []; // Filter by access if user and agent are provided let dbFiles; if (req?.user?.id && agentId) { dbFiles = await filterFilesByAgentAccess({ files: allFiles, userId: req.user.id, role: req.user.role, agentId, }); } else { dbFiles = allFiles; } dbFiles = dbFiles.concat(resourceFiles); const files = []; const sessions = new Map(); let toolContext = ''; /* Per-file path counters — emitted at the bottom so a single * grep on `[primeCodeFiles]` shows the input volume, the per-file * paths taken, and the final dispatch summary in one trace. */ let skippedNoRef = 0; let reuploadFailures = 0; for (let i = 0; i < dbFiles.length; i++) { const file = dbFiles[i]; if (!file) { continue; } const ref = file.metadata?.codeEnvRef; if (!ref) { skippedNoRef += 1; logger.debug( `[primeCodeFiles] file=${file.file_id} path=skip reason=no-codeenvref filename=${file.filename}`, ); continue; } const session_id = ref.storage_session_id; const id = ref.file_id; /** * `pushFile` accepts optional overrides so the reupload path can * push the FRESH `(storage_session_id, file_id)` from the new * `codeEnvRef`. Without these overrides, the closure would * capture the stale pre-reupload refs from the outer loop and * the in-memory `files` array (now consumed by * `buildInitialToolSessions` to seed `Graph.sessions`) would * point at a sandbox object that no longer exists. The DB record * gets the new ref via `updateFile`, but the seed would still * inject the old one — bash_tool / read_file would 404 trying to * mount the file until the next turn re-reads metadata. * * `kind`, `id`, `version` are preserved on the in-memory ref so * codeapi can resolve sessionKey per-file (kind switch + * tenant prefix from auth context). */ const pushFile = (overrideSessionId, overrideId) => { toolContext = appendVisibleCodeFileContext( toolContext, getVisibleCodeFileContextLine(file, agentResourceIds), ); /* `id` is the storage file_id (drives codeapi's upload-key * existence check), `resource_id` is the entity that owns * the storage session (drives sessionKey re-derivation). For * code-output files this is `kind: 'user'` and `resource_id` * is informational (codeapi ignores it for user kind), but * we still send it for shape uniformity with shared kinds. */ files.push({ id: overrideId ?? id, resource_id: ref.id, storage_session_id: overrideSessionId ?? session_id, name: file.filename, kind: ref.kind, ...(ref.kind === 'skill' ? { version: ref.version } : {}), }); }; if (sessions.has(session_id)) { logger.debug( `[primeCodeFiles] file=${file.file_id} path=cache-hit-by-session storage_session_id=${session_id}`, ); pushFile(); continue; } const reuploadFile = async () => { try { const { getDownloadStream } = getStrategyFunctions(file.source); const { handleFileUpload: uploadCodeEnvFile } = getStrategyFunctions( FileSources.execute_code, ); const stream = await getDownloadStream(options.req, file.filepath); /* Reupload preserves the resource identity from the existing * ref so codeapi re-buckets under the same sessionKey shape * (skill stays skill, user stays user). Without this, a * skill-cache-miss reupload would land in the user bucket * and never re-shareable cross-user. */ const uploaded = await uploadCodeEnvFile({ req: options.req, stream, filename: file.filename, kind: ref.kind, id: ref.id, ...(ref.kind === 'skill' ? { version: ref.version } : {}), }); /** * Use the FRESH `(storage_session_id, file_id)` from the * reupload response and route it through the dedupe Map, the * persisted record, and the in-memory `files` list. The * original ref captured at the top of this iteration refers * to the old, expired/missing sandbox object — using it here * would silently re-introduce the bug `Graph.sessions` * seeding is supposed to fix. * * `kind`, `id`, `version` survive the round-trip: the * upload preserves the resource identity, only the storage * pointer changes. */ const newRef = { kind: ref.kind, id: ref.id, storage_session_id: uploaded.storage_session_id, file_id: uploaded.file_id, ...(ref.kind === 'skill' ? { version: ref.version } : {}), }; const updatedMetadata = { ...file.metadata, codeEnvRef: newRef, }; await updateFile({ file_id: file.file_id, metadata: updatedMetadata, }); sessions.set(newRef.storage_session_id, true); pushFile(newRef.storage_session_id, newRef.file_id); logger.debug( `[primeCodeFiles] file=${file.file_id} path=reupload-success ` + `oldSession=${session_id} newSession=${newRef.storage_session_id} newFileId=${newRef.file_id}`, ); } catch (error) { reuploadFailures += 1; logger.error( `[primeCodeFiles] file=${file.file_id} path=reupload-failed session=${session_id}: ${error.message}`, error, ); } }; const uploadTime = await getSessionInfo(ref, req); if (!uploadTime) { logger.debug( `[primeCodeFiles] file=${file.file_id} path=reupload reason=no-uploadtime ` + `storage_session_id=${session_id}`, ); await reuploadFile(); continue; } if (!checkIfActive(uploadTime)) { logger.debug( `[primeCodeFiles] file=${file.file_id} path=reupload reason=stale ` + `uploadTime=${uploadTime} storage_session_id=${session_id}`, ); await reuploadFile(); continue; } sessions.set(session_id, true); logger.debug( `[primeCodeFiles] file=${file.file_id} path=fresh-active storage_session_id=${session_id}`, ); pushFile(); } /* Dispatch summary — emitted unconditionally so a single grep on * `[primeCodeFiles] out` always shows the final state, not only * the per-path trail leading up to it. */ logger.debug( `[primeCodeFiles] out: returned=${files.length} ` + `skippedNoRef=${skippedNoRef} reuploadFailures=${reuploadFailures}`, ); return { files, toolContext }; }; /** * Reads a single file from the code-execution sandbox by shelling `cat` * through the sandbox `/exec` endpoint. Used by the `read_file` host * handler when the requested path is a code-env path (`/mnt/data/...`) * or otherwise not resolvable as a skill file. Resolves to * `{ content }` from stdout on success, or `null` when the codeapi base * URL isn't configured / the read returns no content (caller turns that * into a model-visible error). Throws axios-style errors on transport * failure so the caller can surface a meaningful error message. * * `session_id` and `files` come from the seeded `tc.codeSessionContext` * (emitted by the agents-side `ToolNode` for `read_file` calls in * v3.1.72+) so the read lands in the same sandbox session that holds * the agent's prior-turn artifacts. * * @param {Object} params * @param {string} params.file_path - Absolute path inside the sandbox (e.g. `/mnt/data/foo.txt`). * @param {string} [params.session_id] - Sandbox session id from the seeded context. * @param {Array<{id: string, name: string, session_id?: string}>} [params.files] - File refs to mount. * @param {ServerRequest} [params.req] - Current authenticated request, used to mint Code API auth. * @returns {Promise<{content: string} | null>} */ async function readSandboxFile({ file_path, session_id, files, req }) { const baseURL = getCodeBaseURL(); if (!baseURL) { return null; } /** Single-quote `file_path` with embedded-quote escaping so a malicious * filename can't break out of the `cat` command. The handler upstream * has already established this is a code-env path the model * legitimately asked to read; this just keeps the shell quoting safe. */ const safePath = `'${file_path.replace(/'/g, `'\\''`)}'`; /** @type {Record} */ const postData = { lang: 'bash', code: `cat ${safePath}` }; if (session_id) { postData.session_id = session_id; } if (files && files.length > 0) { postData.files = files; } try { const authHeaders = await getCodeApiAuthHeaders(req); const response = await axios({ method: 'post', url: `${baseURL}/exec`, data: postData, headers: { 'Content-Type': 'application/json', 'User-Agent': 'LibreChat/1.0', ...authHeaders, }, httpAgent: codeServerHttpAgent, httpsAgent: codeServerHttpsAgent, timeout: 15000, }); const result = response?.data ?? {}; if (result.stderr && (result.stdout == null || result.stdout === '')) { throw new Error(String(result.stderr).trim()); } if (result.stdout == null) { return null; } return { content: String(result.stdout) }; } catch (error) { logAxiosError({ message: `Error reading sandbox file "${file_path}"`, error, }); throw error; } } module.exports = { primeFiles, checkIfActive, getSessionInfo, processCodeOutput, readSandboxFile, runPreviewFinalize, };