mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-05-13 16:07:30 +00:00
* 📄 feat: Auto-render Text-Based Code Execution Artifacts Inline Eagerly extract text content from non-image artifacts produced by code execution tools and render it inline in the message instead of behind a click-to-download file card. Reuses the SkillFiles binary-detection helper and the existing parseDocument dispatcher so docx, xlsx, csv, html, code, and other text-renderable formats land directly under the tool call. PPTX is intentionally classified but not yet extracted — follow-up. * 🌐 chore: Remove unused com_download_expires locale key Removed in en/translation.json so the detect-unused-i18n-keys CI check passes. The only reference was a commented-out localize() call in LogContent.tsx that was deleted in the previous commit. * 🩹 fix: Address PR review on code artifact text extraction - extract.ts: build the temp document path from a randomUUID and pass path.basename(name) as originalname so a malicious artifact name cannot escape os.tmpdir() (P1 traversal flagged by codex/Copilot). - process.js: classify and extract using safeName, not the raw name — defense in depth alongside the temp-path fix. - classify.ts: add a bare-name lookup so extensionless text artifacts (Makefile, Dockerfile, …) classify as utf8-text instead of falling through to other. - Attachment.tsx: wire aria-expanded / aria-controls on the show-all toggle for screen reader support. - LogContent.tsx: restore a download chip (LogLink) on inline-text attachments so users can still pull down the underlying file. - Tests: cover extensionless filenames and the temp-path traversal invariant. * 🩹 fix: Address comprehensive PR review on code artifact extraction - extract.ts: walk back to a UTF-8 code-point boundary before truncating so cuts cannot land mid-multibyte and emit U+FFFD (CJK/emoji concern). truncate() now accepts the original buffer to skip a redundant encode. - extract.ts: add an 8s timeout around parseDocument via Promise.race so a pathological docx/xlsx cannot stall the response path. - process.js: always set `text` (string or null) on the file payload — createFile uses findOneAndUpdate with $set semantics, so omitting the field leaves a stale value behind when an artifact's content changes. - Attachment.tsx: switch the show-all toggle from char-count threshold to a useLayoutEffect ref measurement on scrollHeight, and use overflow-hidden when collapsed (overflow-auto when expanded) so the collapsed box has a single clear interaction model. - Attachment.tsx + LogContent.tsx: lift `isImageAttachment` / `isTextAttachment` into a shared attachmentTypes module. LogContent keeps its looser image check (no width/height required) because the legacy log surface receives attachments without dimensions. - Tests: cover multi-byte boundary, the always-set-text contract on updates, and the new shared predicates. * 🧪 test: Component test for TextAttachment + direct withTimeout coverage - Attachment.tsx: re-order local imports longest-to-shortest per AGENTS.md (attachmentTypes ahead of FileContainer/Image). - extract.ts: export withTimeout so it can be unit-tested directly (it's also used internally — exporting carries no runtime cost). - extract.spec.ts: three small unit tests on withTimeout that cover resolve, propagated rejection, and timeout rejection paths with real timers. - TextAttachment.test.tsx: ten cases for the new React component — text rendering in <pre>, download chip presence/absence, ref-based collapse measurement (with scrollHeight stubbed via prototype), aria-expanded toggle, fall-through to FileAttachment for missing and empty text, and AttachmentGroup routing. * 🩹 fix: Canonicalize document MIME by extension before parseDocument When the classifier puts a file on the document path via its extension (.docx, .xlsx, …) but the buffer sniffer returned a generic value like application/zip or application/octet-stream, we previously forwarded that generic MIME to parseDocument, which dispatches strictly by MIME and silently rejected it — exactly defeating the extension-first classification this PR added. extractDocument now remaps the MIME from the extension (falling back to the original sniffed MIME if the extension is unrecognized, so files that reached the document branch via MIME detection still work). Adds a parameterized test across docx/xlsx/xls/ods/odt against zip/octet sniffs to guard the regression. * 🩹 fix: Reuse existing withTimeout from utils/promise The previous commit's local withTimeout export collided with the already-exported `withTimeout` from `~/utils/promise`, breaking the @librechat/api tsc job (TS2308 ambiguous re-export). Drops the duplicate, imports from `~/utils/promise`, and removes the now-redundant unit tests (the helper has its own coverage in utils/promise.spec.ts). The third argument shifts from a label to the fully-formed timeout error message that the existing helper expects. * 🧹 chore: TextAttachment test polish (NITs) - Use the conventional `import Attachment, { AttachmentGroup }` form rather than `default as Attachment`. - Save the original `scrollHeight` property descriptor and restore it in afterAll, so the prototype patch never leaks past this suite.
464 lines
14 KiB
JavaScript
464 lines
14 KiB
JavaScript
const path = require('path');
|
|
const { v4 } = require('uuid');
|
|
const { logger } = require('@librechat/data-schemas');
|
|
const { getCodeBaseURL } = require('@librechat/agents');
|
|
const {
|
|
getBasePath,
|
|
logAxiosError,
|
|
sanitizeFilename,
|
|
createAxiosInstance,
|
|
classifyCodeArtifact,
|
|
codeServerHttpAgent,
|
|
codeServerHttpsAgent,
|
|
extractCodeArtifactText,
|
|
} = require('@librechat/api');
|
|
const {
|
|
Tools,
|
|
megabyte,
|
|
fileConfig,
|
|
FileContext,
|
|
FileSources,
|
|
imageExtRegex,
|
|
inferMimeType,
|
|
EToolResources,
|
|
EModelEndpoint,
|
|
mergeFileConfig,
|
|
getEndpointFileConfig,
|
|
} = require('librechat-data-provider');
|
|
const { filterFilesByAgentAccess } = require('~/server/services/Files/permissions');
|
|
const { createFile, getFiles, updateFile, claimCodeFile } = require('~/models');
|
|
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
|
|
const { convertImage } = require('~/server/services/Files/images/convert');
|
|
const { determineFileType } = require('~/server/utils');
|
|
|
|
const axios = createAxiosInstance();
|
|
|
|
/**
|
|
* Creates a fallback download URL response when file cannot be processed locally.
|
|
* Used when: file exceeds size limit, storage strategy unavailable, or download error occurs.
|
|
* @param {Object} params - The parameters.
|
|
* @param {string} params.name - The filename.
|
|
* @param {string} params.session_id - The code execution session ID.
|
|
* @param {string} params.id - The file ID from the code environment.
|
|
* @param {string} params.conversationId - The current conversation ID.
|
|
* @param {string} params.toolCallId - The tool call ID that generated the file.
|
|
* @param {string} params.messageId - The current message ID.
|
|
* @param {number} params.expiresAt - Expiration timestamp (24 hours from creation).
|
|
* @returns {Object} Fallback response with download URL.
|
|
*/
|
|
const createDownloadFallback = ({
|
|
id,
|
|
name,
|
|
messageId,
|
|
expiresAt,
|
|
session_id,
|
|
toolCallId,
|
|
conversationId,
|
|
}) => {
|
|
const basePath = getBasePath();
|
|
return {
|
|
filename: name,
|
|
filepath: `${basePath}/api/files/code/download/${session_id}/${id}`,
|
|
expiresAt,
|
|
conversationId,
|
|
toolCallId,
|
|
messageId,
|
|
};
|
|
};
|
|
|
|
/**
|
|
* Process code execution output files - downloads and saves both images and non-image files.
|
|
* All files are saved to local storage with fileIdentifier metadata for code env re-upload.
|
|
* @param {ServerRequest} params.req - The Express request object.
|
|
* @param {string} params.id - The file ID from the code environment.
|
|
* @param {string} params.name - The filename.
|
|
* @param {string} params.toolCallId - The tool call ID that generated the file.
|
|
* @param {string} params.session_id - The code execution session ID.
|
|
* @param {string} params.conversationId - The current conversation ID.
|
|
* @param {string} params.messageId - The current message ID.
|
|
* @returns {Promise<MongoFile & { messageId: string, toolCallId: string } | undefined>} The file metadata or undefined if an error occurs.
|
|
*/
|
|
const processCodeOutput = async ({
|
|
req,
|
|
id,
|
|
name,
|
|
toolCallId,
|
|
conversationId,
|
|
messageId,
|
|
session_id,
|
|
}) => {
|
|
const appConfig = req.config;
|
|
const currentDate = new Date();
|
|
const baseURL = getCodeBaseURL();
|
|
const fileExt = path.extname(name).toLowerCase();
|
|
const isImage = fileExt && imageExtRegex.test(name);
|
|
|
|
const mergedFileConfig = mergeFileConfig(appConfig.fileConfig);
|
|
const endpointFileConfig = getEndpointFileConfig({
|
|
fileConfig: mergedFileConfig,
|
|
endpoint: EModelEndpoint.agents,
|
|
});
|
|
const fileSizeLimit = endpointFileConfig.fileSizeLimit ?? mergedFileConfig.serverFileSizeLimit;
|
|
|
|
try {
|
|
const formattedDate = currentDate.toISOString();
|
|
const response = await axios({
|
|
method: 'get',
|
|
url: `${baseURL}/download/${session_id}/${id}`,
|
|
responseType: 'arraybuffer',
|
|
headers: {
|
|
'User-Agent': 'LibreChat/1.0',
|
|
},
|
|
httpAgent: codeServerHttpAgent,
|
|
httpsAgent: codeServerHttpsAgent,
|
|
timeout: 15000,
|
|
});
|
|
|
|
const buffer = Buffer.from(response.data, 'binary');
|
|
|
|
// Enforce file size limit
|
|
if (buffer.length > fileSizeLimit) {
|
|
logger.warn(
|
|
`[processCodeOutput] File "${name}" (${(buffer.length / megabyte).toFixed(2)} MB) exceeds size limit of ${(fileSizeLimit / megabyte).toFixed(2)} MB, falling back to download URL`,
|
|
);
|
|
return createDownloadFallback({
|
|
id,
|
|
name,
|
|
messageId,
|
|
toolCallId,
|
|
session_id,
|
|
conversationId,
|
|
expiresAt: currentDate.getTime() + 86400000,
|
|
});
|
|
}
|
|
|
|
const fileIdentifier = `${session_id}/${id}`;
|
|
|
|
/**
|
|
* Atomically claim a file_id for this (filename, conversationId, context) tuple.
|
|
* Uses $setOnInsert so concurrent calls for the same filename converge on
|
|
* a single record instead of creating duplicates (TOCTOU race fix).
|
|
*/
|
|
const newFileId = v4();
|
|
const claimed = await claimCodeFile({
|
|
filename: name,
|
|
conversationId,
|
|
file_id: newFileId,
|
|
user: req.user.id,
|
|
});
|
|
const file_id = claimed.file_id;
|
|
const isUpdate = file_id !== newFileId;
|
|
|
|
if (isUpdate) {
|
|
logger.debug(
|
|
`[processCodeOutput] Updating existing file "${name}" (${file_id}) instead of creating duplicate`,
|
|
);
|
|
}
|
|
|
|
const safeName = sanitizeFilename(name);
|
|
if (safeName !== name) {
|
|
logger.warn(
|
|
`[processCodeOutput] Filename sanitized: "${name}" -> "${safeName}" | conv=${conversationId}`,
|
|
);
|
|
}
|
|
|
|
if (isImage) {
|
|
const usage = isUpdate ? (claimed.usage ?? 0) + 1 : 1;
|
|
const _file = await convertImage(req, buffer, 'high', `${file_id}${fileExt}`);
|
|
const filepath = usage > 1 ? `${_file.filepath}?v=${Date.now()}` : _file.filepath;
|
|
const file = {
|
|
..._file,
|
|
filepath,
|
|
file_id,
|
|
messageId,
|
|
usage,
|
|
filename: safeName,
|
|
conversationId,
|
|
user: req.user.id,
|
|
type: `image/${appConfig.imageOutputType}`,
|
|
createdAt: isUpdate ? claimed.createdAt : formattedDate,
|
|
updatedAt: formattedDate,
|
|
source: appConfig.fileStrategy,
|
|
context: FileContext.execute_code,
|
|
metadata: { fileIdentifier },
|
|
};
|
|
await createFile(file, true);
|
|
return Object.assign(file, { messageId, toolCallId });
|
|
}
|
|
|
|
const { saveBuffer } = getStrategyFunctions(appConfig.fileStrategy);
|
|
if (!saveBuffer) {
|
|
logger.warn(
|
|
`[processCodeOutput] saveBuffer not available for strategy ${appConfig.fileStrategy}, falling back to download URL`,
|
|
);
|
|
return createDownloadFallback({
|
|
id,
|
|
name,
|
|
messageId,
|
|
toolCallId,
|
|
session_id,
|
|
conversationId,
|
|
expiresAt: currentDate.getTime() + 86400000,
|
|
});
|
|
}
|
|
|
|
const detectedType = await determineFileType(buffer, true);
|
|
const mimeType = detectedType?.mime || inferMimeType(name, '') || 'application/octet-stream';
|
|
|
|
/** Check MIME type support - for code-generated files, we're lenient but log unsupported types */
|
|
const isSupportedMimeType = fileConfig.checkType(
|
|
mimeType,
|
|
endpointFileConfig.supportedMimeTypes,
|
|
);
|
|
if (!isSupportedMimeType) {
|
|
logger.warn(
|
|
`[processCodeOutput] File "${name}" has unsupported MIME type "${mimeType}", proceeding with storage but may not be usable as tool resource`,
|
|
);
|
|
}
|
|
|
|
const fileName = `${file_id}__${safeName}`;
|
|
const filepath = await saveBuffer({
|
|
userId: req.user.id,
|
|
buffer,
|
|
fileName,
|
|
basePath: 'uploads',
|
|
});
|
|
|
|
const category = classifyCodeArtifact(safeName, mimeType);
|
|
const text = await extractCodeArtifactText(buffer, safeName, mimeType, category);
|
|
|
|
const file = {
|
|
file_id,
|
|
filepath,
|
|
messageId,
|
|
object: 'file',
|
|
filename: safeName,
|
|
type: mimeType,
|
|
conversationId,
|
|
user: req.user.id,
|
|
bytes: buffer.length,
|
|
updatedAt: formattedDate,
|
|
metadata: { fileIdentifier },
|
|
source: appConfig.fileStrategy,
|
|
context: FileContext.execute_code,
|
|
usage: isUpdate ? (claimed.usage ?? 0) + 1 : 1,
|
|
createdAt: isUpdate ? claimed.createdAt : formattedDate,
|
|
// Always set `text` explicitly (string or null) so that an update which
|
|
// produces a binary or oversized artifact clears any previously cached
|
|
// text — `createFile` uses findOneAndUpdate with $set semantics, which
|
|
// would otherwise leave a stale value behind.
|
|
text: text ?? null,
|
|
};
|
|
|
|
await createFile(file, true);
|
|
return Object.assign(file, { messageId, toolCallId });
|
|
} catch (error) {
|
|
if (error?.message === 'Path traversal detected in filename') {
|
|
logger.warn(
|
|
`[processCodeOutput] Path traversal blocked for file "${name}" | conv=${conversationId}`,
|
|
);
|
|
}
|
|
logAxiosError({
|
|
message: 'Error downloading/processing code environment file',
|
|
error,
|
|
});
|
|
|
|
// Fallback for download errors - return download URL so user can still manually download
|
|
return createDownloadFallback({
|
|
id,
|
|
name,
|
|
messageId,
|
|
toolCallId,
|
|
session_id,
|
|
conversationId,
|
|
expiresAt: currentDate.getTime() + 86400000,
|
|
});
|
|
}
|
|
};
|
|
|
|
function checkIfActive(dateString) {
|
|
const givenDate = new Date(dateString);
|
|
const currentDate = new Date();
|
|
const timeDifference = currentDate - givenDate;
|
|
const hoursPassed = timeDifference / (1000 * 60 * 60);
|
|
return hoursPassed < 23;
|
|
}
|
|
|
|
/**
|
|
* Retrieves the `lastModified` time string for a specified file from Code Execution Server.
|
|
*
|
|
* @param {string} fileIdentifier - The identifier for the file (e.g., "session_id/fileId").
|
|
*
|
|
* @returns {Promise<string|null>}
|
|
* A promise that resolves to the `lastModified` time string of the file if successful, or null if there is an
|
|
* error in initialization or fetching the info.
|
|
*/
|
|
async function getSessionInfo(fileIdentifier) {
|
|
try {
|
|
const baseURL = getCodeBaseURL();
|
|
const [path, queryString] = fileIdentifier.split('?');
|
|
const [session_id, fileId] = path.split('/');
|
|
let queryParams = {};
|
|
if (queryString) {
|
|
queryParams = Object.fromEntries(new URLSearchParams(queryString).entries());
|
|
}
|
|
|
|
const response = await axios({
|
|
method: 'get',
|
|
url: `${baseURL}/sessions/${session_id}/objects/${fileId}`,
|
|
params: queryParams,
|
|
headers: {
|
|
'User-Agent': 'LibreChat/1.0',
|
|
},
|
|
httpAgent: codeServerHttpAgent,
|
|
httpsAgent: codeServerHttpsAgent,
|
|
timeout: 5000,
|
|
});
|
|
|
|
return response.data?.lastModified;
|
|
} catch (error) {
|
|
logAxiosError({
|
|
message: `Error fetching session info: ${error.message}`,
|
|
error,
|
|
});
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param {Object} options
|
|
* @param {ServerRequest} options.req
|
|
* @param {Agent['tool_resources']} options.tool_resources
|
|
* @param {string} [options.agentId] - The agent ID for file access control
|
|
* @returns {Promise<{
|
|
* files: Array<{ id: string; session_id: string; name: string }>,
|
|
* toolContext: string,
|
|
* }>}
|
|
*/
|
|
const primeFiles = async (options) => {
|
|
const { tool_resources, req, agentId } = options;
|
|
const file_ids = tool_resources?.[EToolResources.execute_code]?.file_ids ?? [];
|
|
const agentResourceIds = new Set(file_ids);
|
|
const resourceFiles = tool_resources?.[EToolResources.execute_code]?.files ?? [];
|
|
|
|
// Get all files first
|
|
const allFiles = (await getFiles({ file_id: { $in: file_ids } }, null, { text: 0 })) ?? [];
|
|
|
|
// Filter by access if user and agent are provided
|
|
let dbFiles;
|
|
if (req?.user?.id && agentId) {
|
|
dbFiles = await filterFilesByAgentAccess({
|
|
files: allFiles,
|
|
userId: req.user.id,
|
|
role: req.user.role,
|
|
agentId,
|
|
});
|
|
} else {
|
|
dbFiles = allFiles;
|
|
}
|
|
|
|
dbFiles = dbFiles.concat(resourceFiles);
|
|
|
|
const files = [];
|
|
const sessions = new Map();
|
|
let toolContext = '';
|
|
|
|
for (let i = 0; i < dbFiles.length; i++) {
|
|
const file = dbFiles[i];
|
|
if (!file) {
|
|
continue;
|
|
}
|
|
|
|
if (file.metadata.fileIdentifier) {
|
|
const [path, queryString] = file.metadata.fileIdentifier.split('?');
|
|
const [session_id, id] = path.split('/');
|
|
|
|
const pushFile = () => {
|
|
if (!toolContext) {
|
|
toolContext = `- Note: The following files are available in the "${Tools.execute_code}" tool environment:`;
|
|
}
|
|
|
|
let fileSuffix = '';
|
|
if (!agentResourceIds.has(file.file_id)) {
|
|
fileSuffix =
|
|
file.context === FileContext.execute_code
|
|
? ' (from previous code execution)'
|
|
: ' (attached by user)';
|
|
}
|
|
|
|
toolContext += `\n\t- /mnt/data/${file.filename}${fileSuffix}`;
|
|
files.push({
|
|
id,
|
|
session_id,
|
|
name: file.filename,
|
|
});
|
|
};
|
|
|
|
if (sessions.has(session_id)) {
|
|
pushFile();
|
|
continue;
|
|
}
|
|
|
|
let queryParams = {};
|
|
if (queryString) {
|
|
queryParams = Object.fromEntries(new URLSearchParams(queryString).entries());
|
|
}
|
|
|
|
const reuploadFile = async () => {
|
|
try {
|
|
const { getDownloadStream } = getStrategyFunctions(file.source);
|
|
const { handleFileUpload: uploadCodeEnvFile } = getStrategyFunctions(
|
|
FileSources.execute_code,
|
|
);
|
|
const stream = await getDownloadStream(options.req, file.filepath);
|
|
const fileIdentifier = await uploadCodeEnvFile({
|
|
req: options.req,
|
|
stream,
|
|
filename: file.filename,
|
|
entity_id: queryParams.entity_id,
|
|
});
|
|
|
|
// Preserve existing metadata when adding fileIdentifier
|
|
const updatedMetadata = {
|
|
...file.metadata, // Preserve existing metadata (like S3 storage info)
|
|
fileIdentifier, // Add fileIdentifier
|
|
};
|
|
|
|
await updateFile({
|
|
file_id: file.file_id,
|
|
metadata: updatedMetadata,
|
|
});
|
|
sessions.set(session_id, true);
|
|
pushFile();
|
|
} catch (error) {
|
|
logger.error(
|
|
`Error re-uploading file ${id} in session ${session_id}: ${error.message}`,
|
|
error,
|
|
);
|
|
}
|
|
};
|
|
const uploadTime = await getSessionInfo(file.metadata.fileIdentifier);
|
|
if (!uploadTime) {
|
|
logger.warn(`Failed to get upload time for file ${id} in session ${session_id}`);
|
|
await reuploadFile();
|
|
continue;
|
|
}
|
|
if (!checkIfActive(uploadTime)) {
|
|
await reuploadFile();
|
|
continue;
|
|
}
|
|
sessions.set(session_id, true);
|
|
pushFile();
|
|
}
|
|
}
|
|
|
|
return { files, toolContext };
|
|
};
|
|
|
|
module.exports = {
|
|
primeFiles,
|
|
checkIfActive,
|
|
getSessionInfo,
|
|
processCodeOutput,
|
|
};
|