From 1eb460eb034e55ddaa624faa6e4c7b8747387eb4 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Tue, 23 Jun 2026 15:49:57 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=BE=20fix:=20Harden=20Historical=20Fil?= =?UTF-8?q?e=20Authorization=20(#13918)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: Harden historical file authorization * chore: Sort file authorization imports * fix: Preserve authorized historical artifact refs * chore: Format historical artifact hardening --- api/app/clients/BaseClient.js | 186 ++++++++++++-- api/app/clients/specs/BaseClient.test.js | 238 +++++++++++++++++- api/server/controllers/agents/v1.js | 12 +- api/server/middleware/buildEndpointOption.js | 1 + .../middleware/buildEndpointOption.spec.js | 1 + api/server/services/Files/process.js | 6 +- .../src/agents/__tests__/initialize.test.ts | 30 ++- packages/api/src/agents/initialize.ts | 64 +++-- .../data-schemas/src/methods/file.spec.ts | 195 ++++++++++++++ packages/data-schemas/src/methods/file.ts | 93 +++++-- packages/data-schemas/src/methods/index.ts | 3 +- 11 files changed, 755 insertions(+), 74 deletions(-) diff --git a/api/app/clients/BaseClient.js b/api/app/clients/BaseClient.js index 871a398e23..b7c5a8ae59 100644 --- a/api/app/clients/BaseClient.js +++ b/api/app/clients/BaseClient.js @@ -6,6 +6,7 @@ const { checkBalance, getBalanceConfig, buildMessageFiles, + sanitizeFileForTransmit, extractFileContext, getReferencedQuotes, encodeAndFormatAudios, @@ -15,6 +16,7 @@ const { const { Constants, FileSources, + Tools, ContentTypes, excludedKeys, EModelEndpoint, @@ -31,6 +33,115 @@ const { logViolation } = require('~/cache'); const TextStream = require('./TextStream'); const db = require('~/models'); +const collectHistoricalFileRefs = (message) => { + const refs = []; + if (Array.isArray(message.files)) { + refs.push(...message.files); + } + if (Array.isArray(message.attachments)) { + refs.push(...message.attachments); + } + return refs; +}; + +const collectHistoricalFileIds = (messages) => { + const fileIds = new Set(); + for (const message of messages) { + for (const ref of collectHistoricalFileRefs(message)) { + if (ref?.file_id) { + fileIds.add(ref.file_id); + } + } + } + return Array.from(fileIds); +}; + +const buildOwnerFileFilter = (fileIds, user) => { + if (!user?.id || fileIds.length === 0) { + return null; + } + + const filter = { + file_id: { $in: fileIds }, + user: user.id, + }; + if (user.tenantId) { + filter.tenantId = user.tenantId; + } + return filter; +}; + +const TOOL_ATTACHMENT_KEYS = [ + Tools.file_search, + Tools.web_search, + Tools.ui_resources, + Tools.memory, +]; +const DISPLAY_ATTACHMENT_FIELDS = [ + 'filename', + 'filepath', + 'expiresAt', + 'conversationId', + 'messageId', + 'toolCallId', + 'name', +]; +const PER_MESSAGE_FILE_ATTACHMENT_FIELDS = ['messageId', 'toolCallId']; + +const pickFields = (source, fields) => { + const picked = {}; + for (const field of fields) { + if (source?.[field] !== undefined) { + picked[field] = source[field]; + } + } + return picked; +}; + +const sanitizeDisplayOnlyAttachment = (ref) => { + if (!ref || ref.file_id) { + return undefined; + } + + const attachment = pickFields(ref, DISPLAY_ATTACHMENT_FIELDS); + if (TOOL_ATTACHMENT_KEYS.includes(ref.type)) { + attachment.type = ref.type; + } + for (const key of TOOL_ATTACHMENT_KEYS) { + if (ref[key] !== undefined) { + attachment[key] = ref[key]; + } + } + + return Object.keys(attachment).length > 0 ? attachment : undefined; +}; + +const rehydrateMessageFileRefs = (refs, filesById, { preserveDisplayOnly = false } = {}) => { + if (!Array.isArray(refs)) { + return undefined; + } + + const files = []; + for (const ref of refs) { + const file = filesById.get(ref?.file_id); + if (file) { + files.push({ + ...sanitizeFileForTransmit(file), + ...pickFields(ref, PER_MESSAGE_FILE_ATTACHMENT_FIELDS), + }); + continue; + } + + if (preserveDisplayOnly) { + const displayOnlyAttachment = sanitizeDisplayOnlyAttachment(ref); + if (displayOnlyAttachment) { + files.push(displayOnlyAttachment); + } + } + } + return files.length > 0 ? files : undefined; +}; + class BaseClient { constructor(apiKey, options = {}) { this.apiKey = apiKey; @@ -1324,12 +1435,26 @@ class BaseClient { return _messages; } - const seen = new Set(); + const contextSeen = new Set(); const attachmentsProcessed = this.options.attachments && !(this.options.attachments instanceof Promise); if (attachmentsProcessed) { for (const attachment of this.options.attachments) { - seen.add(attachment.file_id); + if (attachment?.file_id) { + contextSeen.add(attachment.file_id); + } + } + } + + const historicalFileIds = collectHistoricalFileIds(_messages); + const fileFilter = buildOwnerFileFilter(historicalFileIds, this.options.req?.user); + const authorizedFilesById = new Map(); + if (fileFilter) { + const files = (await db.getFiles(fileFilter, {}, {})) ?? []; + for (const file of files) { + if (file?.file_id) { + authorizedFilesById.set(file.file_id, file); + } } } @@ -1343,38 +1468,57 @@ class BaseClient { this.message_file_map = {}; } - const fileIds = []; - for (const file of message.files) { - if (seen.has(file.file_id)) { - continue; + delete message.fileContext; + + const contextFiles = []; + if (Array.isArray(message.files)) { + for (const file of message.files) { + if (!file?.file_id || contextSeen.has(file.file_id)) { + continue; + } + const authorizedFile = authorizedFilesById.get(file.file_id); + if (authorizedFile) { + contextFiles.push(authorizedFile); + contextSeen.add(file.file_id); + } } - fileIds.push(file.file_id); - seen.add(file.file_id); } - if (fileIds.length === 0) { + const rehydratedFiles = rehydrateMessageFileRefs(message.files, authorizedFilesById); + if (rehydratedFiles) { + message.files = rehydratedFiles; + } else { + delete message.files; + } + + const rehydratedAttachments = rehydrateMessageFileRefs( + message.attachments, + authorizedFilesById, + { + preserveDisplayOnly: true, + }, + ); + if (rehydratedAttachments) { + message.attachments = rehydratedAttachments; + } else { + delete message.attachments; + } + + if (contextFiles.length === 0) { return message; } - const files = await db.getFiles( - { - file_id: { $in: fileIds }, - }, - {}, - {}, - ); + await this.addFileContextToMessage(message, contextFiles); + await this.processAttachments(message, contextFiles); - await this.addFileContextToMessage(message, files); - await this.processAttachments(message, files); - - this.message_file_map[message.messageId] = files; + this.message_file_map[message.messageId] = contextFiles; return message; }; const promises = []; for (const message of _messages) { - if (!message.files) { + if (!message.files && !message.attachments) { promises.push(message); continue; } diff --git a/api/app/clients/specs/BaseClient.test.js b/api/app/clients/specs/BaseClient.test.js index 40c4a1e05e..d565f87012 100644 --- a/api/app/clients/specs/BaseClient.test.js +++ b/api/app/clients/specs/BaseClient.test.js @@ -38,7 +38,7 @@ jest.mock('~/models', () => ({ updateFileUsage: jest.fn(), })); -const { getConvo, getMessages, saveConvo, saveMessage } = require('~/models'); +const { getConvo, getFiles, getMessages, saveConvo, saveMessage } = require('~/models'); jest.mock('@librechat/agents', () => { const actual = jest.requireActual('@librechat/agents'); @@ -1317,6 +1317,242 @@ describe('BaseClient', () => { }); }); + describe('addPreviousAttachments authorization', () => { + const ownerFile = { + file_id: 'owner-file', + filename: 'owner.txt', + filepath: '/uploads/owner.txt', + source: 'local', + type: 'text/plain', + bytes: 100, + object: 'file', + user: 'user-1', + embedded: false, + usage: 0, + text: 'authorized owner text', + _id: 'owner-mongo-id', + metadata: { + codeEnvRef: { + kind: 'user', + id: 'user-1', + storage_session_id: 'owner-session', + file_id: 'owner-code-file', + }, + }, + }; + + beforeEach(() => { + getFiles.mockReset(); + TestClient.options.resendFiles = true; + TestClient.options.attachments = undefined; + TestClient.options.req = { + user: { + id: 'user-1', + tenantId: 'tenant-a', + }, + }; + TestClient.addFileContextToMessage = jest.fn(async (message, files) => { + const text = files + .map((file) => file.text) + .filter(Boolean) + .join('\n'); + if (text) { + message.fileContext = text; + } + }); + TestClient.processAttachments = jest.fn(async (_message, files) => files); + TestClient.checkVisionRequest = jest.fn(); + }); + + test('rehydrates historical file refs from owner-scoped DB rows only', async () => { + getFiles.mockResolvedValueOnce([ownerFile]); + + const [message] = await TestClient.addPreviousAttachments([ + { + messageId: 'msg-1', + text: 'Use the attachment', + files: [ + { + file_id: 'owner-file', + filename: 'attacker-controlled-owner-name.txt', + filepath: '/forged/owner.txt', + text: 'forged owner text', + }, + { + file_id: 'victim-file', + filename: 'victim.txt', + filepath: '/victim/private.txt', + text: 'victim private text', + }, + ], + attachments: [ + { + file_id: 'victim-file', + filename: 'victim-output.csv', + text: 'victim output text', + }, + ], + fileContext: 'stale victim private text', + }, + ]); + + expect(getFiles).toHaveBeenCalledWith( + { + file_id: { $in: ['owner-file', 'victim-file'] }, + user: 'user-1', + tenantId: 'tenant-a', + }, + {}, + {}, + ); + expect(TestClient.addFileContextToMessage).toHaveBeenCalledWith(message, [ownerFile]); + expect(TestClient.processAttachments).toHaveBeenCalledWith(message, [ownerFile]); + expect(message.fileContext).toBe('authorized owner text'); + expect(message.files).toEqual([ + expect.objectContaining({ + file_id: 'owner-file', + filename: 'owner.txt', + filepath: '/uploads/owner.txt', + source: 'local', + metadata: ownerFile.metadata, + }), + ]); + expect(message.files[0].text).toBeUndefined(); + expect(message.files[0]._id).toBeUndefined(); + expect(message.attachments).toBeUndefined(); + expect(JSON.stringify(message)).not.toContain('victim'); + expect(JSON.stringify(message)).not.toContain('forged owner text'); + }); + + test('strips historical file context when no authenticated owner scope is available', async () => { + TestClient.options.req = {}; + + const [message] = await TestClient.addPreviousAttachments([ + { + messageId: 'msg-2', + files: [{ file_id: 'victim-file', filename: 'victim.txt' }], + fileContext: 'stale victim private text', + }, + ]); + + expect(getFiles).not.toHaveBeenCalled(); + expect(message.files).toBeUndefined(); + expect(message.fileContext).toBeUndefined(); + }); + + test('preserves repeated owner-authorized historical file refs after the first context use', async () => { + getFiles.mockResolvedValueOnce([ownerFile]); + + const [firstMessage, secondMessage] = await TestClient.addPreviousAttachments([ + { + messageId: 'msg-repeat-1', + files: [{ file_id: 'owner-file', filename: 'first-forged.txt' }], + }, + { + messageId: 'msg-repeat-2', + files: [{ file_id: 'owner-file', filename: 'second-forged.txt' }], + }, + ]); + + expect(getFiles).toHaveBeenCalledTimes(1); + expect(getFiles).toHaveBeenCalledWith( + { + file_id: { $in: ['owner-file'] }, + user: 'user-1', + tenantId: 'tenant-a', + }, + {}, + {}, + ); + expect(TestClient.addFileContextToMessage).toHaveBeenCalledTimes(1); + expect(TestClient.addFileContextToMessage).toHaveBeenCalledWith(firstMessage, [ownerFile]); + expect(secondMessage.fileContext).toBeUndefined(); + expect(firstMessage.files).toEqual([ + expect.objectContaining({ file_id: 'owner-file', filename: 'owner.txt' }), + ]); + expect(secondMessage.files).toEqual([ + expect.objectContaining({ file_id: 'owner-file', filename: 'owner.txt' }), + ]); + expect(JSON.stringify(secondMessage)).not.toContain('second-forged'); + }); + + test('preserves download-only historical attachments without trusting file fields', async () => { + const [message] = await TestClient.addPreviousAttachments([ + { + messageId: 'msg-download-only', + attachments: [ + { + filename: 'report.csv', + filepath: '/api/files/code/download/session/file', + expiresAt: 123456, + conversationId: 'conversation-1', + messageId: 'assistant-message', + toolCallId: 'tool-call-1', + text: 'untrusted text should not survive', + source: 'forged-source', + metadata: { codeEnvRef: { id: 'victim' } }, + }, + ], + fileContext: 'stale context', + }, + ]); + + expect(getFiles).not.toHaveBeenCalled(); + expect(message.fileContext).toBeUndefined(); + expect(message.attachments).toEqual([ + { + filename: 'report.csv', + filepath: '/api/files/code/download/session/file', + expiresAt: 123456, + conversationId: 'conversation-1', + messageId: 'assistant-message', + toolCallId: 'tool-call-1', + }, + ]); + expect(JSON.stringify(message)).not.toContain('untrusted text'); + expect(JSON.stringify(message)).not.toContain('forged-source'); + expect(JSON.stringify(message)).not.toContain('victim'); + }); + + test('merges safe per-message metadata onto authorized DB-backed attachments', async () => { + getFiles.mockResolvedValueOnce([ownerFile]); + + const [message] = await TestClient.addPreviousAttachments([ + { + messageId: 'msg-artifact', + attachments: [ + { + file_id: 'owner-file', + filename: 'forged-artifact.csv', + filepath: '/forged/artifact.csv', + source: 'forged-source', + metadata: { codeEnvRef: { id: 'victim' } }, + text: 'forged artifact text', + messageId: 'assistant-message', + toolCallId: 'tool-call-2', + }, + ], + }, + ]); + + expect(message.attachments).toEqual([ + expect.objectContaining({ + file_id: 'owner-file', + filename: 'owner.txt', + filepath: '/uploads/owner.txt', + source: 'local', + metadata: ownerFile.metadata, + messageId: 'assistant-message', + toolCallId: 'tool-call-2', + }), + ]); + expect(message.attachments[0].text).toBeUndefined(); + expect(message.attachments[0]._id).toBeUndefined(); + expect(JSON.stringify(message)).not.toContain('forged-artifact'); + expect(JSON.stringify(message)).not.toContain('forged artifact text'); + }); + }); + describe('sendMessage quote references', () => { // The blockquote merge itself lives in AgentClient.buildMessages / prependQuotes // (covered by packages/api specs). BaseClient's job is to attach the normalized diff --git a/api/server/controllers/agents/v1.js b/api/server/controllers/agents/v1.js index b7f7a9bd29..ec1c6e62f2 100644 --- a/api/server/controllers/agents/v1.js +++ b/api/server/controllers/agents/v1.js @@ -305,10 +305,14 @@ const pruneToolResourceFileIdsForOwner = async ({ tool_resources, ownerId, logPr const ownerIdStr = ownerId.toString(); try { - const ownerFiles = await db.getFiles({ file_id: { $in: referencedFileIds } }, null, { - file_id: 1, - user: 1, - }); + const ownerFiles = await db.getFiles( + { file_id: { $in: referencedFileIds }, user: ownerIdStr }, + null, + { + file_id: 1, + user: 1, + }, + ); const allowedIds = new Set( (ownerFiles ?? []) .filter((file) => file.user && file.user.toString() === ownerIdStr) diff --git a/api/server/middleware/buildEndpointOption.js b/api/server/middleware/buildEndpointOption.js index a762c153ea..84fb85dba8 100644 --- a/api/server/middleware/buildEndpointOption.js +++ b/api/server/middleware/buildEndpointOption.js @@ -141,6 +141,7 @@ async function buildEndpointOption(req, res, next) { if (req.body.files && !isAgents) { req.body.endpointOption.attachments = updateFilesUsage(req.body.files, undefined, { user: req.user.id, + tenantId: req.user.tenantId, }); } diff --git a/api/server/middleware/buildEndpointOption.spec.js b/api/server/middleware/buildEndpointOption.spec.js index c4da3a726e..abf49c1456 100644 --- a/api/server/middleware/buildEndpointOption.spec.js +++ b/api/server/middleware/buildEndpointOption.spec.js @@ -484,6 +484,7 @@ describe('buildEndpointOption - defaultParamsEndpoint parsing', () => { expect(updateFilesUsage).toHaveBeenCalledWith(req.body.files, undefined, { user: 'user-1', + tenantId: undefined, }); expect(req.body.endpointOption.attachments).toBe(attachments); }); diff --git a/api/server/services/Files/process.js b/api/server/services/Files/process.js index ca8980e8fc..f6d92ac5af 100644 --- a/api/server/services/Files/process.js +++ b/api/server/services/Files/process.js @@ -1034,7 +1034,11 @@ const processOpenAIFile = async ({ await db.createFile(file, true); } else if (updateUsage) { try { - await db.updateFileUsage({ file_id }); + await db.updateFileUsage({ + file_id, + user: userId, + tenantId: openai.req?.user?.tenantId, + }); } catch (error) { logger.error('Error updating file usage', error); } diff --git a/packages/api/src/agents/__tests__/initialize.test.ts b/packages/api/src/agents/__tests__/initialize.test.ts index 601d4706fd..09917ffef3 100644 --- a/packages/api/src/agents/__tests__/initialize.test.ts +++ b/packages/api/src/agents/__tests__/initialize.test.ts @@ -809,10 +809,19 @@ describe('initializeAgent — attachment scoping', () => { db, ); + expect(db.getToolFilesByIds).toHaveBeenCalledWith( + [toolFile.file_id], + new Set([EToolResources.file_search]), + { userId: 'user-1', tenantId: undefined }, + ); expect(db.updateFilesUsage).toHaveBeenNthCalledWith(1, [requestFile], undefined, { user: 'user-1', + tenantId: undefined, + }); + expect(db.updateFilesUsage).toHaveBeenNthCalledWith(2, [toolFile], undefined, { + user: 'user-1', + tenantId: undefined, }); - expect(db.updateFilesUsage).toHaveBeenNthCalledWith(2, [toolFile]); }); }); @@ -1992,13 +2001,17 @@ describe('initializeAgent — code-generated file thread filter (regression)', ( ); expect(getCodeGeneratedFiles).toHaveBeenCalledTimes(1); - expect(getCodeGeneratedFiles).toHaveBeenCalledWith('conv-1', [ - 'file-pptx-skill', - 'file-output-csv', - ]); + expect(getCodeGeneratedFiles).toHaveBeenCalledWith( + 'conv-1', + ['file-pptx-skill', 'file-output-csv'], + { userId: 'user-1', tenantId: undefined }, + ); /* Both functions now share the same primary anchor — symmetric * design that closes the sibling-branch hole. */ - expect(getUserCodeFiles).toHaveBeenCalledWith(['file-pptx-skill', 'file-output-csv']); + expect(getUserCodeFiles).toHaveBeenCalledWith(['file-pptx-skill', 'file-output-csv'], { + userId: 'user-1', + tenantId: undefined, + }); }); it('selects messages.attachments alongside messages.files (regression)', async () => { @@ -2086,7 +2099,10 @@ describe('initializeAgent — code-generated file thread filter (regression)', ( { ...db, getMessages, getCodeGeneratedFiles, getUserCodeFiles }, ); - expect(getCodeGeneratedFiles).toHaveBeenCalledWith('conv-1', []); + expect(getCodeGeneratedFiles).toHaveBeenCalledWith('conv-1', [], { + userId: 'user-1', + tenantId: undefined, + }); /* `getUserCodeFiles` is gated on a non-empty array at the call site, * so it shouldn't be invoked at all. `getCodeGeneratedFiles`'s own * empty-guard is exercised by data-schemas tests. */ diff --git a/packages/api/src/agents/initialize.ts b/packages/api/src/agents/initialize.ts index ebbc6be73c..b5ec1df2da 100644 --- a/packages/api/src/agents/initialize.ts +++ b/packages/api/src/agents/initialize.ts @@ -21,8 +21,8 @@ import type { TUser, } from 'librechat-data-provider'; import type { GenericTool, LCToolRegistry, ToolMap, LCTool } from '@librechat/agents'; +import type { IMongoFile, FileOwnerScope } from '@librechat/data-schemas'; import type { Response as ServerResponse } from 'express'; -import type { IMongoFile } from '@librechat/data-schemas'; import type { ServerRequest, EndpointDbMethods, @@ -411,22 +411,30 @@ export interface InitializeAgentDbMethods extends EndpointDbMethods { updateFilesUsage: ( files: Array<{ file_id: string }>, fileIds?: string[], - options?: { user?: string }, + options?: { user?: string; tenantId?: string | null }, ) => Promise; /** Get files from database */ getFiles: (filter: unknown, sort: unknown, select: unknown) => Promise; /** Filter files by agent access permissions (ownership or agent attachment) */ filterFilesByAgentAccess?: TFilterFilesByAgentAccess; /** Get tool files by IDs (user-uploaded files only, code files handled separately) */ - getToolFilesByIds: (fileIds: string[], toolSet: Set) => Promise; + getToolFilesByIds: ( + fileIds: string[], + toolSet: Set, + ownerScope?: FileOwnerScope, + ) => Promise; /** Get conversation file IDs */ getConvoFiles: (conversationId: string) => Promise; /** Get code-generated files by conversation ID and the file_ids * referenced from messages in the current thread (collected via * `messages.files[].file_id` during thread walk). */ - getCodeGeneratedFiles?: (conversationId: string, threadFileIds?: string[]) => Promise; + getCodeGeneratedFiles?: ( + conversationId: string, + threadFileIds?: string[], + ownerScope?: FileOwnerScope, + ) => Promise; /** Get user-uploaded execute_code files by file IDs (from message.files in thread) */ - getUserCodeFiles?: (fileIds: string[]) => Promise; + getUserCodeFiles?: (fileIds: string[], ownerScope: FileOwnerScope) => Promise; /** Get messages for a conversation (supports select for field projection) */ getMessages?: ( filter: { conversationId: string }, @@ -563,6 +571,9 @@ export async function initializeAgent( isInitialAgent = false, } = params; const requestFileOwnerId = req.user?.id; + const requestFileOwnerScope: FileOwnerScope | undefined = requestFileOwnerId + ? { userId: requestFileOwnerId, tenantId: req.user?.tenantId } + : undefined; if (!db) { throw new Error('initializeAgent requires db methods to be passed'); @@ -610,7 +621,13 @@ export async function initializeAgent( } } - const toolFiles = (await db.getToolFilesByIds(fileIds, toolResourceSet)) as IMongoFile[]; + const toolFiles = requestFileOwnerScope + ? ((await db.getToolFilesByIds( + fileIds, + toolResourceSet, + requestFileOwnerScope, + )) as IMongoFile[]) + : []; /** * Retrieve execute_code files filtered to the current thread. @@ -651,14 +668,25 @@ export async function initializeAgent( * which sibling first generated them — see `getCodeGeneratedFiles` * for the branched-conversation rationale. */ if (db.getCodeGeneratedFiles) { - codeGeneratedFiles = (await db.getCodeGeneratedFiles( - conversationId, - threadFileIds, - )) as IMongoFile[]; + codeGeneratedFiles = requestFileOwnerScope + ? ((await db.getCodeGeneratedFiles( + conversationId, + threadFileIds, + requestFileOwnerScope, + )) as IMongoFile[]) + : []; } - if (db.getUserCodeFiles && threadFileIds && threadFileIds.length > 0) { - userCodeFiles = (await db.getUserCodeFiles(threadFileIds)) as IMongoFile[]; + if ( + db.getUserCodeFiles && + requestFileOwnerScope && + threadFileIds && + threadFileIds.length > 0 + ) { + userCodeFiles = (await db.getUserCodeFiles( + threadFileIds, + requestFileOwnerScope, + )) as IMongoFile[]; } } @@ -668,21 +696,27 @@ export async function initializeAgent( requestFiles.length && requestFileOwnerId ? ((await db.updateFilesUsage(requestFiles, undefined, { user: requestFileOwnerId, + tenantId: req.user?.tenantId, })) as IMongoFile[]) : []; const requestUsageFileIds = new Set(requestUsageFiles.map((file) => file.file_id)); const trustedToolFiles = allToolFiles.filter( (file) => !requestUsageFileIds.has(file.file_id), ); - const toolUsageFiles = trustedToolFiles.length - ? ((await db.updateFilesUsage(trustedToolFiles)) as IMongoFile[]) - : []; + let toolUsageFiles: IMongoFile[] = []; + if (trustedToolFiles.length > 0 && requestFileOwnerId) { + toolUsageFiles = (await db.updateFilesUsage(trustedToolFiles, undefined, { + user: requestFileOwnerId, + tenantId: req.user?.tenantId, + })) as IMongoFile[]; + } currentFiles = requestUsageFiles.concat(toolUsageFiles); } } else if (requestFiles.length) { currentFiles = requestFileOwnerId ? ((await db.updateFilesUsage(requestFiles, undefined, { user: requestFileOwnerId, + tenantId: req.user?.tenantId, })) as IMongoFile[]) : []; } diff --git a/packages/data-schemas/src/methods/file.spec.ts b/packages/data-schemas/src/methods/file.spec.ts index 090783e263..0abbdb4e8c 100644 --- a/packages/data-schemas/src/methods/file.spec.ts +++ b/packages/data-schemas/src/methods/file.spec.ts @@ -379,6 +379,47 @@ describe('File Methods', () => { expect(files).toHaveLength(0); }); + + it('owner-scopes historical tool file lookups', async () => { + const ownerId = new mongoose.Types.ObjectId(); + const victimId = new mongoose.Types.ObjectId(); + const ownerFileId = uuidv4(); + const victimFileId = uuidv4(); + + await runAsSystem(() => + fileMethods.createFile({ + file_id: ownerFileId, + user: ownerId, + tenantId: 'tenant-a', + filename: 'owner-embedded.txt', + filepath: '/uploads/owner-embedded.txt', + type: 'text/plain', + bytes: 100, + embedded: true, + }), + ); + await runAsSystem(() => + fileMethods.createFile({ + file_id: victimFileId, + user: victimId, + tenantId: 'tenant-a', + filename: 'victim-embedded.txt', + filepath: '/uploads/victim-embedded.txt', + type: 'text/plain', + bytes: 100, + embedded: true, + }), + ); + + const toolSet = new Set([EToolResources.file_search]); + const files = await fileMethods.getToolFilesByIds([ownerFileId, victimFileId], toolSet, { + userId: ownerId.toString(), + tenantId: 'tenant-a', + }); + + expect(files).toHaveLength(1); + expect(files[0].file_id).toBe(ownerFileId); + }); }); describe('getCodeGeneratedFiles', () => { @@ -527,6 +568,160 @@ describe('File Methods', () => { const files = await fileMethods.getCodeGeneratedFiles(conversationId, [fileId]); expect(files).toEqual([]); }); + + it('owner-scopes code-generated file lookups', async () => { + const ownerId = new mongoose.Types.ObjectId(); + const victimId = new mongoose.Types.ObjectId(); + const conversationId = uuidv4(); + const ownerFileId = uuidv4(); + const victimFileId = uuidv4(); + + for (const [fileId, userId, filename] of [ + [ownerFileId, ownerId, 'owner-output.csv'], + [victimFileId, victimId, 'victim-output.csv'], + ] as const) { + await runAsSystem(() => + fileMethods.createFile({ + file_id: fileId, + user: userId, + tenantId: 'tenant-a', + conversationId, + messageId: `msg-${fileId}`, + filename, + filepath: `/uploads/${filename}`, + type: 'text/csv', + bytes: 100, + context: FileContext.execute_code, + metadata: { + codeEnvRef: { + kind: 'user', + id: userId.toString(), + storage_session_id: `sess-${fileId}`, + file_id: fileId, + }, + }, + }), + ); + } + + const files = await fileMethods.getCodeGeneratedFiles( + conversationId, + [ownerFileId, victimFileId], + { userId: ownerId.toString(), tenantId: 'tenant-a' }, + ); + + expect(files).toHaveLength(1); + expect(files[0].file_id).toBe(ownerFileId); + }); + }); + + describe('getUserCodeFiles', () => { + it('returns only authenticated owner code-env uploads', async () => { + const ownerId = new mongoose.Types.ObjectId(); + const victimId = new mongoose.Types.ObjectId(); + const ownerFileId = uuidv4(); + const victimFileId = uuidv4(); + + await runAsSystem(() => + fileMethods.createFile({ + file_id: ownerFileId, + user: ownerId, + tenantId: 'tenant-a', + conversationId: 'conversation-owner', + filename: 'owner.csv', + filepath: '/uploads/owner.csv', + source: 'local', + type: 'text/csv', + bytes: 100, + context: FileContext.message_attachment, + metadata: { + codeEnvRef: { + kind: 'user', + id: ownerId.toString(), + storage_session_id: 'owner-session', + file_id: ownerFileId, + }, + }, + }), + ); + await runAsSystem(() => + fileMethods.createFile({ + file_id: victimFileId, + user: victimId, + tenantId: 'tenant-a', + conversationId: 'conversation-victim', + filename: 'victim.csv', + filepath: '/uploads/victim.csv', + source: 'local', + type: 'text/csv', + bytes: 100, + context: FileContext.message_attachment, + metadata: { + codeEnvRef: { + kind: 'user', + id: victimId.toString(), + storage_session_id: 'victim-session', + file_id: victimFileId, + }, + }, + }), + ); + + const files = await fileMethods.getUserCodeFiles([ownerFileId, victimFileId], { + userId: ownerId.toString(), + tenantId: 'tenant-a', + }); + + expect(files).toHaveLength(1); + expect(files[0]).toMatchObject({ + file_id: ownerFileId, + filename: 'owner.csv', + filepath: '/uploads/owner.csv', + source: 'local', + metadata: { + codeEnvRef: { + kind: 'user', + id: ownerId.toString(), + storage_session_id: 'owner-session', + file_id: ownerFileId, + }, + }, + }); + }); + + it('excludes files outside the authenticated tenant scope', async () => { + const ownerId = new mongoose.Types.ObjectId(); + const fileId = uuidv4(); + + await runAsSystem(() => + fileMethods.createFile({ + file_id: fileId, + user: ownerId, + tenantId: 'tenant-b', + filename: 'tenant-b.csv', + filepath: '/uploads/tenant-b.csv', + source: 'local', + type: 'text/csv', + bytes: 100, + context: FileContext.message_attachment, + metadata: { + codeEnvRef: { + kind: 'user', + id: ownerId.toString(), + storage_session_id: 'tenant-b-session', + file_id: fileId, + }, + }, + }), + ); + + const files = await fileMethods.getUserCodeFiles([fileId], { + userId: ownerId.toString(), + tenantId: 'tenant-a', + }); + + expect(files).toEqual([]); + }); }); describe('updateFile', () => { diff --git a/packages/data-schemas/src/methods/file.ts b/packages/data-schemas/src/methods/file.ts index c8d7620580..9d384bf7bb 100644 --- a/packages/data-schemas/src/methods/file.ts +++ b/packages/data-schemas/src/methods/file.ts @@ -4,6 +4,29 @@ import type { IMongoFile } from '~/types/file'; import { tenantSafeBulkWrite } from '~/utils/tenantBulkWrite'; import logger from '../config/winston'; +export type FileOwnerScope = { + userId: string; + tenantId?: string | null; +}; + +function withOwnerScope>( + filter: T, + ownerScope?: FileOwnerScope, +): T & FilterQuery { + if (!ownerScope) { + return filter; + } + + const scopedFilter: T & FilterQuery = { + ...filter, + user: ownerScope.userId, + }; + if (ownerScope.tenantId) { + scopedFilter.tenantId = ownerScope.tenantId; + } + return scopedFilter; +} + /** Factory function that takes mongoose instance and returns the file methods */ export function createFileMethods(mongoose: typeof import('mongoose')): { findFileById: (file_id: string, options?: Record) => Promise; @@ -16,12 +39,14 @@ export function createFileMethods(mongoose: typeof import('mongoose')): { getToolFilesByIds: ( fileIds: string[], toolResourceSet?: Set, + ownerScope?: FileOwnerScope, ) => Promise; getCodeGeneratedFiles: ( conversationId: string, threadFileIds?: string[], + ownerScope?: FileOwnerScope, ) => Promise; - getUserCodeFiles: (fileIds?: string[]) => Promise; + getUserCodeFiles: (fileIds: string[], ownerScope: FileOwnerScope) => Promise; claimCodeFile: (data: { filename: string; conversationId: string; @@ -38,6 +63,7 @@ export function createFileMethods(mongoose: typeof import('mongoose')): { file_id: string; inc?: number; user?: string; + tenantId?: string | null; }) => Promise; deleteFile: (file_id: string) => Promise; deleteFiles: (file_ids: string[], user?: string) => Promise<{ deletedCount?: number }>; @@ -53,7 +79,7 @@ export function createFileMethods(mongoose: typeof import('mongoose')): { updateFilesUsage: ( files: Array<{ file_id: string }>, fileIds?: string[], - options?: { user?: string }, + options?: { user?: string; tenantId?: string | null }, ) => Promise; sweepOrphanedPreviews: (maxAgeMs?: number) => Promise; } { @@ -116,6 +142,7 @@ export function createFileMethods(mongoose: typeof import('mongoose')): { async function getToolFilesByIds( fileIds: string[], toolResourceSet?: Set, + ownerScope?: FileOwnerScope, ): Promise { if (!fileIds || !fileIds.length || !toolResourceSet?.size) { return []; @@ -136,11 +163,14 @@ export function createFileMethods(mongoose: typeof import('mongoose')): { return []; } - const filter: FilterQuery = { - file_id: { $in: fileIds }, - context: { $ne: FileContext.execute_code }, - $or: orConditions, - }; + const filter = withOwnerScope( + { + file_id: { $in: fileIds }, + context: { $ne: FileContext.execute_code }, + $or: orConditions, + }, + ownerScope, + ); const selectFields: SelectProjection = { text: 0 }; const sortOptions = { updatedAt: -1 as SortOrder }; @@ -189,6 +219,7 @@ export function createFileMethods(mongoose: typeof import('mongoose')): { async function getCodeGeneratedFiles( conversationId: string, threadFileIds?: string[], + ownerScope?: FileOwnerScope, ): Promise { if (!conversationId) { return []; @@ -206,12 +237,15 @@ export function createFileMethods(mongoose: typeof import('mongoose')): { } try { - const filter: FilterQuery = { - conversationId, - context: FileContext.execute_code, - file_id: { $in: threadFileIds }, - 'metadata.codeEnvRef': { $exists: true }, - }; + const filter = withOwnerScope( + { + conversationId, + context: FileContext.execute_code, + file_id: { $in: threadFileIds }, + 'metadata.codeEnvRef': { $exists: true }, + }, + ownerScope, + ); const selectFields: SelectProjection = { text: 0 }; const sortOptions = { createdAt: 1 as SortOrder }; @@ -229,19 +263,26 @@ export function createFileMethods(mongoose: typeof import('mongoose')): { * These are files with fileIdentifier metadata but context is NOT execute_code (e.g., agents or message_attachment). * File IDs should be collected from message.files arrays in the current thread. * @param fileIds - Array of file IDs to fetch (from message.files in the thread) + * @param ownerScope - Authenticated owner scope used to constrain historical refs * @returns User-uploaded execute_code files */ - async function getUserCodeFiles(fileIds?: string[]): Promise { + async function getUserCodeFiles( + fileIds: string[], + ownerScope: FileOwnerScope, + ): Promise { if (!fileIds || fileIds.length === 0) { return []; } try { - const filter: FilterQuery = { - file_id: { $in: fileIds }, - context: { $ne: FileContext.execute_code }, - 'metadata.codeEnvRef': { $exists: true }, - }; + const filter = withOwnerScope( + { + file_id: { $in: fileIds }, + context: { $ne: FileContext.execute_code }, + 'metadata.codeEnvRef': { $exists: true }, + }, + ownerScope, + ); const selectFields: SelectProjection = { text: 0 }; const sortOptions = { createdAt: 1 as SortOrder }; @@ -357,15 +398,18 @@ export function createFileMethods(mongoose: typeof import('mongoose')): { file_id: string; inc?: number; user?: string; + tenantId?: string | null; }): Promise { const File = mongoose.models.File as Model; - const { file_id, inc = 1, user } = data; + const { file_id, inc = 1, user, tenantId } = data; const updateOperation = { $inc: { usage: inc }, $unset: { expiresAt: '', temp_file_id: '' }, }; // Owner scoping is fail-closed: mismatches leave usage and TTL metadata unchanged. - const query: FilterQuery = user ? { file_id, user } : { file_id }; + const query: FilterQuery = user + ? withOwnerScope({ file_id }, { userId: user, tenantId }) + : { file_id }; return File.findOneAndUpdate(query, updateOperation, { new: true, }).lean(); @@ -454,12 +498,13 @@ export function createFileMethods(mongoose: typeof import('mongoose')): { async function updateFilesUsage( files: Array<{ file_id: string }>, fileIds?: string[], - options?: { user?: string }, + options?: { user?: string; tenantId?: string | null }, ): Promise { const promises: Promise[] = []; const seen = new Set(); // Preserve the same owner scope for every deduped ID in this batch. const user = options?.user; + const tenantId = options?.tenantId; for (const file of files) { const { file_id } = file; @@ -467,7 +512,7 @@ export function createFileMethods(mongoose: typeof import('mongoose')): { continue; } seen.add(file_id); - promises.push(updateFileUsage({ file_id, user })); + promises.push(updateFileUsage({ file_id, user, tenantId })); } if (!fileIds) { @@ -480,7 +525,7 @@ export function createFileMethods(mongoose: typeof import('mongoose')): { continue; } seen.add(file_id); - promises.push(updateFileUsage({ file_id, user })); + promises.push(updateFileUsage({ file_id, user, tenantId })); } const results = await Promise.all(promises); diff --git a/packages/data-schemas/src/methods/index.ts b/packages/data-schemas/src/methods/index.ts index efde53c3e9..5eee6c4db5 100644 --- a/packages/data-schemas/src/methods/index.ts +++ b/packages/data-schemas/src/methods/index.ts @@ -1,9 +1,9 @@ import type { RoleMethods, RoleDeps } from './role'; import { createSessionMethods, DEFAULT_REFRESH_TOKEN_EXPIRY, type SessionMethods } from './session'; import { createUserMethods, DEFAULT_SESSION_EXPIRY, type UserMethods } from './user'; +import { createFileMethods, type FileMethods, type FileOwnerScope } from './file'; import { createTokenMethods, type TokenMethods } from './token'; import { createRoleMethods, RoleConflictError } from './role'; -import { createFileMethods, type FileMethods } from './file'; import { createKeyMethods, type KeyMethods } from './key'; /* Memories */ import { createMemoryMethods, type MemoryMethods } from './memory'; @@ -288,6 +288,7 @@ export type { RoleMethods, KeyMethods, FileMethods, + FileOwnerScope, MemoryMethods, AgentCategoryMethods, AgentApiKeyMethods,