🧵 fix: Include Code Outputs in Thread File Lookup (#13023)

Code-execution outputs land on `messages.attachments` (set by
`processCodeOutput`), while user uploads land on `messages.files`.
The threadFileIds switch (#13004) walked only `files`, so on a
single linear thread:

  Turn 1: assistant produces sample.xlsx → attachment with codeEnvRef
  Turn 2: user says "add 2 rows"
          → primeCodeFiles: file_ids=0 resourceFiles=0
          → /exec sent files=[]
          → sandbox: FileNotFoundError: 'sample.xlsx'

The `getThreadData` walk found zero file_ids because the assistant's
codeEnvRef was on `attachments`, not `files`. Compounded by the
DB select string `'messageId parentMessageId files'` which didn't
pull `attachments` into memory in the first place — so even fixing
the walk in isolation wouldn't have surfaced them.

Both layers fixed:
  - `ThreadMessage` type adds `attachments?: Array<{ file_id?: string }>`
  - `getThreadData` walks both arrays, dedups via the same Set
  - `initialize.ts` selects `'messageId parentMessageId files attachments'`

## Test plan

`packages/api/src/utils/message.spec.ts` (+6 cases):
- collects file_ids from `attachments`
- walks both `files` and `attachments` on the same message
- regression: linear thread with code-output attachments across
  user→assistant→user→assistant produces the right file_ids
- dedupes shared ids that appear in both arrays
- skips attachments without file_id (mirrors `files` behavior)
- empty `attachments` array

`packages/api/src/agents/__tests__/initialize.test.ts` (+1 case):
- locks the DB select string includes `attachments` alongside
  `files` / `messageId` / `parentMessageId`

- [x] `npx jest src/utils/message.spec.ts` — 39/39 pass
- [x] `npx jest src/agents/__tests__/initialize.test.ts` — 33/33 pass
- [x] lint clean on all four touched files
This commit is contained in:
Danny Avila 2026-05-08 12:23:03 -04:00
parent 26a6312917
commit 3d5e5348a4
No known key found for this signature in database
GPG key ID: BF31EEB2C5CA0956
4 changed files with 207 additions and 8 deletions

View file

@ -1318,6 +1318,58 @@ describe('initializeAgent — code-generated file thread filter (regression)', (
expect(getUserCodeFiles).toHaveBeenCalledWith(['file-pptx-skill', 'file-output-csv']);
});
it('selects messages.attachments alongside messages.files (regression)', async () => {
/* Code-execution outputs land on `messages.attachments` via
* `processCodeOutput`; user uploads land on `messages.files`.
* Selecting only `files` silently dropped every code-output
* file_id from the thread walk, so the next turn's
* `tool_resources.execute_code.file_ids` came up empty and the
* sandbox saw `_injected_files: []`. The visible symptom: "the
* previous file isn't persisted between executions" on a single
* linear thread. Lock the select string so a future field
* trim doesn't silently re-introduce the bug. */
const { agent, req, res, loadTools, db } = setupExecuteCodeAgent();
mockGetThreadData.mockReturnValue({ messageIds: [], fileIds: [] });
const getCodeGeneratedFiles = jest.fn().mockResolvedValue([]);
const getUserCodeFiles = jest.fn().mockResolvedValue([]);
const getMessages = jest.fn().mockResolvedValue([]);
const dbWithThreadCalls: InitializeAgentDbMethods = {
...db,
getMessages,
getCodeGeneratedFiles,
getUserCodeFiles,
};
await initializeAgent(
{
req,
res,
agent,
loadTools,
endpointOption: { endpoint: EModelEndpoint.agents },
conversationId: 'conv-1',
parentMessageId: 'msgN',
allowedProviders: new Set([Providers.OPENAI]),
isInitialAgent: true,
codeEnvAvailable: true,
},
dbWithThreadCalls,
);
expect(getMessages).toHaveBeenCalledTimes(1);
const [, selectFields] = getMessages.mock.calls[0];
/* Asserting on a substring instead of exact equality keeps the
* test resilient to future ordering / new fields, while still
* catching a regression where `attachments` is dropped. */
expect(selectFields).toMatch(/\battachments\b/);
expect(selectFields).toMatch(/\bfiles\b/);
expect(selectFields).toMatch(/\bmessageId\b/);
expect(selectFields).toMatch(/\bparentMessageId\b/);
});
it('skips the code-generated fetch entirely when threadFileIds is empty', async () => {
/* Empty `messages.files[]` across the thread nothing to look up.
* The function returns early without hitting Mongo, mirroring the

View file

@ -428,17 +428,22 @@ export async function initializeAgent(
let threadFileIds: string[] | undefined;
if (parentMessageId && parentMessageId !== Constants.NO_PARENT && db.getMessages) {
/** Only select fields needed for thread traversal */
/** Only select fields needed for thread traversal. Both
* `files` (user uploads) and `attachments` (code-execution
* outputs from `processCodeOutput`) carry the `file_id`
* refs the next turn must prime selecting only `files`
* silently drops every code-output ref. */
const messages = await db.getMessages(
{ conversationId },
'messageId parentMessageId files',
'messageId parentMessageId files attachments',
);
if (messages && messages.length > 0) {
/** Walk the parent chain and collect file_ids referenced by
* any message in the thread (`messages.files[].file_id`).
* Used as the primary anchor for both
* `getCodeGeneratedFiles` and `getUserCodeFiles`
* message ids no longer needed at this layer. */
* any message in the thread (`messages.files[].file_id` +
* `messages.attachments[].file_id`). Used as the primary
* anchor for both `getCodeGeneratedFiles` and
* `getUserCodeFiles` message ids no longer needed at
* this layer. */
threadFileIds = getThreadData(messages, parentMessageId).fileIds;
}
}

View file

@ -435,6 +435,128 @@ describe('getThreadData', () => {
});
});
describe('attachment ID collection (code-execution outputs)', () => {
/* Code-execution outputs land on `messages.attachments`, not
* `messages.files` `processCodeOutput` writes the artifact
* there. Walking only `files` silently dropped every code-output
* file_id, so the next turn's `tool_resources.execute_code.file_ids`
* came up empty and the sandbox saw `_injected_files: []`. The
* user-visible symptom: "the previous file isn't persisted
* between executions" on a single linear thread (no branching
* needed). Lock the contract that both fields are walked. */
it('collects file_ids from message.attachments (code-execution outputs)', () => {
const messages = [
{ messageId: 'msg-user', parentMessageId: NO_PARENT },
{
messageId: 'msg-assistant',
parentMessageId: 'msg-user',
attachments: [{ file_id: 'attach-1' }, { file_id: 'attach-2' }],
},
];
const result = getThreadData(messages, 'msg-assistant');
expect(result.fileIds).toContain('attach-1');
expect(result.fileIds).toContain('attach-2');
expect(result.fileIds).toHaveLength(2);
});
it('walks both files (user uploads) and attachments (code outputs) on the same message', () => {
/* In practice they land on different roles by convention, but
* the walk shouldn't depend on that it should be honest about
* collecting from both arrays regardless of where they appear. */
const messages = [
{
messageId: 'msg-1',
parentMessageId: NO_PARENT,
files: [{ file_id: 'user-uploaded' }],
attachments: [{ file_id: 'code-generated' }],
},
];
const result = getThreadData(messages, 'msg-1');
expect(result.fileIds).toContain('user-uploaded');
expect(result.fileIds).toContain('code-generated');
expect(result.fileIds).toHaveLength(2);
});
it('regression: collects code-output across user→assistant→user→assistant chain', () => {
/* Mirrors the exact failure mode from the field report two
* sequential turns producing the same xlsx, both attachments
* on assistant messages. With only `files` walked, turn 2's
* priming ran with `file_ids=0` and the sandbox FileNotFound'd. */
const messages = [
{ messageId: 'user-1', parentMessageId: NO_PARENT },
{
messageId: 'asst-1',
parentMessageId: 'user-1',
attachments: [{ file_id: 'sample-xlsx-v1' }],
},
{ messageId: 'user-2', parentMessageId: 'asst-1' },
{
messageId: 'asst-2',
parentMessageId: 'user-2',
attachments: [{ file_id: 'sample-xlsx-v2' }],
},
];
/* From parentMessageId = 'asst-2', walking back through the
* full chain. Both attachment file_ids must surface so
* primeFiles can resolve them via getCodeGeneratedFiles. */
const result = getThreadData(messages, 'asst-2');
expect(result.fileIds).toContain('sample-xlsx-v1');
expect(result.fileIds).toContain('sample-xlsx-v2');
expect(result.fileIds).toHaveLength(2);
});
it('dedupes file_ids that appear in both files and attachments arrays', () => {
const messages = [
{
messageId: 'msg-1',
parentMessageId: NO_PARENT,
files: [{ file_id: 'shared-id' }],
attachments: [{ file_id: 'shared-id' }, { file_id: 'attach-only' }],
},
];
const result = getThreadData(messages, 'msg-1');
expect(result.fileIds).toContain('shared-id');
expect(result.fileIds).toContain('attach-only');
expect(result.fileIds).toHaveLength(2);
});
it('skips attachments without file_id (mirrors files behavior)', () => {
const messages = [
{
messageId: 'msg-1',
parentMessageId: NO_PARENT,
attachments: [{ file_id: 'attach-1' }, { file_id: undefined }, { file_id: '' }],
},
];
const result = getThreadData(messages, 'msg-1');
expect(result.fileIds).toEqual(['attach-1']);
});
it('handles messages with empty attachments array', () => {
const messages = [
{
messageId: 'msg-1',
parentMessageId: NO_PARENT,
attachments: [],
},
];
const result = getThreadData(messages, 'msg-1');
expect(result.fileIds).toEqual([]);
});
});
describe('performance - O(1) lookups', () => {
it('should handle large message arrays efficiently', () => {
// Create a linear thread of 1000 messages

View file

@ -92,11 +92,21 @@ export function sanitizeMessageForTransmit<T extends Partial<TMessage>>(
return sanitized;
}
/** Minimal message shape for thread traversal */
/** Minimal message shape for thread traversal.
*
* Both `files` and `attachments` carry `file_id` references, but they
* land on different roles by convention: user-uploaded files live on
* `messages.files` (set when the user attaches a file in chat), while
* code-execution outputs live on `messages.attachments` (set by
* `processCodeOutput` when a tool call produces a file). The thread
* walk must visit both so the next turn's `tool_resources.execute_code.file_ids`
* picks up files the assistant generated, not just files the user
* uploaded. */
type ThreadMessage = {
messageId: string;
parentMessageId?: string | null;
files?: Array<{ file_id?: string }>;
attachments?: Array<{ file_id?: string }>;
};
/** Result of thread data extraction */
@ -147,7 +157,10 @@ export function getThreadData(
result.messageIds.push(message.messageId);
/** Collect file IDs from this message */
/** Collect file IDs from BOTH `files` (user uploads) and
* `attachments` (code-execution outputs from `processCodeOutput`).
* Walking only one half drops half the relevant refs see
* the type doc on `ThreadMessage` for the role split. */
if (message.files) {
for (const file of message.files) {
if (file.file_id) {
@ -155,6 +168,13 @@ export function getThreadData(
}
}
}
if (message.attachments) {
for (const attachment of message.attachments) {
if (attachment.file_id) {
fileIdSet.add(attachment.file_id);
}
}
}
currentId = message.parentMessageId === Constants.NO_PARENT ? null : message.parentMessageId;
}