From 48c3d31db3f6b491e0e868790b92a09cd6acaaa6 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Sun, 19 Apr 2026 21:49:41 -0700 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=8A=20fix:=20Preserve=20Log=20Metadata?= =?UTF-8?q?=20on=20Console=20for=20Warn/Error=20Levels=20(#12737)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ๐Ÿ”Š fix: Preserve Log Metadata on Console for Warn/Error Levels The default console formatter discarded every structured metadata key on the winston info object โ€” only `CONSOLE_JSON=true` preserved it. That meant failures emitted by the agents SDK (e.g. "Summarization LLM call failed") reached stdout without the provider, model, or underlying error attached, leaving users unable to diagnose the root cause. - Add `formatConsoleMeta` helper to serialize non-reserved metadata as a compact JSON trailer, with per-value string truncation and safe handling of circular references. - Append the metadata trailer to warn/error console lines; info/debug behavior is unchanged. - Relax `debugTraverse`'s debug-only gate so warn/error messages routed through the debug formatter also surface their metadata. - Add a `formatConsoleMeta` stub to the shared logger mock so existing tests keep working. * ๐Ÿ” fix: Also Redact Sensitive Patterns on Warn Console Lines The warn-level console output now includes a metadata trailer that may contain provider-returned error strings with embedded tokens or keys (e.g. `Bearer ...`, `sk-...`). Apply `redactMessage` to warn lines in addition to error, matching the new surface area. * ๐Ÿ” fix: Redact Sensitive Tokens Embedded in JSON Metadata Two gaps in the existing console redaction that became user-visible once warn/error lines started emitting structured metadata: 1. The OpenAI-key regex (`/^(sk-)[^\s]+/`) was anchored to start-of-line, so keys embedded inside JSON payloads (e.g. `{"apiKey":"sk-..."}`) were never redacted. Every console line begins with a timestamp, so the anchor effectively made this pattern dead code. 2. `formatConsoleMeta` stringified metadata values verbatim; a sensitive string value was only redacted by the whole-line regex pass, which missed the anchored `sk-` case above. Fix: - Drop the `^` anchor; add `/g` so every occurrence is redacted, not just the first. - Also exclude `"` and `'` from the token body so JSON-embedded values terminate at the closing quote rather than chewing into the next field. - Simplify `redactMessage` to apply patterns directly (dropping the `getMatchingSensitivePatterns` filter) โ€” the filter used `.test()` which has stateful behavior on `/g` regexes and is no longer needed. - `formatConsoleMeta` now runs `redactMessage` over every string value before JSON serialization, so the metadata trailer is safe even on the warn path. - Add regression tests covering both fixes. Reviewed-by: Codex (P1 finding on PR #12737, commit 68c31b6). * ๐Ÿ” fix: Redact Metadata in debugTraverse for Warn and Error Relaxing the debug-only gate in debugTraverse (in commit 59371be0) routed warn/error records through the traversal path, which emits leaf string values verbatim (via truncateLongStrings only). Because DEBUG_LOGGING defaults to true, those records are also written to the rotating debug log file โ€” which means payloads like `{ auth: 'Bearer ...' }` or `{ openaiKey: 'sk-...' }` were persisted unredacted once my earlier change took effect. Apply redactMessage to the final formatted string when the level is warn or error. Debug-level behavior is unchanged (matching prior art). Includes regression tests covering error/warn redaction and debug-level preservation. Reviewed-by: Codex (P1 finding on PR #12737, commit e288f7fd). * ๐Ÿ” fix: Anchor Secret Regexes at Word Boundaries to Prevent Over-Redaction Removing the `^` anchor in commit e288f7fd let the OpenAI-key regex match anywhere in the line โ€” including inside ordinary words like `task-runner` or `mask-value`, where `sk-` appears mid-word. Non-secret text was being rewritten to `task-[REDACTED]`, hiding real log content from operators. - Anchor every sensitive-key pattern with `\b` so matches only fire at word boundaries. - Constrain the OpenAI-key body to the documented charset (`[a-zA-Z0-9_-]+`) instead of the broader "not whitespace or quote" character class. - Add `&` to the `key=` exclusion so a query-string value stops at the next parameter separator. - Regression tests covering both the over-redaction cases (`task-runner`, `monkey=10`) and the intended redactions still firing. Reviewed-by: Codex (P2 finding on PR #12737, commit c09d293d). * ๐Ÿ” fix: Redact Before Colorize To Survive ANSI Word-Boundary Interference The console pipeline runs `redactFormat โ†’ colorize({ all: true }) โ†’ printf`. With `all: true`, winston wraps `info.message` in ANSI escapes whose trailing `m` is a word character. That means `\b(Bearer )โ€ฆ` placed at the start of a colorized segment can fall on a (word,word) boundary and miss โ€” the earlier line-wise `redactMessage(line)` pass in printf suffers the same issue because it runs after colorize. Extend `redactFormat` to run for `warn` in addition to `error`, operating on the raw pre-colorize `info.message` + `Symbol.for('message')` strings. The later in-printf `redactMessage(line)` stays as a backstop, but the primary redaction now happens where the regex can actually see the text. Metadata redaction already operates on the raw info object via `formatConsoleMeta`, so it was never affected by ANSI โ€” no change there. Includes regression tests for the new warn-level behavior and for the info/debug no-op path. Reviewed-by: Codex (P2 finding on PR #12737, commit fdb6b361). * ๐Ÿงน fix: Prefer Structured Metadata Over Consumed Splat Args in Traversal `debugTraverse` previously read `metadata[Symbol.for('splat')][0]` first and only fell back to the structured metadata object. When a caller uses printf interpolation alongside a metadata object โ€” for example `logger.warn('failed for %s', tenant, { provider })` โ€” winston leaves the *consumed* positional arg (`tenant`) in `SPLAT[0]` after interpolation. The formatter would then append the tenant a second time and skip the real metadata, regressing debug-file and `DEBUG_CONSOLE` output quality now that warn/error share this path. Prefer the structured metadata object (via `extractMetaObject`) and only fall back to `SPLAT[0]` when there's nothing else, so the surviving log line surfaces the actual key/value pairs regardless of call shape. Reviewed-by: Codex (P2 finding on PR #12737, commit 1e43d636). * ๐Ÿงน fix: Skip Consumed Splat Primitives in Warn/Error Debug Traversal When no structured metadata is attached, winston still leaves consumed `%s` / `%d` arguments in `Symbol.for('splat')`. Previous fix preferred the structured object but still fell back to whatever sat at `SPLAT[0]` โ€” so `logger.warn('failed for %s', tenantId)` emitted `failed for tenant-7 tenant-7` in the traversal path (debug file and `DEBUG_CONSOLE`), now regressed outside of `debug` level because warn/error share the path. Only accept the splat fallback when the value is a plain object or an array (structural data worth surfacing). Primitives there are almost certainly consumed printf args and get skipped. Regression tests cover the single-%s case and the array-as-metadata case (which still surfaces through splat). Reviewed-by: Codex (P2 finding on PR #12737, commit bccbf117). * ๐Ÿงน fix: Skip Numeric Splat Keys When Extracting Log Metadata When a caller passes a primitive as the second argument โ€” e.g. `logger.warn('Unhandled step creation type:', step.type)` โ€” winston / `format.splat()` can leave character-index keys (`"0"`, `"1"`, โ€ฆ) on the `info` object. With the warn/error metadata trailer in play, those synthetic artifacts were being surfaced as bogus metadata, producing noisy console and debug-file output. Filter out numeric-string keys in `extractMetaObject` so only real metadata fields reach the trailer. Added a regression test. Reviewed-by: Codex (P2 finding on PR #12737, commit b34628de). * ๐Ÿงน fix: Preserve Unconsumed Primitive Splat Args in Debug Traversal The previous round dropped every primitive SPLAT[0] value to avoid duplicating consumed %s args, but that removed useful context from calls like \`logger.debug('prefix:', detail)\` where the primitive was never interpolated โ€” users lost the \`detail\` value. Refine the heuristic: skip a primitive splat value only when it already appears inside the (post-interpolation) \`info.message\`; otherwise surface it. Arrays and objects continue to surface unconditionally. Regression test covers the 'prefix:', detail case. Reviewed-by: Codex (P2 finding on PR #12737, commit 6bf9548f). * ๐Ÿงน fix: Traverse Filtered Metadata, Not Raw Metadata, In debugTraverse `debugTraverse` computes `extracted = extractMetaObject(metadata)` to strip reserved keys, underscore-prefixed internals, and numeric splat artifacts โ€” but the later \`klona(metadata)\` + \`traverse\` path still read the raw object, putting all the filtered junk back into the rendered multi-line output. Clone and traverse \`debugValue\` (the already-filtered object) instead. Regression test exercises the case where numeric splat artifacts sit alongside a real metadata field. Reviewed-by: Codex (P2 finding on PR #12737, commit c29c18e8). * ๐Ÿงน refactor: Split Warn/Error From Debug-Level Traversal in Parsers Retrofitting `debugTraverse`'s multi-line object walker to cover warn/error created a minefield of splat-interaction edge cases (numeric artifacts, consumed %s args, bogus `_`-prefix filtering, over-eager suppression of unconsumed primitives). Each fix kept introducing new corner cases. Split the two concerns instead: - Warn/error now emit a compact single-line JSON metadata trailer via `formatConsoleMeta`, then pass the full line through `redactMessage`. This mirrors what the console formatter already does, so behavior between the console and debug-file outputs stays consistent for warn/error โ€” and none of the splat/traversal edge cases apply. - Debug level keeps its original code path verbatim (including the raw `metadata` traversal and SPLAT\[0\] fallback). No regressions from my earlier iterations. - `extractMetaObject` no longer filters underscore-prefixed keys, so legitimate fields like MongoDB `_id` still appear. Reserved winston keys and numeric splat artifacts remain filtered. Updated tests reflect the simpler contract (underscore preservation, single-line trailer expectations already covered). Reviewed-by: Codex (two P2 findings on PR #12737, commit 9ea11529: `_id` regression and over-eager primitive suppression). * ๐Ÿงน fix: Preserve Scalar Metadata When One Value Is Circular `formatConsoleMeta` previously wrapped a single `JSON.stringify` in try/catch โ€” any circular reference inside any field (e.g. an attached request/response object) caused the entire trailer to be dropped. That defeats the goal of making failures diagnosable: one malformed field would mask the provider/model/status we wanted to surface. Use a `WeakSet`-based replacer that emits `[Circular]` for repeated object visits. On the whole-object serialization failing, fall back to per-field serialization so scalar keys always land and only the offending field is replaced with \`"[Unserializable]"\`. Reviewed-by: Codex (P2 finding on PR #12737, commit d63742a5). * ๐Ÿงน fix: Address Audit Findings (JSDoc, Case-Insensitive Api-Key, Tests) Audit review identified several MINOR/NIT items on top of the codex rounds. This commit closes the actionable ones: - **JSDoc (#1, #2)**: `extractMetaObject` no longer claims to filter underscore-prefixed keys (that filter was removed intentionally for MongoDB `_id`). `debugTraverse`'s docblock now describes the three code paths (warn/error compact trailer, debug multi-line traversal, other levels). - **Case-insensitive api-key regex (#6)**: `/gi` so the Azure style `Api-Key:` / `API-KEY:` also gets redacted. Pre-existing behavior was lowercase-only. - **Consolidated redundant branch (#5)**: `consoleFormat` printf was checking `isError || isWarn` twice; merged into one block. - **Pre-compiled regex (#9)**: `NUMERIC_KEY_RE` moved to module scope. - **Test coverage (#3, #4)**: Added regression tests for - per-field serialization fallback when a value's `toJSON` throws, - sensitive strings nested inside metadata objects, - the Azure-style `Api-Key:` header. --- api/config/__tests__/parsers.spec.js | 358 +++++++++++++++++++++++++++ api/config/parsers.js | 192 +++++++++++--- api/config/winston.js | 20 +- api/test/__mocks__/logger.js | 1 + 4 files changed, 532 insertions(+), 39 deletions(-) create mode 100644 api/config/__tests__/parsers.spec.js diff --git a/api/config/__tests__/parsers.spec.js b/api/config/__tests__/parsers.spec.js new file mode 100644 index 0000000000..f54675ce3a --- /dev/null +++ b/api/config/__tests__/parsers.spec.js @@ -0,0 +1,358 @@ +jest.unmock('winston'); + +const { formatConsoleMeta, redactMessage, redactFormat, debugTraverse } = + jest.requireActual('../parsers'); +const SPLAT_SYMBOL = Symbol.for('splat'); + +describe('formatConsoleMeta', () => { + it('returns empty string when there is no user metadata', () => { + expect( + formatConsoleMeta({ + level: 'error', + message: 'oops', + timestamp: '2026-04-18 02:25:22', + }), + ).toBe(''); + }); + + it('serializes user-supplied metadata keys', () => { + const meta = formatConsoleMeta({ + level: 'error', + message: '[agents:summarize] Summarization LLM call failed', + timestamp: '2026-04-18 02:25:22', + provider: 'azureOpenAI', + model: 'gpt-5.4-mini', + messagesToRefineCount: 42, + }); + + expect(meta).toContain('"provider":"azureOpenAI"'); + expect(meta).toContain('"model":"gpt-5.4-mini"'); + expect(meta).toContain('"messagesToRefineCount":42'); + }); + + it('ignores reserved winston keys but preserves legitimate fields like _id', () => { + const meta = formatConsoleMeta({ + level: 'error', + message: 'boom', + timestamp: 'ts', + splat: [1, 2], + _id: '507f191e810c19729de860ea', + userField: 'keep', + }); + + expect(meta).toContain('"_id":"507f191e810c19729de860ea"'); + expect(meta).toContain('"userField":"keep"'); + expect(meta).not.toContain('"splat"'); + }); + + it('drops numeric-index-like keys (splat artifacts from primitive args)', () => { + const meta = formatConsoleMeta({ + level: 'warn', + message: 'Unhandled step:', + timestamp: 'ts', + 0: 'f', + 1: 'o', + 2: 'o', + realField: 'real', + }); + + expect(meta).toBe('{"realField":"real"}'); + }); + + it('drops empty, null, undefined, function, and symbol values', () => { + const meta = formatConsoleMeta({ + level: 'warn', + message: 'noise', + timestamp: 'ts', + empty: '', + nullish: null, + undef: undefined, + fn: () => 1, + sym: Symbol('x'), + kept: 'yes', + }); + + expect(meta).toBe('{"kept":"yes"}'); + }); + + it('truncates very long string values to avoid console spam', () => { + const longString = 'x'.repeat(5000); + const meta = formatConsoleMeta({ + level: 'error', + message: 'long', + timestamp: 'ts', + errorStack: longString, + }); + + expect(meta.length).toBeLessThan(longString.length); + expect(meta).toContain('...'); + }); + + it('preserves non-circular fields when one value is circular', () => { + const circular = {}; + circular.self = circular; + const meta = formatConsoleMeta({ + level: 'error', + message: 'circular', + timestamp: 'ts', + provider: 'openai', + model: 'gpt-5.4-mini', + circular, + }); + + expect(meta).toContain('"provider":"openai"'); + expect(meta).toContain('"model":"gpt-5.4-mini"'); + expect(meta).toContain('[Circular]'); + }); + + it('falls back to per-field serialization when a value toJSON throws', () => { + const meta = formatConsoleMeta({ + level: 'error', + message: 'crash', + timestamp: 'ts', + provider: 'azure', + model: 'gpt-5.4-mini', + broken: { + toJSON() { + throw new Error('nope'); + }, + }, + }); + + expect(meta).toContain('"provider":"azure"'); + expect(meta).toContain('"model":"gpt-5.4-mini"'); + expect(meta).toContain('[Unserializable]'); + }); + + it('redacts sensitive strings nested inside metadata objects', () => { + const meta = formatConsoleMeta({ + level: 'error', + message: 'nested leak', + timestamp: 'ts', + config: { + headers: { + authorization: 'Bearer eyJhbGciOi.nestedTokenValue', + }, + query: 'https://example.com/?key=AIzaNested', + }, + openaiKey: 'sk-outerKey123', + }); + + expect(meta).not.toContain('eyJhbGciOi.nestedTokenValue'); + expect(meta).not.toContain('AIzaNested'); + expect(meta).not.toContain('sk-outerKey123'); + expect(meta).toContain('Bearer [REDACTED]'); + expect(meta).toContain('key=[REDACTED]'); + expect(meta).toContain('sk-[REDACTED]'); + }); + + it('redacts the Azure-style mixed-case Api-Key header', () => { + const meta = formatConsoleMeta({ + level: 'error', + message: 'azure call', + timestamp: 'ts', + headers: 'Api-Key: 0123456789abcdef', + }); + + expect(meta).not.toContain('0123456789abcdef'); + expect(meta).toContain('Api-Key: [REDACTED]'); + }); + + it('redacts sensitive patterns inside string metadata values', () => { + const meta = formatConsoleMeta({ + level: 'error', + message: 'leak test', + timestamp: 'ts', + openaiKey: 'sk-abc123def456', + auth: 'Bearer eyJhbGciOi...tokenvalue', + google: 'https://example.com/?key=AIzaSyXX', + }); + + expect(meta).not.toContain('sk-abc123def456'); + expect(meta).not.toContain('eyJhbGciOi...tokenvalue'); + expect(meta).not.toContain('AIzaSyXX'); + expect(meta).toContain('sk-[REDACTED]'); + expect(meta).toContain('Bearer [REDACTED]'); + expect(meta).toContain('key=[REDACTED]'); + }); + + it('redacts multiple occurrences of the same pattern in one value', () => { + const meta = formatConsoleMeta({ + level: 'error', + message: 'two keys', + timestamp: 'ts', + combined: 'first sk-aaa and then sk-bbb', + }); + + expect(meta).not.toContain('sk-aaa'); + expect(meta).not.toContain('sk-bbb'); + expect(meta.match(/sk-\[REDACTED\]/g)?.length).toBe(2); + }); +}); + +describe('redactMessage', () => { + it('redacts sk- keys that are not at line start (inside JSON-like text)', () => { + const input = '{"apiKey":"sk-abc123"}'; + expect(redactMessage(input)).toBe('{"apiKey":"sk-[REDACTED]"}'); + }); + + it('redacts all sk- occurrences in a single pass', () => { + const input = 'sk-one sk-two sk-three'; + expect(redactMessage(input)).toBe('sk-[REDACTED] sk-[REDACTED] sk-[REDACTED]'); + }); + + it('trims redacted output when trimLength is provided', () => { + const input = 'Bearer supersecretvalue'; + expect(redactMessage(input, 10)).toBe('Bearer [RE...'); + }); + + it('returns empty string for falsy input', () => { + expect(redactMessage('')).toBe(''); + expect(redactMessage(undefined)).toBe(''); + }); + + it('does not redact ordinary words that contain "sk-" inside them', () => { + expect(redactMessage('task-runner failed')).toBe('task-runner failed'); + expect(redactMessage('mask-value computed')).toBe('mask-value computed'); + expect(redactMessage('desk-lamp is on')).toBe('desk-lamp is on'); + }); + + it('does not redact words that contain "key=" inside them', () => { + expect(redactMessage('monkey=10 bananas')).toBe('monkey=10 bananas'); + }); + + it('still redacts standalone sk- keys at word boundaries', () => { + expect(redactMessage('token: sk-abc123def')).toBe('token: sk-[REDACTED]'); + expect(redactMessage('"sk-abc123def"')).toBe('"sk-[REDACTED]"'); + }); +}); + +describe('redactFormat', () => { + const runFormat = (info) => redactFormat().transform(info) || info; + + it('redacts info.message for error level before any colorize step runs', () => { + const info = runFormat({ level: 'error', message: 'Bearer secretvalue' }); + expect(info.message).toBe('Bearer [REDACTED]'); + }); + + it('redacts info.message for warn level too (avoids ANSI boundary issues later)', () => { + const info = runFormat({ level: 'warn', message: 'apiKey=sk-abc123def' }); + expect(info.message).toContain('sk-[REDACTED]'); + }); + + it('leaves info.message untouched for info and debug levels', () => { + const infoInfo = runFormat({ level: 'info', message: 'Bearer looksSensitive' }); + expect(infoInfo.message).toBe('Bearer looksSensitive'); + + const infoDebug = runFormat({ level: 'debug', message: 'Bearer looksSensitive' }); + expect(infoDebug.message).toBe('Bearer looksSensitive'); + }); +}); + +describe('debugTraverse', () => { + const runFormatter = (info) => { + const transformed = debugTraverse.transform(info); + const MESSAGE = Symbol.for('message'); + if (transformed && typeof transformed === 'object') { + return transformed[MESSAGE] ?? String(transformed); + } + return String(transformed); + }; + + const buildInfo = (level, meta) => { + const info = { + level, + message: 'test', + timestamp: 'ts', + ...meta, + }; + info[SPLAT_SYMBOL] = [meta]; + return info; + }; + + it('redacts sensitive strings in metadata for error level', () => { + const out = runFormatter(buildInfo('error', { auth: 'Bearer eyJabc123', openai: 'sk-abc123' })); + expect(out).not.toContain('eyJabc123'); + expect(out).not.toContain('sk-abc123'); + expect(out).toContain('Bearer [REDACTED]'); + expect(out).toContain('sk-[REDACTED]'); + }); + + it('redacts sensitive strings in metadata for warn level', () => { + const out = runFormatter(buildInfo('warn', { header: 'Bearer supersecrettoken' })); + expect(out).not.toContain('supersecrettoken'); + expect(out).toContain('Bearer [REDACTED]'); + }); + + it('preserves debug-level metadata unmodified (existing behavior)', () => { + const out = runFormatter(buildInfo('debug', { someField: 'not-sensitive' })); + expect(out).toContain('not-sensitive'); + }); + + it('prefers structured metadata over a consumed printf arg in SPLAT[0]', () => { + const info = { + level: 'warn', + message: 'failed for tenant-7', + timestamp: 'ts', + provider: 'openai', + [SPLAT_SYMBOL]: ['tenant-7', { provider: 'openai' }], + }; + const out = runFormatter(info); + expect(out).toContain('openai'); + const tenantMatches = out.match(/tenant-7/g) ?? []; + expect(tenantMatches.length).toBeLessThanOrEqual(1); + }); + + it('does not duplicate a consumed %s arg when there is no structured metadata', () => { + const info = { + level: 'warn', + message: 'failed for tenant-7', + timestamp: 'ts', + [SPLAT_SYMBOL]: ['tenant-7'], + }; + const out = runFormatter(info); + const tenantMatches = out.match(/tenant-7/g) ?? []; + expect(tenantMatches.length).toBe(1); + }); + + it('omits numeric splat-artifact keys from the traversed output', () => { + const info = { + level: 'error', + message: 'boom', + timestamp: 'ts', + 0: 'x', + 1: 'y', + realField: 'keep', + [SPLAT_SYMBOL]: [{ realField: 'keep' }], + }; + const out = runFormatter(info); + expect(out).toContain('realField'); + expect(out).toContain('keep'); + expect(out).not.toMatch(/^\s*0:/m); + expect(out).not.toMatch(/^\s*1:/m); + }); + + it('surfaces unconsumed primitive SPLAT[0] (no %s in message) for debug level', () => { + const info = { + level: 'debug', + message: 'prefix:', + timestamp: 'ts', + [SPLAT_SYMBOL]: ['detailValueXYZ'], + }; + const out = runFormatter(info); + expect(out).toContain('detailValueXYZ'); + }); + + it('still surfaces array metadata in SPLAT[0] when no object is extracted', () => { + const info = { + level: 'debug', + message: 'list', + timestamp: 'ts', + [SPLAT_SYMBOL]: [['alpha', 'beta', 'gamma']], + }; + const out = runFormatter(info); + expect(out).toContain('alpha'); + expect(out).toContain('beta'); + expect(out).toContain('gamma'); + }); +}); diff --git a/api/config/parsers.js b/api/config/parsers.js index 46394a9ff8..111f3f6a7d 100644 --- a/api/config/parsers.js +++ b/api/config/parsers.js @@ -8,25 +8,16 @@ const CONSOLE_JSON_STRING_LENGTH = parseInt(process.env.CONSOLE_JSON_STRING_LENG const DEBUG_MESSAGE_LENGTH = parseInt(process.env.DEBUG_MESSAGE_LENGTH) || 150; const sensitiveKeys = [ - /^(sk-)[^\s]+/, // OpenAI API key pattern - /(Bearer )[^\s]+/, // Header: Bearer token pattern - /(api-key:? )[^\s]+/, // Header: API key pattern - /(key=)[^\s]+/, // URL query param: sensitive key pattern (Google) + // OpenAI API key: `sk-` at a word boundary, followed by the documented + // charset for keys. `\b` keeps `task-runner`, `mask-value`, etc. from + // being mis-redacted. + /\b(sk-)[a-zA-Z0-9_-]+/g, + /\b(Bearer )[^\s"']+/g, // Header: Bearer token pattern + /\b(api-key:? )[^\s"']+/gi, // Header: API key pattern (case-insensitive; covers `Api-Key:`, `API-KEY:`) + /\b(key=)[^\s"'&]+/g, // URL query param: sensitive key pattern (Google) ]; -/** - * Determines if a given value string is sensitive and returns matching regex patterns. - * - * @param {string} valueStr - The value string to check. - * @returns {Array} An array of regex patterns that match the value string. - */ -function getMatchingSensitivePatterns(valueStr) { - if (valueStr) { - // Filter and return all regex patterns that match the value string - return sensitiveKeys.filter((regex) => regex.test(valueStr)); - } - return []; -} +const NUMERIC_KEY_RE = /^\d+$/; /** * Redacts sensitive information from a console message and trims it to a specified length if provided. @@ -39,28 +30,35 @@ function redactMessage(str, trimLength) { return ''; } - const patterns = getMatchingSensitivePatterns(str); - patterns.forEach((pattern) => { - str = str.replace(pattern, '$1[REDACTED]'); - }); - - if (trimLength !== undefined && str.length > trimLength) { - return `${str.substring(0, trimLength)}...`; + let redacted = str; + for (const pattern of sensitiveKeys) { + redacted = redacted.replace(pattern, '$1[REDACTED]'); } - return str; + if (trimLength !== undefined && redacted.length > trimLength) { + return `${redacted.substring(0, trimLength)}...`; + } + + return redacted; } /** - * Redacts sensitive information from log messages if the log level is 'error'. + * Redacts sensitive information from log messages when the log level is + * `error` or `warn`. Runs on the raw `info.message` before any colorize / + * splat transforms so the sensitive-token regexes don't have to contend + * with ANSI escape sequences (whose trailing `m` would otherwise defeat + * `\b` anchors). + * * Note: Intentionally mutates the object. * @param {Object} info - The log information object. * @returns {Object} - The modified log information object. */ const redactFormat = winston.format((info) => { - if (info.level === 'error') { - info.message = redactMessage(info.message); - if (info[MESSAGE_SYMBOL]) { + if (info.level === 'error' || info.level === 'warn') { + if (typeof info.message === 'string') { + info.message = redactMessage(info.message); + } + if (typeof info[MESSAGE_SYMBOL] === 'string') { info[MESSAGE_SYMBOL] = redactMessage(info[MESSAGE_SYMBOL]); } } @@ -96,12 +94,120 @@ const condenseArray = (item) => { return item; }; +const RESERVED_LOG_KEYS = new Set(['level', 'message', 'timestamp', 'splat']); + /** - * Formats log messages for debugging purposes. - * - Truncates long strings within log messages. - * - Condenses arrays by truncating long strings and objects as strings within array items. - * - Redacts sensitive information from log messages if the log level is 'error'. - * - Converts log information object to a formatted string. + * Extracts user-supplied metadata from a winston info object. Filters out: + * - Reserved winston keys (`level`, `message`, `timestamp`, `splat`). + * - Numeric-string keys (`"0"`, `"1"`, ...) that `format.splat()` can + * synthesize when a primitive is passed as an extra log argument. + * - Values that are undefined, null, empty strings, functions, or symbols. + * + * Underscore-prefixed keys are intentionally preserved so legitimate + * fields like MongoDB `_id` survive. + * + * @param {Record} source - The object to extract metadata from. + * @returns {Record | undefined} - The extracted metadata, or undefined if empty. + */ +function extractMetaObject(source) { + if (source == null || typeof source !== 'object') { + return undefined; + } + const meta = {}; + for (const key of Object.keys(source)) { + if (RESERVED_LOG_KEYS.has(key)) { + continue; + } + if (NUMERIC_KEY_RE.test(key)) { + continue; + } + const value = source[key]; + if (value === undefined || value === null || value === '') { + continue; + } + if (typeof value === 'function' || typeof value === 'symbol') { + continue; + } + meta[key] = value; + } + return Object.keys(meta).length > 0 ? meta : undefined; +} + +/** + * Formats the metadata portion of a winston info object as a compact + * single-line JSON trailer, suitable for appending to the console message. + * Returns an empty string when there is no meaningful metadata. + * + * @param {Record} info - The winston info object. + * @returns {string} - The serialized metadata, or an empty string. + */ +function formatConsoleMeta(info) { + const meta = extractMetaObject(info); + if (!meta) { + return ''; + } + const seen = new WeakSet(); + const replacer = (_key, value) => { + if (typeof value === 'string') { + const safe = redactMessage(value); + return safe.length > CONSOLE_JSON_STRING_LENGTH + ? `${safe.substring(0, CONSOLE_JSON_STRING_LENGTH)}...` + : safe; + } + if (value !== null && typeof value === 'object') { + if (seen.has(value)) { + return '[Circular]'; + } + seen.add(value); + } + return value; + }; + + try { + return JSON.stringify(meta, replacer); + } catch { + /* + * Fall back to per-field serialization: a single unserializable field + * shouldn't drop every other scalar in the trailer. Scalars are emitted + * as-is; values that still fail serialization are replaced with a + * placeholder so `provider`, `model`, etc. continue to surface. + */ + const parts = []; + for (const key of Object.keys(meta)) { + const perFieldSeen = new WeakSet(); + const perFieldReplacer = (k, value) => { + if (typeof value === 'string') { + return replacer(k, value); + } + if (value !== null && typeof value === 'object') { + if (perFieldSeen.has(value)) { + return '[Circular]'; + } + perFieldSeen.add(value); + } + return value; + }; + try { + parts.push(`${JSON.stringify(key)}:${JSON.stringify(meta[key], perFieldReplacer)}`); + } catch { + parts.push(`${JSON.stringify(key)}:"[Unserializable]"`); + } + } + return parts.length > 0 ? `{${parts.join(',')}}` : ''; + } +} + +/** + * Formats log messages for file and debug-console transports. Three paths: + * - `warn` / `error`: append a compact single-line JSON metadata trailer + * (via `formatConsoleMeta`) and pass the full line through `redactMessage` + * so sensitive patterns are scrubbed. + * - `debug`: perform the detailed multi-line object traversal of + * `SPLAT_SYMBOL[0]`, with long-string truncation and array condensation. + * Redaction on this path is not applied here (debug-file consumers + * historically accept raw detail). + * - Other levels: return the truncated `" : "` + * line with no metadata. * * @param {Object} options - The options for formatting log messages. * @param {string} options.level - The log level. @@ -120,6 +226,23 @@ const debugTraverse = winston.format.printf(({ level, message, timestamp, ...met } let msg = `${timestamp} ${level}: ${truncateLongStrings(message?.trim(), DEBUG_MESSAGE_LENGTH)}`; + const levelStr = typeof level === 'string' ? level : String(level); + const isErrorOrWarn = levelStr.includes('error') || levelStr.includes('warn'); + + /* + * Warn/error follow a simpler code path: append a single-line JSON + * metadata trailer (same shape as the console formatter) and pass the + * result through `redactMessage`. The complex object-traversal below is + * kept for debug level only, where detailed multi-line output is the + * intended behavior and its splat/interpolation interactions were + * already tolerated. + */ + if (isErrorOrWarn) { + const trailer = formatConsoleMeta(metadata); + const line = trailer ? `${msg} ${trailer}` : msg; + return redactMessage(line); + } + try { if (level !== 'debug') { return msg; @@ -229,4 +352,5 @@ module.exports = { redactMessage, debugTraverse, jsonTruncateFormat, + formatConsoleMeta, }; diff --git a/api/config/winston.js b/api/config/winston.js index 93b84f7c46..c077cf5bf8 100644 --- a/api/config/winston.js +++ b/api/config/winston.js @@ -2,7 +2,13 @@ const path = require('path'); const fs = require('fs'); const winston = require('winston'); require('winston-daily-rotate-file'); -const { redactFormat, redactMessage, debugTraverse, jsonTruncateFormat } = require('./parsers'); +const { + redactFormat, + redactMessage, + debugTraverse, + jsonTruncateFormat, + formatConsoleMeta, +} = require('./parsers'); /** * Determine the log directory. @@ -110,12 +116,16 @@ const consoleFormat = winston.format.combine( winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), // redactErrors(), winston.format.printf((info) => { - const message = `${info.timestamp} ${info.level}: ${info.message}`; - if (info.level.includes('error')) { - return redactMessage(message); + const base = `${info.timestamp} ${info.level}: ${info.message}`; + const isErrorOrWarn = info.level.includes('error') || info.level.includes('warn'); + + if (isErrorOrWarn) { + const metaTrailer = formatConsoleMeta(info); + const line = metaTrailer ? `${base} ${metaTrailer}` : base; + return redactMessage(line); } - return message; + return base; }), ); diff --git a/api/test/__mocks__/logger.js b/api/test/__mocks__/logger.js index 56fb28cbab..62f9bee93a 100644 --- a/api/test/__mocks__/logger.js +++ b/api/test/__mocks__/logger.js @@ -56,5 +56,6 @@ jest.mock('~/config/parsers', () => { redactMessage: jest.fn(), redactFormat: jest.fn(), debugTraverse: jest.fn(), + formatConsoleMeta: jest.fn(() => ''), }; });