From c00fb2d73d334bb48424193465e8a92e0776e3c8 Mon Sep 17 00:00:00 2001 From: matt burnett Date: Tue, 30 Jun 2026 17:35:51 -0700 Subject: [PATCH] fix: `stripHeavyErrorFields` Winston format (defense-in-depth) (#14018) --- packages/data-schemas/src/config/parsers.ts | 167 ++++++++++++- .../data-schemas/src/config/winston.spec.ts | 223 ++++++++++++++++++ packages/data-schemas/src/config/winston.ts | 9 +- 3 files changed, 397 insertions(+), 2 deletions(-) create mode 100644 packages/data-schemas/src/config/winston.spec.ts diff --git a/packages/data-schemas/src/config/parsers.ts b/packages/data-schemas/src/config/parsers.ts index ad12f2cfe5..2b2db4a692 100644 --- a/packages/data-schemas/src/config/parsers.ts +++ b/packages/data-schemas/src/config/parsers.ts @@ -21,6 +21,23 @@ const MAX_REDACTION_STRING_LENGTH = Math.max( ); const MAX_REDACTION_BUFFER_BYTES = MAX_REDACTION_STRING_LENGTH; +const HEAVY_ERROR_KEYS = new Set([ + 'httpsAgent', + 'httpAgent', + 'agent', + 'socket', + 'sockets', + '_httpMessage', + '_httpAgent', + 'parser', + '_tlsOptions', + '_handle', + 'ssl', +]); +const AXIOS_ONLY_HEAVY_KEYS = new Set(['config', 'request']); +const MAX_STRIP_DEPTH = 6; +const PRESERVED_ERROR_PROPS = ['message', 'stack', 'name', 'code'] as const; + const sensitiveKeys: RegExp[] = [ /\b(sk-)[a-zA-Z0-9_-]+/g, // OpenAI API key pattern /\b(Bearer )[^\s"']+/g, // Header: Bearer token pattern @@ -505,6 +522,154 @@ const debugTraverse: winston.Logform.Format = winston.format.printf( }, ); +const isErrorLike = (value: object): boolean => { + if (value instanceof Error) { + return true; + } + const record = value as Record; + if (record.isAxiosError === true) { + return true; + } + if (typeof record.stack === 'string') { + return true; + } + return typeof record.name === 'string' && record.name.endsWith('Error'); +}; + +const compactRequestInfo = (config: unknown): { method?: unknown; url?: unknown } | undefined => { + if (config == null || typeof config !== 'object') { + return undefined; + } + const { method, url } = config as Record; + if (method === undefined && url === undefined) { + return undefined; + } + return { method, url }; +}; + +const compactResponse = (response: unknown): Record | undefined => { + if (response == null || typeof response !== 'object') { + return undefined; + } + const { status, statusText, headers, data } = response as Record; + return { status, statusText, headers, data }; +}; + +const sanitizeErrorNode = (node: Record): Record => { + const sanitized: Record = {}; + const nodeIsAxios = node.isAxiosError === true; + + for (const key of Object.keys(node)) { + if (HEAVY_ERROR_KEYS.has(key) || (nodeIsAxios && AXIOS_ONLY_HEAVY_KEYS.has(key))) { + continue; + } + if (nodeIsAxios && key === 'response') { + const response = compactResponse(node.response); + if (response !== undefined) { + sanitized.response = response; + } + continue; + } + sanitized[key] = node[key]; + } + + if (nodeIsAxios) { + const requestInfo = compactRequestInfo(node.config); + if (requestInfo !== undefined) { + sanitized.requestInfo = requestInfo; + } + } + + for (const key of PRESERVED_ERROR_PROPS) { + const value = (node as Record)[key]; + if (value !== undefined) { + sanitized[key] = value; + } + } + + return sanitized; +}; + +const stripHeavy = (value: unknown, depth: number, seen: WeakSet): unknown => { + if (value == null || typeof value !== 'object') { + return value; + } + if (depth > MAX_STRIP_DEPTH) { + return value; + } + if (seen.has(value)) { + return '[Circular]'; + } + // Ancestor-path tracking (added on entry, removed on exit) so genuinely cyclic + // references are caught without collapsing benign objects shared between siblings. + seen.add(value); + + let result: unknown; + if (Array.isArray(value)) { + result = value.map((item) => stripHeavy(item, depth + 1, seen)); + } else if (isErrorLike(value)) { + const working = sanitizeErrorNode(value as Record); + for (const key of Object.keys(working)) { + working[key] = stripHeavy(working[key], depth + 1, seen); + } + result = working; + } else if (Object.isFrozen(value)) { + result = value; + } else { + const working = { ...(value as Record) }; + for (const key of Object.keys(working)) { + working[key] = stripHeavy(working[key], depth + 1, seen); + } + result = working; + } + + seen.delete(value); + return result; +}; + +/** + * Strips heavy, non-serializable fields (e.g. AxiosError `config`/`httpsAgent`, + * sockets, TLS internals) from error-like log nodes before serialization, while + * preserving a compact `requestInfo`, a compact `response`, and the error's + * message/stack/name/code. Operates on copies and never mutates caller-owned objects. + */ +const stripHeavyErrorFields: winston.Logform.FormatWrap = winston.format( + (info: winston.Logform.TransformableInfo) => { + if (info.level !== 'error' && info.level !== 'warn') { + return info; + } + try { + const seen = new WeakSet(); + // Winston merges a logged error's enumerable props (config/httpsAgent/...) onto + // the top-level info object when the message has no format token, so the info + // node itself must be sanitized as an error-like node, not just its values. + const base = isErrorLike(info) + ? sanitizeErrorNode(info as unknown as Record) + : { ...(info as Record) }; + const result = base as Record; + + for (const key of Object.keys(result)) { + result[key] = stripHeavy(result[key], 0, seen); + } + + // sanitizeErrorNode rebuilds from enumerable string keys only; re-attach the + // reserved winston symbols (LEVEL/MESSAGE) that downstream transports read. + for (const sym of Object.getOwnPropertySymbols(info)) { + result[sym] = (info as Record)[sym]; + } + + const splat = (info as Record)[SPLAT_SYMBOL]; + if (Array.isArray(splat)) { + result[SPLAT_SYMBOL] = splat.map((item) => stripHeavy(item, 0, seen)); + } + + return result as winston.Logform.TransformableInfo; + } catch { + return info; + } + }, +); + /** * Truncates long string values in JSON log objects. * Prevents outputting extremely long values (e.g., base64, blobs). @@ -548,4 +713,4 @@ const jsonTruncateFormat: winston.Logform.FormatWrap = winston.format( }, ); -export { redactFormat, redactMessage, debugTraverse, jsonTruncateFormat }; +export { redactFormat, redactMessage, debugTraverse, jsonTruncateFormat, stripHeavyErrorFields }; diff --git a/packages/data-schemas/src/config/winston.spec.ts b/packages/data-schemas/src/config/winston.spec.ts new file mode 100644 index 0000000000..e0d4e0f5f4 --- /dev/null +++ b/packages/data-schemas/src/config/winston.spec.ts @@ -0,0 +1,223 @@ +import winston from 'winston'; +import { stripHeavyErrorFields } from './parsers'; + +const SPLAT = Symbol.for('splat'); + +const buildAxiosErrorWithHeavyAgent = (): Error => { + const sockets: Record = {}; + for (let i = 0; i < 500; i++) { + sockets[`socket-${i}`] = { + _tlsOptions: { session: 'A'.repeat(1024) }, + bytesRead: i, + }; + } + const httpsAgent = { + sockets, + freeSockets: sockets, + _tlsOptions: { session: 'B'.repeat(2048) }, + maxSockets: Infinity, + }; + const config = { + method: 'post', + url: 'http://rag-api/query', + httpsAgent, + headers: { Authorization: 'Bearer secret' }, + }; + + const error = new Error('Request failed with status code 404') as Error & { + name: string; + code: string; + isAxiosError: boolean; + config: unknown; + request: unknown; + response: unknown; + httpsAgent: unknown; + }; + error.name = 'AxiosError'; + error.code = 'ERR_BAD_REQUEST'; + error.isAxiosError = true; + error.config = config; + error.request = { socket: { _handle: {} }, agent: httpsAgent }; + error.response = { + status: 404, + statusText: 'Not Found', + headers: { 'content-type': 'application/json' }, + data: { detail: 'not found' }, + config, + request: { agent: httpsAgent }, + }; + error.httpsAgent = httpsAgent; + return error; +}; + +describe('stripHeavyErrorFields', () => { + const runTransform = (info: Record) => { + const format = stripHeavyErrorFields(); + return format.transform( + info as unknown as import('winston').Logform.TransformableInfo, + format.options, + ); + }; + + it('drops config/httpsAgent from a logged AxiosError while keeping message/stack/status', () => { + const error = buildAxiosErrorWithHeavyAgent(); + const info = { + level: 'error', + message: 'Error encountered in `file_search` while querying file:', + [SPLAT]: [error], + } as Record; + + const out = runTransform(info) as Record; + const sanitizedError = (out[SPLAT] as unknown[])[0]; + const serialized = JSON.stringify(sanitizedError); + + expect(serialized).not.toContain('httpsAgent'); + expect(serialized).not.toContain('_tlsOptions'); + expect(serialized).not.toContain('sockets'); + expect(serialized).not.toContain('freeSockets'); + expect(serialized).not.toMatch(/"config"/); + + const parsed = JSON.parse(serialized); + expect(parsed.message).toBe('Request failed with status code 404'); + expect(typeof parsed.stack).toBe('string'); + expect(parsed.name).toBe('AxiosError'); + expect(parsed.code).toBe('ERR_BAD_REQUEST'); + expect(parsed.requestInfo).toEqual({ method: 'post', url: 'http://rag-api/query' }); + expect(parsed.response.status).toBe(404); + expect(parsed.response.statusText).toBe('Not Found'); + expect(parsed.response.data).toEqual({ detail: 'not found' }); + + // Everything except the (intentionally kept) stack trace is tiny: the heavy + // object graph is gone. + const { stack: _stack, ...withoutStack } = parsed; + expect(JSON.stringify(withoutStack).length).toBeLessThan(1024); + + // The sanitized error is a tiny fraction of the raw object graph + // (the raw AxiosError serializes its socket/TLS pool into hundreds of KB). + const rawSize = JSON.stringify(error).length; + expect(serialized.length).toBeLessThan(rawSize / 50); + }); + + it('does not mutate the caller-owned error object', () => { + const error = buildAxiosErrorWithHeavyAgent(); + const info = { + level: 'error', + message: 'boom', + [SPLAT]: [error], + } as Record; + + runTransform(info); + + // Original error is untouched (downstream code may still rethrow/read it). + expect((error as unknown as { config: unknown }).config).toBeDefined(); + expect((error as unknown as { httpsAgent: unknown }).httpsAgent).toBeDefined(); + }); + + it('leaves non-error levels untouched', () => { + const info = { level: 'info', message: 'hello' } as Record; + const out = runTransform(info); + expect(out).toBe(info); + }); + + it('strips heavy fields hoisted to top-level info by winston for `logger.error(msg, err)`', () => { + // A token-less message makes winston merge the error's enumerable props + // (config/httpsAgent/...) onto the top-level info object, not just into [splat]. + const captured: unknown[] = []; + const capture = winston.format((info) => { + captured.push(JSON.parse(JSON.stringify(info))); + return info; + }); + const logger = winston.createLogger({ + level: 'error', + format: winston.format.combine( + winston.format.errors({ stack: true }), + stripHeavyErrorFields(), + capture(), + winston.format.splat(), + ), + transports: [new winston.transports.Console({ silent: true })], + }); + + logger.error( + 'Error encountered in `file_search` while querying file:', + buildAxiosErrorWithHeavyAgent(), + ); + + const serialized = JSON.stringify(captured[0]); + expect(serialized).not.toContain('httpsAgent'); + expect(serialized).not.toContain('_tlsOptions'); + expect(serialized).not.toContain('sockets'); + expect(serialized).not.toMatch(/"config"/); + expect(serialized.length).toBeLessThan(4096); + + const record = captured[0] as Record; + expect(record.requestInfo).toEqual({ method: 'post', url: 'http://rag-api/query' }); + expect((record.response as Record).status).toBe(404); + expect(typeof record.message).toBe('string'); + }); + + it('preserves config/request/response on a non-Axios error while still stripping agent internals', () => { + const sockets: Record = { s0: { _tlsOptions: { session: 'X'.repeat(2048) } } }; + const httpsAgent = { + sockets, + freeSockets: sockets, + _tlsOptions: { session: 'Y'.repeat(2048) }, + }; + const error = new Error('ENOENT: no such file or directory') as Error & { + httpsAgent: unknown; + config: unknown; + request: unknown; + response: unknown; + }; + error.httpsAgent = httpsAgent; + error.config = { method: 'get', url: 'http://internal/resource' }; + error.request = { id: 'req-123' }; + error.response = { status: 503, statusText: 'Service Unavailable', traceId: 'trace-abc' }; + + const info = { + level: 'error', + message: 'non-axios failure', + [SPLAT]: [error], + } as Record; + + const out = runTransform(info) as Record; + const sanitizedError = (out[SPLAT] as unknown[])[0]; + const serialized = JSON.stringify(sanitizedError); + + expect(serialized).not.toContain('httpsAgent'); + expect(serialized).not.toContain('_tlsOptions'); + expect(serialized).not.toContain('freeSockets'); + + const parsed = JSON.parse(serialized); + expect(parsed.config).toEqual({ method: 'get', url: 'http://internal/resource' }); + expect(parsed.request).toEqual({ id: 'req-123' }); + expect(parsed.response).toEqual({ + status: 503, + statusText: 'Service Unavailable', + traceId: 'trace-abc', + }); + expect(parsed.requestInfo).toBeUndefined(); + expect(parsed.message).toBe('ENOENT: no such file or directory'); + }); + + it('still reduces a genuine AxiosError (config dropped, requestInfo derived, response compacted)', () => { + const error = buildAxiosErrorWithHeavyAgent(); + const info = { + level: 'error', + message: 'axios failure', + [SPLAT]: [error], + } as Record; + + const out = runTransform(info) as Record; + const sanitizedError = (out[SPLAT] as unknown[])[0]; + const serialized = JSON.stringify(sanitizedError); + + expect(serialized).not.toContain('httpsAgent'); + expect(serialized).not.toMatch(/"config"/); + + const parsed = JSON.parse(serialized); + expect(parsed.config).toBeUndefined(); + expect(parsed.requestInfo).toEqual({ method: 'post', url: 'http://rag-api/query' }); + expect(parsed.response.status).toBe(404); + }); +}); diff --git a/packages/data-schemas/src/config/winston.ts b/packages/data-schemas/src/config/winston.ts index 7b856b41fd..3b927f7a84 100644 --- a/packages/data-schemas/src/config/winston.ts +++ b/packages/data-schemas/src/config/winston.ts @@ -1,6 +1,12 @@ import winston from 'winston'; import 'winston-daily-rotate-file'; -import { redactFormat, redactMessage, debugTraverse, jsonTruncateFormat } from './parsers'; +import { + redactFormat, + redactMessage, + debugTraverse, + jsonTruncateFormat, + stripHeavyErrorFields, +} from './parsers'; import { getTenantId, getUserId, getRequestId, SYSTEM_TENANT_ID } from './tenantContext'; import { getLogDirectory } from './utils'; @@ -84,6 +90,7 @@ const fileFormat = winston.format.combine( redactFormat(), winston.format.timestamp({ format: () => new Date().toISOString() }), winston.format.errors({ stack: true }), + stripHeavyErrorFields(), winston.format.splat(), requestContextFormat(), );