diff --git a/api/server/index.spec.js b/api/server/index.spec.js index 5c4310cea7..8eb8956c3f 100644 --- a/api/server/index.spec.js +++ b/api/server/index.spec.js @@ -60,12 +60,17 @@ describe('Telemetry wiring', () => { }); it('mounts telemetry middleware before routes and telemetry errors before ErrorController', () => { - expect(source.indexOf('app.use(telemetryMiddleware);')).toBeLessThan( - source.indexOf("app.use('/api/auth'"), - ); - expect(source.indexOf('app.use(telemetryErrorMiddleware);')).toBeLessThan( - source.indexOf('app.use(ErrorController);'), - ); + const telemetryMiddlewareIndex = source.indexOf('app.use(telemetryMiddleware);'); + const apiRoutesIndex = source.indexOf("app.use('/api/auth'"); + const telemetryErrorMiddlewareIndex = source.indexOf('app.use(telemetryErrorMiddleware);'); + const errorControllerIndex = source.indexOf('app.use(ErrorController);'); + + expect(telemetryMiddlewareIndex).toBeGreaterThan(-1); + expect(apiRoutesIndex).toBeGreaterThan(-1); + expect(telemetryErrorMiddlewareIndex).toBeGreaterThan(-1); + expect(errorControllerIndex).toBeGreaterThan(-1); + expect(telemetryMiddlewareIndex).toBeLessThan(apiRoutesIndex); + expect(telemetryErrorMiddlewareIndex).toBeLessThan(errorControllerIndex); }); }); diff --git a/packages/api/src/telemetry/config.ts b/packages/api/src/telemetry/config.ts index af4302eea0..cf8a26882a 100644 --- a/packages/api/src/telemetry/config.ts +++ b/packages/api/src/telemetry/config.ts @@ -1,7 +1,7 @@ const DEFAULT_SERVICE_NAME = 'librechat'; const DEFAULT_HEALTH_PATH = '/health'; -export type TelemetryStatus = 'disabled' | 'failed' | 'started' | 'stopped'; +export type TelemetryStatus = 'disabled' | 'failed' | 'started' | 'starting' | 'stopped'; export interface TelemetryConfig { enabled: boolean; diff --git a/packages/api/src/telemetry/middleware.spec.ts b/packages/api/src/telemetry/middleware.spec.ts index 5613c9a506..080ce5b459 100644 --- a/packages/api/src/telemetry/middleware.spec.ts +++ b/packages/api/src/telemetry/middleware.spec.ts @@ -5,18 +5,30 @@ import type { Span } from '@opentelemetry/api'; import type { ServerRequest } from '~/types'; import { telemetryErrorMiddleware, telemetryMiddleware } from './middleware'; -type MockSpan = Pick; - interface MockResponse extends EventEmitter { statusCode: number; } -function createSpan(): jest.Mocked { - return { - setStatus: jest.fn(), - setAttributes: jest.fn(), - recordException: jest.fn(), - }; +function createSpan(): jest.Mocked { + const span = {} as jest.Mocked; + span.addEvent = jest.fn, Parameters>(() => span); + span.addLink = jest.fn, Parameters>(() => span); + span.addLinks = jest.fn, Parameters>(() => span); + span.end = jest.fn>(); + span.isRecording = jest.fn>(() => true); + span.recordException = jest.fn>(); + span.setAttribute = jest.fn, Parameters>(() => span); + span.setAttributes = jest.fn, Parameters>(() => span); + span.setStatus = jest.fn, Parameters>(() => span); + span.spanContext = jest.fn, Parameters>( + () => ({ + spanId: '0000000000000000', + traceFlags: 0, + traceId: '00000000000000000000000000000000', + }), + ); + span.updateName = jest.fn, Parameters>(() => span); + return span; } function createResponse(statusCode = 200): MockResponse { @@ -68,7 +80,7 @@ describe('telemetryMiddleware', () => { const req = createRequest(); const res = createResponse(201); const next: NextFunction = jest.fn(); - jest.spyOn(trace, 'getActiveSpan').mockReturnValue(span as Span); + jest.spyOn(trace, 'getActiveSpan').mockReturnValue(span); telemetryMiddleware(req, res as Response, next); res.emit('finish'); @@ -102,7 +114,7 @@ describe('telemetryMiddleware', () => { it('ignores health checks', () => { const span = createSpan(); const next = jest.fn(); - jest.spyOn(trace, 'getActiveSpan').mockReturnValue(span as Span); + jest.spyOn(trace, 'getActiveSpan').mockReturnValue(span); telemetryMiddleware( createRequest({ @@ -126,7 +138,7 @@ describe('telemetryMiddleware', () => { route: undefined, }); const res = createResponse(404); - jest.spyOn(trace, 'getActiveSpan').mockReturnValue(span as Span); + jest.spyOn(trace, 'getActiveSpan').mockReturnValue(span); telemetryMiddleware(req, res as Response, jest.fn()); res.emit('finish'); @@ -144,13 +156,25 @@ describe('telemetryMiddleware', () => { const span = createSpan(); const req = createRequest(); const res = createResponse(500); - jest.spyOn(trace, 'getActiveSpan').mockReturnValue(span as Span); + jest.spyOn(trace, 'getActiveSpan').mockReturnValue(span); telemetryMiddleware(req, res as Response, jest.fn()); res.emit('finish'); expect(span.setStatus).toHaveBeenCalledWith({ code: SpanStatusCode.ERROR }); }); + + it('records completion attributes only once when finish and close both fire', () => { + const span = createSpan(); + const res = createResponse(200); + jest.spyOn(trace, 'getActiveSpan').mockReturnValue(span); + + telemetryMiddleware(createRequest(), res as Response, jest.fn()); + res.emit('finish'); + res.emit('close'); + + expect(span.setAttributes).toHaveBeenCalledTimes(3); + }); }); describe('telemetryErrorMiddleware', () => { @@ -162,7 +186,7 @@ describe('telemetryErrorMiddleware', () => { const span = createSpan(); const error = new TypeError('boom'); const next = jest.fn(); - jest.spyOn(trace, 'getActiveSpan').mockReturnValue(span as Span); + jest.spyOn(trace, 'getActiveSpan').mockReturnValue(span); telemetryErrorMiddleware(error, createRequest(), createResponse() as Response, next); diff --git a/packages/api/src/telemetry/middleware.ts b/packages/api/src/telemetry/middleware.ts index f7df2cc683..fd30753dcd 100644 --- a/packages/api/src/telemetry/middleware.ts +++ b/packages/api/src/telemetry/middleware.ts @@ -96,8 +96,17 @@ export function telemetryMiddleware(req: ServerRequest, res: Response, next: Nex }); setIdentityAttributes(span, req); - res.once('finish', () => setCompletionAttributes(span, req, res, start)); - res.once('close', () => setCompletionAttributes(span, req, res, start)); + let completed = false; + const complete = () => { + if (completed) { + return; + } + completed = true; + setCompletionAttributes(span, req, res, start); + }; + + res.once('finish', complete); + res.once('close', complete); next(); } diff --git a/packages/api/src/telemetry/sdk.spec.ts b/packages/api/src/telemetry/sdk.spec.ts index 36292d7c91..335137f000 100644 --- a/packages/api/src/telemetry/sdk.spec.ts +++ b/packages/api/src/telemetry/sdk.spec.ts @@ -19,12 +19,6 @@ jest.mock('@opentelemetry/resources', () => ({ resourceFromAttributes: mockResourceFromAttributes, })); -interface BunGlobal { - Bun?: object; -} - -const runtime = globalThis as typeof globalThis & BunGlobal; - describe('telemetry SDK lifecycle', () => { let emitWarningSpy: jest.SpyInstance; let initializeTelemetry: (typeof import('./sdk'))['initializeTelemetry']; @@ -35,14 +29,14 @@ describe('telemetry SDK lifecycle', () => { jest.clearAllMocks(); ({ initializeTelemetry, resetTelemetryForTests, shutdownTelemetry } = await import('./sdk')); resetTelemetryForTests(); - delete runtime.Bun; + Reflect.deleteProperty(globalThis, 'Bun'); emitWarningSpy = jest.spyOn(process, 'emitWarning').mockImplementation(() => true); }); afterEach(() => { resetTelemetryForTests(); emitWarningSpy.mockRestore(); - delete runtime.Bun; + Reflect.deleteProperty(globalThis, 'Bun'); }); it('does not initialize when tracing is disabled by default', () => { @@ -65,7 +59,10 @@ describe('telemetry SDK lifecycle', () => { }); it('does not initialize under Bun runtime', () => { - runtime.Bun = {}; + Object.defineProperty(globalThis, 'Bun', { + configurable: true, + value: {}, + }); const controller = initializeTelemetry({ OTEL_TRACING_ENABLED: 'true' }); @@ -108,6 +105,29 @@ describe('telemetry SDK lifecycle', () => { ); }); + it('reflects lifecycle status from the controller getter', async () => { + const controller = initializeTelemetry({ OTEL_TRACING_ENABLED: 'true' }); + + expect(controller.status).toBe('started'); + await controller.shutdown(); + expect(controller.status).toBe('stopped'); + }); + + it('handles async SDK start failures without throwing', async () => { + mockStart.mockRejectedValueOnce(new Error('async start failed')); + + const controller = initializeTelemetry({ OTEL_TRACING_ENABLED: 'true' }); + + expect(controller.enabled).toBe(true); + expect(controller.status).toBe('starting'); + await controller.shutdown(); + expect(controller.status).toBe('failed'); + expect(emitWarningSpy).toHaveBeenCalledWith( + 'OpenTelemetry initialization failed: async start failed', + { code: 'LIBRECHAT_OTEL' }, + ); + }); + it('returns failed status without throwing when SDK start fails', () => { mockStart.mockImplementationOnce(() => { throw new Error('start failed'); @@ -130,4 +150,16 @@ describe('telemetry SDK lifecycle', () => { expect(mockShutdown).toHaveBeenCalledTimes(1); }); + + it('keeps the active SDK available when shutdown fails', async () => { + mockShutdown.mockRejectedValueOnce(new Error('shutdown failed')); + const controller = initializeTelemetry({ OTEL_TRACING_ENABLED: 'true' }); + + await expect(shutdownTelemetry()).rejects.toThrow('shutdown failed'); + expect(controller.status).toBe('started'); + + await shutdownTelemetry(); + expect(mockShutdown).toHaveBeenCalledTimes(2); + expect(controller.status).toBe('stopped'); + }); }); diff --git a/packages/api/src/telemetry/sdk.ts b/packages/api/src/telemetry/sdk.ts index e4844665b7..e78022ca18 100644 --- a/packages/api/src/telemetry/sdk.ts +++ b/packages/api/src/telemetry/sdk.ts @@ -8,13 +8,9 @@ import type { Attributes } from '@opentelemetry/api'; import type { TelemetryConfig, TelemetryStatus } from './config'; import { getTelemetryConfig } from './config'; -interface BunGlobal { - Bun?: object; -} - export interface TelemetryController { enabled: boolean; - status: TelemetryStatus; + readonly status: TelemetryStatus; shutdown: () => Promise; } @@ -24,12 +20,13 @@ interface RegisteredSignal { } let activeSdk: NodeSDK | undefined; +let pendingSdk: NodeSDK | undefined; +let startPromise: Promise | undefined; let status: TelemetryStatus = 'stopped'; let registeredSignals: RegisteredSignal[] = []; function isBunRuntime(): boolean { - const runtime = globalThis as typeof globalThis & BunGlobal; - return runtime.Bun != null; + return Reflect.get(globalThis, 'Bun') != null; } function shouldIgnoreIncomingRequest(request: IncomingMessage, healthPath: string): boolean { @@ -89,6 +86,10 @@ function createSdk(config: TelemetryConfig): NodeSDK { return new NodeSDK(sdkConfig); } +function startSdk(sdk: NodeSDK): void | Promise { + return (sdk as NodeSDK & { start: () => void | Promise }).start(); +} + function emitWarning(message: string): void { process.emitWarning(message, { code: 'LIBRECHAT_OTEL' }); } @@ -96,11 +97,20 @@ function emitWarning(message: string): void { function makeController(enabled: boolean): TelemetryController { return { enabled, - status, + get status() { + return status; + }, shutdown: shutdownTelemetry, }; } +function unregisterShutdownHandlers(): void { + for (const { signal, listener } of registeredSignals) { + process.removeListener(signal, listener); + } + registeredSignals = []; +} + function registerShutdownHandlers(): void { if (registeredSignals.length > 0) { return; @@ -122,7 +132,7 @@ function registerShutdownHandlers(): void { } export function initializeTelemetry(env: NodeJS.ProcessEnv = process.env): TelemetryController { - if (activeSdk) { + if (activeSdk || pendingSdk) { return makeController(true); } @@ -134,7 +144,36 @@ export function initializeTelemetry(env: NodeJS.ProcessEnv = process.env): Telem try { const sdk = createSdk(config); - sdk.start(); + const result = startSdk(sdk); + if (result) { + pendingSdk = sdk; + status = 'starting'; + const pendingStart = result + .then(() => { + if (pendingSdk === sdk) { + pendingSdk = undefined; + activeSdk = sdk; + status = 'started'; + registerShutdownHandlers(); + } + }) + .catch((error) => { + if (pendingSdk === sdk) { + pendingSdk = undefined; + status = 'failed'; + const message = error instanceof Error ? error.message : String(error); + emitWarning(`OpenTelemetry initialization failed: ${message}`); + } + }); + startPromise = pendingStart; + void pendingStart.finally(() => { + if (startPromise === pendingStart) { + startPromise = undefined; + } + }); + return makeController(true); + } + activeSdk = sdk; status = 'started'; registerShutdownHandlers(); @@ -148,22 +187,31 @@ export function initializeTelemetry(env: NodeJS.ProcessEnv = process.env): Telem } export async function shutdownTelemetry(): Promise { + if (startPromise) { + await startPromise; + } + if (!activeSdk) { status = status === 'started' ? 'stopped' : status; return; } const sdk = activeSdk; - activeSdk = undefined; - await sdk.shutdown(); - status = 'stopped'; + try { + await sdk.shutdown(); + activeSdk = undefined; + status = 'stopped'; + unregisterShutdownHandlers(); + } catch (error) { + status = 'started'; + throw error; + } } export function resetTelemetryForTests(): void { activeSdk = undefined; + pendingSdk = undefined; + startPromise = undefined; status = 'stopped'; - for (const { signal, listener } of registeredSignals) { - process.removeListener(signal, listener); - } - registeredSignals = []; + unregisterShutdownHandlers(); }