From 90ebecb25489b60e67d0f3a0cb12ae052a62f482 Mon Sep 17 00:00:00 2001 From: Gil Raphaelli Date: Sun, 7 Jun 2026 11:23:41 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=8A=20feat:=20Surface=20Message=20Feed?= =?UTF-8?q?back=20as=20Langfuse=20Scores=20(#13544)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: surface message feedback (thumbs up/down) as Langfuse scores When Langfuse tracing is enabled, the message feedback endpoint now posts a boolean `user-feedback` score (1/0 + tag/comment) to Langfuse for the assistant message's trace; clearing feedback deletes the score. Fire-and- forget, so the feedback UX never blocks on Langfuse. Linking is lookup-free: the run opts into deterministic Langfuse trace ids (`langfuse.deterministicTraceId`, passed to the agents Run), so the trace id is sha256(messageId)[:32]. The feedback route recomputes the same id and scores by it. - api/server/services/Langfuse.js: POST/DELETE /api/public/scores (env-gated) - api/server/utils/langfuseTrace.js: traceIdForMessage(messageId) - api/server/routes/messages.js: fire feedback score after the Mongo write - packages/api: pass langfuse.deterministicTraceId to the run - bump @librechat/agents to ^3.2.21 (adds LangfuseConfig.deterministicTraceId) Closes #13537 Co-Authored-By: Claude Opus 4.8 (1M context) * fix: match Langfuse trace environment for feedback scores @librechat/agents passes no environment to its Langfuse tracer, so @langfuse/otel falls back to LANGFUSE_TRACING_ENVIRONMENT and otherwise to Langfuse's "default". The score helper instead fell back to NODE_ENV, so a deployment with only NODE_ENV=production filed scores under "production" while the trace stayed on "default" — the score never landed on the trace. Use LANGFUSE_TRACING_ENVIRONMENT only, and omit `environment` when unset so Langfuse defaults both score and trace to "default". Addresses Codex review on #13544. Co-Authored-By: Claude Opus 4.8 (1M context) * fix: don't require LANGFUSE_BASE_URL to post feedback scores The agent tracer emits traces with just the public/secret keys (defaulting to Langfuse Cloud, or via the legacy LANGFUSE_BASEURL alias), but the score helper disabled itself unless LANGFUSE_BASE_URL was set — so an otherwise-traced deployment silently posted no scores. Resolve the base URL the same way the tracer does (LANGFUSE_BASE_URL -> LANGFUSE_BASEURL -> Cloud) and gate enablement on the credentials only. Addresses Codex review on #13544. Co-Authored-By: Claude Opus 4.8 (1M context) * fix: only post feedback scores for agent-endpoint messages The feedback route is shared by all message types, but deterministic Langfuse trace IDs are only enabled for agent runs. Rating a message from a non-agent endpoint (with Langfuse configured) posted a user-feedback score for sha256(messageId) that no trace will ever match, leaving orphan scores. Gate scoring on isAgentsEndpoint(message.endpoint); `updateMessage` now returns `endpoint` so the route can check it. Addresses Codex review on #13544. Co-Authored-By: Claude Opus 4.8 (1M context) * fix: gate feedback scoring by !isAssistantsEndpoint, not isAgentsEndpoint The previous gate used isAgentsEndpoint, which only matches the literal `agents` endpoint. But provider endpoints (anthropic, openai, custom, …) run through the agents runtime as ephemeral agents and DO emit deterministic AgentRun traces, so isAgentsEndpoint('anthropic') === false suppressed scoring for the common case. Only the OpenAI/Azure Assistants endpoints use a separate runtime with no agent trace, so gate on !isAssistantsEndpoint instead. Co-Authored-By: Claude Opus 4.8 (1M context) * style: sort message method imports * fix: honor Langfuse tracing gates for feedback scores * refactor: move Langfuse feedback logic to api package * fix: support Langfuse host for feedback scores * test: type Langfuse feedback fetch mock * chore: compact Langfuse feedback comment --------- Co-authored-by: Claude Opus 4.8 (1M context) Co-authored-by: Danny Avila --- api/server/routes/messages.js | 17 ++- packages/api/src/agents/run.ts | 5 + packages/api/src/index.ts | 2 + packages/api/src/langfuse/feedback.spec.ts | 125 +++++++++++++++++++ packages/api/src/langfuse/feedback.ts | 86 +++++++++++++ packages/api/src/langfuse/index.ts | 2 + packages/api/src/langfuse/trace.ts | 5 + packages/data-schemas/src/methods/message.ts | 1 + 8 files changed, 241 insertions(+), 2 deletions(-) create mode 100644 packages/api/src/langfuse/feedback.spec.ts create mode 100644 packages/api/src/langfuse/feedback.ts create mode 100644 packages/api/src/langfuse/index.ts create mode 100644 packages/api/src/langfuse/trace.ts diff --git a/api/server/routes/messages.js b/api/server/routes/messages.js index a07293c0e2..0133fe8541 100644 --- a/api/server/routes/messages.js +++ b/api/server/routes/messages.js @@ -1,8 +1,13 @@ const express = require('express'); const { v4: uuidv4 } = require('uuid'); const { logger } = require('@librechat/data-schemas'); -const { ContentTypes } = require('librechat-data-provider'); -const { unescapeLaTeX, countTokens } = require('@librechat/api'); +const { ContentTypes, isAssistantsEndpoint } = require('librechat-data-provider'); +const { + unescapeLaTeX, + countTokens, + sendFeedbackScore, + traceIdForMessage, +} = require('@librechat/api'); const { findAllArtifacts, replaceArtifactContent } = require('~/server/services/Artifacts/update'); const { requireJwtAuth, validateMessageReq } = require('~/server/middleware'); const db = require('~/models'); @@ -391,6 +396,14 @@ router.put('/:conversationId/:messageId/feedback', validateMessageReq, async (re { context: 'updateFeedback' }, ); + // Best-effort: Assistants messages do not have deterministic AgentRun traces. + if (!isAssistantsEndpoint(updatedMessage.endpoint)) { + sendFeedbackScore({ + traceId: traceIdForMessage(messageId), + feedback: updatedMessage.feedback, + }).catch((err) => logger.error('[langfuse] feedback score failed:', err)); + } + res.json({ messageId, conversationId, diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index 2549ec7984..07aa726c01 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -1040,6 +1040,11 @@ export async function createRun({ calibrationRatio, indexTokenCountMap, eagerEventToolExecution: { enabled: true }, + // Derive the Langfuse trace id deterministically from runId so message + // feedback can be scored against the trace without a lookup (see the + // feedback route in api/server/routes/messages.js). No-op unless Langfuse + // tracing is enabled. Requires @librechat/agents >= 3.2.21. + langfuse: { deterministicTraceId: true }, ...(enableToolOutputReferences && { toolOutputReferences: { enabled: true }, }), diff --git a/packages/api/src/index.ts b/packages/api/src/index.ts index 49e4ff50d3..d923a4e75d 100644 --- a/packages/api/src/index.ts +++ b/packages/api/src/index.ts @@ -57,6 +57,8 @@ export * from './storage'; export * from './tools'; /* web search */ export * from './web'; +/* Langfuse */ +export * from './langfuse'; /* Cache */ export * from './cache'; /* Shared Links */ diff --git a/packages/api/src/langfuse/feedback.spec.ts b/packages/api/src/langfuse/feedback.spec.ts new file mode 100644 index 0000000000..daf64cf09e --- /dev/null +++ b/packages/api/src/langfuse/feedback.spec.ts @@ -0,0 +1,125 @@ +jest.mock( + '@librechat/data-schemas', + () => ({ + logger: { + debug: jest.fn(), + }, + }), + { virtual: true }, +); + +const langfuseEnvKeys = [ + 'LANGFUSE_PUBLIC_KEY', + 'LANGFUSE_SECRET_KEY', + 'LANGFUSE_BASE_URL', + 'LANGFUSE_HOST', + 'LANGFUSE_BASEURL', + 'LANGFUSE_TRACING_ENABLED', + 'LANGFUSE_SAMPLE_RATE', + 'LANGFUSE_TRACING_ENVIRONMENT', +]; +let fetchMock: jest.SpiedFunction; + +function clearLangfuseEnv() { + for (const key of langfuseEnvKeys) { + delete process.env[key]; + } +} + +function setLangfuseCredentials() { + process.env.LANGFUSE_PUBLIC_KEY = 'public-key'; + process.env.LANGFUSE_SECRET_KEY = 'secret-key'; +} + +async function loadFeedback(): Promise { + jest.resetModules(); + return import('./feedback'); +} + +function getFetchMock(): jest.SpiedFunction { + return fetchMock; +} + +describe('Langfuse feedback scores', () => { + beforeEach(() => { + clearLangfuseEnv(); + setLangfuseCredentials(); + fetchMock = jest.spyOn(global, 'fetch').mockResolvedValue(new Response(null, { status: 200 })); + }); + + afterEach(() => { + clearLangfuseEnv(); + fetchMock.mockRestore(); + jest.clearAllMocks(); + }); + + it('posts feedback scores when Langfuse tracing is enabled by default', async () => { + const { sendFeedbackScore } = await loadFeedback(); + + await sendFeedbackScore({ + traceId: 'trace-id', + feedback: { rating: 'thumbsUp', tag: 'helpful', text: 'nice' }, + }); + + expect(getFetchMock()).toHaveBeenCalledWith( + 'https://cloud.langfuse.com/api/public/scores', + expect.objectContaining({ + method: 'POST', + headers: expect.objectContaining({ + Authorization: `Basic ${Buffer.from('public-key:secret-key').toString('base64')}`, + 'Content-Type': 'application/json', + }), + body: expect.any(String), + }), + ); + const [, init] = getFetchMock().mock.calls[0]; + expect(JSON.parse(init?.body as string)).toMatchObject({ + id: 'feedback-trace-id', + traceId: 'trace-id', + name: 'user-feedback', + value: 1, + dataType: 'BOOLEAN', + comment: 'helpful — nice', + metadata: { rating: 'thumbsUp', tag: 'helpful' }, + }); + }); + + it('posts feedback scores to the configured Langfuse host', async () => { + process.env.LANGFUSE_HOST = 'http://langfuse-server:3000'; + const { sendFeedbackScore } = await loadFeedback(); + + await sendFeedbackScore({ + traceId: 'trace-id', + feedback: { rating: 'thumbsUp' }, + }); + + expect(getFetchMock()).toHaveBeenCalledWith( + 'http://langfuse-server:3000/api/public/scores', + expect.objectContaining({ method: 'POST' }), + ); + }); + + it('skips scores when Langfuse tracing is disabled', async () => { + process.env.LANGFUSE_TRACING_ENABLED = 'false'; + const { sendFeedbackScore } = await loadFeedback(); + + await sendFeedbackScore({ + traceId: 'trace-id', + feedback: { rating: 'thumbsDown' }, + }); + + expect(getFetchMock()).not.toHaveBeenCalled(); + }); + + it('skips scores when Langfuse sampling is set to zero', async () => { + process.env.LANGFUSE_SAMPLE_RATE = '0'; + const { sendFeedbackScore } = await loadFeedback(); + + await sendFeedbackScore({ + traceId: 'trace-id', + feedback: { rating: 'thumbsUp' }, + }); + + expect(getFetchMock()).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/api/src/langfuse/feedback.ts b/packages/api/src/langfuse/feedback.ts new file mode 100644 index 0000000000..db6e2abdc0 --- /dev/null +++ b/packages/api/src/langfuse/feedback.ts @@ -0,0 +1,86 @@ +import { logger } from '@librechat/data-schemas'; + +export type LangfuseFeedback = { + rating?: 'thumbsUp' | 'thumbsDown'; + tag?: string; + text?: string; +}; + +export type SendFeedbackScoreParams = { + traceId: string; + feedback?: LangfuseFeedback | null; +}; + +const DEFAULT_BASE_URL = 'https://cloud.langfuse.com'; +const BASE = + process.env.LANGFUSE_BASE_URL ?? + process.env.LANGFUSE_HOST ?? + process.env.LANGFUSE_BASEURL ?? + DEFAULT_BASE_URL; + +function isFalseEnv(value?: string): boolean { + return value != null && ['0', 'false', 'no', 'off'].includes(value.trim().toLowerCase()); +} + +function isSampleRateEnabled(value?: string): boolean { + if (value == null || value.trim() === '') { + return true; + } + const parsed = Number(value); + return !Number.isFinite(parsed) || parsed !== 0; +} + +const ENABLED = + Boolean(process.env.LANGFUSE_PUBLIC_KEY && process.env.LANGFUSE_SECRET_KEY) && + !isFalseEnv(process.env.LANGFUSE_TRACING_ENABLED) && + isSampleRateEnabled(process.env.LANGFUSE_SAMPLE_RATE); +const AUTHORIZATION = ENABLED + ? 'Basic ' + + Buffer.from(`${process.env.LANGFUSE_PUBLIC_KEY}:${process.env.LANGFUSE_SECRET_KEY}`).toString( + 'base64', + ) + : undefined; +const ENVIRONMENT = process.env.LANGFUSE_TRACING_ENVIRONMENT; + +export async function sendFeedbackScore({ + traceId, + feedback, +}: SendFeedbackScoreParams): Promise { + if (!ENABLED || !AUTHORIZATION || !traceId) { + return; + } + + const scoreId = `feedback-${traceId}`; + + if (!feedback?.rating) { + const res = await fetch(`${BASE}/api/public/scores/${encodeURIComponent(scoreId)}`, { + method: 'DELETE', + headers: { Authorization: AUTHORIZATION }, + }); + if (!res.ok && res.status !== 404) { + throw new Error(`langfuse score delete ${res.status}: ${await res.text()}`); + } + return; + } + + const body = { + id: scoreId, + traceId, + name: 'user-feedback', + value: feedback.rating === 'thumbsUp' ? 1 : 0, + dataType: 'BOOLEAN', + comment: [feedback.tag, feedback.text].filter(Boolean).join(' — ') || undefined, + metadata: { rating: feedback.rating, tag: feedback.tag }, + ...(ENVIRONMENT ? { environment: ENVIRONMENT } : {}), + }; + + const res = await fetch(`${BASE}/api/public/scores`, { + method: 'POST', + headers: { Authorization: AUTHORIZATION, 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + }); + if (!res.ok) { + throw new Error(`langfuse score create ${res.status}: ${await res.text()}`); + } + logger.debug(`[langfuse] feedback score sent for trace ${traceId} (${feedback.rating})`); +} diff --git a/packages/api/src/langfuse/index.ts b/packages/api/src/langfuse/index.ts new file mode 100644 index 0000000000..b22dba4698 --- /dev/null +++ b/packages/api/src/langfuse/index.ts @@ -0,0 +1,2 @@ +export * from './feedback'; +export * from './trace'; diff --git a/packages/api/src/langfuse/trace.ts b/packages/api/src/langfuse/trace.ts new file mode 100644 index 0000000000..0bc80633c8 --- /dev/null +++ b/packages/api/src/langfuse/trace.ts @@ -0,0 +1,5 @@ +import { createHash } from 'crypto'; + +export function traceIdForMessage(messageId: string): string { + return createHash('sha256').update(messageId, 'utf8').digest('hex').slice(0, 32); +} diff --git a/packages/data-schemas/src/methods/message.ts b/packages/data-schemas/src/methods/message.ts index dc3ab6d5b2..9ee15c7978 100644 --- a/packages/data-schemas/src/methods/message.ts +++ b/packages/data-schemas/src/methods/message.ts @@ -285,6 +285,7 @@ export function createMessageMethods(mongoose: typeof import('mongoose')): Messa isCreatedByUser: updatedMessage.isCreatedByUser, tokenCount: updatedMessage.tokenCount, feedback: updatedMessage.feedback, + endpoint: updatedMessage.endpoint, }; } catch (err) { logger.error('Error updating message:', err);