📊 feat: Surface Message Feedback as Langfuse Scores (#13544)

* feat: surface message feedback (thumbs up/down) as Langfuse scores When Langfuse tracing is enabled, the message feedback endpoint now posts a boolean `user-feedback` score (1/0 + tag/comment) to Langfuse for the assistant message's trace; clearing feedback deletes the score. Fire-and- forget, so the feedback UX never blocks on Langfuse. Linking is lookup-free: the run opts into deterministic Langfuse trace ids (`langfuse.deterministicTraceId`, passed to the agents Run), so the trace id is sha256(messageId)[:32]. The feedback route recomputes the same id and scores by it. - api/server/services/Langfuse.js: POST/DELETE /api/public/scores (env-gated) - api/server/utils/langfuseTrace.js: traceIdForMessage(messageId) - api/server/routes/messages.js: fire feedback score after the Mongo write - packages/api: pass langfuse.deterministicTraceId to the run - bump @librechat/agents to ^3.2.21 (adds LangfuseConfig.deterministicTraceId) Closes #13537 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * fix: match Langfuse trace environment for feedback scores @librechat/agents passes no environment to its Langfuse tracer, so @langfuse/otel falls back to LANGFUSE_TRACING_ENVIRONMENT and otherwise to Langfuse's "default". The score helper instead fell back to NODE_ENV, so a deployment with only NODE_ENV=production filed scores under "production" while the trace stayed on "default" — the score never landed on the trace. Use LANGFUSE_TRACING_ENVIRONMENT only, and omit `environment` when unset so Langfuse defaults both score and trace to "default". Addresses Codex review on #13544. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * fix: don't require LANGFUSE_BASE_URL to post feedback scores The agent tracer emits traces with just the public/secret keys (defaulting to Langfuse Cloud, or via the legacy LANGFUSE_BASEURL alias), but the score helper disabled itself unless LANGFUSE_BASE_URL was set — so an otherwise-traced deployment silently posted no scores. Resolve the base URL the same way the tracer does (LANGFUSE_BASE_URL -> LANGFUSE_BASEURL -> Cloud) and gate enablement on the credentials only. Addresses Codex review on #13544. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * fix: only post feedback scores for agent-endpoint messages The feedback route is shared by all message types, but deterministic Langfuse trace IDs are only enabled for agent runs. Rating a message from a non-agent endpoint (with Langfuse configured) posted a user-feedback score for sha256(messageId) that no trace will ever match, leaving orphan scores. Gate scoring on isAgentsEndpoint(message.endpoint); `updateMessage` now returns `endpoint` so the route can check it. Addresses Codex review on #13544. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * fix: gate feedback scoring by !isAssistantsEndpoint, not isAgentsEndpoint The previous gate used isAgentsEndpoint, which only matches the literal `agents` endpoint. But provider endpoints (anthropic, openai, custom, …) run through the agents runtime as ephemeral agents and DO emit deterministic AgentRun traces, so isAgentsEndpoint('anthropic') === false suppressed scoring for the common case. Only the OpenAI/Azure Assistants endpoints use a separate runtime with no agent trace, so gate on !isAssistantsEndpoint instead. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * style: sort message method imports * fix: honor Langfuse tracing gates for feedback scores * refactor: move Langfuse feedback logic to api package * fix: support Langfuse host for feedback scores * test: type Langfuse feedback fetch mock * chore: compact Langfuse feedback comment --------- Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Co-authored-by: Danny Avila <danny@librechat.ai>
2026-06-09 17:31:19 +00:00 · 2026-06-07 11:23:41 -04:00 · 2026-06-07 11:23:41 -04:00 · 90ebecb254
commit 90ebecb254
parent cb1d536874
8 changed files with 241 additions and 2 deletions
--- a/api/server/routes/messages.js
+++ b/api/server/routes/messages.js
@ -1,8 +1,13 @@
 const express = require('express');
 const { v4: uuidv4 } = require('uuid');
 const { logger } = require('@librechat/data-schemas');
-const { ContentTypes } = require('librechat-data-provider');
-const { unescapeLaTeX, countTokens } = require('@librechat/api');
+const { ContentTypes, isAssistantsEndpoint } = require('librechat-data-provider');
+const {
+  unescapeLaTeX,
+  countTokens,
+  sendFeedbackScore,
+  traceIdForMessage,
+} = require('@librechat/api');
 const { findAllArtifacts, replaceArtifactContent } = require('~/server/services/Artifacts/update');
 const { requireJwtAuth, validateMessageReq } = require('~/server/middleware');
 const db = require('~/models');
@ -391,6 +396,14 @@ router.put('/:conversationId/:messageId/feedback', validateMessageReq, async (re
      { context: 'updateFeedback' },
    );

+    // Best-effort: Assistants messages do not have deterministic AgentRun traces.
+    if (!isAssistantsEndpoint(updatedMessage.endpoint)) {
+      sendFeedbackScore({
+        traceId: traceIdForMessage(messageId),
+        feedback: updatedMessage.feedback,
+      }).catch((err) => logger.error('[langfuse] feedback score failed:', err));
+    }
+
    res.json({
      messageId,
      conversationId,
--- a/packages/api/src/agents/run.ts
+++ b/packages/api/src/agents/run.ts
@ -1040,6 +1040,11 @@ export async function createRun({
    calibrationRatio,
    indexTokenCountMap,
    eagerEventToolExecution: { enabled: true },
+    // Derive the Langfuse trace id deterministically from runId so message
+    // feedback can be scored against the trace without a lookup (see the
+    // feedback route in api/server/routes/messages.js). No-op unless Langfuse
+    // tracing is enabled. Requires @librechat/agents >= 3.2.21.
+    langfuse: { deterministicTraceId: true },
    ...(enableToolOutputReferences && {
      toolOutputReferences: { enabled: true },
    }),
--- a/packages/api/src/index.ts
+++ b/packages/api/src/index.ts
@ -57,6 +57,8 @@ export * from './storage';
 export * from './tools';
 /* web search */
 export * from './web';
+/* Langfuse */
+export * from './langfuse';
 /* Cache */
 export * from './cache';
 /* Shared Links */
--- a/packages/api/src/langfuse/feedback.spec.ts
+++ b/packages/api/src/langfuse/feedback.spec.ts
@ -0,0 +1,125 @@
+jest.mock(
+  '@librechat/data-schemas',
+  () => ({
+    logger: {
+      debug: jest.fn(),
+    },
+  }),
+  { virtual: true },
+);
+
+const langfuseEnvKeys = [
+  'LANGFUSE_PUBLIC_KEY',
+  'LANGFUSE_SECRET_KEY',
+  'LANGFUSE_BASE_URL',
+  'LANGFUSE_HOST',
+  'LANGFUSE_BASEURL',
+  'LANGFUSE_TRACING_ENABLED',
+  'LANGFUSE_SAMPLE_RATE',
+  'LANGFUSE_TRACING_ENVIRONMENT',
+];
+let fetchMock: jest.SpiedFunction<typeof fetch>;
+
+function clearLangfuseEnv() {
+  for (const key of langfuseEnvKeys) {
+    delete process.env[key];
+  }
+}
+
+function setLangfuseCredentials() {
+  process.env.LANGFUSE_PUBLIC_KEY = 'public-key';
+  process.env.LANGFUSE_SECRET_KEY = 'secret-key';
+}
+
+async function loadFeedback(): Promise<typeof import('./feedback')> {
+  jest.resetModules();
+  return import('./feedback');
+}
+
+function getFetchMock(): jest.SpiedFunction<typeof fetch> {
+  return fetchMock;
+}
+
+describe('Langfuse feedback scores', () => {
+  beforeEach(() => {
+    clearLangfuseEnv();
+    setLangfuseCredentials();
+    fetchMock = jest.spyOn(global, 'fetch').mockResolvedValue(new Response(null, { status: 200 }));
+  });
+
+  afterEach(() => {
+    clearLangfuseEnv();
+    fetchMock.mockRestore();
+    jest.clearAllMocks();
+  });
+
+  it('posts feedback scores when Langfuse tracing is enabled by default', async () => {
+    const { sendFeedbackScore } = await loadFeedback();
+
+    await sendFeedbackScore({
+      traceId: 'trace-id',
+      feedback: { rating: 'thumbsUp', tag: 'helpful', text: 'nice' },
+    });
+
+    expect(getFetchMock()).toHaveBeenCalledWith(
+      'https://cloud.langfuse.com/api/public/scores',
+      expect.objectContaining({
+        method: 'POST',
+        headers: expect.objectContaining({
+          Authorization: `Basic ${Buffer.from('public-key:secret-key').toString('base64')}`,
+          'Content-Type': 'application/json',
+        }),
+        body: expect.any(String),
+      }),
+    );
+    const [, init] = getFetchMock().mock.calls[0];
+    expect(JSON.parse(init?.body as string)).toMatchObject({
+      id: 'feedback-trace-id',
+      traceId: 'trace-id',
+      name: 'user-feedback',
+      value: 1,
+      dataType: 'BOOLEAN',
+      comment: 'helpful — nice',
+      metadata: { rating: 'thumbsUp', tag: 'helpful' },
+    });
+  });
+
+  it('posts feedback scores to the configured Langfuse host', async () => {
+    process.env.LANGFUSE_HOST = 'http://langfuse-server:3000';
+    const { sendFeedbackScore } = await loadFeedback();
+
+    await sendFeedbackScore({
+      traceId: 'trace-id',
+      feedback: { rating: 'thumbsUp' },
+    });
+
+    expect(getFetchMock()).toHaveBeenCalledWith(
+      'http://langfuse-server:3000/api/public/scores',
+      expect.objectContaining({ method: 'POST' }),
+    );
+  });
+
+  it('skips scores when Langfuse tracing is disabled', async () => {
+    process.env.LANGFUSE_TRACING_ENABLED = 'false';
+    const { sendFeedbackScore } = await loadFeedback();
+
+    await sendFeedbackScore({
+      traceId: 'trace-id',
+      feedback: { rating: 'thumbsDown' },
+    });
+
+    expect(getFetchMock()).not.toHaveBeenCalled();
+  });
+
+  it('skips scores when Langfuse sampling is set to zero', async () => {
+    process.env.LANGFUSE_SAMPLE_RATE = '0';
+    const { sendFeedbackScore } = await loadFeedback();
+
+    await sendFeedbackScore({
+      traceId: 'trace-id',
+      feedback: { rating: 'thumbsUp' },
+    });
+
+    expect(getFetchMock()).not.toHaveBeenCalled();
+  });
+});
--- a/packages/api/src/langfuse/feedback.ts
+++ b/packages/api/src/langfuse/feedback.ts
@ -0,0 +1,86 @@
+import { logger } from '@librechat/data-schemas';
+
+export type LangfuseFeedback = {
+  rating?: 'thumbsUp' | 'thumbsDown';
+  tag?: string;
+  text?: string;
+};
+
+export type SendFeedbackScoreParams = {
+  traceId: string;
+  feedback?: LangfuseFeedback | null;
+};
+
+const DEFAULT_BASE_URL = 'https://cloud.langfuse.com';
+const BASE =
+  process.env.LANGFUSE_BASE_URL ??
+  process.env.LANGFUSE_HOST ??
+  process.env.LANGFUSE_BASEURL ??
+  DEFAULT_BASE_URL;
+
+function isFalseEnv(value?: string): boolean {
+  return value != null && ['0', 'false', 'no', 'off'].includes(value.trim().toLowerCase());
+}
+
+function isSampleRateEnabled(value?: string): boolean {
+  if (value == null || value.trim() === '') {
+    return true;
+  }
+  const parsed = Number(value);
+  return !Number.isFinite(parsed) || parsed !== 0;
+}
+
+const ENABLED =
+  Boolean(process.env.LANGFUSE_PUBLIC_KEY && process.env.LANGFUSE_SECRET_KEY) &&
+  !isFalseEnv(process.env.LANGFUSE_TRACING_ENABLED) &&
+  isSampleRateEnabled(process.env.LANGFUSE_SAMPLE_RATE);
+const AUTHORIZATION = ENABLED
+  ? 'Basic ' +
+    Buffer.from(`${process.env.LANGFUSE_PUBLIC_KEY}:${process.env.LANGFUSE_SECRET_KEY}`).toString(
+      'base64',
+    )
+  : undefined;
+const ENVIRONMENT = process.env.LANGFUSE_TRACING_ENVIRONMENT;
+
+export async function sendFeedbackScore({
+  traceId,
+  feedback,
+}: SendFeedbackScoreParams): Promise<void> {
+  if (!ENABLED || !AUTHORIZATION || !traceId) {
+    return;
+  }
+
+  const scoreId = `feedback-${traceId}`;
+
+  if (!feedback?.rating) {
+    const res = await fetch(`${BASE}/api/public/scores/${encodeURIComponent(scoreId)}`, {
+      method: 'DELETE',
+      headers: { Authorization: AUTHORIZATION },
+    });
+    if (!res.ok && res.status !== 404) {
+      throw new Error(`langfuse score delete ${res.status}: ${await res.text()}`);
+    }
+    return;
+  }
+
+  const body = {
+    id: scoreId,
+    traceId,
+    name: 'user-feedback',
+    value: feedback.rating === 'thumbsUp' ? 1 : 0,
+    dataType: 'BOOLEAN',
+    comment: [feedback.tag, feedback.text].filter(Boolean).join(' — ') || undefined,
+    metadata: { rating: feedback.rating, tag: feedback.tag },
+    ...(ENVIRONMENT ? { environment: ENVIRONMENT } : {}),
+  };
+
+  const res = await fetch(`${BASE}/api/public/scores`, {
+    method: 'POST',
+    headers: { Authorization: AUTHORIZATION, 'Content-Type': 'application/json' },
+    body: JSON.stringify(body),
+  });
+  if (!res.ok) {
+    throw new Error(`langfuse score create ${res.status}: ${await res.text()}`);
+  }
+  logger.debug(`[langfuse] feedback score sent for trace ${traceId} (${feedback.rating})`);
+}
--- a/packages/api/src/langfuse/index.ts
+++ b/packages/api/src/langfuse/index.ts
@ -0,0 +1,2 @@
+export * from './feedback';
+export * from './trace';
--- a/packages/api/src/langfuse/trace.ts
+++ b/packages/api/src/langfuse/trace.ts
@ -0,0 +1,5 @@
+import { createHash } from 'crypto';
+
+export function traceIdForMessage(messageId: string): string {
+  return createHash('sha256').update(messageId, 'utf8').digest('hex').slice(0, 32);
+}
--- a/packages/data-schemas/src/methods/message.ts
+++ b/packages/data-schemas/src/methods/message.ts
@ -285,6 +285,7 @@ export function createMessageMethods(mongoose: typeof import('mongoose')): Messa
        isCreatedByUser: updatedMessage.isCreatedByUser,
        tokenCount: updatedMessage.tokenCount,
        feedback: updatedMessage.feedback,
+        endpoint: updatedMessage.endpoint,
      };
    } catch (err) {
      logger.error('Error updating message:', err);