🧠 fix: Apply Bedrock thinking config to bare inference-profile model IDs (#14054)

* 🧠 fix: Apply Bedrock thinking config to bare inference-profile model IDs The Bedrock request parser gated thinking config, sampling handling, and the anthropic_beta headers on the model ID literally containing `anthropic.`. When a deployment uses an application inference profile, the LibreChat model ID is a bare `claude-*` (e.g. `claude-sonnet-5`) that maps to the profile ARN — so the gate never matched, no `thinking` config was sent, and reasoning models returned empty thinking blocks (most visibly: Claude Sonnet 5 never streamed reasoning, while `us.anthropic.claude-opus-4-8` did). Match on the `claude` family token instead of the `anthropic.` prefix so prefixed (`anthropic.`, `us.`, `global.`) and bare inference-profile IDs are handled identically. Verified e2e against live Bedrock via the agents SDK: a bare `claude-sonnet-5` now sends `{type:'adaptive', display:'summarized'}` and streams reasoning. Non-Claude Bedrock models (llama/cohere) and pre-thinking Claude (3.5 sonnet) are unaffected. * 🧹 fix: Strip stale thinking fields for non-thinking Claude Bedrock IDs Follow-up to the bare-ID matching change: broadening the anthropic guard to match bare `claude-*` meant a non-thinking Claude profile (e.g. a bare `claude-3-5-sonnet` inference profile) took the Claude cleanup branch, which kept persisted `thinking`/`anthropic_beta`/`output_config` from a previously-selected thinking model — leaking unsupported fields after a model switch. Extract `isThinkingModel` and, in the Claude cleanup branch, strip the thinking fields when the model isn't thinking-capable. Also fixes the pre-existing prefixed `anthropic.claude-3-5-sonnet` case (which already kept stale thinking). Thinking-capable models (sonnet-5, 3.7-sonnet) still keep their config. * 🩹 fix: Preserve user anthropic_beta on non-thinking Claude cleanup The non-thinking stale-cleanup deleted amrf.anthropic_beta, but that is the generic Bedrock Anthropic beta field and may carry a user opt-in (e.g. max-tokens-3-5-sonnet-2024-07-15 for extended output on Claude 3.5). Strip only the thinking-specific fields (thinking/thinkingBudget/effort/output_config) and leave anthropic_beta intact. * fix: clear persisted AMRF (output_config, thinking, generated betas) on bare Bedrock profiles * fix: preserve persisted effort on resume + strip stale thinking/betas across bare profiles * fix: normalize string/comma-delimited anthropic_beta before stripping generated betas
2026-07-03 12:54:01 +00:00 · 2026-07-01 14:19:34 -04:00 · 2026-07-01 14:19:34 -04:00 · 8683eccbbc
commit 8683eccbbc
parent 53ee82fe5d
2 changed files with 319 additions and 30 deletions
--- a/packages/data-provider/specs/bedrock.spec.ts
+++ b/packages/data-provider/specs/bedrock.spec.ts
@ -461,6 +461,214 @@ describe('bedrockInputParser', () => {
      expect(additionalFields.anthropic_beta).toEqual(BEDROCK_CLAUDE_4_BETAS);
    });

+    // Bedrock application inference profiles surface a bare `claude-*` model ID
+    // (no `anthropic.` prefix). The thinking/beta config must still apply.
+    test('should configure adaptive thinking for a bare claude-sonnet-5 (inference profile) ID', () => {
+      const input = { model: 'claude-sonnet-5' };
+      const result = bedrockInputParser.parse(input) as Record<string, unknown>;
+      const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
+      expect(additionalFields.thinking).toEqual({ type: 'adaptive', display: 'summarized' });
+      expect(additionalFields.thinkingBudget).toBeUndefined();
+      expect(additionalFields.anthropic_beta).toEqual(BEDROCK_CLAUDE_4_BETAS);
+    });
+
+    test('bare claude-* IDs match their anthropic.-prefixed equivalents', () => {
+      const thinkingFor = (model: string) => {
+        const result = bedrockInputParser.parse({ model }) as Record<string, unknown>;
+        return (result.additionalModelRequestFields as Record<string, unknown>).thinking;
+      };
+      expect(thinkingFor('claude-sonnet-5')).toEqual(thinkingFor('anthropic.claude-sonnet-5'));
+      expect(thinkingFor('claude-opus-4-8')).toEqual(thinkingFor('us.anthropic.claude-opus-4-8'));
+      expect(thinkingFor('claude-sonnet-4-6')).toEqual(thinkingFor('anthropic.claude-sonnet-4-6'));
+    });
+
+    test('should configure extended thinking for a bare claude-3-7-sonnet ID', () => {
+      const input = { model: 'claude-3-7-sonnet' };
+      const result = bedrockInputParser.parse(input) as Record<string, unknown>;
+      const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
+      expect(additionalFields.thinking).toBe(true);
+      expect(additionalFields.thinkingBudget).toBe(2000);
+      expect(additionalFields.anthropic_beta).toEqual([BEDROCK_OUTPUT_128K_BETA]);
+    });
+
+    test('should not configure thinking for non-Claude Bedrock models', () => {
+      const input = { model: 'meta.llama3-1-8b-instruct-v1:0' };
+      const result = bedrockInputParser.parse(input) as Record<string, unknown>;
+      const additionalFields = result.additionalModelRequestFields as
+        | Record<string, unknown>
+        | undefined;
+      expect(additionalFields?.thinking).toBeUndefined();
+      expect(additionalFields?.anthropic_beta).toBeUndefined();
+    });
+
+    // Switching a persisted conversation to a non-thinking Claude model (bare or
+    // prefixed) must strip stale thinking fields carried over in AMRF, so they
+    // aren't sent to a profile that can't accept them — but a user-configured
+    // `anthropic_beta` opt-in must be preserved.
+    test.each(['claude-3-5-sonnet', 'anthropic.claude-3-5-sonnet'])(
+      'strips stale thinking fields but keeps user anthropic_beta for non-thinking Claude %s',
+      (model) => {
+        const input = {
+          model,
+          additionalModelRequestFields: {
+            thinking: { type: 'adaptive', display: 'summarized' },
+            anthropic_beta: ['max-tokens-3-5-sonnet-2024-07-15'],
+            output_config: { effort: 'high' },
+          },
+        };
+        const result = bedrockInputParser.parse(input) as Record<string, unknown>;
+        const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
+        expect(amrf?.thinking).toBeUndefined();
+        expect(amrf?.output_config).toBeUndefined();
+        expect(amrf?.anthropic_beta).toEqual(['max-tokens-3-5-sonnet-2024-07-15']);
+      },
+    );
+
+    test('keeps thinking config for a bare thinking Claude model with persisted AMRF', () => {
+      const input = {
+        model: 'claude-sonnet-5',
+        additionalModelRequestFields: { thinking: { type: 'adaptive', display: 'summarized' } },
+      };
+      const result = bedrockInputParser.parse(input) as Record<string, unknown>;
+      const amrf = result.additionalModelRequestFields as Record<string, unknown>;
+      expect(amrf.thinking).toEqual({ type: 'adaptive', display: 'summarized' });
+    });
+
+    // The persisted AMRF is spread back into the final request, so clearing only
+    // the freshly-built fields leaves a stale value from a prior selection.
+    // An agent resume round-trips its llmConfig back into model_parameters, so a
+    // persisted output_config with NO top-level effort must be preserved as the
+    // user's saved choice; only an explicit unset ('' / null) clears it.
+    test('preserves persisted output_config when an adaptive model is re-parsed without top-level effort', () => {
+      const input = {
+        model: 'claude-opus-4-8',
+        additionalModelRequestFields: {
+          thinking: { type: 'adaptive', display: 'summarized' },
+          output_config: { effort: 'high' },
+        },
+      };
+      const result = bedrockInputParser.parse(input) as Record<string, unknown>;
+      const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
+      expect(amrf?.output_config).toEqual({ effort: 'high' });
+      expect(amrf?.thinking).toEqual({ type: 'adaptive', display: 'summarized' });
+    });
+
+    test.each(['', null])(
+      'clears persisted output_config when effort is explicitly unset (%p)',
+      (effort) => {
+        const input = {
+          model: 'claude-opus-4-8',
+          effort,
+          additionalModelRequestFields: {
+            thinking: { type: 'adaptive', display: 'summarized' },
+            output_config: { effort: 'high' },
+          },
+        };
+        const result = bedrockInputParser.parse(input) as Record<string, unknown>;
+        const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
+        expect(amrf?.output_config).toBeUndefined();
+      },
+    );
+
+    // Switching a persisted adaptive/disabled conversation to a bare non-adaptive
+    // thinking profile (3.7) must not leak the prior thinking object or output_config.
+    test('clears persisted thinking + output_config when switching to a bare non-adaptive thinking model', () => {
+      const input = {
+        model: 'claude-3-7-sonnet',
+        additionalModelRequestFields: {
+          thinking: { type: 'disabled' },
+          output_config: { effort: 'high' },
+        },
+      };
+      const result = bedrockInputParser.parse(input) as Record<string, unknown>;
+      const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
+      expect(amrf?.output_config).toBeUndefined();
+      expect(amrf?.thinking).not.toEqual({ type: 'disabled' });
+    });
+
+    // Switching a bare Claude 4+/5 profile (both generated betas persisted) to a
+    // bare 3.7 profile must drop the fine-grained beta 3.7 does not generate.
+    test('drops a stale generated beta not applicable to the target thinking model', () => {
+      const input = {
+        model: 'claude-3-7-sonnet',
+        additionalModelRequestFields: {
+          anthropic_beta: [
+            BEDROCK_OUTPUT_128K_BETA,
+            BEDROCK_FINE_GRAINED_TOOL_STREAMING_BETA,
+            'context-1m-2025-08-07',
+          ],
+        },
+      };
+      const result = bedrockInputParser.parse(input) as Record<string, unknown>;
+      const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
+      expect(additionalFields.anthropic_beta).toEqual([
+        BEDROCK_OUTPUT_128K_BETA,
+        'context-1m-2025-08-07',
+      ]);
+    });
+
+    test('disabling thinking on a bare adaptive model clears the persisted adaptive config', () => {
+      const input = {
+        model: 'claude-opus-4-8',
+        thinking: false,
+        additionalModelRequestFields: {
+          thinking: { type: 'adaptive', display: 'summarized' },
+        },
+      };
+      const result = bedrockInputParser.parse(input) as Record<string, unknown>;
+      const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
+      expect(amrf?.thinking).toBeUndefined();
+    });
+
+    test('strips only LibreChat-generated betas from persisted AMRF, keeping user betas', () => {
+      const input = {
+        model: 'claude-3-5-sonnet',
+        additionalModelRequestFields: {
+          anthropic_beta: [BEDROCK_OUTPUT_128K_BETA, 'context-1m-2025-08-07'],
+        },
+      };
+      const result = bedrockInputParser.parse(input) as Record<string, unknown>;
+      const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
+      expect(amrf?.anthropic_beta).toEqual(['context-1m-2025-08-07']);
+    });
+
+    test('drops persisted anthropic_beta entirely when it holds only generated betas', () => {
+      const input = {
+        model: 'claude-3-5-sonnet',
+        additionalModelRequestFields: {
+          anthropic_beta: [BEDROCK_OUTPUT_128K_BETA, BEDROCK_FINE_GRAINED_TOOL_STREAMING_BETA],
+        },
+      };
+      const result = bedrockInputParser.parse(input) as Record<string, unknown>;
+      const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
+      expect(amrf?.anthropic_beta).toBeUndefined();
+    });
+
+    // Persisted anthropic_beta may be a bare string or a comma-delimited string,
+    // which the merge helper accepts; the non-thinking cleanup must normalize
+    // that shape before filtering out generated betas.
+    test('strips a string-form generated beta for non-thinking Claude', () => {
+      const input = {
+        model: 'claude-3-5-sonnet',
+        additionalModelRequestFields: { anthropic_beta: BEDROCK_OUTPUT_128K_BETA },
+      };
+      const result = bedrockInputParser.parse(input) as Record<string, unknown>;
+      const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
+      expect(amrf?.anthropic_beta).toBeUndefined();
+    });
+
+    test('strips generated betas from a comma-delimited string, keeping user betas', () => {
+      const input = {
+        model: 'claude-3-5-sonnet',
+        additionalModelRequestFields: {
+          anthropic_beta: `${BEDROCK_OUTPUT_128K_BETA}, context-1m-2025-08-07`,
+        },
+      };
+      const result = bedrockInputParser.parse(input) as Record<string, unknown>;
+      const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
+      expect(amrf?.anthropic_beta).toEqual(['context-1m-2025-08-07']);
+    });
+
    test('should match anthropic.claude-haiku-6 model without context beta header', () => {
      const input = {
        model: 'anthropic.claude-haiku-6',
--- a/packages/data-provider/src/bedrock.ts
+++ b/packages/data-provider/src/bedrock.ts
@ -6,6 +6,13 @@ const DEFAULT_THINKING_BUDGET = 2000;
 export const BEDROCK_OUTPUT_128K_BETA = 'output-128k-2025-02-19';
 export const BEDROCK_FINE_GRAINED_TOOL_STREAMING_BETA = 'fine-grained-tool-streaming-2025-05-14';

+/** Betas LibreChat injects itself, safe to strip from persisted AMRF when a
+ * model no longer supports them; anything else in `anthropic_beta` is a user opt-in. */
+const GENERATED_BEDROCK_BETAS = new Set<string>([
+  BEDROCK_OUTPUT_128K_BETA,
+  BEDROCK_FINE_GRAINED_TOOL_STREAMING_BETA,
+]);
+
 const bedrockReasoningConfigValues = new Set<string>(Object.values(s.BedrockReasoningConfig));

 type ThinkingConfig =
@ -201,6 +208,22 @@ export function supportsContext1m(model: string): boolean {
  return false;
 }

+/**
+ * A Bedrock Claude model ID may be prefixed (`anthropic.claude-*`,
+ * `us.anthropic.claude-*`, `global.anthropic.claude-*`) or bare (`claude-*`,
+ * used when the LibreChat model ID maps to an application inference profile).
+ * Match on the `claude` family token so every form is recognized — requiring
+ * the literal `anthropic.` prefix silently dropped thinking config, beta
+ * headers, and sampling handling for inference-profile deployments.
+ */
+const BEDROCK_CLAUDE_4PLUS_THINKING =
+  /claude-(?:[4-9](?:\.\d+)?(?:-\d+)?-(?:sonnet|opus|haiku)|(?:sonnet|opus|haiku)-[4-9])/;
+
+/** Whether a Bedrock model ID is an Anthropic Claude model (prefixed or bare). */
+function isBedrockClaudeModel(model: string): boolean {
+  return model.includes('claude');
+}
+
 /**
 * Gets the appropriate anthropic_beta headers for Bedrock Anthropic models.
 * Bedrock uses `anthropic_beta` (with underscore) in additionalModelRequestFields.
@ -213,11 +236,8 @@ function getBedrockAnthropicBetaHeaders(model: string): string[] {

  /** Mythos-class (Fable/Mythos) is intentionally not matched: these betas are built-in/no-op for the
   * 4.7+ generation (Fable has native 128K output), so omitting them on Bedrock is lossless. */
-  const isClaude4PlusModel =
-    /anthropic\.claude-(?:[4-9](?:\.\d+)?(?:-\d+)?-(?:sonnet|opus|haiku)|(?:sonnet|opus|haiku)-[4-9])/.test(
-      model,
-    );
-  const isClaudeThinkingModel = model.includes('anthropic.claude-3-7-sonnet') || isClaude4PlusModel;
+  const isClaude4PlusModel = BEDROCK_CLAUDE_4PLUS_THINKING.test(model);
+  const isClaudeThinkingModel = model.includes('claude-3-7-sonnet') || isClaude4PlusModel;

  if (isClaudeThinkingModel) {
    betaHeaders.push(BEDROCK_OUTPUT_128K_BETA);
@ -230,26 +250,41 @@ function getBedrockAnthropicBetaHeaders(model: string): string[] {
  return betaHeaders;
 }

-function mergeBedrockAnthropicBetaHeaders(existing: unknown, generated: string[]): string[] {
-  let existingValues: unknown[] = [];
-  if (Array.isArray(existing)) {
-    existingValues = existing;
-  } else if (typeof existing === 'string') {
-    existingValues = [existing];
+/** Flatten an anthropic_beta value (array, single string, or comma-delimited
+ * string) into trimmed, non-empty header tokens. */
+function normalizeBetaHeaders(value: unknown): string[] {
+  let values: unknown[] = [];
+  if (Array.isArray(value)) {
+    values = value;
+  } else if (typeof value === 'string') {
+    values = [value];
  }
-
-  const betaHeaders = new Set<string>();
-
-  [...existingValues, ...generated].forEach((value) => {
-    if (typeof value !== 'string') {
+  const headers: string[] = [];
+  values.forEach((entry) => {
+    if (typeof entry !== 'string') {
      return;
    }
-
-    value
+    entry
      .split(',')
      .map((header) => header.trim())
      .filter(Boolean)
-      .forEach((header) => betaHeaders.add(header));
+      .forEach((header) => headers.push(header));
+  });
+  return headers;
+}
+
+function mergeBedrockAnthropicBetaHeaders(existing: unknown, generated: string[]): string[] {
+  const generatedSet = new Set(generated);
+  const betaHeaders = new Set<string>();
+
+  [...normalizeBetaHeaders(existing), ...generated].forEach((header) => {
+    /** Drop a generated beta carried over from a prior model that the current
+     * model does not generate (e.g. fine-grained-tool-streaming on a 3.7
+     * profile); user opt-ins are always preserved. */
+    if (GENERATED_BEDROCK_BETAS.has(header) && !generatedSet.has(header)) {
+      return;
+    }
+    betaHeaders.add(header);
  });

  return Array.from(betaHeaders);
@ -407,21 +442,30 @@ export const bedrockInputParser = s.tConversationSchema
      additionalFields.thinking = false;
    }

-    /** Configure thinking for Bedrock Anthropic models: 3.7 Sonnet, Claude 4+ (opus/sonnet/haiku), and Mythos-class (Fable/Mythos). */
-    if (
+    /** Bedrock thinking-capable Claude models: 3.7 Sonnet, Claude 4+ (opus/sonnet/haiku), and Mythos-class (Fable/Mythos). */
+    const isThinkingModel =
      typeof typedData.model === 'string' &&
-      (typedData.model.includes('anthropic.claude-3-7-sonnet') ||
-        /anthropic\.claude-(?:[4-9](?:\.\d+)?(?:-\d+)?-(?:sonnet|opus|haiku)|(?:sonnet|opus|haiku)-[4-9])/.test(
-          typedData.model,
-        ) ||
-        s.isMythosClassModel(typedData.model))
-    ) {
+      (typedData.model.includes('claude-3-7-sonnet') ||
+        BEDROCK_CLAUDE_4PLUS_THINKING.test(typedData.model) ||
+        s.isMythosClassModel(typedData.model));
+
+    if (isThinkingModel) {
      const isAdaptive = supportsAdaptiveThinking(typedData.model as string);

      if (isAdaptive) {
+        /** Persisted AMRF is spread into the final request, so clearing only
+         * `additionalFields` leaves a stale value from a prior selection. */
+        const persistedAmrf = typedData.additionalModelRequestFields as
+          | Record<string, unknown>
+          | undefined;
        const effort = additionalFields.effort;
-        if (effort && typeof effort === 'string' && effort !== '') {
+        if (typeof effort === 'string' && effort !== '') {
          additionalFields.output_config = { effort };
+        } else if (effort !== undefined && persistedAmrf) {
+          /** Explicit unset ('' or null) clears the persisted effort. An absent
+           * effort (agent resume, where the prior llmConfig persisted
+           * `output_config` but no top-level `effort`) preserves it. */
+          delete persistedAmrf.output_config;
        }
        delete additionalFields.effort;

@ -432,6 +476,11 @@ export const bedrockInputParser = s.tConversationSchema
            additionalFields.thinking = { type: 'disabled' };
          } else {
            delete additionalFields.thinking;
+            /** Disable-by-omission models (Opus 4.7+): drop the persisted
+             * adaptive config so turning thinking off actually disables it. */
+            if (persistedAmrf) {
+              delete persistedAmrf.thinking;
+            }
          }
        } else {
          /**
@ -473,12 +522,23 @@ export const bedrockInputParser = s.tConversationSchema
        }
        delete additionalFields.effort;
        delete additionalFields.thinkingDisplay;
+
+        /** A bare non-adaptive thinking profile (e.g. `claude-3-7-sonnet`) must
+         * not inherit an adaptive/disabled thinking object or `output_config`
+         * persisted from another model; this branch's own fields are authoritative. */
+        const persistedAmrf = typedData.additionalModelRequestFields as
+          | Record<string, unknown>
+          | undefined;
+        if (persistedAmrf) {
+          delete persistedAmrf.thinking;
+          delete persistedAmrf.output_config;
+        }
      }

      /** Anthropic uses 'effort' via output_config, not reasoning_config */
      delete additionalFields.reasoning_effort;

-      if ((typedData.model as string).includes('anthropic.')) {
+      if (isBedrockClaudeModel(typedData.model as string)) {
        const betaHeaders = getBedrockAnthropicBetaHeaders(typedData.model as string);
        if (betaHeaders.length > 0) {
          const existingBetaHeaders = (
@ -509,7 +569,7 @@ export const bedrockInputParser = s.tConversationSchema
    }

    const isAnthropicModel =
-      typeof typedData.model === 'string' && typedData.model.includes('anthropic.');
+      typeof typedData.model === 'string' && isBedrockClaudeModel(typedData.model);

    /** Strip stale fields from previously-persisted additionalModelRequestFields */
    if (
@ -527,6 +587,27 @@ export const bedrockInputParser = s.tConversationSchema
      } else {
        delete amrf.reasoning_config;
        delete amrf.reasoning_effort;
+        /** A Claude model that does not support Bedrock thinking (e.g. a bare
+         * `claude-3-5-sonnet` inference profile) must not carry stale thinking
+         * fields from a previously-selected thinking model. Drop only the
+         * LibreChat-generated betas (output-128k, fine-grained tool streaming);
+         * user opt-ins in `anthropic_beta` are preserved. */
+        if (!isThinkingModel) {
+          delete amrf.thinking;
+          delete amrf.thinkingBudget;
+          delete amrf.effort;
+          delete amrf.output_config;
+          if (amrf.anthropic_beta !== undefined) {
+            const kept = normalizeBetaHeaders(amrf.anthropic_beta).filter(
+              (header) => !GENERATED_BEDROCK_BETAS.has(header),
+            );
+            if (kept.length > 0) {
+              amrf.anthropic_beta = kept;
+            } else {
+              delete amrf.anthropic_beta;
+            }
+          }
+        }
      }

      if (shouldOmitSamplingParameters) {