diff --git a/packages/data-provider/specs/bedrock.spec.ts b/packages/data-provider/specs/bedrock.spec.ts index 36f94803df..70145ea29e 100644 --- a/packages/data-provider/specs/bedrock.spec.ts +++ b/packages/data-provider/specs/bedrock.spec.ts @@ -461,6 +461,214 @@ describe('bedrockInputParser', () => { expect(additionalFields.anthropic_beta).toEqual(BEDROCK_CLAUDE_4_BETAS); }); + // Bedrock application inference profiles surface a bare `claude-*` model ID + // (no `anthropic.` prefix). The thinking/beta config must still apply. + test('should configure adaptive thinking for a bare claude-sonnet-5 (inference profile) ID', () => { + const input = { model: 'claude-sonnet-5' }; + const result = bedrockInputParser.parse(input) as Record; + const additionalFields = result.additionalModelRequestFields as Record; + expect(additionalFields.thinking).toEqual({ type: 'adaptive', display: 'summarized' }); + expect(additionalFields.thinkingBudget).toBeUndefined(); + expect(additionalFields.anthropic_beta).toEqual(BEDROCK_CLAUDE_4_BETAS); + }); + + test('bare claude-* IDs match their anthropic.-prefixed equivalents', () => { + const thinkingFor = (model: string) => { + const result = bedrockInputParser.parse({ model }) as Record; + return (result.additionalModelRequestFields as Record).thinking; + }; + expect(thinkingFor('claude-sonnet-5')).toEqual(thinkingFor('anthropic.claude-sonnet-5')); + expect(thinkingFor('claude-opus-4-8')).toEqual(thinkingFor('us.anthropic.claude-opus-4-8')); + expect(thinkingFor('claude-sonnet-4-6')).toEqual(thinkingFor('anthropic.claude-sonnet-4-6')); + }); + + test('should configure extended thinking for a bare claude-3-7-sonnet ID', () => { + const input = { model: 'claude-3-7-sonnet' }; + const result = bedrockInputParser.parse(input) as Record; + const additionalFields = result.additionalModelRequestFields as Record; + expect(additionalFields.thinking).toBe(true); + expect(additionalFields.thinkingBudget).toBe(2000); + expect(additionalFields.anthropic_beta).toEqual([BEDROCK_OUTPUT_128K_BETA]); + }); + + test('should not configure thinking for non-Claude Bedrock models', () => { + const input = { model: 'meta.llama3-1-8b-instruct-v1:0' }; + const result = bedrockInputParser.parse(input) as Record; + const additionalFields = result.additionalModelRequestFields as + | Record + | undefined; + expect(additionalFields?.thinking).toBeUndefined(); + expect(additionalFields?.anthropic_beta).toBeUndefined(); + }); + + // Switching a persisted conversation to a non-thinking Claude model (bare or + // prefixed) must strip stale thinking fields carried over in AMRF, so they + // aren't sent to a profile that can't accept them — but a user-configured + // `anthropic_beta` opt-in must be preserved. + test.each(['claude-3-5-sonnet', 'anthropic.claude-3-5-sonnet'])( + 'strips stale thinking fields but keeps user anthropic_beta for non-thinking Claude %s', + (model) => { + const input = { + model, + additionalModelRequestFields: { + thinking: { type: 'adaptive', display: 'summarized' }, + anthropic_beta: ['max-tokens-3-5-sonnet-2024-07-15'], + output_config: { effort: 'high' }, + }, + }; + const result = bedrockInputParser.parse(input) as Record; + const amrf = result.additionalModelRequestFields as Record | undefined; + expect(amrf?.thinking).toBeUndefined(); + expect(amrf?.output_config).toBeUndefined(); + expect(amrf?.anthropic_beta).toEqual(['max-tokens-3-5-sonnet-2024-07-15']); + }, + ); + + test('keeps thinking config for a bare thinking Claude model with persisted AMRF', () => { + const input = { + model: 'claude-sonnet-5', + additionalModelRequestFields: { thinking: { type: 'adaptive', display: 'summarized' } }, + }; + const result = bedrockInputParser.parse(input) as Record; + const amrf = result.additionalModelRequestFields as Record; + expect(amrf.thinking).toEqual({ type: 'adaptive', display: 'summarized' }); + }); + + // The persisted AMRF is spread back into the final request, so clearing only + // the freshly-built fields leaves a stale value from a prior selection. + // An agent resume round-trips its llmConfig back into model_parameters, so a + // persisted output_config with NO top-level effort must be preserved as the + // user's saved choice; only an explicit unset ('' / null) clears it. + test('preserves persisted output_config when an adaptive model is re-parsed without top-level effort', () => { + const input = { + model: 'claude-opus-4-8', + additionalModelRequestFields: { + thinking: { type: 'adaptive', display: 'summarized' }, + output_config: { effort: 'high' }, + }, + }; + const result = bedrockInputParser.parse(input) as Record; + const amrf = result.additionalModelRequestFields as Record | undefined; + expect(amrf?.output_config).toEqual({ effort: 'high' }); + expect(amrf?.thinking).toEqual({ type: 'adaptive', display: 'summarized' }); + }); + + test.each(['', null])( + 'clears persisted output_config when effort is explicitly unset (%p)', + (effort) => { + const input = { + model: 'claude-opus-4-8', + effort, + additionalModelRequestFields: { + thinking: { type: 'adaptive', display: 'summarized' }, + output_config: { effort: 'high' }, + }, + }; + const result = bedrockInputParser.parse(input) as Record; + const amrf = result.additionalModelRequestFields as Record | undefined; + expect(amrf?.output_config).toBeUndefined(); + }, + ); + + // Switching a persisted adaptive/disabled conversation to a bare non-adaptive + // thinking profile (3.7) must not leak the prior thinking object or output_config. + test('clears persisted thinking + output_config when switching to a bare non-adaptive thinking model', () => { + const input = { + model: 'claude-3-7-sonnet', + additionalModelRequestFields: { + thinking: { type: 'disabled' }, + output_config: { effort: 'high' }, + }, + }; + const result = bedrockInputParser.parse(input) as Record; + const amrf = result.additionalModelRequestFields as Record | undefined; + expect(amrf?.output_config).toBeUndefined(); + expect(amrf?.thinking).not.toEqual({ type: 'disabled' }); + }); + + // Switching a bare Claude 4+/5 profile (both generated betas persisted) to a + // bare 3.7 profile must drop the fine-grained beta 3.7 does not generate. + test('drops a stale generated beta not applicable to the target thinking model', () => { + const input = { + model: 'claude-3-7-sonnet', + additionalModelRequestFields: { + anthropic_beta: [ + BEDROCK_OUTPUT_128K_BETA, + BEDROCK_FINE_GRAINED_TOOL_STREAMING_BETA, + 'context-1m-2025-08-07', + ], + }, + }; + const result = bedrockInputParser.parse(input) as Record; + const additionalFields = result.additionalModelRequestFields as Record; + expect(additionalFields.anthropic_beta).toEqual([ + BEDROCK_OUTPUT_128K_BETA, + 'context-1m-2025-08-07', + ]); + }); + + test('disabling thinking on a bare adaptive model clears the persisted adaptive config', () => { + const input = { + model: 'claude-opus-4-8', + thinking: false, + additionalModelRequestFields: { + thinking: { type: 'adaptive', display: 'summarized' }, + }, + }; + const result = bedrockInputParser.parse(input) as Record; + const amrf = result.additionalModelRequestFields as Record | undefined; + expect(amrf?.thinking).toBeUndefined(); + }); + + test('strips only LibreChat-generated betas from persisted AMRF, keeping user betas', () => { + const input = { + model: 'claude-3-5-sonnet', + additionalModelRequestFields: { + anthropic_beta: [BEDROCK_OUTPUT_128K_BETA, 'context-1m-2025-08-07'], + }, + }; + const result = bedrockInputParser.parse(input) as Record; + const amrf = result.additionalModelRequestFields as Record | undefined; + expect(amrf?.anthropic_beta).toEqual(['context-1m-2025-08-07']); + }); + + test('drops persisted anthropic_beta entirely when it holds only generated betas', () => { + const input = { + model: 'claude-3-5-sonnet', + additionalModelRequestFields: { + anthropic_beta: [BEDROCK_OUTPUT_128K_BETA, BEDROCK_FINE_GRAINED_TOOL_STREAMING_BETA], + }, + }; + const result = bedrockInputParser.parse(input) as Record; + const amrf = result.additionalModelRequestFields as Record | undefined; + expect(amrf?.anthropic_beta).toBeUndefined(); + }); + + // Persisted anthropic_beta may be a bare string or a comma-delimited string, + // which the merge helper accepts; the non-thinking cleanup must normalize + // that shape before filtering out generated betas. + test('strips a string-form generated beta for non-thinking Claude', () => { + const input = { + model: 'claude-3-5-sonnet', + additionalModelRequestFields: { anthropic_beta: BEDROCK_OUTPUT_128K_BETA }, + }; + const result = bedrockInputParser.parse(input) as Record; + const amrf = result.additionalModelRequestFields as Record | undefined; + expect(amrf?.anthropic_beta).toBeUndefined(); + }); + + test('strips generated betas from a comma-delimited string, keeping user betas', () => { + const input = { + model: 'claude-3-5-sonnet', + additionalModelRequestFields: { + anthropic_beta: `${BEDROCK_OUTPUT_128K_BETA}, context-1m-2025-08-07`, + }, + }; + const result = bedrockInputParser.parse(input) as Record; + const amrf = result.additionalModelRequestFields as Record | undefined; + expect(amrf?.anthropic_beta).toEqual(['context-1m-2025-08-07']); + }); + test('should match anthropic.claude-haiku-6 model without context beta header', () => { const input = { model: 'anthropic.claude-haiku-6', diff --git a/packages/data-provider/src/bedrock.ts b/packages/data-provider/src/bedrock.ts index 7cb233b46f..3a32dd5cfd 100644 --- a/packages/data-provider/src/bedrock.ts +++ b/packages/data-provider/src/bedrock.ts @@ -6,6 +6,13 @@ const DEFAULT_THINKING_BUDGET = 2000; export const BEDROCK_OUTPUT_128K_BETA = 'output-128k-2025-02-19'; export const BEDROCK_FINE_GRAINED_TOOL_STREAMING_BETA = 'fine-grained-tool-streaming-2025-05-14'; +/** Betas LibreChat injects itself, safe to strip from persisted AMRF when a + * model no longer supports them; anything else in `anthropic_beta` is a user opt-in. */ +const GENERATED_BEDROCK_BETAS = new Set([ + BEDROCK_OUTPUT_128K_BETA, + BEDROCK_FINE_GRAINED_TOOL_STREAMING_BETA, +]); + const bedrockReasoningConfigValues = new Set(Object.values(s.BedrockReasoningConfig)); type ThinkingConfig = @@ -201,6 +208,22 @@ export function supportsContext1m(model: string): boolean { return false; } +/** + * A Bedrock Claude model ID may be prefixed (`anthropic.claude-*`, + * `us.anthropic.claude-*`, `global.anthropic.claude-*`) or bare (`claude-*`, + * used when the LibreChat model ID maps to an application inference profile). + * Match on the `claude` family token so every form is recognized — requiring + * the literal `anthropic.` prefix silently dropped thinking config, beta + * headers, and sampling handling for inference-profile deployments. + */ +const BEDROCK_CLAUDE_4PLUS_THINKING = + /claude-(?:[4-9](?:\.\d+)?(?:-\d+)?-(?:sonnet|opus|haiku)|(?:sonnet|opus|haiku)-[4-9])/; + +/** Whether a Bedrock model ID is an Anthropic Claude model (prefixed or bare). */ +function isBedrockClaudeModel(model: string): boolean { + return model.includes('claude'); +} + /** * Gets the appropriate anthropic_beta headers for Bedrock Anthropic models. * Bedrock uses `anthropic_beta` (with underscore) in additionalModelRequestFields. @@ -213,11 +236,8 @@ function getBedrockAnthropicBetaHeaders(model: string): string[] { /** Mythos-class (Fable/Mythos) is intentionally not matched: these betas are built-in/no-op for the * 4.7+ generation (Fable has native 128K output), so omitting them on Bedrock is lossless. */ - const isClaude4PlusModel = - /anthropic\.claude-(?:[4-9](?:\.\d+)?(?:-\d+)?-(?:sonnet|opus|haiku)|(?:sonnet|opus|haiku)-[4-9])/.test( - model, - ); - const isClaudeThinkingModel = model.includes('anthropic.claude-3-7-sonnet') || isClaude4PlusModel; + const isClaude4PlusModel = BEDROCK_CLAUDE_4PLUS_THINKING.test(model); + const isClaudeThinkingModel = model.includes('claude-3-7-sonnet') || isClaude4PlusModel; if (isClaudeThinkingModel) { betaHeaders.push(BEDROCK_OUTPUT_128K_BETA); @@ -230,26 +250,41 @@ function getBedrockAnthropicBetaHeaders(model: string): string[] { return betaHeaders; } -function mergeBedrockAnthropicBetaHeaders(existing: unknown, generated: string[]): string[] { - let existingValues: unknown[] = []; - if (Array.isArray(existing)) { - existingValues = existing; - } else if (typeof existing === 'string') { - existingValues = [existing]; +/** Flatten an anthropic_beta value (array, single string, or comma-delimited + * string) into trimmed, non-empty header tokens. */ +function normalizeBetaHeaders(value: unknown): string[] { + let values: unknown[] = []; + if (Array.isArray(value)) { + values = value; + } else if (typeof value === 'string') { + values = [value]; } - - const betaHeaders = new Set(); - - [...existingValues, ...generated].forEach((value) => { - if (typeof value !== 'string') { + const headers: string[] = []; + values.forEach((entry) => { + if (typeof entry !== 'string') { return; } - - value + entry .split(',') .map((header) => header.trim()) .filter(Boolean) - .forEach((header) => betaHeaders.add(header)); + .forEach((header) => headers.push(header)); + }); + return headers; +} + +function mergeBedrockAnthropicBetaHeaders(existing: unknown, generated: string[]): string[] { + const generatedSet = new Set(generated); + const betaHeaders = new Set(); + + [...normalizeBetaHeaders(existing), ...generated].forEach((header) => { + /** Drop a generated beta carried over from a prior model that the current + * model does not generate (e.g. fine-grained-tool-streaming on a 3.7 + * profile); user opt-ins are always preserved. */ + if (GENERATED_BEDROCK_BETAS.has(header) && !generatedSet.has(header)) { + return; + } + betaHeaders.add(header); }); return Array.from(betaHeaders); @@ -407,21 +442,30 @@ export const bedrockInputParser = s.tConversationSchema additionalFields.thinking = false; } - /** Configure thinking for Bedrock Anthropic models: 3.7 Sonnet, Claude 4+ (opus/sonnet/haiku), and Mythos-class (Fable/Mythos). */ - if ( + /** Bedrock thinking-capable Claude models: 3.7 Sonnet, Claude 4+ (opus/sonnet/haiku), and Mythos-class (Fable/Mythos). */ + const isThinkingModel = typeof typedData.model === 'string' && - (typedData.model.includes('anthropic.claude-3-7-sonnet') || - /anthropic\.claude-(?:[4-9](?:\.\d+)?(?:-\d+)?-(?:sonnet|opus|haiku)|(?:sonnet|opus|haiku)-[4-9])/.test( - typedData.model, - ) || - s.isMythosClassModel(typedData.model)) - ) { + (typedData.model.includes('claude-3-7-sonnet') || + BEDROCK_CLAUDE_4PLUS_THINKING.test(typedData.model) || + s.isMythosClassModel(typedData.model)); + + if (isThinkingModel) { const isAdaptive = supportsAdaptiveThinking(typedData.model as string); if (isAdaptive) { + /** Persisted AMRF is spread into the final request, so clearing only + * `additionalFields` leaves a stale value from a prior selection. */ + const persistedAmrf = typedData.additionalModelRequestFields as + | Record + | undefined; const effort = additionalFields.effort; - if (effort && typeof effort === 'string' && effort !== '') { + if (typeof effort === 'string' && effort !== '') { additionalFields.output_config = { effort }; + } else if (effort !== undefined && persistedAmrf) { + /** Explicit unset ('' or null) clears the persisted effort. An absent + * effort (agent resume, where the prior llmConfig persisted + * `output_config` but no top-level `effort`) preserves it. */ + delete persistedAmrf.output_config; } delete additionalFields.effort; @@ -432,6 +476,11 @@ export const bedrockInputParser = s.tConversationSchema additionalFields.thinking = { type: 'disabled' }; } else { delete additionalFields.thinking; + /** Disable-by-omission models (Opus 4.7+): drop the persisted + * adaptive config so turning thinking off actually disables it. */ + if (persistedAmrf) { + delete persistedAmrf.thinking; + } } } else { /** @@ -473,12 +522,23 @@ export const bedrockInputParser = s.tConversationSchema } delete additionalFields.effort; delete additionalFields.thinkingDisplay; + + /** A bare non-adaptive thinking profile (e.g. `claude-3-7-sonnet`) must + * not inherit an adaptive/disabled thinking object or `output_config` + * persisted from another model; this branch's own fields are authoritative. */ + const persistedAmrf = typedData.additionalModelRequestFields as + | Record + | undefined; + if (persistedAmrf) { + delete persistedAmrf.thinking; + delete persistedAmrf.output_config; + } } /** Anthropic uses 'effort' via output_config, not reasoning_config */ delete additionalFields.reasoning_effort; - if ((typedData.model as string).includes('anthropic.')) { + if (isBedrockClaudeModel(typedData.model as string)) { const betaHeaders = getBedrockAnthropicBetaHeaders(typedData.model as string); if (betaHeaders.length > 0) { const existingBetaHeaders = ( @@ -509,7 +569,7 @@ export const bedrockInputParser = s.tConversationSchema } const isAnthropicModel = - typeof typedData.model === 'string' && typedData.model.includes('anthropic.'); + typeof typedData.model === 'string' && isBedrockClaudeModel(typedData.model); /** Strip stale fields from previously-persisted additionalModelRequestFields */ if ( @@ -527,6 +587,27 @@ export const bedrockInputParser = s.tConversationSchema } else { delete amrf.reasoning_config; delete amrf.reasoning_effort; + /** A Claude model that does not support Bedrock thinking (e.g. a bare + * `claude-3-5-sonnet` inference profile) must not carry stale thinking + * fields from a previously-selected thinking model. Drop only the + * LibreChat-generated betas (output-128k, fine-grained tool streaming); + * user opt-ins in `anthropic_beta` are preserved. */ + if (!isThinkingModel) { + delete amrf.thinking; + delete amrf.thinkingBudget; + delete amrf.effort; + delete amrf.output_config; + if (amrf.anthropic_beta !== undefined) { + const kept = normalizeBetaHeaders(amrf.anthropic_beta).filter( + (header) => !GENERATED_BEDROCK_BETAS.has(header), + ); + if (kept.length > 0) { + amrf.anthropic_beta = kept; + } else { + delete amrf.anthropic_beta; + } + } + } } if (shouldOmitSamplingParameters) {