🧠 fix: Apply Bedrock thinking config to bare inference-profile model IDs (#14054)
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run
GitNexus Index / index (push) Waiting to run
GitNexus Index / post-index (push) Blocked by required conditions
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Waiting to run
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Waiting to run
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Blocked by required conditions
Sync Helm Chart Tags / Ignore non-main push (push) Waiting to run
Sync Helm Chart Tags / Sync chart tags (push) Waiting to run

* 🧠 fix: Apply Bedrock thinking config to bare inference-profile model IDs

The Bedrock request parser gated thinking config, sampling handling, and the
anthropic_beta headers on the model ID literally containing `anthropic.`. When
a deployment uses an application inference profile, the LibreChat model ID is a
bare `claude-*` (e.g. `claude-sonnet-5`) that maps to the profile ARN — so the
gate never matched, no `thinking` config was sent, and reasoning models
returned empty thinking blocks (most visibly: Claude Sonnet 5 never streamed
reasoning, while `us.anthropic.claude-opus-4-8` did).

Match on the `claude` family token instead of the `anthropic.` prefix so
prefixed (`anthropic.`, `us.`, `global.`) and bare inference-profile IDs are
handled identically. Verified e2e against live Bedrock via the agents SDK: a
bare `claude-sonnet-5` now sends `{type:'adaptive', display:'summarized'}` and
streams reasoning. Non-Claude Bedrock models (llama/cohere) and pre-thinking
Claude (3.5 sonnet) are unaffected.

* 🧹 fix: Strip stale thinking fields for non-thinking Claude Bedrock IDs

Follow-up to the bare-ID matching change: broadening the anthropic guard to
match bare `claude-*` meant a non-thinking Claude profile (e.g. a bare
`claude-3-5-sonnet` inference profile) took the Claude cleanup branch, which
kept persisted `thinking`/`anthropic_beta`/`output_config` from a
previously-selected thinking model — leaking unsupported fields after a model
switch. Extract `isThinkingModel` and, in the Claude cleanup branch, strip the
thinking fields when the model isn't thinking-capable. Also fixes the
pre-existing prefixed `anthropic.claude-3-5-sonnet` case (which already kept
stale thinking). Thinking-capable models (sonnet-5, 3.7-sonnet) still keep
their config.

* 🩹 fix: Preserve user anthropic_beta on non-thinking Claude cleanup

The non-thinking stale-cleanup deleted amrf.anthropic_beta, but that is the
generic Bedrock Anthropic beta field and may carry a user opt-in (e.g.
max-tokens-3-5-sonnet-2024-07-15 for extended output on Claude 3.5). Strip only
the thinking-specific fields (thinking/thinkingBudget/effort/output_config) and
leave anthropic_beta intact.

* fix: clear persisted AMRF (output_config, thinking, generated betas) on bare Bedrock profiles

* fix: preserve persisted effort on resume + strip stale thinking/betas across bare profiles

* fix: normalize string/comma-delimited anthropic_beta before stripping generated betas
This commit is contained in:
Danny Avila 2026-07-01 14:19:34 -04:00 committed by GitHub
parent 53ee82fe5d
commit 8683eccbbc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 319 additions and 30 deletions

View file

@ -461,6 +461,214 @@ describe('bedrockInputParser', () => {
expect(additionalFields.anthropic_beta).toEqual(BEDROCK_CLAUDE_4_BETAS);
});
// Bedrock application inference profiles surface a bare `claude-*` model ID
// (no `anthropic.` prefix). The thinking/beta config must still apply.
test('should configure adaptive thinking for a bare claude-sonnet-5 (inference profile) ID', () => {
const input = { model: 'claude-sonnet-5' };
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
expect(additionalFields.thinking).toEqual({ type: 'adaptive', display: 'summarized' });
expect(additionalFields.thinkingBudget).toBeUndefined();
expect(additionalFields.anthropic_beta).toEqual(BEDROCK_CLAUDE_4_BETAS);
});
test('bare claude-* IDs match their anthropic.-prefixed equivalents', () => {
const thinkingFor = (model: string) => {
const result = bedrockInputParser.parse({ model }) as Record<string, unknown>;
return (result.additionalModelRequestFields as Record<string, unknown>).thinking;
};
expect(thinkingFor('claude-sonnet-5')).toEqual(thinkingFor('anthropic.claude-sonnet-5'));
expect(thinkingFor('claude-opus-4-8')).toEqual(thinkingFor('us.anthropic.claude-opus-4-8'));
expect(thinkingFor('claude-sonnet-4-6')).toEqual(thinkingFor('anthropic.claude-sonnet-4-6'));
});
test('should configure extended thinking for a bare claude-3-7-sonnet ID', () => {
const input = { model: 'claude-3-7-sonnet' };
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
expect(additionalFields.thinking).toBe(true);
expect(additionalFields.thinkingBudget).toBe(2000);
expect(additionalFields.anthropic_beta).toEqual([BEDROCK_OUTPUT_128K_BETA]);
});
test('should not configure thinking for non-Claude Bedrock models', () => {
const input = { model: 'meta.llama3-1-8b-instruct-v1:0' };
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
const additionalFields = result.additionalModelRequestFields as
| Record<string, unknown>
| undefined;
expect(additionalFields?.thinking).toBeUndefined();
expect(additionalFields?.anthropic_beta).toBeUndefined();
});
// Switching a persisted conversation to a non-thinking Claude model (bare or
// prefixed) must strip stale thinking fields carried over in AMRF, so they
// aren't sent to a profile that can't accept them — but a user-configured
// `anthropic_beta` opt-in must be preserved.
test.each(['claude-3-5-sonnet', 'anthropic.claude-3-5-sonnet'])(
'strips stale thinking fields but keeps user anthropic_beta for non-thinking Claude %s',
(model) => {
const input = {
model,
additionalModelRequestFields: {
thinking: { type: 'adaptive', display: 'summarized' },
anthropic_beta: ['max-tokens-3-5-sonnet-2024-07-15'],
output_config: { effort: 'high' },
},
};
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
expect(amrf?.thinking).toBeUndefined();
expect(amrf?.output_config).toBeUndefined();
expect(amrf?.anthropic_beta).toEqual(['max-tokens-3-5-sonnet-2024-07-15']);
},
);
test('keeps thinking config for a bare thinking Claude model with persisted AMRF', () => {
const input = {
model: 'claude-sonnet-5',
additionalModelRequestFields: { thinking: { type: 'adaptive', display: 'summarized' } },
};
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
const amrf = result.additionalModelRequestFields as Record<string, unknown>;
expect(amrf.thinking).toEqual({ type: 'adaptive', display: 'summarized' });
});
// The persisted AMRF is spread back into the final request, so clearing only
// the freshly-built fields leaves a stale value from a prior selection.
// An agent resume round-trips its llmConfig back into model_parameters, so a
// persisted output_config with NO top-level effort must be preserved as the
// user's saved choice; only an explicit unset ('' / null) clears it.
test('preserves persisted output_config when an adaptive model is re-parsed without top-level effort', () => {
const input = {
model: 'claude-opus-4-8',
additionalModelRequestFields: {
thinking: { type: 'adaptive', display: 'summarized' },
output_config: { effort: 'high' },
},
};
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
expect(amrf?.output_config).toEqual({ effort: 'high' });
expect(amrf?.thinking).toEqual({ type: 'adaptive', display: 'summarized' });
});
test.each(['', null])(
'clears persisted output_config when effort is explicitly unset (%p)',
(effort) => {
const input = {
model: 'claude-opus-4-8',
effort,
additionalModelRequestFields: {
thinking: { type: 'adaptive', display: 'summarized' },
output_config: { effort: 'high' },
},
};
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
expect(amrf?.output_config).toBeUndefined();
},
);
// Switching a persisted adaptive/disabled conversation to a bare non-adaptive
// thinking profile (3.7) must not leak the prior thinking object or output_config.
test('clears persisted thinking + output_config when switching to a bare non-adaptive thinking model', () => {
const input = {
model: 'claude-3-7-sonnet',
additionalModelRequestFields: {
thinking: { type: 'disabled' },
output_config: { effort: 'high' },
},
};
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
expect(amrf?.output_config).toBeUndefined();
expect(amrf?.thinking).not.toEqual({ type: 'disabled' });
});
// Switching a bare Claude 4+/5 profile (both generated betas persisted) to a
// bare 3.7 profile must drop the fine-grained beta 3.7 does not generate.
test('drops a stale generated beta not applicable to the target thinking model', () => {
const input = {
model: 'claude-3-7-sonnet',
additionalModelRequestFields: {
anthropic_beta: [
BEDROCK_OUTPUT_128K_BETA,
BEDROCK_FINE_GRAINED_TOOL_STREAMING_BETA,
'context-1m-2025-08-07',
],
},
};
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
expect(additionalFields.anthropic_beta).toEqual([
BEDROCK_OUTPUT_128K_BETA,
'context-1m-2025-08-07',
]);
});
test('disabling thinking on a bare adaptive model clears the persisted adaptive config', () => {
const input = {
model: 'claude-opus-4-8',
thinking: false,
additionalModelRequestFields: {
thinking: { type: 'adaptive', display: 'summarized' },
},
};
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
expect(amrf?.thinking).toBeUndefined();
});
test('strips only LibreChat-generated betas from persisted AMRF, keeping user betas', () => {
const input = {
model: 'claude-3-5-sonnet',
additionalModelRequestFields: {
anthropic_beta: [BEDROCK_OUTPUT_128K_BETA, 'context-1m-2025-08-07'],
},
};
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
expect(amrf?.anthropic_beta).toEqual(['context-1m-2025-08-07']);
});
test('drops persisted anthropic_beta entirely when it holds only generated betas', () => {
const input = {
model: 'claude-3-5-sonnet',
additionalModelRequestFields: {
anthropic_beta: [BEDROCK_OUTPUT_128K_BETA, BEDROCK_FINE_GRAINED_TOOL_STREAMING_BETA],
},
};
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
expect(amrf?.anthropic_beta).toBeUndefined();
});
// Persisted anthropic_beta may be a bare string or a comma-delimited string,
// which the merge helper accepts; the non-thinking cleanup must normalize
// that shape before filtering out generated betas.
test('strips a string-form generated beta for non-thinking Claude', () => {
const input = {
model: 'claude-3-5-sonnet',
additionalModelRequestFields: { anthropic_beta: BEDROCK_OUTPUT_128K_BETA },
};
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
expect(amrf?.anthropic_beta).toBeUndefined();
});
test('strips generated betas from a comma-delimited string, keeping user betas', () => {
const input = {
model: 'claude-3-5-sonnet',
additionalModelRequestFields: {
anthropic_beta: `${BEDROCK_OUTPUT_128K_BETA}, context-1m-2025-08-07`,
},
};
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
expect(amrf?.anthropic_beta).toEqual(['context-1m-2025-08-07']);
});
test('should match anthropic.claude-haiku-6 model without context beta header', () => {
const input = {
model: 'anthropic.claude-haiku-6',

View file

@ -6,6 +6,13 @@ const DEFAULT_THINKING_BUDGET = 2000;
export const BEDROCK_OUTPUT_128K_BETA = 'output-128k-2025-02-19';
export const BEDROCK_FINE_GRAINED_TOOL_STREAMING_BETA = 'fine-grained-tool-streaming-2025-05-14';
/** Betas LibreChat injects itself, safe to strip from persisted AMRF when a
* model no longer supports them; anything else in `anthropic_beta` is a user opt-in. */
const GENERATED_BEDROCK_BETAS = new Set<string>([
BEDROCK_OUTPUT_128K_BETA,
BEDROCK_FINE_GRAINED_TOOL_STREAMING_BETA,
]);
const bedrockReasoningConfigValues = new Set<string>(Object.values(s.BedrockReasoningConfig));
type ThinkingConfig =
@ -201,6 +208,22 @@ export function supportsContext1m(model: string): boolean {
return false;
}
/**
* A Bedrock Claude model ID may be prefixed (`anthropic.claude-*`,
* `us.anthropic.claude-*`, `global.anthropic.claude-*`) or bare (`claude-*`,
* used when the LibreChat model ID maps to an application inference profile).
* Match on the `claude` family token so every form is recognized requiring
* the literal `anthropic.` prefix silently dropped thinking config, beta
* headers, and sampling handling for inference-profile deployments.
*/
const BEDROCK_CLAUDE_4PLUS_THINKING =
/claude-(?:[4-9](?:\.\d+)?(?:-\d+)?-(?:sonnet|opus|haiku)|(?:sonnet|opus|haiku)-[4-9])/;
/** Whether a Bedrock model ID is an Anthropic Claude model (prefixed or bare). */
function isBedrockClaudeModel(model: string): boolean {
return model.includes('claude');
}
/**
* Gets the appropriate anthropic_beta headers for Bedrock Anthropic models.
* Bedrock uses `anthropic_beta` (with underscore) in additionalModelRequestFields.
@ -213,11 +236,8 @@ function getBedrockAnthropicBetaHeaders(model: string): string[] {
/** Mythos-class (Fable/Mythos) is intentionally not matched: these betas are built-in/no-op for the
* 4.7+ generation (Fable has native 128K output), so omitting them on Bedrock is lossless. */
const isClaude4PlusModel =
/anthropic\.claude-(?:[4-9](?:\.\d+)?(?:-\d+)?-(?:sonnet|opus|haiku)|(?:sonnet|opus|haiku)-[4-9])/.test(
model,
);
const isClaudeThinkingModel = model.includes('anthropic.claude-3-7-sonnet') || isClaude4PlusModel;
const isClaude4PlusModel = BEDROCK_CLAUDE_4PLUS_THINKING.test(model);
const isClaudeThinkingModel = model.includes('claude-3-7-sonnet') || isClaude4PlusModel;
if (isClaudeThinkingModel) {
betaHeaders.push(BEDROCK_OUTPUT_128K_BETA);
@ -230,26 +250,41 @@ function getBedrockAnthropicBetaHeaders(model: string): string[] {
return betaHeaders;
}
function mergeBedrockAnthropicBetaHeaders(existing: unknown, generated: string[]): string[] {
let existingValues: unknown[] = [];
if (Array.isArray(existing)) {
existingValues = existing;
} else if (typeof existing === 'string') {
existingValues = [existing];
/** Flatten an anthropic_beta value (array, single string, or comma-delimited
* string) into trimmed, non-empty header tokens. */
function normalizeBetaHeaders(value: unknown): string[] {
let values: unknown[] = [];
if (Array.isArray(value)) {
values = value;
} else if (typeof value === 'string') {
values = [value];
}
const betaHeaders = new Set<string>();
[...existingValues, ...generated].forEach((value) => {
if (typeof value !== 'string') {
const headers: string[] = [];
values.forEach((entry) => {
if (typeof entry !== 'string') {
return;
}
value
entry
.split(',')
.map((header) => header.trim())
.filter(Boolean)
.forEach((header) => betaHeaders.add(header));
.forEach((header) => headers.push(header));
});
return headers;
}
function mergeBedrockAnthropicBetaHeaders(existing: unknown, generated: string[]): string[] {
const generatedSet = new Set(generated);
const betaHeaders = new Set<string>();
[...normalizeBetaHeaders(existing), ...generated].forEach((header) => {
/** Drop a generated beta carried over from a prior model that the current
* model does not generate (e.g. fine-grained-tool-streaming on a 3.7
* profile); user opt-ins are always preserved. */
if (GENERATED_BEDROCK_BETAS.has(header) && !generatedSet.has(header)) {
return;
}
betaHeaders.add(header);
});
return Array.from(betaHeaders);
@ -407,21 +442,30 @@ export const bedrockInputParser = s.tConversationSchema
additionalFields.thinking = false;
}
/** Configure thinking for Bedrock Anthropic models: 3.7 Sonnet, Claude 4+ (opus/sonnet/haiku), and Mythos-class (Fable/Mythos). */
if (
/** Bedrock thinking-capable Claude models: 3.7 Sonnet, Claude 4+ (opus/sonnet/haiku), and Mythos-class (Fable/Mythos). */
const isThinkingModel =
typeof typedData.model === 'string' &&
(typedData.model.includes('anthropic.claude-3-7-sonnet') ||
/anthropic\.claude-(?:[4-9](?:\.\d+)?(?:-\d+)?-(?:sonnet|opus|haiku)|(?:sonnet|opus|haiku)-[4-9])/.test(
typedData.model,
) ||
s.isMythosClassModel(typedData.model))
) {
(typedData.model.includes('claude-3-7-sonnet') ||
BEDROCK_CLAUDE_4PLUS_THINKING.test(typedData.model) ||
s.isMythosClassModel(typedData.model));
if (isThinkingModel) {
const isAdaptive = supportsAdaptiveThinking(typedData.model as string);
if (isAdaptive) {
/** Persisted AMRF is spread into the final request, so clearing only
* `additionalFields` leaves a stale value from a prior selection. */
const persistedAmrf = typedData.additionalModelRequestFields as
| Record<string, unknown>
| undefined;
const effort = additionalFields.effort;
if (effort && typeof effort === 'string' && effort !== '') {
if (typeof effort === 'string' && effort !== '') {
additionalFields.output_config = { effort };
} else if (effort !== undefined && persistedAmrf) {
/** Explicit unset ('' or null) clears the persisted effort. An absent
* effort (agent resume, where the prior llmConfig persisted
* `output_config` but no top-level `effort`) preserves it. */
delete persistedAmrf.output_config;
}
delete additionalFields.effort;
@ -432,6 +476,11 @@ export const bedrockInputParser = s.tConversationSchema
additionalFields.thinking = { type: 'disabled' };
} else {
delete additionalFields.thinking;
/** Disable-by-omission models (Opus 4.7+): drop the persisted
* adaptive config so turning thinking off actually disables it. */
if (persistedAmrf) {
delete persistedAmrf.thinking;
}
}
} else {
/**
@ -473,12 +522,23 @@ export const bedrockInputParser = s.tConversationSchema
}
delete additionalFields.effort;
delete additionalFields.thinkingDisplay;
/** A bare non-adaptive thinking profile (e.g. `claude-3-7-sonnet`) must
* not inherit an adaptive/disabled thinking object or `output_config`
* persisted from another model; this branch's own fields are authoritative. */
const persistedAmrf = typedData.additionalModelRequestFields as
| Record<string, unknown>
| undefined;
if (persistedAmrf) {
delete persistedAmrf.thinking;
delete persistedAmrf.output_config;
}
}
/** Anthropic uses 'effort' via output_config, not reasoning_config */
delete additionalFields.reasoning_effort;
if ((typedData.model as string).includes('anthropic.')) {
if (isBedrockClaudeModel(typedData.model as string)) {
const betaHeaders = getBedrockAnthropicBetaHeaders(typedData.model as string);
if (betaHeaders.length > 0) {
const existingBetaHeaders = (
@ -509,7 +569,7 @@ export const bedrockInputParser = s.tConversationSchema
}
const isAnthropicModel =
typeof typedData.model === 'string' && typedData.model.includes('anthropic.');
typeof typedData.model === 'string' && isBedrockClaudeModel(typedData.model);
/** Strip stale fields from previously-persisted additionalModelRequestFields */
if (
@ -527,6 +587,27 @@ export const bedrockInputParser = s.tConversationSchema
} else {
delete amrf.reasoning_config;
delete amrf.reasoning_effort;
/** A Claude model that does not support Bedrock thinking (e.g. a bare
* `claude-3-5-sonnet` inference profile) must not carry stale thinking
* fields from a previously-selected thinking model. Drop only the
* LibreChat-generated betas (output-128k, fine-grained tool streaming);
* user opt-ins in `anthropic_beta` are preserved. */
if (!isThinkingModel) {
delete amrf.thinking;
delete amrf.thinkingBudget;
delete amrf.effort;
delete amrf.output_config;
if (amrf.anthropic_beta !== undefined) {
const kept = normalizeBetaHeaders(amrf.anthropic_beta).filter(
(header) => !GENERATED_BEDROCK_BETAS.has(header),
);
if (kept.length > 0) {
amrf.anthropic_beta = kept;
} else {
delete amrf.anthropic_beta;
}
}
}
}
if (shouldOmitSamplingParameters) {