mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-07-03 12:54:01 +00:00
🧠 fix: Apply Bedrock thinking config to bare inference-profile model IDs (#14054)
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run
GitNexus Index / index (push) Waiting to run
GitNexus Index / post-index (push) Blocked by required conditions
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Waiting to run
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Waiting to run
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Blocked by required conditions
Sync Helm Chart Tags / Ignore non-main push (push) Waiting to run
Sync Helm Chart Tags / Sync chart tags (push) Waiting to run
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run
GitNexus Index / index (push) Waiting to run
GitNexus Index / post-index (push) Blocked by required conditions
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Waiting to run
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Waiting to run
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Blocked by required conditions
Sync Helm Chart Tags / Ignore non-main push (push) Waiting to run
Sync Helm Chart Tags / Sync chart tags (push) Waiting to run
* 🧠 fix: Apply Bedrock thinking config to bare inference-profile model IDs The Bedrock request parser gated thinking config, sampling handling, and the anthropic_beta headers on the model ID literally containing `anthropic.`. When a deployment uses an application inference profile, the LibreChat model ID is a bare `claude-*` (e.g. `claude-sonnet-5`) that maps to the profile ARN — so the gate never matched, no `thinking` config was sent, and reasoning models returned empty thinking blocks (most visibly: Claude Sonnet 5 never streamed reasoning, while `us.anthropic.claude-opus-4-8` did). Match on the `claude` family token instead of the `anthropic.` prefix so prefixed (`anthropic.`, `us.`, `global.`) and bare inference-profile IDs are handled identically. Verified e2e against live Bedrock via the agents SDK: a bare `claude-sonnet-5` now sends `{type:'adaptive', display:'summarized'}` and streams reasoning. Non-Claude Bedrock models (llama/cohere) and pre-thinking Claude (3.5 sonnet) are unaffected. * 🧹 fix: Strip stale thinking fields for non-thinking Claude Bedrock IDs Follow-up to the bare-ID matching change: broadening the anthropic guard to match bare `claude-*` meant a non-thinking Claude profile (e.g. a bare `claude-3-5-sonnet` inference profile) took the Claude cleanup branch, which kept persisted `thinking`/`anthropic_beta`/`output_config` from a previously-selected thinking model — leaking unsupported fields after a model switch. Extract `isThinkingModel` and, in the Claude cleanup branch, strip the thinking fields when the model isn't thinking-capable. Also fixes the pre-existing prefixed `anthropic.claude-3-5-sonnet` case (which already kept stale thinking). Thinking-capable models (sonnet-5, 3.7-sonnet) still keep their config. * 🩹 fix: Preserve user anthropic_beta on non-thinking Claude cleanup The non-thinking stale-cleanup deleted amrf.anthropic_beta, but that is the generic Bedrock Anthropic beta field and may carry a user opt-in (e.g. max-tokens-3-5-sonnet-2024-07-15 for extended output on Claude 3.5). Strip only the thinking-specific fields (thinking/thinkingBudget/effort/output_config) and leave anthropic_beta intact. * fix: clear persisted AMRF (output_config, thinking, generated betas) on bare Bedrock profiles * fix: preserve persisted effort on resume + strip stale thinking/betas across bare profiles * fix: normalize string/comma-delimited anthropic_beta before stripping generated betas
This commit is contained in:
parent
53ee82fe5d
commit
8683eccbbc
2 changed files with 319 additions and 30 deletions
|
|
@ -461,6 +461,214 @@ describe('bedrockInputParser', () => {
|
|||
expect(additionalFields.anthropic_beta).toEqual(BEDROCK_CLAUDE_4_BETAS);
|
||||
});
|
||||
|
||||
// Bedrock application inference profiles surface a bare `claude-*` model ID
|
||||
// (no `anthropic.` prefix). The thinking/beta config must still apply.
|
||||
test('should configure adaptive thinking for a bare claude-sonnet-5 (inference profile) ID', () => {
|
||||
const input = { model: 'claude-sonnet-5' };
|
||||
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
|
||||
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
|
||||
expect(additionalFields.thinking).toEqual({ type: 'adaptive', display: 'summarized' });
|
||||
expect(additionalFields.thinkingBudget).toBeUndefined();
|
||||
expect(additionalFields.anthropic_beta).toEqual(BEDROCK_CLAUDE_4_BETAS);
|
||||
});
|
||||
|
||||
test('bare claude-* IDs match their anthropic.-prefixed equivalents', () => {
|
||||
const thinkingFor = (model: string) => {
|
||||
const result = bedrockInputParser.parse({ model }) as Record<string, unknown>;
|
||||
return (result.additionalModelRequestFields as Record<string, unknown>).thinking;
|
||||
};
|
||||
expect(thinkingFor('claude-sonnet-5')).toEqual(thinkingFor('anthropic.claude-sonnet-5'));
|
||||
expect(thinkingFor('claude-opus-4-8')).toEqual(thinkingFor('us.anthropic.claude-opus-4-8'));
|
||||
expect(thinkingFor('claude-sonnet-4-6')).toEqual(thinkingFor('anthropic.claude-sonnet-4-6'));
|
||||
});
|
||||
|
||||
test('should configure extended thinking for a bare claude-3-7-sonnet ID', () => {
|
||||
const input = { model: 'claude-3-7-sonnet' };
|
||||
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
|
||||
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
|
||||
expect(additionalFields.thinking).toBe(true);
|
||||
expect(additionalFields.thinkingBudget).toBe(2000);
|
||||
expect(additionalFields.anthropic_beta).toEqual([BEDROCK_OUTPUT_128K_BETA]);
|
||||
});
|
||||
|
||||
test('should not configure thinking for non-Claude Bedrock models', () => {
|
||||
const input = { model: 'meta.llama3-1-8b-instruct-v1:0' };
|
||||
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
|
||||
const additionalFields = result.additionalModelRequestFields as
|
||||
| Record<string, unknown>
|
||||
| undefined;
|
||||
expect(additionalFields?.thinking).toBeUndefined();
|
||||
expect(additionalFields?.anthropic_beta).toBeUndefined();
|
||||
});
|
||||
|
||||
// Switching a persisted conversation to a non-thinking Claude model (bare or
|
||||
// prefixed) must strip stale thinking fields carried over in AMRF, so they
|
||||
// aren't sent to a profile that can't accept them — but a user-configured
|
||||
// `anthropic_beta` opt-in must be preserved.
|
||||
test.each(['claude-3-5-sonnet', 'anthropic.claude-3-5-sonnet'])(
|
||||
'strips stale thinking fields but keeps user anthropic_beta for non-thinking Claude %s',
|
||||
(model) => {
|
||||
const input = {
|
||||
model,
|
||||
additionalModelRequestFields: {
|
||||
thinking: { type: 'adaptive', display: 'summarized' },
|
||||
anthropic_beta: ['max-tokens-3-5-sonnet-2024-07-15'],
|
||||
output_config: { effort: 'high' },
|
||||
},
|
||||
};
|
||||
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
|
||||
const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
|
||||
expect(amrf?.thinking).toBeUndefined();
|
||||
expect(amrf?.output_config).toBeUndefined();
|
||||
expect(amrf?.anthropic_beta).toEqual(['max-tokens-3-5-sonnet-2024-07-15']);
|
||||
},
|
||||
);
|
||||
|
||||
test('keeps thinking config for a bare thinking Claude model with persisted AMRF', () => {
|
||||
const input = {
|
||||
model: 'claude-sonnet-5',
|
||||
additionalModelRequestFields: { thinking: { type: 'adaptive', display: 'summarized' } },
|
||||
};
|
||||
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
|
||||
const amrf = result.additionalModelRequestFields as Record<string, unknown>;
|
||||
expect(amrf.thinking).toEqual({ type: 'adaptive', display: 'summarized' });
|
||||
});
|
||||
|
||||
// The persisted AMRF is spread back into the final request, so clearing only
|
||||
// the freshly-built fields leaves a stale value from a prior selection.
|
||||
// An agent resume round-trips its llmConfig back into model_parameters, so a
|
||||
// persisted output_config with NO top-level effort must be preserved as the
|
||||
// user's saved choice; only an explicit unset ('' / null) clears it.
|
||||
test('preserves persisted output_config when an adaptive model is re-parsed without top-level effort', () => {
|
||||
const input = {
|
||||
model: 'claude-opus-4-8',
|
||||
additionalModelRequestFields: {
|
||||
thinking: { type: 'adaptive', display: 'summarized' },
|
||||
output_config: { effort: 'high' },
|
||||
},
|
||||
};
|
||||
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
|
||||
const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
|
||||
expect(amrf?.output_config).toEqual({ effort: 'high' });
|
||||
expect(amrf?.thinking).toEqual({ type: 'adaptive', display: 'summarized' });
|
||||
});
|
||||
|
||||
test.each(['', null])(
|
||||
'clears persisted output_config when effort is explicitly unset (%p)',
|
||||
(effort) => {
|
||||
const input = {
|
||||
model: 'claude-opus-4-8',
|
||||
effort,
|
||||
additionalModelRequestFields: {
|
||||
thinking: { type: 'adaptive', display: 'summarized' },
|
||||
output_config: { effort: 'high' },
|
||||
},
|
||||
};
|
||||
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
|
||||
const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
|
||||
expect(amrf?.output_config).toBeUndefined();
|
||||
},
|
||||
);
|
||||
|
||||
// Switching a persisted adaptive/disabled conversation to a bare non-adaptive
|
||||
// thinking profile (3.7) must not leak the prior thinking object or output_config.
|
||||
test('clears persisted thinking + output_config when switching to a bare non-adaptive thinking model', () => {
|
||||
const input = {
|
||||
model: 'claude-3-7-sonnet',
|
||||
additionalModelRequestFields: {
|
||||
thinking: { type: 'disabled' },
|
||||
output_config: { effort: 'high' },
|
||||
},
|
||||
};
|
||||
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
|
||||
const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
|
||||
expect(amrf?.output_config).toBeUndefined();
|
||||
expect(amrf?.thinking).not.toEqual({ type: 'disabled' });
|
||||
});
|
||||
|
||||
// Switching a bare Claude 4+/5 profile (both generated betas persisted) to a
|
||||
// bare 3.7 profile must drop the fine-grained beta 3.7 does not generate.
|
||||
test('drops a stale generated beta not applicable to the target thinking model', () => {
|
||||
const input = {
|
||||
model: 'claude-3-7-sonnet',
|
||||
additionalModelRequestFields: {
|
||||
anthropic_beta: [
|
||||
BEDROCK_OUTPUT_128K_BETA,
|
||||
BEDROCK_FINE_GRAINED_TOOL_STREAMING_BETA,
|
||||
'context-1m-2025-08-07',
|
||||
],
|
||||
},
|
||||
};
|
||||
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
|
||||
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
|
||||
expect(additionalFields.anthropic_beta).toEqual([
|
||||
BEDROCK_OUTPUT_128K_BETA,
|
||||
'context-1m-2025-08-07',
|
||||
]);
|
||||
});
|
||||
|
||||
test('disabling thinking on a bare adaptive model clears the persisted adaptive config', () => {
|
||||
const input = {
|
||||
model: 'claude-opus-4-8',
|
||||
thinking: false,
|
||||
additionalModelRequestFields: {
|
||||
thinking: { type: 'adaptive', display: 'summarized' },
|
||||
},
|
||||
};
|
||||
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
|
||||
const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
|
||||
expect(amrf?.thinking).toBeUndefined();
|
||||
});
|
||||
|
||||
test('strips only LibreChat-generated betas from persisted AMRF, keeping user betas', () => {
|
||||
const input = {
|
||||
model: 'claude-3-5-sonnet',
|
||||
additionalModelRequestFields: {
|
||||
anthropic_beta: [BEDROCK_OUTPUT_128K_BETA, 'context-1m-2025-08-07'],
|
||||
},
|
||||
};
|
||||
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
|
||||
const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
|
||||
expect(amrf?.anthropic_beta).toEqual(['context-1m-2025-08-07']);
|
||||
});
|
||||
|
||||
test('drops persisted anthropic_beta entirely when it holds only generated betas', () => {
|
||||
const input = {
|
||||
model: 'claude-3-5-sonnet',
|
||||
additionalModelRequestFields: {
|
||||
anthropic_beta: [BEDROCK_OUTPUT_128K_BETA, BEDROCK_FINE_GRAINED_TOOL_STREAMING_BETA],
|
||||
},
|
||||
};
|
||||
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
|
||||
const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
|
||||
expect(amrf?.anthropic_beta).toBeUndefined();
|
||||
});
|
||||
|
||||
// Persisted anthropic_beta may be a bare string or a comma-delimited string,
|
||||
// which the merge helper accepts; the non-thinking cleanup must normalize
|
||||
// that shape before filtering out generated betas.
|
||||
test('strips a string-form generated beta for non-thinking Claude', () => {
|
||||
const input = {
|
||||
model: 'claude-3-5-sonnet',
|
||||
additionalModelRequestFields: { anthropic_beta: BEDROCK_OUTPUT_128K_BETA },
|
||||
};
|
||||
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
|
||||
const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
|
||||
expect(amrf?.anthropic_beta).toBeUndefined();
|
||||
});
|
||||
|
||||
test('strips generated betas from a comma-delimited string, keeping user betas', () => {
|
||||
const input = {
|
||||
model: 'claude-3-5-sonnet',
|
||||
additionalModelRequestFields: {
|
||||
anthropic_beta: `${BEDROCK_OUTPUT_128K_BETA}, context-1m-2025-08-07`,
|
||||
},
|
||||
};
|
||||
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
|
||||
const amrf = result.additionalModelRequestFields as Record<string, unknown> | undefined;
|
||||
expect(amrf?.anthropic_beta).toEqual(['context-1m-2025-08-07']);
|
||||
});
|
||||
|
||||
test('should match anthropic.claude-haiku-6 model without context beta header', () => {
|
||||
const input = {
|
||||
model: 'anthropic.claude-haiku-6',
|
||||
|
|
|
|||
|
|
@ -6,6 +6,13 @@ const DEFAULT_THINKING_BUDGET = 2000;
|
|||
export const BEDROCK_OUTPUT_128K_BETA = 'output-128k-2025-02-19';
|
||||
export const BEDROCK_FINE_GRAINED_TOOL_STREAMING_BETA = 'fine-grained-tool-streaming-2025-05-14';
|
||||
|
||||
/** Betas LibreChat injects itself, safe to strip from persisted AMRF when a
|
||||
* model no longer supports them; anything else in `anthropic_beta` is a user opt-in. */
|
||||
const GENERATED_BEDROCK_BETAS = new Set<string>([
|
||||
BEDROCK_OUTPUT_128K_BETA,
|
||||
BEDROCK_FINE_GRAINED_TOOL_STREAMING_BETA,
|
||||
]);
|
||||
|
||||
const bedrockReasoningConfigValues = new Set<string>(Object.values(s.BedrockReasoningConfig));
|
||||
|
||||
type ThinkingConfig =
|
||||
|
|
@ -201,6 +208,22 @@ export function supportsContext1m(model: string): boolean {
|
|||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* A Bedrock Claude model ID may be prefixed (`anthropic.claude-*`,
|
||||
* `us.anthropic.claude-*`, `global.anthropic.claude-*`) or bare (`claude-*`,
|
||||
* used when the LibreChat model ID maps to an application inference profile).
|
||||
* Match on the `claude` family token so every form is recognized — requiring
|
||||
* the literal `anthropic.` prefix silently dropped thinking config, beta
|
||||
* headers, and sampling handling for inference-profile deployments.
|
||||
*/
|
||||
const BEDROCK_CLAUDE_4PLUS_THINKING =
|
||||
/claude-(?:[4-9](?:\.\d+)?(?:-\d+)?-(?:sonnet|opus|haiku)|(?:sonnet|opus|haiku)-[4-9])/;
|
||||
|
||||
/** Whether a Bedrock model ID is an Anthropic Claude model (prefixed or bare). */
|
||||
function isBedrockClaudeModel(model: string): boolean {
|
||||
return model.includes('claude');
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the appropriate anthropic_beta headers for Bedrock Anthropic models.
|
||||
* Bedrock uses `anthropic_beta` (with underscore) in additionalModelRequestFields.
|
||||
|
|
@ -213,11 +236,8 @@ function getBedrockAnthropicBetaHeaders(model: string): string[] {
|
|||
|
||||
/** Mythos-class (Fable/Mythos) is intentionally not matched: these betas are built-in/no-op for the
|
||||
* 4.7+ generation (Fable has native 128K output), so omitting them on Bedrock is lossless. */
|
||||
const isClaude4PlusModel =
|
||||
/anthropic\.claude-(?:[4-9](?:\.\d+)?(?:-\d+)?-(?:sonnet|opus|haiku)|(?:sonnet|opus|haiku)-[4-9])/.test(
|
||||
model,
|
||||
);
|
||||
const isClaudeThinkingModel = model.includes('anthropic.claude-3-7-sonnet') || isClaude4PlusModel;
|
||||
const isClaude4PlusModel = BEDROCK_CLAUDE_4PLUS_THINKING.test(model);
|
||||
const isClaudeThinkingModel = model.includes('claude-3-7-sonnet') || isClaude4PlusModel;
|
||||
|
||||
if (isClaudeThinkingModel) {
|
||||
betaHeaders.push(BEDROCK_OUTPUT_128K_BETA);
|
||||
|
|
@ -230,26 +250,41 @@ function getBedrockAnthropicBetaHeaders(model: string): string[] {
|
|||
return betaHeaders;
|
||||
}
|
||||
|
||||
function mergeBedrockAnthropicBetaHeaders(existing: unknown, generated: string[]): string[] {
|
||||
let existingValues: unknown[] = [];
|
||||
if (Array.isArray(existing)) {
|
||||
existingValues = existing;
|
||||
} else if (typeof existing === 'string') {
|
||||
existingValues = [existing];
|
||||
/** Flatten an anthropic_beta value (array, single string, or comma-delimited
|
||||
* string) into trimmed, non-empty header tokens. */
|
||||
function normalizeBetaHeaders(value: unknown): string[] {
|
||||
let values: unknown[] = [];
|
||||
if (Array.isArray(value)) {
|
||||
values = value;
|
||||
} else if (typeof value === 'string') {
|
||||
values = [value];
|
||||
}
|
||||
|
||||
const betaHeaders = new Set<string>();
|
||||
|
||||
[...existingValues, ...generated].forEach((value) => {
|
||||
if (typeof value !== 'string') {
|
||||
const headers: string[] = [];
|
||||
values.forEach((entry) => {
|
||||
if (typeof entry !== 'string') {
|
||||
return;
|
||||
}
|
||||
|
||||
value
|
||||
entry
|
||||
.split(',')
|
||||
.map((header) => header.trim())
|
||||
.filter(Boolean)
|
||||
.forEach((header) => betaHeaders.add(header));
|
||||
.forEach((header) => headers.push(header));
|
||||
});
|
||||
return headers;
|
||||
}
|
||||
|
||||
function mergeBedrockAnthropicBetaHeaders(existing: unknown, generated: string[]): string[] {
|
||||
const generatedSet = new Set(generated);
|
||||
const betaHeaders = new Set<string>();
|
||||
|
||||
[...normalizeBetaHeaders(existing), ...generated].forEach((header) => {
|
||||
/** Drop a generated beta carried over from a prior model that the current
|
||||
* model does not generate (e.g. fine-grained-tool-streaming on a 3.7
|
||||
* profile); user opt-ins are always preserved. */
|
||||
if (GENERATED_BEDROCK_BETAS.has(header) && !generatedSet.has(header)) {
|
||||
return;
|
||||
}
|
||||
betaHeaders.add(header);
|
||||
});
|
||||
|
||||
return Array.from(betaHeaders);
|
||||
|
|
@ -407,21 +442,30 @@ export const bedrockInputParser = s.tConversationSchema
|
|||
additionalFields.thinking = false;
|
||||
}
|
||||
|
||||
/** Configure thinking for Bedrock Anthropic models: 3.7 Sonnet, Claude 4+ (opus/sonnet/haiku), and Mythos-class (Fable/Mythos). */
|
||||
if (
|
||||
/** Bedrock thinking-capable Claude models: 3.7 Sonnet, Claude 4+ (opus/sonnet/haiku), and Mythos-class (Fable/Mythos). */
|
||||
const isThinkingModel =
|
||||
typeof typedData.model === 'string' &&
|
||||
(typedData.model.includes('anthropic.claude-3-7-sonnet') ||
|
||||
/anthropic\.claude-(?:[4-9](?:\.\d+)?(?:-\d+)?-(?:sonnet|opus|haiku)|(?:sonnet|opus|haiku)-[4-9])/.test(
|
||||
typedData.model,
|
||||
) ||
|
||||
s.isMythosClassModel(typedData.model))
|
||||
) {
|
||||
(typedData.model.includes('claude-3-7-sonnet') ||
|
||||
BEDROCK_CLAUDE_4PLUS_THINKING.test(typedData.model) ||
|
||||
s.isMythosClassModel(typedData.model));
|
||||
|
||||
if (isThinkingModel) {
|
||||
const isAdaptive = supportsAdaptiveThinking(typedData.model as string);
|
||||
|
||||
if (isAdaptive) {
|
||||
/** Persisted AMRF is spread into the final request, so clearing only
|
||||
* `additionalFields` leaves a stale value from a prior selection. */
|
||||
const persistedAmrf = typedData.additionalModelRequestFields as
|
||||
| Record<string, unknown>
|
||||
| undefined;
|
||||
const effort = additionalFields.effort;
|
||||
if (effort && typeof effort === 'string' && effort !== '') {
|
||||
if (typeof effort === 'string' && effort !== '') {
|
||||
additionalFields.output_config = { effort };
|
||||
} else if (effort !== undefined && persistedAmrf) {
|
||||
/** Explicit unset ('' or null) clears the persisted effort. An absent
|
||||
* effort (agent resume, where the prior llmConfig persisted
|
||||
* `output_config` but no top-level `effort`) preserves it. */
|
||||
delete persistedAmrf.output_config;
|
||||
}
|
||||
delete additionalFields.effort;
|
||||
|
||||
|
|
@ -432,6 +476,11 @@ export const bedrockInputParser = s.tConversationSchema
|
|||
additionalFields.thinking = { type: 'disabled' };
|
||||
} else {
|
||||
delete additionalFields.thinking;
|
||||
/** Disable-by-omission models (Opus 4.7+): drop the persisted
|
||||
* adaptive config so turning thinking off actually disables it. */
|
||||
if (persistedAmrf) {
|
||||
delete persistedAmrf.thinking;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/**
|
||||
|
|
@ -473,12 +522,23 @@ export const bedrockInputParser = s.tConversationSchema
|
|||
}
|
||||
delete additionalFields.effort;
|
||||
delete additionalFields.thinkingDisplay;
|
||||
|
||||
/** A bare non-adaptive thinking profile (e.g. `claude-3-7-sonnet`) must
|
||||
* not inherit an adaptive/disabled thinking object or `output_config`
|
||||
* persisted from another model; this branch's own fields are authoritative. */
|
||||
const persistedAmrf = typedData.additionalModelRequestFields as
|
||||
| Record<string, unknown>
|
||||
| undefined;
|
||||
if (persistedAmrf) {
|
||||
delete persistedAmrf.thinking;
|
||||
delete persistedAmrf.output_config;
|
||||
}
|
||||
}
|
||||
|
||||
/** Anthropic uses 'effort' via output_config, not reasoning_config */
|
||||
delete additionalFields.reasoning_effort;
|
||||
|
||||
if ((typedData.model as string).includes('anthropic.')) {
|
||||
if (isBedrockClaudeModel(typedData.model as string)) {
|
||||
const betaHeaders = getBedrockAnthropicBetaHeaders(typedData.model as string);
|
||||
if (betaHeaders.length > 0) {
|
||||
const existingBetaHeaders = (
|
||||
|
|
@ -509,7 +569,7 @@ export const bedrockInputParser = s.tConversationSchema
|
|||
}
|
||||
|
||||
const isAnthropicModel =
|
||||
typeof typedData.model === 'string' && typedData.model.includes('anthropic.');
|
||||
typeof typedData.model === 'string' && isBedrockClaudeModel(typedData.model);
|
||||
|
||||
/** Strip stale fields from previously-persisted additionalModelRequestFields */
|
||||
if (
|
||||
|
|
@ -527,6 +587,27 @@ export const bedrockInputParser = s.tConversationSchema
|
|||
} else {
|
||||
delete amrf.reasoning_config;
|
||||
delete amrf.reasoning_effort;
|
||||
/** A Claude model that does not support Bedrock thinking (e.g. a bare
|
||||
* `claude-3-5-sonnet` inference profile) must not carry stale thinking
|
||||
* fields from a previously-selected thinking model. Drop only the
|
||||
* LibreChat-generated betas (output-128k, fine-grained tool streaming);
|
||||
* user opt-ins in `anthropic_beta` are preserved. */
|
||||
if (!isThinkingModel) {
|
||||
delete amrf.thinking;
|
||||
delete amrf.thinkingBudget;
|
||||
delete amrf.effort;
|
||||
delete amrf.output_config;
|
||||
if (amrf.anthropic_beta !== undefined) {
|
||||
const kept = normalizeBetaHeaders(amrf.anthropic_beta).filter(
|
||||
(header) => !GENERATED_BEDROCK_BETAS.has(header),
|
||||
);
|
||||
if (kept.length > 0) {
|
||||
amrf.anthropic_beta = kept;
|
||||
} else {
|
||||
delete amrf.anthropic_beta;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (shouldOmitSamplingParameters) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue