🧭 fix: Migrate Anthropic Long Context (#12911)

This commit is contained in:
Danny Avila 2026-05-02 22:14:19 +09:00 committed by GitHub
parent f3e1201ae7
commit eb22bb6969
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 121 additions and 155 deletions

View file

@ -140,7 +140,7 @@ PROXY=
#============#
ANTHROPIC_API_KEY=user_provided
# ANTHROPIC_MODELS=claude-opus-4-7,claude-sonnet-4-6,claude-opus-4-6,claude-opus-4-20250514,claude-sonnet-4-20250514,claude-3-7-sonnet-20250219,claude-3-5-sonnet-20241022,claude-3-5-haiku-20241022,claude-3-opus-20240229,claude-3-sonnet-20240229,claude-3-haiku-20240307
# ANTHROPIC_MODELS=claude-opus-4-7,claude-sonnet-4-6,claude-opus-4-6,claude-opus-4-20250514,claude-3-7-sonnet-20250219,claude-3-5-sonnet-20241022,claude-3-5-haiku-20241022,claude-3-opus-20240229,claude-3-sonnet-20240229,claude-3-haiku-20240307
# ANTHROPIC_REVERSE_PROXY=
# Set to true to use Anthropic models through Google Vertex AI instead of direct API

View file

@ -1185,13 +1185,22 @@ describe('Grok Model Tests - Tokens', () => {
describe('Claude Model Tests', () => {
it('should return correct context length for Claude 4 models', () => {
expect(getModelMaxTokens('claude-sonnet-4')).toBe(
maxTokensMap[EModelEndpoint.anthropic]['claude-sonnet-4'],
maxTokensMap[EModelEndpoint.anthropic]['claude-4'],
);
expect(getModelMaxTokens('claude-opus-4')).toBe(
maxTokensMap[EModelEndpoint.anthropic]['claude-opus-4'],
);
});
it('should return 200K for Claude Sonnet 4.5', () => {
expect(getModelMaxTokens('claude-sonnet-4-5', EModelEndpoint.anthropic)).toBe(
maxTokensMap[EModelEndpoint.anthropic]['claude-sonnet-4-5'],
);
expect(getModelMaxTokens('claude-sonnet-4-5-20250929')).toBe(
maxTokensMap[EModelEndpoint.anthropic]['claude-sonnet-4-5'],
);
});
it('should return correct context length for Claude Haiku 4.5', () => {
expect(getModelMaxTokens('claude-haiku-4-5', EModelEndpoint.anthropic)).toBe(
maxTokensMap[EModelEndpoint.anthropic]['claude-haiku-4-5'],
@ -1415,6 +1424,9 @@ describe('Claude Model Tests', () => {
expect(getModelMaxTokens('claude-sonnet-4-6')).toBe(
maxTokensMap[EModelEndpoint.anthropic]['claude-sonnet-4-6'],
);
expect(getModelMaxTokens('claude-sonnet-4-6')).toBeGreaterThan(
getModelMaxTokens('claude-sonnet-4-5'),
);
});
it('should return correct max output tokens for Claude Sonnet 4.6 (64K)', () => {

View file

@ -304,7 +304,7 @@ endpoints:
# # Option 1: Simple array (legacy format - model name = deployment name)
# # Use this if you want the technical model IDs to show in the UI
# # models:
# # - "claude-sonnet-4-20250514"
# # - "claude-sonnet-4-6"
# # - "claude-3-7-sonnet-20250219"
# # - "claude-3-5-sonnet-v2@20241022"
# # - "claude-3-5-haiku@20241022"
@ -317,7 +317,7 @@ endpoints:
# claude-opus-4.5:
# deploymentName: claude-opus-4-5@20251101
# claude-sonnet-4:
# deploymentName: claude-sonnet-4-20250514
# deploymentName: claude-sonnet-4-6
# claude-3.7-sonnet:
# deploymentName: claude-3-7-sonnet-20250219
# claude-3.5-sonnet:
@ -327,7 +327,7 @@ endpoints:
#
# # Option 3: Mixed format with default deploymentName
# # Set a default deploymentName and use boolean values for models
# # deploymentName: claude-sonnet-4-20250514
# # deploymentName: claude-sonnet-4-6
# # models:
# # claude-sonnet-4: true # Will use the default deploymentName
# # claude-3.5-haiku:
@ -458,7 +458,7 @@ endpoints:
# # The ARN (value) is the inference profile you wish to map to for that model
# # Both the model ID and ARN are sent to AWS - the model ID for validation/metadata, the ARN for routing
# inferenceProfiles:
# "us.anthropic.claude-sonnet-4-20250514-v1:0": "${BEDROCK_INFERENCE_PROFILE_CLAUDE_SONNET}"
# "us.anthropic.claude-sonnet-4-6": "${BEDROCK_INFERENCE_PROFILE_CLAUDE_SONNET}"
# "anthropic.claude-3-7-sonnet-20250219-v1:0": "arn:aws:bedrock:us-west-2:123456789012:application-inference-profile/abc123"
#
# # Guardrail Configuration

View file

@ -418,7 +418,7 @@ describe('Premium pricing parity', () => {
const initialBalance = 100000000;
await Balance.create({ user: userId, tokenCredits: initialBalance });
const model = 'claude-opus-4-6';
const model = 'gemini-3.1';
const promptTokens = 100000;
const completionTokens = 500;
@ -445,7 +445,7 @@ describe('Premium pricing parity', () => {
const initialBalance = 100000000;
await Balance.create({ user: userId, tokenCredits: initialBalance });
const model = 'claude-opus-4-6';
const model = 'gemini-3.1';
const promptTokens = 250000;
const completionTokens = 500;
@ -473,7 +473,7 @@ describe('Premium pricing parity', () => {
const initialBalance = 100000000;
await Balance.create({ user: userId, tokenCredits: initialBalance });
const model = 'claude-opus-4-6';
const model = 'gemini-3.1';
const promptTokens = (premiumTokenValues as Record<string, Record<string, number>>)[model]
.threshold;
const completionTokens = 500;
@ -540,7 +540,7 @@ describe('Multi-entry batch parity', () => {
const initialBalance = 100000000;
await Balance.create({ user: userId, tokenCredits: initialBalance });
const model = 'claude-opus-4-6';
const model = 'gemini-3.1';
const tokenUsage = {
promptTokens: { input: 200000, write: 10000, read: 5000 },
completionTokens: 1000,

View file

@ -5,7 +5,6 @@ import {
ThinkingDisplay,
AnthropicEffort,
anthropicSettings,
supportsContext1m,
resolveThinkingDisplay,
supportsAdaptiveThinking,
} from 'librechat-data-provider';
@ -56,10 +55,6 @@ function getClaudeHeaders(
return {
'anthropic-beta': 'token-efficient-tools-2025-02-19,output-128k-2025-02-19',
};
} else if (supportsContext1m(model)) {
return {
'anthropic-beta': 'context-1m-2025-08-07',
};
}
return undefined;

View file

@ -88,21 +88,18 @@ describe('getLLMConfig', () => {
expect(result.llmConfig.thinking).toHaveProperty('budget_tokens', 2000);
});
it('should add "context-1m" beta header and promptCache boolean for claude-sonnet-4 model', () => {
it('should pass promptCache without long-context beta header for claude-sonnet-4 model', () => {
const modelOptions = {
model: 'claude-sonnet-4-20250514',
promptCache: true,
};
const result = getLLMConfig('test-key', { modelOptions });
const clientOptions = result.llmConfig.clientOptions;
expect(clientOptions?.defaultHeaders).toBeDefined();
expect(clientOptions?.defaultHeaders).toHaveProperty('anthropic-beta');
const defaultHeaders = clientOptions?.defaultHeaders as Record<string, string>;
expect(defaultHeaders['anthropic-beta']).toBe('context-1m-2025-08-07');
expect(clientOptions?.defaultHeaders).toBeUndefined();
expect(result.llmConfig.promptCache).toBe(true);
});
it('should add "context-1m" beta header and promptCache boolean for claude-sonnet-4 model formats', () => {
it('should pass promptCache without long-context beta header for claude-sonnet-4 model formats', () => {
const modelVariations = [
'claude-sonnet-4-20250514',
'claude-sonnet-4-latest',
@ -113,29 +110,23 @@ describe('getLLMConfig', () => {
const modelOptions = { model, promptCache: true };
const result = getLLMConfig('test-key', { modelOptions });
const clientOptions = result.llmConfig.clientOptions;
expect(clientOptions?.defaultHeaders).toBeDefined();
expect(clientOptions?.defaultHeaders).toHaveProperty('anthropic-beta');
const defaultHeaders = clientOptions?.defaultHeaders as Record<string, string>;
expect(defaultHeaders['anthropic-beta']).toBe('context-1m-2025-08-07');
expect(clientOptions?.defaultHeaders).toBeUndefined();
expect(result.llmConfig.promptCache).toBe(true);
});
});
it('should add "context-1m" beta header for claude-sonnet-4-6 model', () => {
it('should pass promptCache without long-context beta header for claude-sonnet-4-6 model', () => {
const modelOptions = {
model: 'claude-sonnet-4-6',
promptCache: true,
};
const result = getLLMConfig('test-key', { modelOptions });
const clientOptions = result.llmConfig.clientOptions;
expect(clientOptions?.defaultHeaders).toBeDefined();
expect(clientOptions?.defaultHeaders).toHaveProperty('anthropic-beta');
const defaultHeaders = clientOptions?.defaultHeaders as Record<string, string>;
expect(defaultHeaders['anthropic-beta']).toBe('context-1m-2025-08-07');
expect(clientOptions?.defaultHeaders).toBeUndefined();
expect(result.llmConfig.promptCache).toBe(true);
});
it('should add "context-1m" beta header for claude-sonnet-4-6 model formats', () => {
it('should pass promptCache without long-context beta header for claude-sonnet-4-6 model formats', () => {
const modelVariations = [
'claude-sonnet-4-6',
'claude-sonnet-4-6-20260101',
@ -146,10 +137,7 @@ describe('getLLMConfig', () => {
const modelOptions = { model, promptCache: true };
const result = getLLMConfig('test-key', { modelOptions });
const clientOptions = result.llmConfig.clientOptions;
expect(clientOptions?.defaultHeaders).toBeDefined();
expect(clientOptions?.defaultHeaders).toHaveProperty('anthropic-beta');
const defaultHeaders = clientOptions?.defaultHeaders as Record<string, string>;
expect(defaultHeaders['anthropic-beta']).toBe('context-1m-2025-08-07');
expect(clientOptions?.defaultHeaders).toBeUndefined();
expect(result.llmConfig.promptCache).toBe(true);
});
});
@ -1485,7 +1473,7 @@ describe('getLLMConfig', () => {
it('should handle prompt cache support logic for different models', () => {
const testCases = [
// Models that support prompt cache (and have other beta headers)
// Models that support prompt cache and have other beta headers
{
model: 'claude-3-5-sonnet',
promptCache: true,
@ -1513,7 +1501,13 @@ describe('getLLMConfig', () => {
{
model: 'claude-sonnet-4-20250514',
promptCache: true,
shouldHaveHeaders: true,
shouldHaveHeaders: false,
shouldHavePromptCache: true,
},
{
model: 'claude-sonnet-4-6',
promptCache: true,
shouldHaveHeaders: false,
shouldHavePromptCache: true,
},
// Models that support prompt cache but have no additional beta headers needed

View file

@ -627,9 +627,7 @@ describe('initializeBedrock', () => {
expect(amrf.thinking).toEqual({ type: 'adaptive' });
expect(result.llmConfig.maxTokens).toBeUndefined();
expect(amrf.anthropic_beta).toEqual(
expect.arrayContaining(['output-128k-2025-02-19', 'context-1m-2025-08-07']),
);
expect(amrf.anthropic_beta).toEqual(expect.arrayContaining(['output-128k-2025-02-19']));
});
it('should pass effort via output_config for Opus 4.6', async () => {

View file

@ -44,9 +44,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
},
configOptions: {
baseURL: 'http://host.docker.internal:4000/v1',
defaultHeaders: {
'anthropic-beta': 'context-1m-2025-08-07',
},
defaultHeaders: {},
},
tools: [],
});

View file

@ -132,7 +132,7 @@ async function validateAnthropicPdf(
/**
* Matches Bedrock Claude 4+ model identifiers, including cross-region inference profile IDs.
* Pattern: [region.]anthropic.claude-{family}-{version4}-{date}-v{n}:{rev}
* e.g. "anthropic.claude-sonnet-4-20250514-v1:0" or "us.anthropic.claude-sonnet-4-20250514-v1:0"
* e.g. "anthropic.claude-sonnet-4-6" or "us.anthropic.claude-sonnet-4-6"
*/
const BEDROCK_CLAUDE_4_PLUS_RE = /(?:^|\.)anthropic\.claude-(?:sonnet|opus|haiku)-[4-9]\d*-/;
const isBedrockClaude4Plus = (model?: string): boolean =>

View file

@ -138,7 +138,8 @@ const anthropicModels = {
'claude-haiku-4-5': 200000,
'claude-opus-4': 200000,
'claude-opus-4-5': 200000,
'claude-sonnet-4': 1000000,
'claude-sonnet-4': 200000,
'claude-sonnet-4-5': 200000,
'claude-sonnet-4-6': 1000000,
'claude-opus-4-6': 1000000,
'claude-opus-4-7': 1000000,

View file

@ -124,12 +124,12 @@ describe('supportsAdaptiveThinking', () => {
});
describe('supportsContext1m', () => {
test('should return true for claude-sonnet-4', () => {
expect(supportsContext1m('claude-sonnet-4')).toBe(true);
test('should return false for claude-sonnet-4', () => {
expect(supportsContext1m('claude-sonnet-4')).toBe(false);
});
test('should return true for claude-sonnet-4-5', () => {
expect(supportsContext1m('claude-sonnet-4-5')).toBe(true);
test('should return false for claude-sonnet-4-5', () => {
expect(supportsContext1m('claude-sonnet-4-5')).toBe(false);
});
test('should return true for claude-sonnet-4-6', () => {
@ -176,14 +176,18 @@ describe('supportsContext1m', () => {
expect(supportsContext1m('gpt-4o')).toBe(false);
});
test('should return true for claude-4-sonnet (alternate naming)', () => {
expect(supportsContext1m('claude-4-sonnet')).toBe(true);
test('should return false for claude-4-sonnet (alternate naming, below threshold)', () => {
expect(supportsContext1m('claude-4-sonnet')).toBe(false);
});
test('should return true for claude-5-sonnet (alternate naming)', () => {
expect(supportsContext1m('claude-5-sonnet')).toBe(true);
});
test('should return true for claude-4-6-sonnet (alternate naming)', () => {
expect(supportsContext1m('claude-4-6-sonnet')).toBe(true);
});
test('should return true for claude-4-6-opus (alternate naming)', () => {
expect(supportsContext1m('claude-4-6-opus')).toBe(true);
});
@ -305,7 +309,7 @@ describe('bedrockInputParser', () => {
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
});
test('should match anthropic.claude-sonnet-4 model with 1M context header', () => {
test('should match anthropic.claude-sonnet-4 model without context beta header', () => {
const input = {
model: 'anthropic.claude-sonnet-4',
};
@ -313,13 +317,10 @@ describe('bedrockInputParser', () => {
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
expect(additionalFields.thinking).toBe(true);
expect(additionalFields.thinkingBudget).toBe(2000);
expect(additionalFields.anthropic_beta).toEqual([
'output-128k-2025-02-19',
'context-1m-2025-08-07',
]);
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
});
test('should match anthropic.claude-opus-5 model with adaptive thinking and 1M context', () => {
test('should match anthropic.claude-opus-5 model with adaptive thinking', () => {
const input = {
model: 'anthropic.claude-opus-5',
};
@ -327,13 +328,10 @@ describe('bedrockInputParser', () => {
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
expect(additionalFields.thinking).toEqual({ type: 'adaptive', display: 'summarized' });
expect(additionalFields.thinkingBudget).toBeUndefined();
expect(additionalFields.anthropic_beta).toEqual([
'output-128k-2025-02-19',
'context-1m-2025-08-07',
]);
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
});
test('should match anthropic.claude-haiku-6 model without 1M context header', () => {
test('should match anthropic.claude-haiku-6 model without context beta header', () => {
const input = {
model: 'anthropic.claude-haiku-6',
};
@ -344,7 +342,7 @@ describe('bedrockInputParser', () => {
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
});
test('should match anthropic.claude-4-sonnet model with 1M context header', () => {
test('should match anthropic.claude-4-sonnet model without context beta header', () => {
const input = {
model: 'anthropic.claude-4-sonnet',
};
@ -352,13 +350,10 @@ describe('bedrockInputParser', () => {
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
expect(additionalFields.thinking).toBe(true);
expect(additionalFields.thinkingBudget).toBe(2000);
expect(additionalFields.anthropic_beta).toEqual([
'output-128k-2025-02-19',
'context-1m-2025-08-07',
]);
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
});
test('should match anthropic.claude-4.5-sonnet model with 1M context header', () => {
test('should match anthropic.claude-4.5-sonnet model without context beta header', () => {
const input = {
model: 'anthropic.claude-4.5-sonnet',
};
@ -366,13 +361,10 @@ describe('bedrockInputParser', () => {
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
expect(additionalFields.thinking).toBe(true);
expect(additionalFields.thinkingBudget).toBe(2000);
expect(additionalFields.anthropic_beta).toEqual([
'output-128k-2025-02-19',
'context-1m-2025-08-07',
]);
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
});
test('should match anthropic.claude-sonnet-4-6 with adaptive thinking and 1M context header', () => {
test('should match anthropic.claude-sonnet-4-6 with adaptive thinking', () => {
const input = {
model: 'anthropic.claude-sonnet-4-6',
};
@ -380,13 +372,10 @@ describe('bedrockInputParser', () => {
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
expect(additionalFields.thinking).toEqual({ type: 'adaptive' });
expect(additionalFields.thinkingBudget).toBeUndefined();
expect(additionalFields.anthropic_beta).toEqual([
'output-128k-2025-02-19',
'context-1m-2025-08-07',
]);
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
});
test('should match us.anthropic.claude-sonnet-4-6 with adaptive thinking and 1M context header', () => {
test('should match us.anthropic.claude-sonnet-4-6 with adaptive thinking', () => {
const input = {
model: 'us.anthropic.claude-sonnet-4-6',
};
@ -394,13 +383,10 @@ describe('bedrockInputParser', () => {
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
expect(additionalFields.thinking).toEqual({ type: 'adaptive' });
expect(additionalFields.thinkingBudget).toBeUndefined();
expect(additionalFields.anthropic_beta).toEqual([
'output-128k-2025-02-19',
'context-1m-2025-08-07',
]);
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
});
test('should match anthropic.claude-4-7-sonnet model with adaptive thinking and 1M context header', () => {
test('should match anthropic.claude-4-7-sonnet model with adaptive thinking', () => {
const input = {
model: 'anthropic.claude-4-7-sonnet',
};
@ -408,10 +394,7 @@ describe('bedrockInputParser', () => {
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
expect(additionalFields.thinking).toEqual({ type: 'adaptive' });
expect(additionalFields.thinkingBudget).toBeUndefined();
expect(additionalFields.anthropic_beta).toEqual([
'output-128k-2025-02-19',
'context-1m-2025-08-07',
]);
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
});
test('should match anthropic.claude-sonnet-4-20250514-v1:0 with full model ID', () => {
@ -422,10 +405,7 @@ describe('bedrockInputParser', () => {
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
expect(additionalFields.thinking).toBe(true);
expect(additionalFields.thinkingBudget).toBe(2000);
expect(additionalFields.anthropic_beta).toEqual([
'output-128k-2025-02-19',
'context-1m-2025-08-07',
]);
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
});
test('should not match non-Claude models', () => {
@ -467,10 +447,7 @@ describe('bedrockInputParser', () => {
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
expect(additionalFields.thinking).toBeUndefined();
expect(additionalFields.thinkingBudget).toBeUndefined();
expect(additionalFields.anthropic_beta).toEqual([
'output-128k-2025-02-19',
'context-1m-2025-08-07',
]);
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
});
test('should respect custom thinking budget', () => {
@ -495,10 +472,7 @@ describe('bedrockInputParser', () => {
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
expect(additionalFields.thinking).toEqual({ type: 'adaptive' });
expect(additionalFields.thinkingBudget).toBeUndefined();
expect(additionalFields.anthropic_beta).toEqual([
'output-128k-2025-02-19',
'context-1m-2025-08-07',
]);
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
});
test('should handle cross-region model ID us.anthropic.claude-opus-4-6-v1', () => {
@ -509,10 +483,7 @@ describe('bedrockInputParser', () => {
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
expect(additionalFields.thinking).toEqual({ type: 'adaptive' });
expect(additionalFields.thinkingBudget).toBeUndefined();
expect(additionalFields.anthropic_beta).toEqual([
'output-128k-2025-02-19',
'context-1m-2025-08-07',
]);
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
});
test('should handle cross-region model ID global.anthropic.claude-opus-4-6-v1', () => {
@ -522,10 +493,7 @@ describe('bedrockInputParser', () => {
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
expect(additionalFields.thinking).toEqual({ type: 'adaptive' });
expect(additionalFields.anthropic_beta).toEqual([
'output-128k-2025-02-19',
'context-1m-2025-08-07',
]);
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
});
test('should pass effort parameter via output_config for adaptive models', () => {
@ -729,10 +697,7 @@ describe('bedrockInputParser', () => {
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
expect(additionalFields.thinking).toBeUndefined();
expect(additionalFields.thinkingBudget).toBeUndefined();
expect(additionalFields.anthropic_beta).toEqual([
'output-128k-2025-02-19',
'context-1m-2025-08-07',
]);
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
});
test('should preserve effort when thinking=false for adaptive models', () => {
@ -895,7 +860,7 @@ describe('bedrockInputParser', () => {
model: 'openai.gpt-oss-120b-1:0',
promptCache: true,
additionalModelRequestFields: {
anthropic_beta: ['output-128k-2025-02-19', 'context-1m-2025-08-07'],
anthropic_beta: ['output-128k-2025-02-19'],
thinking: { type: 'adaptive' },
output_config: { effort: 'high' },
},

View file

@ -134,10 +134,10 @@ export function omitsThinkingByDefault(model: string): boolean {
return false;
}
/** Checks if a model qualifies for the context-1m beta header (Sonnet 4+, Opus 4.6+, Opus 5+) */
/** Checks if a model has a 1M context window (Sonnet 4.6+, Opus 4.6+, Opus 5+) */
export function supportsContext1m(model: string): boolean {
const sonnet = parseSonnetVersion(model);
if (sonnet != null && sonnet.major >= 4) {
if (sonnet != null && (sonnet.major > 4 || (sonnet.major === 4 && sonnet.minor >= 6))) {
return true;
}
const opus = parseOpusVersion(model);
@ -151,7 +151,7 @@ export function supportsContext1m(model: string): boolean {
* Gets the appropriate anthropic_beta headers for Bedrock Anthropic models.
* Bedrock uses `anthropic_beta` (with underscore) in additionalModelRequestFields.
*
* @param model - The Bedrock model identifier (e.g., "anthropic.claude-sonnet-4-20250514-v1:0")
* @param model - The Bedrock model identifier (e.g., "anthropic.claude-sonnet-4-6")
* @returns Array of beta header strings, or empty array if not applicable
*/
function getBedrockAnthropicBetaHeaders(model: string): string[] {
@ -163,17 +163,10 @@ function getBedrockAnthropicBetaHeaders(model: string): string[] {
model,
);
const isSonnet4PlusModel =
/anthropic\.claude-(?:sonnet-[4-9]|[4-9](?:\.\d+)?(?:-\d+)?-sonnet)/.test(model);
if (isClaudeThinkingModel) {
betaHeaders.push('output-128k-2025-02-19');
}
if (isSonnet4PlusModel || supportsAdaptiveThinking(model)) {
betaHeaders.push('context-1m-2025-08-07');
}
return betaHeaders;
}

View file

@ -18,7 +18,7 @@ import type {
* These are the standard Anthropic model names as served by Vertex AI
*/
export const defaultVertexModels = [
'claude-sonnet-4-20250514',
'claude-sonnet-4-6',
'claude-3-7-sonnet-20250219',
'claude-3-5-sonnet-v2@20241022',
'claude-3-5-sonnet@20240620',

View file

@ -768,14 +768,14 @@ describe('spendTokens', () => {
});
describe('premium token pricing', () => {
it('should charge standard rates for claude-opus-4-6 when prompt tokens are below threshold', async () => {
it('should charge standard rates for gemini-3.1 when prompt tokens are below threshold', async () => {
const initialBalance = 100000000;
await Balance.create({
user: userId,
tokenCredits: initialBalance,
});
const model = 'claude-opus-4-6';
const model = 'gemini-3.1';
const promptTokens = 100000;
const completionTokens = 500;
@ -796,7 +796,7 @@ describe('spendTokens', () => {
expect(balance?.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
});
it('should charge premium rates for claude-opus-4-6 when prompt tokens exceed threshold', async () => {
it('should charge standard rates for claude-opus-4-6 when prompt tokens exceed the former premium threshold', async () => {
const initialBalance = 100000000;
await Balance.create({
user: userId,
@ -818,14 +818,13 @@ describe('spendTokens', () => {
await spendTokens(txData, { promptTokens, completionTokens });
const expectedCost =
promptTokens * premiumTokenValues[model].prompt +
completionTokens * premiumTokenValues[model].completion;
promptTokens * tokenValues[model].prompt + completionTokens * tokenValues[model].completion;
const balance = await Balance.findOne({ user: userId });
expect(balance?.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
});
it('should charge premium rates for both prompt and completion in structured tokens when above threshold', async () => {
it('should charge standard rates for Claude structured tokens above the former premium threshold', async () => {
const initialBalance = 100000000;
await Balance.create({
user: userId,
@ -852,16 +851,16 @@ describe('spendTokens', () => {
const result = await spendStructuredTokens(txData, tokenUsage);
const premiumPromptRate = premiumTokenValues[model].prompt;
const premiumCompletionRate = premiumTokenValues[model].completion;
const standardPromptRate = tokenValues[model].prompt;
const standardCompletionRate = tokenValues[model].completion;
const writeRate = getCacheMultiplier({ model, cacheType: 'write' });
const readRate = getCacheMultiplier({ model, cacheType: 'read' });
const expectedPromptCost =
tokenUsage.promptTokens.input * premiumPromptRate +
tokenUsage.promptTokens.input * standardPromptRate +
tokenUsage.promptTokens.write * (writeRate ?? 0) +
tokenUsage.promptTokens.read * (readRate ?? 0);
const expectedCompletionCost = tokenUsage.completionTokens * premiumCompletionRate;
const expectedCompletionCost = tokenUsage.completionTokens * standardCompletionRate;
expect(result).not.toBeNull();
expect(result!.prompt!.prompt).toBeCloseTo(-expectedPromptCost, 0);
@ -875,7 +874,7 @@ describe('spendTokens', () => {
tokenCredits: initialBalance,
});
const model = 'claude-opus-4-6';
const model = 'gemini-3.1';
const txData = {
user: userId,
conversationId: 'test-structured-standard',
@ -1106,7 +1105,7 @@ describe('spendTokens', () => {
tokenCredits: initialBalance,
});
const model = 'claude-opus-4-6';
const model = 'gemini-3.1';
const promptTokens = 250000;
const completionTokens = 500;
@ -1163,7 +1162,7 @@ describe('spendTokens', () => {
tokenCredits: initialBalance,
});
const model = 'claude-opus-4-6';
const model = 'gemini-3.1';
const txData = {
user: userId,
conversationId: 'test-negative-no-premium',

View file

@ -700,7 +700,7 @@ describe('Premium Token Pricing Integration Tests', () => {
const initialBalance = 100000000;
await Balance.create({ user: userId, tokenCredits: initialBalance });
const model = 'claude-opus-4-6';
const model = 'gemini-3.1';
const promptTokens = 100000;
const completionTokens = 500;
@ -729,7 +729,7 @@ describe('Premium Token Pricing Integration Tests', () => {
const initialBalance = 100000000;
await Balance.create({ user: userId, tokenCredits: initialBalance });
const model = 'claude-opus-4-6';
const model = 'gemini-3.1';
const promptTokens = 250000;
const completionTokens = 500;
@ -758,7 +758,7 @@ describe('Premium Token Pricing Integration Tests', () => {
const initialBalance = 100000000;
await Balance.create({ user: userId, tokenCredits: initialBalance });
const model = 'claude-opus-4-6';
const model = 'gemini-3.1';
const promptTokens = premiumTokenValues[model].threshold;
const completionTokens = 500;
@ -787,7 +787,7 @@ describe('Premium Token Pricing Integration Tests', () => {
const initialBalance = 100000000;
await Balance.create({ user: userId, tokenCredits: initialBalance });
const model = 'claude-opus-4-6';
const model = 'gemini-3.1';
const txData = {
user: userId,
conversationId: 'test-structured-premium',
@ -838,7 +838,7 @@ describe('Premium Token Pricing Integration Tests', () => {
const initialBalance = 100000000;
await Balance.create({ user: userId, tokenCredits: initialBalance });
const model = 'claude-opus-4-6';
const model = 'gemini-3.1';
const txData = {
user: userId,
conversationId: 'test-structured-standard',
@ -1027,7 +1027,7 @@ describe('Premium Token Pricing Integration Tests', () => {
const initialBalance = 100000000;
await Balance.create({ user: userId, tokenCredits: initialBalance });
const model = 'claude-opus-4-5';
const model = 'claude-opus-4-6';
const promptTokens = 300000;
const completionTokens = 500;

View file

@ -2322,14 +2322,14 @@ describe('Claude Model Tests', () => {
});
describe('Premium Token Pricing', () => {
const premiumModel = 'claude-opus-4-6';
const premiumModel = 'gemini-3.1';
const premiumEntry = premiumTokenValues[premiumModel];
const { threshold } = premiumEntry;
const belowThreshold = threshold - 1;
const aboveThreshold = threshold + 1;
const wellAboveThreshold = threshold * 2;
it('should have premium pricing defined for claude-opus-4-6', () => {
it('should have premium pricing defined for gemini-3.1', () => {
expect(premiumEntry).toBeDefined();
expect(premiumEntry.threshold).toBeDefined();
expect(premiumEntry.prompt).toBeDefined();
@ -2338,14 +2338,26 @@ describe('Premium Token Pricing', () => {
expect(premiumEntry.completion).toBeGreaterThan(tokenValues[premiumModel].completion);
});
it('should have premium pricing defined for claude-opus-4-7', () => {
const entry = premiumTokenValues['claude-opus-4-7'];
expect(entry).toBeDefined();
expect(entry.threshold).toBe(200000);
expect(entry.prompt).toBe(10);
expect(entry.completion).toBe(37.5);
expect(entry.prompt).toBeGreaterThan(tokenValues['claude-opus-4-7'].prompt);
expect(entry.completion).toBeGreaterThan(tokenValues['claude-opus-4-7'].completion);
it('should not apply premium pricing to Claude 1M GA models', () => {
const claudeModels = ['claude-opus-4-6', 'claude-opus-4-7', 'claude-sonnet-4-6'];
claudeModels.forEach((model) => {
expect(premiumTokenValues[model]).toBeUndefined();
expect(getPremiumRate(model, 'prompt', wellAboveThreshold)).toBeNull();
expect(
getMultiplier({
model,
tokenType: 'prompt',
inputTokenCount: wellAboveThreshold,
}),
).toBe(tokenValues[model].prompt);
expect(
getMultiplier({
model,
tokenType: 'completion',
inputTokenCount: wellAboveThreshold,
}),
).toBe(tokenValues[model].completion);
});
});
it('should return null from getPremiumRate when inputTokenCount is below threshold', () => {

View file

@ -159,6 +159,7 @@ export const tokenValues: Record<string, { prompt: number; completion: number }>
'claude-opus-4-6': { prompt: 5, completion: 25 },
'claude-opus-4-7': { prompt: 5, completion: 25 },
'claude-sonnet-4': { prompt: 3, completion: 15 },
'claude-sonnet-4-5': { prompt: 3, completion: 15 },
'claude-sonnet-4-6': { prompt: 3, completion: 15 },
'command-r': { prompt: 0.5, completion: 1.5 },
'command-r-plus': { prompt: 3, completion: 15 },
@ -286,6 +287,7 @@ export const cacheTokenValues: Record<string, { write: number; read: number }> =
'claude-3-haiku': { write: 0.3, read: 0.03 },
'claude-haiku-4-5': { write: 1.25, read: 0.1 },
'claude-sonnet-4': { write: 3.75, read: 0.3 },
'claude-sonnet-4-5': { write: 3.75, read: 0.3 },
'claude-sonnet-4-6': { write: 3.75, read: 0.3 },
'claude-opus-4': { write: 18.75, read: 1.5 },
'claude-opus-4-5': { write: 6.25, read: 0.5 },
@ -336,9 +338,6 @@ export const premiumTokenValues: Record<
string,
{ threshold: number; prompt: number; completion: number }
> = {
'claude-opus-4-6': { threshold: 200000, prompt: 10, completion: 37.5 },
'claude-opus-4-7': { threshold: 200000, prompt: 10, completion: 37.5 },
'claude-sonnet-4-6': { threshold: 200000, prompt: 6, completion: 22.5 },
'gemini-3.1': { threshold: 200000, prompt: 4, completion: 18 },
};