mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-05-13 07:46:47 +00:00
🧭 fix: Migrate Anthropic Long Context (#12911)
This commit is contained in:
parent
f3e1201ae7
commit
eb22bb6969
17 changed files with 121 additions and 155 deletions
|
|
@ -140,7 +140,7 @@ PROXY=
|
|||
#============#
|
||||
|
||||
ANTHROPIC_API_KEY=user_provided
|
||||
# ANTHROPIC_MODELS=claude-opus-4-7,claude-sonnet-4-6,claude-opus-4-6,claude-opus-4-20250514,claude-sonnet-4-20250514,claude-3-7-sonnet-20250219,claude-3-5-sonnet-20241022,claude-3-5-haiku-20241022,claude-3-opus-20240229,claude-3-sonnet-20240229,claude-3-haiku-20240307
|
||||
# ANTHROPIC_MODELS=claude-opus-4-7,claude-sonnet-4-6,claude-opus-4-6,claude-opus-4-20250514,claude-3-7-sonnet-20250219,claude-3-5-sonnet-20241022,claude-3-5-haiku-20241022,claude-3-opus-20240229,claude-3-sonnet-20240229,claude-3-haiku-20240307
|
||||
# ANTHROPIC_REVERSE_PROXY=
|
||||
|
||||
# Set to true to use Anthropic models through Google Vertex AI instead of direct API
|
||||
|
|
|
|||
|
|
@ -1185,13 +1185,22 @@ describe('Grok Model Tests - Tokens', () => {
|
|||
describe('Claude Model Tests', () => {
|
||||
it('should return correct context length for Claude 4 models', () => {
|
||||
expect(getModelMaxTokens('claude-sonnet-4')).toBe(
|
||||
maxTokensMap[EModelEndpoint.anthropic]['claude-sonnet-4'],
|
||||
maxTokensMap[EModelEndpoint.anthropic]['claude-4'],
|
||||
);
|
||||
expect(getModelMaxTokens('claude-opus-4')).toBe(
|
||||
maxTokensMap[EModelEndpoint.anthropic]['claude-opus-4'],
|
||||
);
|
||||
});
|
||||
|
||||
it('should return 200K for Claude Sonnet 4.5', () => {
|
||||
expect(getModelMaxTokens('claude-sonnet-4-5', EModelEndpoint.anthropic)).toBe(
|
||||
maxTokensMap[EModelEndpoint.anthropic]['claude-sonnet-4-5'],
|
||||
);
|
||||
expect(getModelMaxTokens('claude-sonnet-4-5-20250929')).toBe(
|
||||
maxTokensMap[EModelEndpoint.anthropic]['claude-sonnet-4-5'],
|
||||
);
|
||||
});
|
||||
|
||||
it('should return correct context length for Claude Haiku 4.5', () => {
|
||||
expect(getModelMaxTokens('claude-haiku-4-5', EModelEndpoint.anthropic)).toBe(
|
||||
maxTokensMap[EModelEndpoint.anthropic]['claude-haiku-4-5'],
|
||||
|
|
@ -1415,6 +1424,9 @@ describe('Claude Model Tests', () => {
|
|||
expect(getModelMaxTokens('claude-sonnet-4-6')).toBe(
|
||||
maxTokensMap[EModelEndpoint.anthropic]['claude-sonnet-4-6'],
|
||||
);
|
||||
expect(getModelMaxTokens('claude-sonnet-4-6')).toBeGreaterThan(
|
||||
getModelMaxTokens('claude-sonnet-4-5'),
|
||||
);
|
||||
});
|
||||
|
||||
it('should return correct max output tokens for Claude Sonnet 4.6 (64K)', () => {
|
||||
|
|
|
|||
|
|
@ -304,7 +304,7 @@ endpoints:
|
|||
# # Option 1: Simple array (legacy format - model name = deployment name)
|
||||
# # Use this if you want the technical model IDs to show in the UI
|
||||
# # models:
|
||||
# # - "claude-sonnet-4-20250514"
|
||||
# # - "claude-sonnet-4-6"
|
||||
# # - "claude-3-7-sonnet-20250219"
|
||||
# # - "claude-3-5-sonnet-v2@20241022"
|
||||
# # - "claude-3-5-haiku@20241022"
|
||||
|
|
@ -317,7 +317,7 @@ endpoints:
|
|||
# claude-opus-4.5:
|
||||
# deploymentName: claude-opus-4-5@20251101
|
||||
# claude-sonnet-4:
|
||||
# deploymentName: claude-sonnet-4-20250514
|
||||
# deploymentName: claude-sonnet-4-6
|
||||
# claude-3.7-sonnet:
|
||||
# deploymentName: claude-3-7-sonnet-20250219
|
||||
# claude-3.5-sonnet:
|
||||
|
|
@ -327,7 +327,7 @@ endpoints:
|
|||
#
|
||||
# # Option 3: Mixed format with default deploymentName
|
||||
# # Set a default deploymentName and use boolean values for models
|
||||
# # deploymentName: claude-sonnet-4-20250514
|
||||
# # deploymentName: claude-sonnet-4-6
|
||||
# # models:
|
||||
# # claude-sonnet-4: true # Will use the default deploymentName
|
||||
# # claude-3.5-haiku:
|
||||
|
|
@ -458,7 +458,7 @@ endpoints:
|
|||
# # The ARN (value) is the inference profile you wish to map to for that model
|
||||
# # Both the model ID and ARN are sent to AWS - the model ID for validation/metadata, the ARN for routing
|
||||
# inferenceProfiles:
|
||||
# "us.anthropic.claude-sonnet-4-20250514-v1:0": "${BEDROCK_INFERENCE_PROFILE_CLAUDE_SONNET}"
|
||||
# "us.anthropic.claude-sonnet-4-6": "${BEDROCK_INFERENCE_PROFILE_CLAUDE_SONNET}"
|
||||
# "anthropic.claude-3-7-sonnet-20250219-v1:0": "arn:aws:bedrock:us-west-2:123456789012:application-inference-profile/abc123"
|
||||
#
|
||||
# # Guardrail Configuration
|
||||
|
|
|
|||
|
|
@ -418,7 +418,7 @@ describe('Premium pricing parity', () => {
|
|||
const initialBalance = 100000000;
|
||||
await Balance.create({ user: userId, tokenCredits: initialBalance });
|
||||
|
||||
const model = 'claude-opus-4-6';
|
||||
const model = 'gemini-3.1';
|
||||
const promptTokens = 100000;
|
||||
const completionTokens = 500;
|
||||
|
||||
|
|
@ -445,7 +445,7 @@ describe('Premium pricing parity', () => {
|
|||
const initialBalance = 100000000;
|
||||
await Balance.create({ user: userId, tokenCredits: initialBalance });
|
||||
|
||||
const model = 'claude-opus-4-6';
|
||||
const model = 'gemini-3.1';
|
||||
const promptTokens = 250000;
|
||||
const completionTokens = 500;
|
||||
|
||||
|
|
@ -473,7 +473,7 @@ describe('Premium pricing parity', () => {
|
|||
const initialBalance = 100000000;
|
||||
await Balance.create({ user: userId, tokenCredits: initialBalance });
|
||||
|
||||
const model = 'claude-opus-4-6';
|
||||
const model = 'gemini-3.1';
|
||||
const promptTokens = (premiumTokenValues as Record<string, Record<string, number>>)[model]
|
||||
.threshold;
|
||||
const completionTokens = 500;
|
||||
|
|
@ -540,7 +540,7 @@ describe('Multi-entry batch parity', () => {
|
|||
const initialBalance = 100000000;
|
||||
await Balance.create({ user: userId, tokenCredits: initialBalance });
|
||||
|
||||
const model = 'claude-opus-4-6';
|
||||
const model = 'gemini-3.1';
|
||||
const tokenUsage = {
|
||||
promptTokens: { input: 200000, write: 10000, read: 5000 },
|
||||
completionTokens: 1000,
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ import {
|
|||
ThinkingDisplay,
|
||||
AnthropicEffort,
|
||||
anthropicSettings,
|
||||
supportsContext1m,
|
||||
resolveThinkingDisplay,
|
||||
supportsAdaptiveThinking,
|
||||
} from 'librechat-data-provider';
|
||||
|
|
@ -56,10 +55,6 @@ function getClaudeHeaders(
|
|||
return {
|
||||
'anthropic-beta': 'token-efficient-tools-2025-02-19,output-128k-2025-02-19',
|
||||
};
|
||||
} else if (supportsContext1m(model)) {
|
||||
return {
|
||||
'anthropic-beta': 'context-1m-2025-08-07',
|
||||
};
|
||||
}
|
||||
|
||||
return undefined;
|
||||
|
|
|
|||
|
|
@ -88,21 +88,18 @@ describe('getLLMConfig', () => {
|
|||
expect(result.llmConfig.thinking).toHaveProperty('budget_tokens', 2000);
|
||||
});
|
||||
|
||||
it('should add "context-1m" beta header and promptCache boolean for claude-sonnet-4 model', () => {
|
||||
it('should pass promptCache without long-context beta header for claude-sonnet-4 model', () => {
|
||||
const modelOptions = {
|
||||
model: 'claude-sonnet-4-20250514',
|
||||
promptCache: true,
|
||||
};
|
||||
const result = getLLMConfig('test-key', { modelOptions });
|
||||
const clientOptions = result.llmConfig.clientOptions;
|
||||
expect(clientOptions?.defaultHeaders).toBeDefined();
|
||||
expect(clientOptions?.defaultHeaders).toHaveProperty('anthropic-beta');
|
||||
const defaultHeaders = clientOptions?.defaultHeaders as Record<string, string>;
|
||||
expect(defaultHeaders['anthropic-beta']).toBe('context-1m-2025-08-07');
|
||||
expect(clientOptions?.defaultHeaders).toBeUndefined();
|
||||
expect(result.llmConfig.promptCache).toBe(true);
|
||||
});
|
||||
|
||||
it('should add "context-1m" beta header and promptCache boolean for claude-sonnet-4 model formats', () => {
|
||||
it('should pass promptCache without long-context beta header for claude-sonnet-4 model formats', () => {
|
||||
const modelVariations = [
|
||||
'claude-sonnet-4-20250514',
|
||||
'claude-sonnet-4-latest',
|
||||
|
|
@ -113,29 +110,23 @@ describe('getLLMConfig', () => {
|
|||
const modelOptions = { model, promptCache: true };
|
||||
const result = getLLMConfig('test-key', { modelOptions });
|
||||
const clientOptions = result.llmConfig.clientOptions;
|
||||
expect(clientOptions?.defaultHeaders).toBeDefined();
|
||||
expect(clientOptions?.defaultHeaders).toHaveProperty('anthropic-beta');
|
||||
const defaultHeaders = clientOptions?.defaultHeaders as Record<string, string>;
|
||||
expect(defaultHeaders['anthropic-beta']).toBe('context-1m-2025-08-07');
|
||||
expect(clientOptions?.defaultHeaders).toBeUndefined();
|
||||
expect(result.llmConfig.promptCache).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
it('should add "context-1m" beta header for claude-sonnet-4-6 model', () => {
|
||||
it('should pass promptCache without long-context beta header for claude-sonnet-4-6 model', () => {
|
||||
const modelOptions = {
|
||||
model: 'claude-sonnet-4-6',
|
||||
promptCache: true,
|
||||
};
|
||||
const result = getLLMConfig('test-key', { modelOptions });
|
||||
const clientOptions = result.llmConfig.clientOptions;
|
||||
expect(clientOptions?.defaultHeaders).toBeDefined();
|
||||
expect(clientOptions?.defaultHeaders).toHaveProperty('anthropic-beta');
|
||||
const defaultHeaders = clientOptions?.defaultHeaders as Record<string, string>;
|
||||
expect(defaultHeaders['anthropic-beta']).toBe('context-1m-2025-08-07');
|
||||
expect(clientOptions?.defaultHeaders).toBeUndefined();
|
||||
expect(result.llmConfig.promptCache).toBe(true);
|
||||
});
|
||||
|
||||
it('should add "context-1m" beta header for claude-sonnet-4-6 model formats', () => {
|
||||
it('should pass promptCache without long-context beta header for claude-sonnet-4-6 model formats', () => {
|
||||
const modelVariations = [
|
||||
'claude-sonnet-4-6',
|
||||
'claude-sonnet-4-6-20260101',
|
||||
|
|
@ -146,10 +137,7 @@ describe('getLLMConfig', () => {
|
|||
const modelOptions = { model, promptCache: true };
|
||||
const result = getLLMConfig('test-key', { modelOptions });
|
||||
const clientOptions = result.llmConfig.clientOptions;
|
||||
expect(clientOptions?.defaultHeaders).toBeDefined();
|
||||
expect(clientOptions?.defaultHeaders).toHaveProperty('anthropic-beta');
|
||||
const defaultHeaders = clientOptions?.defaultHeaders as Record<string, string>;
|
||||
expect(defaultHeaders['anthropic-beta']).toBe('context-1m-2025-08-07');
|
||||
expect(clientOptions?.defaultHeaders).toBeUndefined();
|
||||
expect(result.llmConfig.promptCache).toBe(true);
|
||||
});
|
||||
});
|
||||
|
|
@ -1485,7 +1473,7 @@ describe('getLLMConfig', () => {
|
|||
|
||||
it('should handle prompt cache support logic for different models', () => {
|
||||
const testCases = [
|
||||
// Models that support prompt cache (and have other beta headers)
|
||||
// Models that support prompt cache and have other beta headers
|
||||
{
|
||||
model: 'claude-3-5-sonnet',
|
||||
promptCache: true,
|
||||
|
|
@ -1513,7 +1501,13 @@ describe('getLLMConfig', () => {
|
|||
{
|
||||
model: 'claude-sonnet-4-20250514',
|
||||
promptCache: true,
|
||||
shouldHaveHeaders: true,
|
||||
shouldHaveHeaders: false,
|
||||
shouldHavePromptCache: true,
|
||||
},
|
||||
{
|
||||
model: 'claude-sonnet-4-6',
|
||||
promptCache: true,
|
||||
shouldHaveHeaders: false,
|
||||
shouldHavePromptCache: true,
|
||||
},
|
||||
// Models that support prompt cache but have no additional beta headers needed
|
||||
|
|
|
|||
|
|
@ -627,9 +627,7 @@ describe('initializeBedrock', () => {
|
|||
|
||||
expect(amrf.thinking).toEqual({ type: 'adaptive' });
|
||||
expect(result.llmConfig.maxTokens).toBeUndefined();
|
||||
expect(amrf.anthropic_beta).toEqual(
|
||||
expect.arrayContaining(['output-128k-2025-02-19', 'context-1m-2025-08-07']),
|
||||
);
|
||||
expect(amrf.anthropic_beta).toEqual(expect.arrayContaining(['output-128k-2025-02-19']));
|
||||
});
|
||||
|
||||
it('should pass effort via output_config for Opus 4.6', async () => {
|
||||
|
|
|
|||
|
|
@ -44,9 +44,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
|
|||
},
|
||||
configOptions: {
|
||||
baseURL: 'http://host.docker.internal:4000/v1',
|
||||
defaultHeaders: {
|
||||
'anthropic-beta': 'context-1m-2025-08-07',
|
||||
},
|
||||
defaultHeaders: {},
|
||||
},
|
||||
tools: [],
|
||||
});
|
||||
|
|
|
|||
|
|
@ -132,7 +132,7 @@ async function validateAnthropicPdf(
|
|||
/**
|
||||
* Matches Bedrock Claude 4+ model identifiers, including cross-region inference profile IDs.
|
||||
* Pattern: [region.]anthropic.claude-{family}-{version≥4}-{date}-v{n}:{rev}
|
||||
* e.g. "anthropic.claude-sonnet-4-20250514-v1:0" or "us.anthropic.claude-sonnet-4-20250514-v1:0"
|
||||
* e.g. "anthropic.claude-sonnet-4-6" or "us.anthropic.claude-sonnet-4-6"
|
||||
*/
|
||||
const BEDROCK_CLAUDE_4_PLUS_RE = /(?:^|\.)anthropic\.claude-(?:sonnet|opus|haiku)-[4-9]\d*-/;
|
||||
const isBedrockClaude4Plus = (model?: string): boolean =>
|
||||
|
|
|
|||
|
|
@ -138,7 +138,8 @@ const anthropicModels = {
|
|||
'claude-haiku-4-5': 200000,
|
||||
'claude-opus-4': 200000,
|
||||
'claude-opus-4-5': 200000,
|
||||
'claude-sonnet-4': 1000000,
|
||||
'claude-sonnet-4': 200000,
|
||||
'claude-sonnet-4-5': 200000,
|
||||
'claude-sonnet-4-6': 1000000,
|
||||
'claude-opus-4-6': 1000000,
|
||||
'claude-opus-4-7': 1000000,
|
||||
|
|
|
|||
|
|
@ -124,12 +124,12 @@ describe('supportsAdaptiveThinking', () => {
|
|||
});
|
||||
|
||||
describe('supportsContext1m', () => {
|
||||
test('should return true for claude-sonnet-4', () => {
|
||||
expect(supportsContext1m('claude-sonnet-4')).toBe(true);
|
||||
test('should return false for claude-sonnet-4', () => {
|
||||
expect(supportsContext1m('claude-sonnet-4')).toBe(false);
|
||||
});
|
||||
|
||||
test('should return true for claude-sonnet-4-5', () => {
|
||||
expect(supportsContext1m('claude-sonnet-4-5')).toBe(true);
|
||||
test('should return false for claude-sonnet-4-5', () => {
|
||||
expect(supportsContext1m('claude-sonnet-4-5')).toBe(false);
|
||||
});
|
||||
|
||||
test('should return true for claude-sonnet-4-6', () => {
|
||||
|
|
@ -176,14 +176,18 @@ describe('supportsContext1m', () => {
|
|||
expect(supportsContext1m('gpt-4o')).toBe(false);
|
||||
});
|
||||
|
||||
test('should return true for claude-4-sonnet (alternate naming)', () => {
|
||||
expect(supportsContext1m('claude-4-sonnet')).toBe(true);
|
||||
test('should return false for claude-4-sonnet (alternate naming, below threshold)', () => {
|
||||
expect(supportsContext1m('claude-4-sonnet')).toBe(false);
|
||||
});
|
||||
|
||||
test('should return true for claude-5-sonnet (alternate naming)', () => {
|
||||
expect(supportsContext1m('claude-5-sonnet')).toBe(true);
|
||||
});
|
||||
|
||||
test('should return true for claude-4-6-sonnet (alternate naming)', () => {
|
||||
expect(supportsContext1m('claude-4-6-sonnet')).toBe(true);
|
||||
});
|
||||
|
||||
test('should return true for claude-4-6-opus (alternate naming)', () => {
|
||||
expect(supportsContext1m('claude-4-6-opus')).toBe(true);
|
||||
});
|
||||
|
|
@ -305,7 +309,7 @@ describe('bedrockInputParser', () => {
|
|||
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
|
||||
});
|
||||
|
||||
test('should match anthropic.claude-sonnet-4 model with 1M context header', () => {
|
||||
test('should match anthropic.claude-sonnet-4 model without context beta header', () => {
|
||||
const input = {
|
||||
model: 'anthropic.claude-sonnet-4',
|
||||
};
|
||||
|
|
@ -313,13 +317,10 @@ describe('bedrockInputParser', () => {
|
|||
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
|
||||
expect(additionalFields.thinking).toBe(true);
|
||||
expect(additionalFields.thinkingBudget).toBe(2000);
|
||||
expect(additionalFields.anthropic_beta).toEqual([
|
||||
'output-128k-2025-02-19',
|
||||
'context-1m-2025-08-07',
|
||||
]);
|
||||
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
|
||||
});
|
||||
|
||||
test('should match anthropic.claude-opus-5 model with adaptive thinking and 1M context', () => {
|
||||
test('should match anthropic.claude-opus-5 model with adaptive thinking', () => {
|
||||
const input = {
|
||||
model: 'anthropic.claude-opus-5',
|
||||
};
|
||||
|
|
@ -327,13 +328,10 @@ describe('bedrockInputParser', () => {
|
|||
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
|
||||
expect(additionalFields.thinking).toEqual({ type: 'adaptive', display: 'summarized' });
|
||||
expect(additionalFields.thinkingBudget).toBeUndefined();
|
||||
expect(additionalFields.anthropic_beta).toEqual([
|
||||
'output-128k-2025-02-19',
|
||||
'context-1m-2025-08-07',
|
||||
]);
|
||||
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
|
||||
});
|
||||
|
||||
test('should match anthropic.claude-haiku-6 model without 1M context header', () => {
|
||||
test('should match anthropic.claude-haiku-6 model without context beta header', () => {
|
||||
const input = {
|
||||
model: 'anthropic.claude-haiku-6',
|
||||
};
|
||||
|
|
@ -344,7 +342,7 @@ describe('bedrockInputParser', () => {
|
|||
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
|
||||
});
|
||||
|
||||
test('should match anthropic.claude-4-sonnet model with 1M context header', () => {
|
||||
test('should match anthropic.claude-4-sonnet model without context beta header', () => {
|
||||
const input = {
|
||||
model: 'anthropic.claude-4-sonnet',
|
||||
};
|
||||
|
|
@ -352,13 +350,10 @@ describe('bedrockInputParser', () => {
|
|||
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
|
||||
expect(additionalFields.thinking).toBe(true);
|
||||
expect(additionalFields.thinkingBudget).toBe(2000);
|
||||
expect(additionalFields.anthropic_beta).toEqual([
|
||||
'output-128k-2025-02-19',
|
||||
'context-1m-2025-08-07',
|
||||
]);
|
||||
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
|
||||
});
|
||||
|
||||
test('should match anthropic.claude-4.5-sonnet model with 1M context header', () => {
|
||||
test('should match anthropic.claude-4.5-sonnet model without context beta header', () => {
|
||||
const input = {
|
||||
model: 'anthropic.claude-4.5-sonnet',
|
||||
};
|
||||
|
|
@ -366,13 +361,10 @@ describe('bedrockInputParser', () => {
|
|||
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
|
||||
expect(additionalFields.thinking).toBe(true);
|
||||
expect(additionalFields.thinkingBudget).toBe(2000);
|
||||
expect(additionalFields.anthropic_beta).toEqual([
|
||||
'output-128k-2025-02-19',
|
||||
'context-1m-2025-08-07',
|
||||
]);
|
||||
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
|
||||
});
|
||||
|
||||
test('should match anthropic.claude-sonnet-4-6 with adaptive thinking and 1M context header', () => {
|
||||
test('should match anthropic.claude-sonnet-4-6 with adaptive thinking', () => {
|
||||
const input = {
|
||||
model: 'anthropic.claude-sonnet-4-6',
|
||||
};
|
||||
|
|
@ -380,13 +372,10 @@ describe('bedrockInputParser', () => {
|
|||
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
|
||||
expect(additionalFields.thinking).toEqual({ type: 'adaptive' });
|
||||
expect(additionalFields.thinkingBudget).toBeUndefined();
|
||||
expect(additionalFields.anthropic_beta).toEqual([
|
||||
'output-128k-2025-02-19',
|
||||
'context-1m-2025-08-07',
|
||||
]);
|
||||
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
|
||||
});
|
||||
|
||||
test('should match us.anthropic.claude-sonnet-4-6 with adaptive thinking and 1M context header', () => {
|
||||
test('should match us.anthropic.claude-sonnet-4-6 with adaptive thinking', () => {
|
||||
const input = {
|
||||
model: 'us.anthropic.claude-sonnet-4-6',
|
||||
};
|
||||
|
|
@ -394,13 +383,10 @@ describe('bedrockInputParser', () => {
|
|||
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
|
||||
expect(additionalFields.thinking).toEqual({ type: 'adaptive' });
|
||||
expect(additionalFields.thinkingBudget).toBeUndefined();
|
||||
expect(additionalFields.anthropic_beta).toEqual([
|
||||
'output-128k-2025-02-19',
|
||||
'context-1m-2025-08-07',
|
||||
]);
|
||||
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
|
||||
});
|
||||
|
||||
test('should match anthropic.claude-4-7-sonnet model with adaptive thinking and 1M context header', () => {
|
||||
test('should match anthropic.claude-4-7-sonnet model with adaptive thinking', () => {
|
||||
const input = {
|
||||
model: 'anthropic.claude-4-7-sonnet',
|
||||
};
|
||||
|
|
@ -408,10 +394,7 @@ describe('bedrockInputParser', () => {
|
|||
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
|
||||
expect(additionalFields.thinking).toEqual({ type: 'adaptive' });
|
||||
expect(additionalFields.thinkingBudget).toBeUndefined();
|
||||
expect(additionalFields.anthropic_beta).toEqual([
|
||||
'output-128k-2025-02-19',
|
||||
'context-1m-2025-08-07',
|
||||
]);
|
||||
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
|
||||
});
|
||||
|
||||
test('should match anthropic.claude-sonnet-4-20250514-v1:0 with full model ID', () => {
|
||||
|
|
@ -422,10 +405,7 @@ describe('bedrockInputParser', () => {
|
|||
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
|
||||
expect(additionalFields.thinking).toBe(true);
|
||||
expect(additionalFields.thinkingBudget).toBe(2000);
|
||||
expect(additionalFields.anthropic_beta).toEqual([
|
||||
'output-128k-2025-02-19',
|
||||
'context-1m-2025-08-07',
|
||||
]);
|
||||
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
|
||||
});
|
||||
|
||||
test('should not match non-Claude models', () => {
|
||||
|
|
@ -467,10 +447,7 @@ describe('bedrockInputParser', () => {
|
|||
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
|
||||
expect(additionalFields.thinking).toBeUndefined();
|
||||
expect(additionalFields.thinkingBudget).toBeUndefined();
|
||||
expect(additionalFields.anthropic_beta).toEqual([
|
||||
'output-128k-2025-02-19',
|
||||
'context-1m-2025-08-07',
|
||||
]);
|
||||
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
|
||||
});
|
||||
|
||||
test('should respect custom thinking budget', () => {
|
||||
|
|
@ -495,10 +472,7 @@ describe('bedrockInputParser', () => {
|
|||
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
|
||||
expect(additionalFields.thinking).toEqual({ type: 'adaptive' });
|
||||
expect(additionalFields.thinkingBudget).toBeUndefined();
|
||||
expect(additionalFields.anthropic_beta).toEqual([
|
||||
'output-128k-2025-02-19',
|
||||
'context-1m-2025-08-07',
|
||||
]);
|
||||
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
|
||||
});
|
||||
|
||||
test('should handle cross-region model ID us.anthropic.claude-opus-4-6-v1', () => {
|
||||
|
|
@ -509,10 +483,7 @@ describe('bedrockInputParser', () => {
|
|||
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
|
||||
expect(additionalFields.thinking).toEqual({ type: 'adaptive' });
|
||||
expect(additionalFields.thinkingBudget).toBeUndefined();
|
||||
expect(additionalFields.anthropic_beta).toEqual([
|
||||
'output-128k-2025-02-19',
|
||||
'context-1m-2025-08-07',
|
||||
]);
|
||||
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
|
||||
});
|
||||
|
||||
test('should handle cross-region model ID global.anthropic.claude-opus-4-6-v1', () => {
|
||||
|
|
@ -522,10 +493,7 @@ describe('bedrockInputParser', () => {
|
|||
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
|
||||
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
|
||||
expect(additionalFields.thinking).toEqual({ type: 'adaptive' });
|
||||
expect(additionalFields.anthropic_beta).toEqual([
|
||||
'output-128k-2025-02-19',
|
||||
'context-1m-2025-08-07',
|
||||
]);
|
||||
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
|
||||
});
|
||||
|
||||
test('should pass effort parameter via output_config for adaptive models', () => {
|
||||
|
|
@ -729,10 +697,7 @@ describe('bedrockInputParser', () => {
|
|||
const additionalFields = result.additionalModelRequestFields as Record<string, unknown>;
|
||||
expect(additionalFields.thinking).toBeUndefined();
|
||||
expect(additionalFields.thinkingBudget).toBeUndefined();
|
||||
expect(additionalFields.anthropic_beta).toEqual([
|
||||
'output-128k-2025-02-19',
|
||||
'context-1m-2025-08-07',
|
||||
]);
|
||||
expect(additionalFields.anthropic_beta).toEqual(['output-128k-2025-02-19']);
|
||||
});
|
||||
|
||||
test('should preserve effort when thinking=false for adaptive models', () => {
|
||||
|
|
@ -895,7 +860,7 @@ describe('bedrockInputParser', () => {
|
|||
model: 'openai.gpt-oss-120b-1:0',
|
||||
promptCache: true,
|
||||
additionalModelRequestFields: {
|
||||
anthropic_beta: ['output-128k-2025-02-19', 'context-1m-2025-08-07'],
|
||||
anthropic_beta: ['output-128k-2025-02-19'],
|
||||
thinking: { type: 'adaptive' },
|
||||
output_config: { effort: 'high' },
|
||||
},
|
||||
|
|
|
|||
|
|
@ -134,10 +134,10 @@ export function omitsThinkingByDefault(model: string): boolean {
|
|||
return false;
|
||||
}
|
||||
|
||||
/** Checks if a model qualifies for the context-1m beta header (Sonnet 4+, Opus 4.6+, Opus 5+) */
|
||||
/** Checks if a model has a 1M context window (Sonnet 4.6+, Opus 4.6+, Opus 5+) */
|
||||
export function supportsContext1m(model: string): boolean {
|
||||
const sonnet = parseSonnetVersion(model);
|
||||
if (sonnet != null && sonnet.major >= 4) {
|
||||
if (sonnet != null && (sonnet.major > 4 || (sonnet.major === 4 && sonnet.minor >= 6))) {
|
||||
return true;
|
||||
}
|
||||
const opus = parseOpusVersion(model);
|
||||
|
|
@ -151,7 +151,7 @@ export function supportsContext1m(model: string): boolean {
|
|||
* Gets the appropriate anthropic_beta headers for Bedrock Anthropic models.
|
||||
* Bedrock uses `anthropic_beta` (with underscore) in additionalModelRequestFields.
|
||||
*
|
||||
* @param model - The Bedrock model identifier (e.g., "anthropic.claude-sonnet-4-20250514-v1:0")
|
||||
* @param model - The Bedrock model identifier (e.g., "anthropic.claude-sonnet-4-6")
|
||||
* @returns Array of beta header strings, or empty array if not applicable
|
||||
*/
|
||||
function getBedrockAnthropicBetaHeaders(model: string): string[] {
|
||||
|
|
@ -163,17 +163,10 @@ function getBedrockAnthropicBetaHeaders(model: string): string[] {
|
|||
model,
|
||||
);
|
||||
|
||||
const isSonnet4PlusModel =
|
||||
/anthropic\.claude-(?:sonnet-[4-9]|[4-9](?:\.\d+)?(?:-\d+)?-sonnet)/.test(model);
|
||||
|
||||
if (isClaudeThinkingModel) {
|
||||
betaHeaders.push('output-128k-2025-02-19');
|
||||
}
|
||||
|
||||
if (isSonnet4PlusModel || supportsAdaptiveThinking(model)) {
|
||||
betaHeaders.push('context-1m-2025-08-07');
|
||||
}
|
||||
|
||||
return betaHeaders;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ import type {
|
|||
* These are the standard Anthropic model names as served by Vertex AI
|
||||
*/
|
||||
export const defaultVertexModels = [
|
||||
'claude-sonnet-4-20250514',
|
||||
'claude-sonnet-4-6',
|
||||
'claude-3-7-sonnet-20250219',
|
||||
'claude-3-5-sonnet-v2@20241022',
|
||||
'claude-3-5-sonnet@20240620',
|
||||
|
|
|
|||
|
|
@ -768,14 +768,14 @@ describe('spendTokens', () => {
|
|||
});
|
||||
|
||||
describe('premium token pricing', () => {
|
||||
it('should charge standard rates for claude-opus-4-6 when prompt tokens are below threshold', async () => {
|
||||
it('should charge standard rates for gemini-3.1 when prompt tokens are below threshold', async () => {
|
||||
const initialBalance = 100000000;
|
||||
await Balance.create({
|
||||
user: userId,
|
||||
tokenCredits: initialBalance,
|
||||
});
|
||||
|
||||
const model = 'claude-opus-4-6';
|
||||
const model = 'gemini-3.1';
|
||||
const promptTokens = 100000;
|
||||
const completionTokens = 500;
|
||||
|
||||
|
|
@ -796,7 +796,7 @@ describe('spendTokens', () => {
|
|||
expect(balance?.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
|
||||
});
|
||||
|
||||
it('should charge premium rates for claude-opus-4-6 when prompt tokens exceed threshold', async () => {
|
||||
it('should charge standard rates for claude-opus-4-6 when prompt tokens exceed the former premium threshold', async () => {
|
||||
const initialBalance = 100000000;
|
||||
await Balance.create({
|
||||
user: userId,
|
||||
|
|
@ -818,14 +818,13 @@ describe('spendTokens', () => {
|
|||
await spendTokens(txData, { promptTokens, completionTokens });
|
||||
|
||||
const expectedCost =
|
||||
promptTokens * premiumTokenValues[model].prompt +
|
||||
completionTokens * premiumTokenValues[model].completion;
|
||||
promptTokens * tokenValues[model].prompt + completionTokens * tokenValues[model].completion;
|
||||
|
||||
const balance = await Balance.findOne({ user: userId });
|
||||
expect(balance?.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
|
||||
});
|
||||
|
||||
it('should charge premium rates for both prompt and completion in structured tokens when above threshold', async () => {
|
||||
it('should charge standard rates for Claude structured tokens above the former premium threshold', async () => {
|
||||
const initialBalance = 100000000;
|
||||
await Balance.create({
|
||||
user: userId,
|
||||
|
|
@ -852,16 +851,16 @@ describe('spendTokens', () => {
|
|||
|
||||
const result = await spendStructuredTokens(txData, tokenUsage);
|
||||
|
||||
const premiumPromptRate = premiumTokenValues[model].prompt;
|
||||
const premiumCompletionRate = premiumTokenValues[model].completion;
|
||||
const standardPromptRate = tokenValues[model].prompt;
|
||||
const standardCompletionRate = tokenValues[model].completion;
|
||||
const writeRate = getCacheMultiplier({ model, cacheType: 'write' });
|
||||
const readRate = getCacheMultiplier({ model, cacheType: 'read' });
|
||||
|
||||
const expectedPromptCost =
|
||||
tokenUsage.promptTokens.input * premiumPromptRate +
|
||||
tokenUsage.promptTokens.input * standardPromptRate +
|
||||
tokenUsage.promptTokens.write * (writeRate ?? 0) +
|
||||
tokenUsage.promptTokens.read * (readRate ?? 0);
|
||||
const expectedCompletionCost = tokenUsage.completionTokens * premiumCompletionRate;
|
||||
const expectedCompletionCost = tokenUsage.completionTokens * standardCompletionRate;
|
||||
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.prompt!.prompt).toBeCloseTo(-expectedPromptCost, 0);
|
||||
|
|
@ -875,7 +874,7 @@ describe('spendTokens', () => {
|
|||
tokenCredits: initialBalance,
|
||||
});
|
||||
|
||||
const model = 'claude-opus-4-6';
|
||||
const model = 'gemini-3.1';
|
||||
const txData = {
|
||||
user: userId,
|
||||
conversationId: 'test-structured-standard',
|
||||
|
|
@ -1106,7 +1105,7 @@ describe('spendTokens', () => {
|
|||
tokenCredits: initialBalance,
|
||||
});
|
||||
|
||||
const model = 'claude-opus-4-6';
|
||||
const model = 'gemini-3.1';
|
||||
const promptTokens = 250000;
|
||||
const completionTokens = 500;
|
||||
|
||||
|
|
@ -1163,7 +1162,7 @@ describe('spendTokens', () => {
|
|||
tokenCredits: initialBalance,
|
||||
});
|
||||
|
||||
const model = 'claude-opus-4-6';
|
||||
const model = 'gemini-3.1';
|
||||
const txData = {
|
||||
user: userId,
|
||||
conversationId: 'test-negative-no-premium',
|
||||
|
|
|
|||
|
|
@ -700,7 +700,7 @@ describe('Premium Token Pricing Integration Tests', () => {
|
|||
const initialBalance = 100000000;
|
||||
await Balance.create({ user: userId, tokenCredits: initialBalance });
|
||||
|
||||
const model = 'claude-opus-4-6';
|
||||
const model = 'gemini-3.1';
|
||||
const promptTokens = 100000;
|
||||
const completionTokens = 500;
|
||||
|
||||
|
|
@ -729,7 +729,7 @@ describe('Premium Token Pricing Integration Tests', () => {
|
|||
const initialBalance = 100000000;
|
||||
await Balance.create({ user: userId, tokenCredits: initialBalance });
|
||||
|
||||
const model = 'claude-opus-4-6';
|
||||
const model = 'gemini-3.1';
|
||||
const promptTokens = 250000;
|
||||
const completionTokens = 500;
|
||||
|
||||
|
|
@ -758,7 +758,7 @@ describe('Premium Token Pricing Integration Tests', () => {
|
|||
const initialBalance = 100000000;
|
||||
await Balance.create({ user: userId, tokenCredits: initialBalance });
|
||||
|
||||
const model = 'claude-opus-4-6';
|
||||
const model = 'gemini-3.1';
|
||||
const promptTokens = premiumTokenValues[model].threshold;
|
||||
const completionTokens = 500;
|
||||
|
||||
|
|
@ -787,7 +787,7 @@ describe('Premium Token Pricing Integration Tests', () => {
|
|||
const initialBalance = 100000000;
|
||||
await Balance.create({ user: userId, tokenCredits: initialBalance });
|
||||
|
||||
const model = 'claude-opus-4-6';
|
||||
const model = 'gemini-3.1';
|
||||
const txData = {
|
||||
user: userId,
|
||||
conversationId: 'test-structured-premium',
|
||||
|
|
@ -838,7 +838,7 @@ describe('Premium Token Pricing Integration Tests', () => {
|
|||
const initialBalance = 100000000;
|
||||
await Balance.create({ user: userId, tokenCredits: initialBalance });
|
||||
|
||||
const model = 'claude-opus-4-6';
|
||||
const model = 'gemini-3.1';
|
||||
const txData = {
|
||||
user: userId,
|
||||
conversationId: 'test-structured-standard',
|
||||
|
|
@ -1027,7 +1027,7 @@ describe('Premium Token Pricing Integration Tests', () => {
|
|||
const initialBalance = 100000000;
|
||||
await Balance.create({ user: userId, tokenCredits: initialBalance });
|
||||
|
||||
const model = 'claude-opus-4-5';
|
||||
const model = 'claude-opus-4-6';
|
||||
const promptTokens = 300000;
|
||||
const completionTokens = 500;
|
||||
|
||||
|
|
|
|||
|
|
@ -2322,14 +2322,14 @@ describe('Claude Model Tests', () => {
|
|||
});
|
||||
|
||||
describe('Premium Token Pricing', () => {
|
||||
const premiumModel = 'claude-opus-4-6';
|
||||
const premiumModel = 'gemini-3.1';
|
||||
const premiumEntry = premiumTokenValues[premiumModel];
|
||||
const { threshold } = premiumEntry;
|
||||
const belowThreshold = threshold - 1;
|
||||
const aboveThreshold = threshold + 1;
|
||||
const wellAboveThreshold = threshold * 2;
|
||||
|
||||
it('should have premium pricing defined for claude-opus-4-6', () => {
|
||||
it('should have premium pricing defined for gemini-3.1', () => {
|
||||
expect(premiumEntry).toBeDefined();
|
||||
expect(premiumEntry.threshold).toBeDefined();
|
||||
expect(premiumEntry.prompt).toBeDefined();
|
||||
|
|
@ -2338,14 +2338,26 @@ describe('Premium Token Pricing', () => {
|
|||
expect(premiumEntry.completion).toBeGreaterThan(tokenValues[premiumModel].completion);
|
||||
});
|
||||
|
||||
it('should have premium pricing defined for claude-opus-4-7', () => {
|
||||
const entry = premiumTokenValues['claude-opus-4-7'];
|
||||
expect(entry).toBeDefined();
|
||||
expect(entry.threshold).toBe(200000);
|
||||
expect(entry.prompt).toBe(10);
|
||||
expect(entry.completion).toBe(37.5);
|
||||
expect(entry.prompt).toBeGreaterThan(tokenValues['claude-opus-4-7'].prompt);
|
||||
expect(entry.completion).toBeGreaterThan(tokenValues['claude-opus-4-7'].completion);
|
||||
it('should not apply premium pricing to Claude 1M GA models', () => {
|
||||
const claudeModels = ['claude-opus-4-6', 'claude-opus-4-7', 'claude-sonnet-4-6'];
|
||||
claudeModels.forEach((model) => {
|
||||
expect(premiumTokenValues[model]).toBeUndefined();
|
||||
expect(getPremiumRate(model, 'prompt', wellAboveThreshold)).toBeNull();
|
||||
expect(
|
||||
getMultiplier({
|
||||
model,
|
||||
tokenType: 'prompt',
|
||||
inputTokenCount: wellAboveThreshold,
|
||||
}),
|
||||
).toBe(tokenValues[model].prompt);
|
||||
expect(
|
||||
getMultiplier({
|
||||
model,
|
||||
tokenType: 'completion',
|
||||
inputTokenCount: wellAboveThreshold,
|
||||
}),
|
||||
).toBe(tokenValues[model].completion);
|
||||
});
|
||||
});
|
||||
|
||||
it('should return null from getPremiumRate when inputTokenCount is below threshold', () => {
|
||||
|
|
|
|||
|
|
@ -159,6 +159,7 @@ export const tokenValues: Record<string, { prompt: number; completion: number }>
|
|||
'claude-opus-4-6': { prompt: 5, completion: 25 },
|
||||
'claude-opus-4-7': { prompt: 5, completion: 25 },
|
||||
'claude-sonnet-4': { prompt: 3, completion: 15 },
|
||||
'claude-sonnet-4-5': { prompt: 3, completion: 15 },
|
||||
'claude-sonnet-4-6': { prompt: 3, completion: 15 },
|
||||
'command-r': { prompt: 0.5, completion: 1.5 },
|
||||
'command-r-plus': { prompt: 3, completion: 15 },
|
||||
|
|
@ -286,6 +287,7 @@ export const cacheTokenValues: Record<string, { write: number; read: number }> =
|
|||
'claude-3-haiku': { write: 0.3, read: 0.03 },
|
||||
'claude-haiku-4-5': { write: 1.25, read: 0.1 },
|
||||
'claude-sonnet-4': { write: 3.75, read: 0.3 },
|
||||
'claude-sonnet-4-5': { write: 3.75, read: 0.3 },
|
||||
'claude-sonnet-4-6': { write: 3.75, read: 0.3 },
|
||||
'claude-opus-4': { write: 18.75, read: 1.5 },
|
||||
'claude-opus-4-5': { write: 6.25, read: 0.5 },
|
||||
|
|
@ -336,9 +338,6 @@ export const premiumTokenValues: Record<
|
|||
string,
|
||||
{ threshold: number; prompt: number; completion: number }
|
||||
> = {
|
||||
'claude-opus-4-6': { threshold: 200000, prompt: 10, completion: 37.5 },
|
||||
'claude-opus-4-7': { threshold: 200000, prompt: 10, completion: 37.5 },
|
||||
'claude-sonnet-4-6': { threshold: 200000, prompt: 6, completion: 22.5 },
|
||||
'gemini-3.1': { threshold: 200000, prompt: 4, completion: 18 },
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue