mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-06-25 08:56:10 +00:00
🛰️ feat: Add GPT-5.5 + Frontier OpenAI Models, Drop Deprecated Defaults (#13636)
* 🛰️ feat: Add GPT-5.5 + Frontier OpenAI Models, Drop Deprecated Defaults * 🛰️ fix: Address Codex Review on OpenAI Model Refresh - Replace nonexistent gpt-5.5-chat-latest with the actual chat-latest alias; register its context window, output cap, pricing, and cache rates, and pin explicit rates for legacy gpt-5.x-chat-latest aliases so the new chat-latest key cannot out-match their cheaper pricing - Add long-context premium tiers (>272K input) for gpt-5.5 and gpt-5.4 - Disable streaming for pro reasoning models (o1-pro, gpt-5.x-pro), which OpenAI does not support, with spec coverage * 🛰️ fix: Address Codex Round-2 Review and CI Spec Failure - Allow chat-latest through the official OpenAI fetched-model filter - Export isProReasoningModel and drop unsupported sampling parameters for versioned pro models (gpt-5.4-pro, gpt-5.5-pro), which the versioned-model exemption previously let through - Honor the pro-model streaming disable in both agent chat-completions routes, which decide SSE from model_parameters before llmConfig exists - Update models.spec default-list assertions for the refreshed defaults and cover chat-latest filter retention * 🛰️ fix: Address Codex Round-3 Review - Convert max_tokens for chat-latest, which the gpt-[5-9] guard missed - Drop snake_case sampling params (top_p, logit_bias, penalties) in the reasoning-model exclusion list so addParams-sourced values are removed - Add createOpenAIAggregatorHandlers and wire them into the agent chat-completions service's non-streaming branch, which previously ran with no handlers and always returned an empty aggregated response * 🛰️ ci: Fix Import Order Drift and Controller Spec Mock - Sort type import first in service.spec.ts per import-order convention - Register isProReasoningModel in the openai controller spec's @librechat/api mock factory, whose enumerated exports left the new helper undefined and broke the non-streaming flow under test * 🛰️ chore: Trim Scope to Model Catalog Changes Revert the OpenAI endpoint and agent handler changes (pro-model streaming, sampling exclusions, non-streaming aggregation) — that surface is moving out of LibreChat into the agents SDK and belongs in its own change. Keep the model list, token windows, pricing, and the fetched-model filter for chat-latest. * 🛰️ fix: Correct GPT-5.4 Context Windows and Pro Long-Context Pricing - Set gpt-5.4 and gpt-5.4-pro context to the documented 1,050,000 window — 272K is the long-context pricing breakpoint, not the cap, and using it truncated prompts before they could reach that tier - Add gpt-5.4-pro long-context premium rates ($60/$270 above 272K) per its model page; gpt-5.5-pro documents no long-context tier * 🛰️ fix: Add gpt-5.4-nano and gpt-5.5-pro Long-Context Pricing - Register gpt-5.4-nano ($0.20/$1.25, cached $0.02, 400K context) in the model list, pricing, cache, and token maps — the longest-match fallback billed it at gpt-5.4's $2.50/$15 - Add gpt-5.5-pro long-context premium rates ($60/$270 above 272K); the pricing table lists the tier even though the model page omits it
This commit is contained in:
parent
ba5778d0df
commit
ca26a2dc9c
6 changed files with 71 additions and 30 deletions
|
|
@ -209,7 +209,7 @@ describe('getOpenAIModels', () => {
|
|||
|
||||
it('returns default models when no environment configurations are provided (and fetch fails)', async () => {
|
||||
const models = await getOpenAIModels({ user: 'user456' });
|
||||
expect(models).toContain('gpt-4');
|
||||
expect(models).toContain('gpt-5.5');
|
||||
});
|
||||
|
||||
it('returns default models when OpenAI API key is user provided', async () => {
|
||||
|
|
@ -220,7 +220,7 @@ describe('getOpenAIModels', () => {
|
|||
|
||||
expect(mockedAxios.get).not.toHaveBeenCalled();
|
||||
expect(models).not.toContain('should-not-appear');
|
||||
expect(models).toContain('gpt-4');
|
||||
expect(models).toContain('gpt-5.5');
|
||||
});
|
||||
|
||||
it('fetches models when OpenAI API key is provided through options', async () => {
|
||||
|
|
@ -335,6 +335,26 @@ describe('getOpenAIModels sorting behavior', () => {
|
|||
];
|
||||
expect(models).toEqual(expectedOrder);
|
||||
});
|
||||
|
||||
it('keeps chat-latest when filtering official OpenAI results', async () => {
|
||||
mockedAxios.get.mockResolvedValue({
|
||||
data: {
|
||||
data: [
|
||||
{ id: 'chat-latest' },
|
||||
{ id: 'gpt-5.5' },
|
||||
{ id: 'dall-e-3' },
|
||||
{ id: 'gpt-realtime-2' },
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
const models = await getOpenAIModels({ user: 'user456' });
|
||||
|
||||
expect(models).toContain('chat-latest');
|
||||
expect(models).toContain('gpt-5.5');
|
||||
expect(models).not.toContain('dall-e-3');
|
||||
expect(models).not.toContain('gpt-realtime-2');
|
||||
});
|
||||
});
|
||||
|
||||
describe('fetchModels with Ollama specific logic', () => {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import crypto from 'crypto';
|
||||
import axios from 'axios';
|
||||
import crypto from 'crypto';
|
||||
import { logger } from '@librechat/data-schemas';
|
||||
import { HttpsProxyAgent } from 'https-proxy-agent';
|
||||
import {
|
||||
|
|
@ -274,7 +274,7 @@ export async function fetchOpenAIModels(
|
|||
}
|
||||
|
||||
if (baseURL === openaiBaseURL) {
|
||||
const regex = /(text-davinci-003|gpt-|o\d+)/;
|
||||
const regex = /(text-davinci-003|gpt-|o\d+|chat-latest)/;
|
||||
const excludeRegex = /audio|realtime/;
|
||||
models = models.filter((model) => regex.test(model) && !excludeRegex.test(model));
|
||||
const instructModels = models.filter((model) => model.includes('instruct'));
|
||||
|
|
|
|||
|
|
@ -55,10 +55,13 @@ const openAIModels = {
|
|||
'gpt-5.1': 400000,
|
||||
'gpt-5.2': 400000,
|
||||
'gpt-5.3': 400000,
|
||||
'gpt-5.4': 272000, // standard context; 1M experimental available via API opt-in (2x rate)
|
||||
'gpt-5.4-pro': 272000, // same window as gpt-5.4
|
||||
'gpt-5.4': 1050000, // >272K input prices at the long-context tier (2x input, 1.5x output)
|
||||
'gpt-5.4-pro': 1050000,
|
||||
'gpt-5.4-mini': 400000,
|
||||
'gpt-5.4-nano': 400000,
|
||||
'gpt-5.5': 1050000,
|
||||
'gpt-5.5-pro': 1050000,
|
||||
'chat-latest': 400000,
|
||||
'gpt-5-mini': 400000,
|
||||
'gpt-5-nano': 400000,
|
||||
'gpt-5-pro': 400000,
|
||||
|
|
@ -383,8 +386,11 @@ export const modelMaxOutputs = {
|
|||
'gpt-5.3': 128000,
|
||||
'gpt-5.4': 128000,
|
||||
'gpt-5.4-pro': 128000,
|
||||
'gpt-5.4-mini': 128000,
|
||||
'gpt-5.4-nano': 128000,
|
||||
'gpt-5.5': 128000,
|
||||
'gpt-5.5-pro': 128000,
|
||||
'chat-latest': 128000,
|
||||
'gpt-5-mini': 128000,
|
||||
'gpt-5-nano': 128000,
|
||||
'gpt-5-pro': 128000,
|
||||
|
|
|
|||
|
|
@ -1578,40 +1578,27 @@ export const alternateName = {
|
|||
};
|
||||
|
||||
const sharedOpenAIModels = [
|
||||
'gpt-5.5',
|
||||
'gpt-5.5-pro',
|
||||
'chat-latest',
|
||||
'gpt-5.4',
|
||||
// TODO: gpt-5.4-thinking may have separate reasoning token pricing — verify before release
|
||||
'gpt-5.4-thinking',
|
||||
'gpt-5.4-pro',
|
||||
'gpt-5.4-mini',
|
||||
'gpt-5.4-nano',
|
||||
'gpt-5.3-codex',
|
||||
'gpt-5.2',
|
||||
'gpt-5.1',
|
||||
'gpt-5.1-chat-latest',
|
||||
'gpt-5.1-codex',
|
||||
'gpt-5.1-codex-max',
|
||||
'gpt-5.1-codex-mini',
|
||||
'gpt-5',
|
||||
'gpt-5-mini',
|
||||
'gpt-5-nano',
|
||||
'gpt-5-chat-latest',
|
||||
'gpt-4.1',
|
||||
'gpt-4.1-mini',
|
||||
'gpt-4.1-nano',
|
||||
'gpt-4o-mini',
|
||||
'gpt-4o',
|
||||
'gpt-4.5-preview',
|
||||
'gpt-4.5-preview-2025-02-27',
|
||||
'gpt-3.5-turbo',
|
||||
'gpt-3.5-turbo-0125',
|
||||
'gpt-4-turbo',
|
||||
'gpt-4-turbo-2024-04-09',
|
||||
'gpt-4-0125-preview',
|
||||
'gpt-4-turbo-preview',
|
||||
'gpt-4-1106-preview',
|
||||
'gpt-3.5-turbo-1106',
|
||||
'gpt-3.5-turbo-16k-0613',
|
||||
'gpt-3.5-turbo-16k',
|
||||
'gpt-4',
|
||||
'gpt-4-0314',
|
||||
'gpt-4-32k-0314',
|
||||
'gpt-4-0613',
|
||||
'gpt-3.5-turbo-0613',
|
||||
];
|
||||
|
||||
const sharedAnthropicModels = [
|
||||
|
|
|
|||
|
|
@ -56,11 +56,19 @@ describe('getValueKey', () => {
|
|||
|
||||
it('should return "gpt-5.3" for model name containing "gpt-5.3"', () => {
|
||||
expect(getValueKey('gpt-5.3')).toBe('gpt-5.3');
|
||||
expect(getValueKey('gpt-5.3-chat-latest')).toBe('gpt-5.3');
|
||||
expect(getValueKey('gpt-5.3-codex')).toBe('gpt-5.3');
|
||||
expect(getValueKey('openai/gpt-5.3')).toBe('gpt-5.3');
|
||||
});
|
||||
|
||||
it('should return explicit keys for chat-latest aliases', () => {
|
||||
expect(getValueKey('chat-latest')).toBe('chat-latest');
|
||||
expect(getValueKey('openai/chat-latest')).toBe('chat-latest');
|
||||
expect(getValueKey('gpt-5-chat-latest')).toBe('gpt-5-chat-latest');
|
||||
expect(getValueKey('gpt-5.1-chat-latest')).toBe('gpt-5.1-chat-latest');
|
||||
expect(getValueKey('gpt-5.2-chat-latest')).toBe('gpt-5.2-chat-latest');
|
||||
expect(getValueKey('gpt-5.3-chat-latest')).toBe('gpt-5.3-chat-latest');
|
||||
});
|
||||
|
||||
it('should return "gpt-5.4" for model name containing "gpt-5.4"', () => {
|
||||
expect(getValueKey('gpt-5.4')).toBe('gpt-5.4');
|
||||
expect(getValueKey('gpt-5.4-thinking')).toBe('gpt-5.4');
|
||||
|
|
|
|||
|
|
@ -129,8 +129,16 @@ export const tokenValues: Record<string, { prompt: number; completion: number }>
|
|||
'gpt-5.2': { prompt: 1.75, completion: 14 },
|
||||
'gpt-5.3': { prompt: 1.75, completion: 14 },
|
||||
'gpt-5.4': { prompt: 2.5, completion: 15 },
|
||||
// TODO: gpt-5.4-pro pricing not yet officially published — verify before release
|
||||
'gpt-5.4-pro': { prompt: 5, completion: 30 },
|
||||
'gpt-5.4-pro': { prompt: 30, completion: 180 },
|
||||
'gpt-5.4-mini': { prompt: 0.75, completion: 4.5 },
|
||||
'gpt-5.4-nano': { prompt: 0.2, completion: 1.25 },
|
||||
'gpt-5.5': { prompt: 5, completion: 30 },
|
||||
'gpt-5.5-pro': { prompt: 30, completion: 180 },
|
||||
'chat-latest': { prompt: 5, completion: 30 },
|
||||
'gpt-5-chat-latest': { prompt: 1.25, completion: 10 },
|
||||
'gpt-5.1-chat-latest': { prompt: 1.25, completion: 10 },
|
||||
'gpt-5.2-chat-latest': { prompt: 1.75, completion: 14 },
|
||||
'gpt-5.3-chat-latest': { prompt: 1.75, completion: 14 },
|
||||
'gpt-5-nano': { prompt: 0.05, completion: 0.4 },
|
||||
'gpt-5-mini': { prompt: 0.25, completion: 2 },
|
||||
'gpt-5-pro': { prompt: 15, completion: 120 },
|
||||
|
|
@ -310,6 +318,14 @@ export const cacheTokenValues: Record<string, { write: number; read: number }> =
|
|||
'gpt-5.2': { write: 1.75, read: 0.175 },
|
||||
'gpt-5.3': { write: 1.75, read: 0.175 },
|
||||
'gpt-5.4': { write: 2.5, read: 0.25 },
|
||||
'gpt-5.4-mini': { write: 0.75, read: 0.075 },
|
||||
'gpt-5.4-nano': { write: 0.2, read: 0.02 },
|
||||
'gpt-5.5': { write: 5, read: 0.5 },
|
||||
'chat-latest': { write: 5, read: 0.5 },
|
||||
'gpt-5-chat-latest': { write: 1.25, read: 0.125 },
|
||||
'gpt-5.1-chat-latest': { write: 1.25, read: 0.125 },
|
||||
'gpt-5.2-chat-latest': { write: 1.75, read: 0.175 },
|
||||
'gpt-5.3-chat-latest': { write: 1.75, read: 0.175 },
|
||||
'gpt-5-mini': { write: 0.25, read: 0.025 },
|
||||
'gpt-5-nano': { write: 0.05, read: 0.005 },
|
||||
o1: { write: 15, read: 7.5 },
|
||||
|
|
@ -348,6 +364,10 @@ export const premiumTokenValues: Record<
|
|||
{ threshold: number; prompt: number; completion: number }
|
||||
> = {
|
||||
'gemini-3.1': { threshold: 200000, prompt: 4, completion: 18 },
|
||||
'gpt-5.4': { threshold: 272000, prompt: 5, completion: 22.5 },
|
||||
'gpt-5.4-pro': { threshold: 272000, prompt: 60, completion: 270 },
|
||||
'gpt-5.5': { threshold: 272000, prompt: 10, completion: 45 },
|
||||
'gpt-5.5-pro': { threshold: 272000, prompt: 60, completion: 270 },
|
||||
};
|
||||
|
||||
export function createTxMethods(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue