From ca26a2dc9cd2c1a6ffd8eb1f016e1ace142af4af Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Tue, 9 Jun 2026 20:12:31 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=9B=B0=EF=B8=8F=20feat:=20Add=20GPT-5.5?= =?UTF-8?q?=20+=20Frontier=20OpenAI=20Models,=20Drop=20Deprecated=20Defaul?= =?UTF-8?q?ts=20(#13636)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🛰️ feat: Add GPT-5.5 + Frontier OpenAI Models, Drop Deprecated Defaults * 🛰️ fix: Address Codex Review on OpenAI Model Refresh - Replace nonexistent gpt-5.5-chat-latest with the actual chat-latest alias; register its context window, output cap, pricing, and cache rates, and pin explicit rates for legacy gpt-5.x-chat-latest aliases so the new chat-latest key cannot out-match their cheaper pricing - Add long-context premium tiers (>272K input) for gpt-5.5 and gpt-5.4 - Disable streaming for pro reasoning models (o1-pro, gpt-5.x-pro), which OpenAI does not support, with spec coverage * 🛰️ fix: Address Codex Round-2 Review and CI Spec Failure - Allow chat-latest through the official OpenAI fetched-model filter - Export isProReasoningModel and drop unsupported sampling parameters for versioned pro models (gpt-5.4-pro, gpt-5.5-pro), which the versioned-model exemption previously let through - Honor the pro-model streaming disable in both agent chat-completions routes, which decide SSE from model_parameters before llmConfig exists - Update models.spec default-list assertions for the refreshed defaults and cover chat-latest filter retention * 🛰️ fix: Address Codex Round-3 Review - Convert max_tokens for chat-latest, which the gpt-[5-9] guard missed - Drop snake_case sampling params (top_p, logit_bias, penalties) in the reasoning-model exclusion list so addParams-sourced values are removed - Add createOpenAIAggregatorHandlers and wire them into the agent chat-completions service's non-streaming branch, which previously ran with no handlers and always returned an empty aggregated response * 🛰️ ci: Fix Import Order Drift and Controller Spec Mock - Sort type import first in service.spec.ts per import-order convention - Register isProReasoningModel in the openai controller spec's @librechat/api mock factory, whose enumerated exports left the new helper undefined and broke the non-streaming flow under test * 🛰️ chore: Trim Scope to Model Catalog Changes Revert the OpenAI endpoint and agent handler changes (pro-model streaming, sampling exclusions, non-streaming aggregation) — that surface is moving out of LibreChat into the agents SDK and belongs in its own change. Keep the model list, token windows, pricing, and the fetched-model filter for chat-latest. * 🛰️ fix: Correct GPT-5.4 Context Windows and Pro Long-Context Pricing - Set gpt-5.4 and gpt-5.4-pro context to the documented 1,050,000 window — 272K is the long-context pricing breakpoint, not the cap, and using it truncated prompts before they could reach that tier - Add gpt-5.4-pro long-context premium rates ($60/$270 above 272K) per its model page; gpt-5.5-pro documents no long-context tier * 🛰️ fix: Add gpt-5.4-nano and gpt-5.5-pro Long-Context Pricing - Register gpt-5.4-nano ($0.20/$1.25, cached $0.02, 400K context) in the model list, pricing, cache, and token maps — the longest-match fallback billed it at gpt-5.4's $2.50/$15 - Add gpt-5.5-pro long-context premium rates ($60/$270 above 272K); the pricing table lists the tier even though the model page omits it --- packages/api/src/endpoints/models.spec.ts | 24 ++++++++++++++-- packages/api/src/endpoints/models.ts | 4 +-- packages/api/src/utils/tokens.ts | 10 +++++-- packages/data-provider/src/config.ts | 29 ++++++-------------- packages/data-schemas/src/methods/tx.spec.ts | 10 ++++++- packages/data-schemas/src/methods/tx.ts | 24 ++++++++++++++-- 6 files changed, 71 insertions(+), 30 deletions(-) diff --git a/packages/api/src/endpoints/models.spec.ts b/packages/api/src/endpoints/models.spec.ts index bb359968ab..a2101053d0 100644 --- a/packages/api/src/endpoints/models.spec.ts +++ b/packages/api/src/endpoints/models.spec.ts @@ -209,7 +209,7 @@ describe('getOpenAIModels', () => { it('returns default models when no environment configurations are provided (and fetch fails)', async () => { const models = await getOpenAIModels({ user: 'user456' }); - expect(models).toContain('gpt-4'); + expect(models).toContain('gpt-5.5'); }); it('returns default models when OpenAI API key is user provided', async () => { @@ -220,7 +220,7 @@ describe('getOpenAIModels', () => { expect(mockedAxios.get).not.toHaveBeenCalled(); expect(models).not.toContain('should-not-appear'); - expect(models).toContain('gpt-4'); + expect(models).toContain('gpt-5.5'); }); it('fetches models when OpenAI API key is provided through options', async () => { @@ -335,6 +335,26 @@ describe('getOpenAIModels sorting behavior', () => { ]; expect(models).toEqual(expectedOrder); }); + + it('keeps chat-latest when filtering official OpenAI results', async () => { + mockedAxios.get.mockResolvedValue({ + data: { + data: [ + { id: 'chat-latest' }, + { id: 'gpt-5.5' }, + { id: 'dall-e-3' }, + { id: 'gpt-realtime-2' }, + ], + }, + }); + + const models = await getOpenAIModels({ user: 'user456' }); + + expect(models).toContain('chat-latest'); + expect(models).toContain('gpt-5.5'); + expect(models).not.toContain('dall-e-3'); + expect(models).not.toContain('gpt-realtime-2'); + }); }); describe('fetchModels with Ollama specific logic', () => { diff --git a/packages/api/src/endpoints/models.ts b/packages/api/src/endpoints/models.ts index aac168ad7a..7246c83e5c 100644 --- a/packages/api/src/endpoints/models.ts +++ b/packages/api/src/endpoints/models.ts @@ -1,5 +1,5 @@ -import crypto from 'crypto'; import axios from 'axios'; +import crypto from 'crypto'; import { logger } from '@librechat/data-schemas'; import { HttpsProxyAgent } from 'https-proxy-agent'; import { @@ -274,7 +274,7 @@ export async function fetchOpenAIModels( } if (baseURL === openaiBaseURL) { - const regex = /(text-davinci-003|gpt-|o\d+)/; + const regex = /(text-davinci-003|gpt-|o\d+|chat-latest)/; const excludeRegex = /audio|realtime/; models = models.filter((model) => regex.test(model) && !excludeRegex.test(model)); const instructModels = models.filter((model) => model.includes('instruct')); diff --git a/packages/api/src/utils/tokens.ts b/packages/api/src/utils/tokens.ts index 7a1bc0b1c7..856b341b63 100644 --- a/packages/api/src/utils/tokens.ts +++ b/packages/api/src/utils/tokens.ts @@ -55,10 +55,13 @@ const openAIModels = { 'gpt-5.1': 400000, 'gpt-5.2': 400000, 'gpt-5.3': 400000, - 'gpt-5.4': 272000, // standard context; 1M experimental available via API opt-in (2x rate) - 'gpt-5.4-pro': 272000, // same window as gpt-5.4 + 'gpt-5.4': 1050000, // >272K input prices at the long-context tier (2x input, 1.5x output) + 'gpt-5.4-pro': 1050000, + 'gpt-5.4-mini': 400000, + 'gpt-5.4-nano': 400000, 'gpt-5.5': 1050000, 'gpt-5.5-pro': 1050000, + 'chat-latest': 400000, 'gpt-5-mini': 400000, 'gpt-5-nano': 400000, 'gpt-5-pro': 400000, @@ -383,8 +386,11 @@ export const modelMaxOutputs = { 'gpt-5.3': 128000, 'gpt-5.4': 128000, 'gpt-5.4-pro': 128000, + 'gpt-5.4-mini': 128000, + 'gpt-5.4-nano': 128000, 'gpt-5.5': 128000, 'gpt-5.5-pro': 128000, + 'chat-latest': 128000, 'gpt-5-mini': 128000, 'gpt-5-nano': 128000, 'gpt-5-pro': 128000, diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index 7426d20631..23d12c14d9 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -1578,40 +1578,27 @@ export const alternateName = { }; const sharedOpenAIModels = [ + 'gpt-5.5', + 'gpt-5.5-pro', + 'chat-latest', 'gpt-5.4', - // TODO: gpt-5.4-thinking may have separate reasoning token pricing — verify before release - 'gpt-5.4-thinking', 'gpt-5.4-pro', + 'gpt-5.4-mini', + 'gpt-5.4-nano', + 'gpt-5.3-codex', + 'gpt-5.2', 'gpt-5.1', - 'gpt-5.1-chat-latest', 'gpt-5.1-codex', + 'gpt-5.1-codex-max', 'gpt-5.1-codex-mini', 'gpt-5', 'gpt-5-mini', 'gpt-5-nano', - 'gpt-5-chat-latest', 'gpt-4.1', 'gpt-4.1-mini', 'gpt-4.1-nano', 'gpt-4o-mini', 'gpt-4o', - 'gpt-4.5-preview', - 'gpt-4.5-preview-2025-02-27', - 'gpt-3.5-turbo', - 'gpt-3.5-turbo-0125', - 'gpt-4-turbo', - 'gpt-4-turbo-2024-04-09', - 'gpt-4-0125-preview', - 'gpt-4-turbo-preview', - 'gpt-4-1106-preview', - 'gpt-3.5-turbo-1106', - 'gpt-3.5-turbo-16k-0613', - 'gpt-3.5-turbo-16k', - 'gpt-4', - 'gpt-4-0314', - 'gpt-4-32k-0314', - 'gpt-4-0613', - 'gpt-3.5-turbo-0613', ]; const sharedAnthropicModels = [ diff --git a/packages/data-schemas/src/methods/tx.spec.ts b/packages/data-schemas/src/methods/tx.spec.ts index 2ef86e8dfd..4c22218a2d 100644 --- a/packages/data-schemas/src/methods/tx.spec.ts +++ b/packages/data-schemas/src/methods/tx.spec.ts @@ -56,11 +56,19 @@ describe('getValueKey', () => { it('should return "gpt-5.3" for model name containing "gpt-5.3"', () => { expect(getValueKey('gpt-5.3')).toBe('gpt-5.3'); - expect(getValueKey('gpt-5.3-chat-latest')).toBe('gpt-5.3'); expect(getValueKey('gpt-5.3-codex')).toBe('gpt-5.3'); expect(getValueKey('openai/gpt-5.3')).toBe('gpt-5.3'); }); + it('should return explicit keys for chat-latest aliases', () => { + expect(getValueKey('chat-latest')).toBe('chat-latest'); + expect(getValueKey('openai/chat-latest')).toBe('chat-latest'); + expect(getValueKey('gpt-5-chat-latest')).toBe('gpt-5-chat-latest'); + expect(getValueKey('gpt-5.1-chat-latest')).toBe('gpt-5.1-chat-latest'); + expect(getValueKey('gpt-5.2-chat-latest')).toBe('gpt-5.2-chat-latest'); + expect(getValueKey('gpt-5.3-chat-latest')).toBe('gpt-5.3-chat-latest'); + }); + it('should return "gpt-5.4" for model name containing "gpt-5.4"', () => { expect(getValueKey('gpt-5.4')).toBe('gpt-5.4'); expect(getValueKey('gpt-5.4-thinking')).toBe('gpt-5.4'); diff --git a/packages/data-schemas/src/methods/tx.ts b/packages/data-schemas/src/methods/tx.ts index 827af52580..dd05312f0d 100644 --- a/packages/data-schemas/src/methods/tx.ts +++ b/packages/data-schemas/src/methods/tx.ts @@ -129,8 +129,16 @@ export const tokenValues: Record 'gpt-5.2': { prompt: 1.75, completion: 14 }, 'gpt-5.3': { prompt: 1.75, completion: 14 }, 'gpt-5.4': { prompt: 2.5, completion: 15 }, - // TODO: gpt-5.4-pro pricing not yet officially published — verify before release - 'gpt-5.4-pro': { prompt: 5, completion: 30 }, + 'gpt-5.4-pro': { prompt: 30, completion: 180 }, + 'gpt-5.4-mini': { prompt: 0.75, completion: 4.5 }, + 'gpt-5.4-nano': { prompt: 0.2, completion: 1.25 }, + 'gpt-5.5': { prompt: 5, completion: 30 }, + 'gpt-5.5-pro': { prompt: 30, completion: 180 }, + 'chat-latest': { prompt: 5, completion: 30 }, + 'gpt-5-chat-latest': { prompt: 1.25, completion: 10 }, + 'gpt-5.1-chat-latest': { prompt: 1.25, completion: 10 }, + 'gpt-5.2-chat-latest': { prompt: 1.75, completion: 14 }, + 'gpt-5.3-chat-latest': { prompt: 1.75, completion: 14 }, 'gpt-5-nano': { prompt: 0.05, completion: 0.4 }, 'gpt-5-mini': { prompt: 0.25, completion: 2 }, 'gpt-5-pro': { prompt: 15, completion: 120 }, @@ -310,6 +318,14 @@ export const cacheTokenValues: Record = 'gpt-5.2': { write: 1.75, read: 0.175 }, 'gpt-5.3': { write: 1.75, read: 0.175 }, 'gpt-5.4': { write: 2.5, read: 0.25 }, + 'gpt-5.4-mini': { write: 0.75, read: 0.075 }, + 'gpt-5.4-nano': { write: 0.2, read: 0.02 }, + 'gpt-5.5': { write: 5, read: 0.5 }, + 'chat-latest': { write: 5, read: 0.5 }, + 'gpt-5-chat-latest': { write: 1.25, read: 0.125 }, + 'gpt-5.1-chat-latest': { write: 1.25, read: 0.125 }, + 'gpt-5.2-chat-latest': { write: 1.75, read: 0.175 }, + 'gpt-5.3-chat-latest': { write: 1.75, read: 0.175 }, 'gpt-5-mini': { write: 0.25, read: 0.025 }, 'gpt-5-nano': { write: 0.05, read: 0.005 }, o1: { write: 15, read: 7.5 }, @@ -348,6 +364,10 @@ export const premiumTokenValues: Record< { threshold: number; prompt: number; completion: number } > = { 'gemini-3.1': { threshold: 200000, prompt: 4, completion: 18 }, + 'gpt-5.4': { threshold: 272000, prompt: 5, completion: 22.5 }, + 'gpt-5.4-pro': { threshold: 272000, prompt: 60, completion: 270 }, + 'gpt-5.5': { threshold: 272000, prompt: 10, completion: 45 }, + 'gpt-5.5-pro': { threshold: 272000, prompt: 60, completion: 270 }, }; export function createTxMethods(