🛰️ feat: Add GPT-5.5 + Frontier OpenAI Models, Drop Deprecated Defaults (#13636)

* 🛰️ feat: Add GPT-5.5 + Frontier OpenAI Models, Drop Deprecated Defaults * 🛰️ fix: Address Codex Review on OpenAI Model Refresh - Replace nonexistent gpt-5.5-chat-latest with the actual chat-latest alias; register its context window, output cap, pricing, and cache rates, and pin explicit rates for legacy gpt-5.x-chat-latest aliases so the new chat-latest key cannot out-match their cheaper pricing - Add long-context premium tiers (>272K input) for gpt-5.5 and gpt-5.4 - Disable streaming for pro reasoning models (o1-pro, gpt-5.x-pro), which OpenAI does not support, with spec coverage * 🛰️ fix: Address Codex Round-2 Review and CI Spec Failure - Allow chat-latest through the official OpenAI fetched-model filter - Export isProReasoningModel and drop unsupported sampling parameters for versioned pro models (gpt-5.4-pro, gpt-5.5-pro), which the versioned-model exemption previously let through - Honor the pro-model streaming disable in both agent chat-completions routes, which decide SSE from model_parameters before llmConfig exists - Update models.spec default-list assertions for the refreshed defaults and cover chat-latest filter retention * 🛰️ fix: Address Codex Round-3 Review - Convert max_tokens for chat-latest, which the gpt-[5-9] guard missed - Drop snake_case sampling params (top_p, logit_bias, penalties) in the reasoning-model exclusion list so addParams-sourced values are removed - Add createOpenAIAggregatorHandlers and wire them into the agent chat-completions service's non-streaming branch, which previously ran with no handlers and always returned an empty aggregated response * 🛰️ ci: Fix Import Order Drift and Controller Spec Mock - Sort type import first in service.spec.ts per import-order convention - Register isProReasoningModel in the openai controller spec's @librechat/api mock factory, whose enumerated exports left the new helper undefined and broke the non-streaming flow under test * 🛰️ chore: Trim Scope to Model Catalog Changes Revert the OpenAI endpoint and agent handler changes (pro-model streaming, sampling exclusions, non-streaming aggregation) — that surface is moving out of LibreChat into the agents SDK and belongs in its own change. Keep the model list, token windows, pricing, and the fetched-model filter for chat-latest. * 🛰️ fix: Correct GPT-5.4 Context Windows and Pro Long-Context Pricing - Set gpt-5.4 and gpt-5.4-pro context to the documented 1,050,000 window — 272K is the long-context pricing breakpoint, not the cap, and using it truncated prompts before they could reach that tier - Add gpt-5.4-pro long-context premium rates ($60/$270 above 272K) per its model page; gpt-5.5-pro documents no long-context tier * 🛰️ fix: Add gpt-5.4-nano and gpt-5.5-pro Long-Context Pricing - Register gpt-5.4-nano ($0.20/$1.25, cached $0.02, 400K context) in the model list, pricing, cache, and token maps — the longest-match fallback billed it at gpt-5.4's $2.50/$15 - Add gpt-5.5-pro long-context premium rates ($60/$270 above 272K); the pricing table lists the tier even though the model page omits it
2026-06-25 08:56:10 +00:00 · 2026-06-09 20:12:31 -04:00 · 2026-06-09 20:12:31 -04:00 · ca26a2dc9c
commit ca26a2dc9c
parent ba5778d0df
6 changed files with 71 additions and 30 deletions
--- a/packages/api/src/endpoints/models.spec.ts
+++ b/packages/api/src/endpoints/models.spec.ts
@ -209,7 +209,7 @@ describe('getOpenAIModels', () => {

  it('returns default models when no environment configurations are provided (and fetch fails)', async () => {
    const models = await getOpenAIModels({ user: 'user456' });
-    expect(models).toContain('gpt-4');
+    expect(models).toContain('gpt-5.5');
  });

  it('returns default models when OpenAI API key is user provided', async () => {
@ -220,7 +220,7 @@ describe('getOpenAIModels', () => {

    expect(mockedAxios.get).not.toHaveBeenCalled();
    expect(models).not.toContain('should-not-appear');
-    expect(models).toContain('gpt-4');
+    expect(models).toContain('gpt-5.5');
  });

  it('fetches models when OpenAI API key is provided through options', async () => {
@ -335,6 +335,26 @@ describe('getOpenAIModels sorting behavior', () => {
    ];
    expect(models).toEqual(expectedOrder);
  });
+
+  it('keeps chat-latest when filtering official OpenAI results', async () => {
+    mockedAxios.get.mockResolvedValue({
+      data: {
+        data: [
+          { id: 'chat-latest' },
+          { id: 'gpt-5.5' },
+          { id: 'dall-e-3' },
+          { id: 'gpt-realtime-2' },
+        ],
+      },
+    });
+
+    const models = await getOpenAIModels({ user: 'user456' });
+
+    expect(models).toContain('chat-latest');
+    expect(models).toContain('gpt-5.5');
+    expect(models).not.toContain('dall-e-3');
+    expect(models).not.toContain('gpt-realtime-2');
+  });
 });

 describe('fetchModels with Ollama specific logic', () => {
--- a/packages/api/src/endpoints/models.ts
+++ b/packages/api/src/endpoints/models.ts
@ -1,5 +1,5 @@
-import crypto from 'crypto';
 import axios from 'axios';
+import crypto from 'crypto';
 import { logger } from '@librechat/data-schemas';
 import { HttpsProxyAgent } from 'https-proxy-agent';
 import {
@ -274,7 +274,7 @@ export async function fetchOpenAIModels(
  }

  if (baseURL === openaiBaseURL) {
-    const regex = /(text-davinci-003|gpt-|o\d+)/;
+    const regex = /(text-davinci-003|gpt-|o\d+|chat-latest)/;
    const excludeRegex = /audio|realtime/;
    models = models.filter((model) => regex.test(model) && !excludeRegex.test(model));
    const instructModels = models.filter((model) => model.includes('instruct'));
--- a/packages/api/src/utils/tokens.ts
+++ b/packages/api/src/utils/tokens.ts
@ -55,10 +55,13 @@ const openAIModels = {
  'gpt-5.1': 400000,
  'gpt-5.2': 400000,
  'gpt-5.3': 400000,
-  'gpt-5.4': 272000, // standard context; 1M experimental available via API opt-in (2x rate)
-  'gpt-5.4-pro': 272000, // same window as gpt-5.4
+  'gpt-5.4': 1050000, // >272K input prices at the long-context tier (2x input, 1.5x output)
+  'gpt-5.4-pro': 1050000,
+  'gpt-5.4-mini': 400000,
+  'gpt-5.4-nano': 400000,
  'gpt-5.5': 1050000,
  'gpt-5.5-pro': 1050000,
+  'chat-latest': 400000,
  'gpt-5-mini': 400000,
  'gpt-5-nano': 400000,
  'gpt-5-pro': 400000,
@ -383,8 +386,11 @@ export const modelMaxOutputs = {
  'gpt-5.3': 128000,
  'gpt-5.4': 128000,
  'gpt-5.4-pro': 128000,
+  'gpt-5.4-mini': 128000,
+  'gpt-5.4-nano': 128000,
  'gpt-5.5': 128000,
  'gpt-5.5-pro': 128000,
+  'chat-latest': 128000,
  'gpt-5-mini': 128000,
  'gpt-5-nano': 128000,
  'gpt-5-pro': 128000,
--- a/packages/data-provider/src/config.ts
+++ b/packages/data-provider/src/config.ts
@ -1578,40 +1578,27 @@ export const alternateName = {
 };

 const sharedOpenAIModels = [
+  'gpt-5.5',
+  'gpt-5.5-pro',
+  'chat-latest',
  'gpt-5.4',
-  // TODO: gpt-5.4-thinking may have separate reasoning token pricing — verify before release
-  'gpt-5.4-thinking',
  'gpt-5.4-pro',
+  'gpt-5.4-mini',
+  'gpt-5.4-nano',
+  'gpt-5.3-codex',
+  'gpt-5.2',
  'gpt-5.1',
-  'gpt-5.1-chat-latest',
  'gpt-5.1-codex',
+  'gpt-5.1-codex-max',
  'gpt-5.1-codex-mini',
  'gpt-5',
  'gpt-5-mini',
  'gpt-5-nano',
-  'gpt-5-chat-latest',
  'gpt-4.1',
  'gpt-4.1-mini',
  'gpt-4.1-nano',
  'gpt-4o-mini',
  'gpt-4o',
-  'gpt-4.5-preview',
-  'gpt-4.5-preview-2025-02-27',
-  'gpt-3.5-turbo',
-  'gpt-3.5-turbo-0125',
-  'gpt-4-turbo',
-  'gpt-4-turbo-2024-04-09',
-  'gpt-4-0125-preview',
-  'gpt-4-turbo-preview',
-  'gpt-4-1106-preview',
-  'gpt-3.5-turbo-1106',
-  'gpt-3.5-turbo-16k-0613',
-  'gpt-3.5-turbo-16k',
-  'gpt-4',
-  'gpt-4-0314',
-  'gpt-4-32k-0314',
-  'gpt-4-0613',
-  'gpt-3.5-turbo-0613',
 ];

 const sharedAnthropicModels = [
--- a/packages/data-schemas/src/methods/tx.spec.ts
+++ b/packages/data-schemas/src/methods/tx.spec.ts
@ -56,11 +56,19 @@ describe('getValueKey', () => {

  it('should return "gpt-5.3" for model name containing "gpt-5.3"', () => {
    expect(getValueKey('gpt-5.3')).toBe('gpt-5.3');
-    expect(getValueKey('gpt-5.3-chat-latest')).toBe('gpt-5.3');
    expect(getValueKey('gpt-5.3-codex')).toBe('gpt-5.3');
    expect(getValueKey('openai/gpt-5.3')).toBe('gpt-5.3');
  });

+  it('should return explicit keys for chat-latest aliases', () => {
+    expect(getValueKey('chat-latest')).toBe('chat-latest');
+    expect(getValueKey('openai/chat-latest')).toBe('chat-latest');
+    expect(getValueKey('gpt-5-chat-latest')).toBe('gpt-5-chat-latest');
+    expect(getValueKey('gpt-5.1-chat-latest')).toBe('gpt-5.1-chat-latest');
+    expect(getValueKey('gpt-5.2-chat-latest')).toBe('gpt-5.2-chat-latest');
+    expect(getValueKey('gpt-5.3-chat-latest')).toBe('gpt-5.3-chat-latest');
+  });
+
  it('should return "gpt-5.4" for model name containing "gpt-5.4"', () => {
    expect(getValueKey('gpt-5.4')).toBe('gpt-5.4');
    expect(getValueKey('gpt-5.4-thinking')).toBe('gpt-5.4');
--- a/packages/data-schemas/src/methods/tx.ts
+++ b/packages/data-schemas/src/methods/tx.ts
@ -129,8 +129,16 @@ export const tokenValues: Record<string, { prompt: number; completion: number }>
    'gpt-5.2': { prompt: 1.75, completion: 14 },
    'gpt-5.3': { prompt: 1.75, completion: 14 },
    'gpt-5.4': { prompt: 2.5, completion: 15 },
-    // TODO: gpt-5.4-pro pricing not yet officially published — verify before release
-    'gpt-5.4-pro': { prompt: 5, completion: 30 },
+    'gpt-5.4-pro': { prompt: 30, completion: 180 },
+    'gpt-5.4-mini': { prompt: 0.75, completion: 4.5 },
+    'gpt-5.4-nano': { prompt: 0.2, completion: 1.25 },
+    'gpt-5.5': { prompt: 5, completion: 30 },
+    'gpt-5.5-pro': { prompt: 30, completion: 180 },
+    'chat-latest': { prompt: 5, completion: 30 },
+    'gpt-5-chat-latest': { prompt: 1.25, completion: 10 },
+    'gpt-5.1-chat-latest': { prompt: 1.25, completion: 10 },
+    'gpt-5.2-chat-latest': { prompt: 1.75, completion: 14 },
+    'gpt-5.3-chat-latest': { prompt: 1.75, completion: 14 },
    'gpt-5-nano': { prompt: 0.05, completion: 0.4 },
    'gpt-5-mini': { prompt: 0.25, completion: 2 },
    'gpt-5-pro': { prompt: 15, completion: 120 },
@ -310,6 +318,14 @@ export const cacheTokenValues: Record<string, { write: number; read: number }> =
  'gpt-5.2': { write: 1.75, read: 0.175 },
  'gpt-5.3': { write: 1.75, read: 0.175 },
  'gpt-5.4': { write: 2.5, read: 0.25 },
+  'gpt-5.4-mini': { write: 0.75, read: 0.075 },
+  'gpt-5.4-nano': { write: 0.2, read: 0.02 },
+  'gpt-5.5': { write: 5, read: 0.5 },
+  'chat-latest': { write: 5, read: 0.5 },
+  'gpt-5-chat-latest': { write: 1.25, read: 0.125 },
+  'gpt-5.1-chat-latest': { write: 1.25, read: 0.125 },
+  'gpt-5.2-chat-latest': { write: 1.75, read: 0.175 },
+  'gpt-5.3-chat-latest': { write: 1.75, read: 0.175 },
  'gpt-5-mini': { write: 0.25, read: 0.025 },
  'gpt-5-nano': { write: 0.05, read: 0.005 },
  o1: { write: 15, read: 7.5 },
@ -348,6 +364,10 @@ export const premiumTokenValues: Record<
  { threshold: number; prompt: number; completion: number }
 > = {
  'gemini-3.1': { threshold: 200000, prompt: 4, completion: 18 },
+  'gpt-5.4': { threshold: 272000, prompt: 5, completion: 22.5 },
+  'gpt-5.4-pro': { threshold: 272000, prompt: 60, completion: 270 },
+  'gpt-5.5': { threshold: 272000, prompt: 10, completion: 45 },
+  'gpt-5.5-pro': { threshold: 272000, prompt: 60, completion: 270 },
 };

 export function createTxMethods(