From ca26a2dc9cd2c1a6ffd8eb1f016e1ace142af4af Mon Sep 17 00:00:00 2001
From: Danny Avila <danny@librechat.ai>
Date: Tue, 9 Jun 2026 20:12:31 -0400
Subject: [PATCH] =?UTF-8?q?=F0=9F=9B=B0=EF=B8=8F=20feat:=20Add=20GPT-5.5?=
 =?UTF-8?q?=20+=20Frontier=20OpenAI=20Models,=20Drop=20Deprecated=20Defaul?=
 =?UTF-8?q?ts=20(#13636)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 🛰️ feat: Add GPT-5.5 + Frontier OpenAI Models, Drop Deprecated Defaults

* 🛰️ fix: Address Codex Review on OpenAI Model Refresh

- Replace nonexistent gpt-5.5-chat-latest with the actual chat-latest
  alias; register its context window, output cap, pricing, and cache
  rates, and pin explicit rates for legacy gpt-5.x-chat-latest aliases
  so the new chat-latest key cannot out-match their cheaper pricing
- Add long-context premium tiers (>272K input) for gpt-5.5 and gpt-5.4
- Disable streaming for pro reasoning models (o1-pro, gpt-5.x-pro),
  which OpenAI does not support, with spec coverage

* 🛰️ fix: Address Codex Round-2 Review and CI Spec Failure

- Allow chat-latest through the official OpenAI fetched-model filter
- Export isProReasoningModel and drop unsupported sampling parameters
  for versioned pro models (gpt-5.4-pro, gpt-5.5-pro), which the
  versioned-model exemption previously let through
- Honor the pro-model streaming disable in both agent chat-completions
  routes, which decide SSE from model_parameters before llmConfig exists
- Update models.spec default-list assertions for the refreshed defaults
  and cover chat-latest filter retention

* 🛰️ fix: Address Codex Round-3 Review

- Convert max_tokens for chat-latest, which the gpt-[5-9] guard missed
- Drop snake_case sampling params (top_p, logit_bias, penalties) in the
  reasoning-model exclusion list so addParams-sourced values are removed
- Add createOpenAIAggregatorHandlers and wire them into the agent
  chat-completions service's non-streaming branch, which previously ran
  with no handlers and always returned an empty aggregated response

* 🛰️ ci: Fix Import Order Drift and Controller Spec Mock

- Sort type import first in service.spec.ts per import-order convention
- Register isProReasoningModel in the openai controller spec's
  @librechat/api mock factory, whose enumerated exports left the new
  helper undefined and broke the non-streaming flow under test

* 🛰️ chore: Trim Scope to Model Catalog Changes

Revert the OpenAI endpoint and agent handler changes (pro-model
streaming, sampling exclusions, non-streaming aggregation) — that
surface is moving out of LibreChat into the agents SDK and belongs
in its own change. Keep the model list, token windows, pricing, and
the fetched-model filter for chat-latest.

* 🛰️ fix: Correct GPT-5.4 Context Windows and Pro Long-Context Pricing

- Set gpt-5.4 and gpt-5.4-pro context to the documented 1,050,000
  window — 272K is the long-context pricing breakpoint, not the cap,
  and using it truncated prompts before they could reach that tier
- Add gpt-5.4-pro long-context premium rates ($60/$270 above 272K)
  per its model page; gpt-5.5-pro documents no long-context tier

* 🛰️ fix: Add gpt-5.4-nano and gpt-5.5-pro Long-Context Pricing

- Register gpt-5.4-nano ($0.20/$1.25, cached $0.02, 400K context) in
  the model list, pricing, cache, and token maps — the longest-match
  fallback billed it at gpt-5.4's $2.50/$15
- Add gpt-5.5-pro long-context premium rates ($60/$270 above 272K);
  the pricing table lists the tier even though the model page omits it
---
 packages/api/src/endpoints/models.spec.ts    | 24 ++++++++++++++--
 packages/api/src/endpoints/models.ts         |  4 +--
 packages/api/src/utils/tokens.ts             | 10 +++++--
 packages/data-provider/src/config.ts         | 29 ++++++--------------
 packages/data-schemas/src/methods/tx.spec.ts | 10 ++++++-
 packages/data-schemas/src/methods/tx.ts      | 24 ++++++++++++++--
 6 files changed, 71 insertions(+), 30 deletions(-)

diff --git a/packages/api/src/endpoints/models.spec.ts b/packages/api/src/endpoints/models.spec.ts
index bb359968ab..a2101053d0 100644
--- a/packages/api/src/endpoints/models.spec.ts
+++ b/packages/api/src/endpoints/models.spec.ts
@@ -209,7 +209,7 @@ describe('getOpenAIModels', () => {
 
   it('returns default models when no environment configurations are provided (and fetch fails)', async () => {
     const models = await getOpenAIModels({ user: 'user456' });
-    expect(models).toContain('gpt-4');
+    expect(models).toContain('gpt-5.5');
   });
 
   it('returns default models when OpenAI API key is user provided', async () => {
@@ -220,7 +220,7 @@ describe('getOpenAIModels', () => {
 
     expect(mockedAxios.get).not.toHaveBeenCalled();
     expect(models).not.toContain('should-not-appear');
-    expect(models).toContain('gpt-4');
+    expect(models).toContain('gpt-5.5');
   });
 
   it('fetches models when OpenAI API key is provided through options', async () => {
@@ -335,6 +335,26 @@ describe('getOpenAIModels sorting behavior', () => {
     ];
     expect(models).toEqual(expectedOrder);
   });
+
+  it('keeps chat-latest when filtering official OpenAI results', async () => {
+    mockedAxios.get.mockResolvedValue({
+      data: {
+        data: [
+          { id: 'chat-latest' },
+          { id: 'gpt-5.5' },
+          { id: 'dall-e-3' },
+          { id: 'gpt-realtime-2' },
+        ],
+      },
+    });
+
+    const models = await getOpenAIModels({ user: 'user456' });
+
+    expect(models).toContain('chat-latest');
+    expect(models).toContain('gpt-5.5');
+    expect(models).not.toContain('dall-e-3');
+    expect(models).not.toContain('gpt-realtime-2');
+  });
 });
 
 describe('fetchModels with Ollama specific logic', () => {
diff --git a/packages/api/src/endpoints/models.ts b/packages/api/src/endpoints/models.ts
index aac168ad7a..7246c83e5c 100644
--- a/packages/api/src/endpoints/models.ts
+++ b/packages/api/src/endpoints/models.ts
@@ -1,5 +1,5 @@
-import crypto from 'crypto';
 import axios from 'axios';
+import crypto from 'crypto';
 import { logger } from '@librechat/data-schemas';
 import { HttpsProxyAgent } from 'https-proxy-agent';
 import {
@@ -274,7 +274,7 @@ export async function fetchOpenAIModels(
   }
 
   if (baseURL === openaiBaseURL) {
-    const regex = /(text-davinci-003|gpt-|o\d+)/;
+    const regex = /(text-davinci-003|gpt-|o\d+|chat-latest)/;
     const excludeRegex = /audio|realtime/;
     models = models.filter((model) => regex.test(model) && !excludeRegex.test(model));
     const instructModels = models.filter((model) => model.includes('instruct'));
diff --git a/packages/api/src/utils/tokens.ts b/packages/api/src/utils/tokens.ts
index 7a1bc0b1c7..856b341b63 100644
--- a/packages/api/src/utils/tokens.ts
+++ b/packages/api/src/utils/tokens.ts
@@ -55,10 +55,13 @@ const openAIModels = {
   'gpt-5.1': 400000,
   'gpt-5.2': 400000,
   'gpt-5.3': 400000,
-  'gpt-5.4': 272000, // standard context; 1M experimental available via API opt-in (2x rate)
-  'gpt-5.4-pro': 272000, // same window as gpt-5.4
+  'gpt-5.4': 1050000, // >272K input prices at the long-context tier (2x input, 1.5x output)
+  'gpt-5.4-pro': 1050000,
+  'gpt-5.4-mini': 400000,
+  'gpt-5.4-nano': 400000,
   'gpt-5.5': 1050000,
   'gpt-5.5-pro': 1050000,
+  'chat-latest': 400000,
   'gpt-5-mini': 400000,
   'gpt-5-nano': 400000,
   'gpt-5-pro': 400000,
@@ -383,8 +386,11 @@ export const modelMaxOutputs = {
   'gpt-5.3': 128000,
   'gpt-5.4': 128000,
   'gpt-5.4-pro': 128000,
+  'gpt-5.4-mini': 128000,
+  'gpt-5.4-nano': 128000,
   'gpt-5.5': 128000,
   'gpt-5.5-pro': 128000,
+  'chat-latest': 128000,
   'gpt-5-mini': 128000,
   'gpt-5-nano': 128000,
   'gpt-5-pro': 128000,
diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts
index 7426d20631..23d12c14d9 100644
--- a/packages/data-provider/src/config.ts
+++ b/packages/data-provider/src/config.ts
@@ -1578,40 +1578,27 @@ export const alternateName = {
 };
 
 const sharedOpenAIModels = [
+  'gpt-5.5',
+  'gpt-5.5-pro',
+  'chat-latest',
   'gpt-5.4',
-  // TODO: gpt-5.4-thinking may have separate reasoning token pricing — verify before release
-  'gpt-5.4-thinking',
   'gpt-5.4-pro',
+  'gpt-5.4-mini',
+  'gpt-5.4-nano',
+  'gpt-5.3-codex',
+  'gpt-5.2',
   'gpt-5.1',
-  'gpt-5.1-chat-latest',
   'gpt-5.1-codex',
+  'gpt-5.1-codex-max',
   'gpt-5.1-codex-mini',
   'gpt-5',
   'gpt-5-mini',
   'gpt-5-nano',
-  'gpt-5-chat-latest',
   'gpt-4.1',
   'gpt-4.1-mini',
   'gpt-4.1-nano',
   'gpt-4o-mini',
   'gpt-4o',
-  'gpt-4.5-preview',
-  'gpt-4.5-preview-2025-02-27',
-  'gpt-3.5-turbo',
-  'gpt-3.5-turbo-0125',
-  'gpt-4-turbo',
-  'gpt-4-turbo-2024-04-09',
-  'gpt-4-0125-preview',
-  'gpt-4-turbo-preview',
-  'gpt-4-1106-preview',
-  'gpt-3.5-turbo-1106',
-  'gpt-3.5-turbo-16k-0613',
-  'gpt-3.5-turbo-16k',
-  'gpt-4',
-  'gpt-4-0314',
-  'gpt-4-32k-0314',
-  'gpt-4-0613',
-  'gpt-3.5-turbo-0613',
 ];
 
 const sharedAnthropicModels = [
diff --git a/packages/data-schemas/src/methods/tx.spec.ts b/packages/data-schemas/src/methods/tx.spec.ts
index 2ef86e8dfd..4c22218a2d 100644
--- a/packages/data-schemas/src/methods/tx.spec.ts
+++ b/packages/data-schemas/src/methods/tx.spec.ts
@@ -56,11 +56,19 @@ describe('getValueKey', () => {
 
   it('should return "gpt-5.3" for model name containing "gpt-5.3"', () => {
     expect(getValueKey('gpt-5.3')).toBe('gpt-5.3');
-    expect(getValueKey('gpt-5.3-chat-latest')).toBe('gpt-5.3');
     expect(getValueKey('gpt-5.3-codex')).toBe('gpt-5.3');
     expect(getValueKey('openai/gpt-5.3')).toBe('gpt-5.3');
   });
 
+  it('should return explicit keys for chat-latest aliases', () => {
+    expect(getValueKey('chat-latest')).toBe('chat-latest');
+    expect(getValueKey('openai/chat-latest')).toBe('chat-latest');
+    expect(getValueKey('gpt-5-chat-latest')).toBe('gpt-5-chat-latest');
+    expect(getValueKey('gpt-5.1-chat-latest')).toBe('gpt-5.1-chat-latest');
+    expect(getValueKey('gpt-5.2-chat-latest')).toBe('gpt-5.2-chat-latest');
+    expect(getValueKey('gpt-5.3-chat-latest')).toBe('gpt-5.3-chat-latest');
+  });
+
   it('should return "gpt-5.4" for model name containing "gpt-5.4"', () => {
     expect(getValueKey('gpt-5.4')).toBe('gpt-5.4');
     expect(getValueKey('gpt-5.4-thinking')).toBe('gpt-5.4');
diff --git a/packages/data-schemas/src/methods/tx.ts b/packages/data-schemas/src/methods/tx.ts
index 827af52580..dd05312f0d 100644
--- a/packages/data-schemas/src/methods/tx.ts
+++ b/packages/data-schemas/src/methods/tx.ts
@@ -129,8 +129,16 @@ export const tokenValues: Record<string, { prompt: number; completion: number }>
     'gpt-5.2': { prompt: 1.75, completion: 14 },
     'gpt-5.3': { prompt: 1.75, completion: 14 },
     'gpt-5.4': { prompt: 2.5, completion: 15 },
-    // TODO: gpt-5.4-pro pricing not yet officially published — verify before release
-    'gpt-5.4-pro': { prompt: 5, completion: 30 },
+    'gpt-5.4-pro': { prompt: 30, completion: 180 },
+    'gpt-5.4-mini': { prompt: 0.75, completion: 4.5 },
+    'gpt-5.4-nano': { prompt: 0.2, completion: 1.25 },
+    'gpt-5.5': { prompt: 5, completion: 30 },
+    'gpt-5.5-pro': { prompt: 30, completion: 180 },
+    'chat-latest': { prompt: 5, completion: 30 },
+    'gpt-5-chat-latest': { prompt: 1.25, completion: 10 },
+    'gpt-5.1-chat-latest': { prompt: 1.25, completion: 10 },
+    'gpt-5.2-chat-latest': { prompt: 1.75, completion: 14 },
+    'gpt-5.3-chat-latest': { prompt: 1.75, completion: 14 },
     'gpt-5-nano': { prompt: 0.05, completion: 0.4 },
     'gpt-5-mini': { prompt: 0.25, completion: 2 },
     'gpt-5-pro': { prompt: 15, completion: 120 },
@@ -310,6 +318,14 @@ export const cacheTokenValues: Record<string, { write: number; read: number }> =
   'gpt-5.2': { write: 1.75, read: 0.175 },
   'gpt-5.3': { write: 1.75, read: 0.175 },
   'gpt-5.4': { write: 2.5, read: 0.25 },
+  'gpt-5.4-mini': { write: 0.75, read: 0.075 },
+  'gpt-5.4-nano': { write: 0.2, read: 0.02 },
+  'gpt-5.5': { write: 5, read: 0.5 },
+  'chat-latest': { write: 5, read: 0.5 },
+  'gpt-5-chat-latest': { write: 1.25, read: 0.125 },
+  'gpt-5.1-chat-latest': { write: 1.25, read: 0.125 },
+  'gpt-5.2-chat-latest': { write: 1.75, read: 0.175 },
+  'gpt-5.3-chat-latest': { write: 1.75, read: 0.175 },
   'gpt-5-mini': { write: 0.25, read: 0.025 },
   'gpt-5-nano': { write: 0.05, read: 0.005 },
   o1: { write: 15, read: 7.5 },
@@ -348,6 +364,10 @@ export const premiumTokenValues: Record<
   { threshold: number; prompt: number; completion: number }
 > = {
   'gemini-3.1': { threshold: 200000, prompt: 4, completion: 18 },
+  'gpt-5.4': { threshold: 272000, prompt: 5, completion: 22.5 },
+  'gpt-5.4-pro': { threshold: 272000, prompt: 60, completion: 270 },
+  'gpt-5.5': { threshold: 272000, prompt: 10, completion: 45 },
+  'gpt-5.5-pro': { threshold: 272000, prompt: 60, completion: 270 },
 };
 
 export function createTxMethods(