LibreChat/packages/api/src/endpoints/models.ts
Danny Avila ca26a2dc9c
🛰️ feat: Add GPT-5.5 + Frontier OpenAI Models, Drop Deprecated Defaults (#13636)
* 🛰️ feat: Add GPT-5.5 + Frontier OpenAI Models, Drop Deprecated Defaults

* 🛰️ fix: Address Codex Review on OpenAI Model Refresh

- Replace nonexistent gpt-5.5-chat-latest with the actual chat-latest
  alias; register its context window, output cap, pricing, and cache
  rates, and pin explicit rates for legacy gpt-5.x-chat-latest aliases
  so the new chat-latest key cannot out-match their cheaper pricing
- Add long-context premium tiers (>272K input) for gpt-5.5 and gpt-5.4
- Disable streaming for pro reasoning models (o1-pro, gpt-5.x-pro),
  which OpenAI does not support, with spec coverage

* 🛰️ fix: Address Codex Round-2 Review and CI Spec Failure

- Allow chat-latest through the official OpenAI fetched-model filter
- Export isProReasoningModel and drop unsupported sampling parameters
  for versioned pro models (gpt-5.4-pro, gpt-5.5-pro), which the
  versioned-model exemption previously let through
- Honor the pro-model streaming disable in both agent chat-completions
  routes, which decide SSE from model_parameters before llmConfig exists
- Update models.spec default-list assertions for the refreshed defaults
  and cover chat-latest filter retention

* 🛰️ fix: Address Codex Round-3 Review

- Convert max_tokens for chat-latest, which the gpt-[5-9] guard missed
- Drop snake_case sampling params (top_p, logit_bias, penalties) in the
  reasoning-model exclusion list so addParams-sourced values are removed
- Add createOpenAIAggregatorHandlers and wire them into the agent
  chat-completions service's non-streaming branch, which previously ran
  with no handlers and always returned an empty aggregated response

* 🛰️ ci: Fix Import Order Drift and Controller Spec Mock

- Sort type import first in service.spec.ts per import-order convention
- Register isProReasoningModel in the openai controller spec's
  @librechat/api mock factory, whose enumerated exports left the new
  helper undefined and broke the non-streaming flow under test

* 🛰️ chore: Trim Scope to Model Catalog Changes

Revert the OpenAI endpoint and agent handler changes (pro-model
streaming, sampling exclusions, non-streaming aggregation) — that
surface is moving out of LibreChat into the agents SDK and belongs
in its own change. Keep the model list, token windows, pricing, and
the fetched-model filter for chat-latest.

* 🛰️ fix: Correct GPT-5.4 Context Windows and Pro Long-Context Pricing

- Set gpt-5.4 and gpt-5.4-pro context to the documented 1,050,000
  window — 272K is the long-context pricing breakpoint, not the cap,
  and using it truncated prompts before they could reach that tier
- Add gpt-5.4-pro long-context premium rates ($60/$270 above 272K)
  per its model page; gpt-5.5-pro documents no long-context tier

* 🛰️ fix: Add gpt-5.4-nano and gpt-5.5-pro Long-Context Pricing

- Register gpt-5.4-nano ($0.20/$1.25, cached $0.02, 400K context) in
  the model list, pricing, cache, and token maps — the longest-match
  fallback billed it at gpt-5.4's $2.50/$15
- Add gpt-5.5-pro long-context premium rates ($60/$270 above 272K);
  the pricing table lists the tier even though the model page omits it
2026-06-09 20:12:31 -04:00

417 lines
11 KiB
TypeScript

import axios from 'axios';
import crypto from 'crypto';
import { logger } from '@librechat/data-schemas';
import { HttpsProxyAgent } from 'https-proxy-agent';
import {
Time,
CacheKeys,
KnownEndpoints,
EModelEndpoint,
defaultModels,
} from 'librechat-data-provider';
import type { IUser } from '@librechat/data-schemas';
import {
processModelData,
extractBaseURL,
isUserProvided,
resolveHeaders,
deriveBaseURL,
logAxiosError,
inputSchema,
} from '~/utils';
import { standardCache, tokenConfigCache } from '~/cache';
export interface FetchModelsParams {
/** User ID for API requests */
user?: string;
/** API key for authentication */
apiKey: string;
/** Base URL for the API */
baseURL?: string;
/** Endpoint name (defaults to 'openAI') */
name?: string;
/** Whether directEndpoint was configured */
direct?: boolean;
/** Whether to fetch from Azure */
azure?: boolean;
/** Whether to send user ID as query parameter */
userIdQuery?: boolean;
/** Whether to create token configuration from API response */
createTokenConfig?: boolean;
/** Cache key for token configuration (uses name if omitted) */
tokenKey?: string;
/** Optional headers for the request */
headers?: Record<string, string> | null;
/** Optional user object for header resolution */
userObject?: Partial<IUser>;
/** Skip MODEL_QUERIES cache (e.g., for user-provided keys) */
skipCache?: boolean;
}
/**
* Fetches Ollama models from the specified base API path.
* @param baseURL - The Ollama server URL
* @param options - Optional configuration
* @returns Promise resolving to array of model names
*/
async function fetchOllamaModels(
baseURL: string,
options: { headers?: Record<string, string> | null; user?: Partial<IUser> } = {},
): Promise<string[]> {
if (!baseURL) {
return [];
}
const ollamaEndpoint = deriveBaseURL(baseURL);
const resolvedHeaders = resolveHeaders({
headers: options.headers ?? undefined,
user: options.user,
});
const response = await axios.get<{ models: Array<{ name: string }> }>(
`${ollamaEndpoint}/api/tags`,
{
headers: resolvedHeaders,
timeout: 5000,
},
);
return response.data.models.map((tag) => tag.name);
}
/**
* Splits a string by commas and trims each resulting value.
* @param input - The input string to split.
* @returns An array of trimmed values.
*/
export function splitAndTrim(input: string | null | undefined): string[] {
if (!input || typeof input !== 'string') {
return [];
}
return input
.split(',')
.map((item) => item.trim())
.filter(Boolean);
}
/**
* Fetches models from the specified base API path or Azure, based on the provided configuration.
*
* @param params - The parameters for fetching the models.
* @returns A promise that resolves to an array of model identifiers.
*/
export async function fetchModels({
user,
apiKey,
baseURL: _baseURL,
name = EModelEndpoint.openAI,
direct = false,
azure = false,
userIdQuery = false,
createTokenConfig = true,
tokenKey,
headers,
userObject,
skipCache = false,
}: FetchModelsParams): Promise<string[]> {
let models: string[] = [];
const baseURL = direct ? extractBaseURL(_baseURL ?? '') : _baseURL;
if (!baseURL && !azure) {
return models;
}
if (!apiKey) {
return models;
}
const shouldCache = !skipCache && !(userIdQuery && user);
const cacheKey = shouldCache ? modelsCacheKey(baseURL ?? '', apiKey) : '';
const modelsCache = shouldCache ? standardCache(CacheKeys.MODEL_QUERIES) : null;
if (modelsCache && cacheKey) {
const cachedModels = await modelsCache.get(cacheKey);
if (cachedModels) {
return cachedModels as string[];
}
}
if (name && name.toLowerCase().startsWith(KnownEndpoints.ollama)) {
let ollamaModels: string[] | null = null;
try {
ollamaModels = await fetchOllamaModels(baseURL ?? '', { headers, user: userObject });
} catch (ollamaError) {
logAxiosError({
message:
'Failed to fetch models from Ollama API. Attempting to fetch via OpenAI-compatible endpoint.',
error: ollamaError as Error,
});
}
if (ollamaModels !== null) {
if (modelsCache && cacheKey && ollamaModels.length > 0) {
await modelsCache.set(cacheKey, ollamaModels, Time.TWO_MINUTES);
}
return ollamaModels;
}
}
try {
const options: {
headers: Record<string, string>;
timeout: number;
httpsAgent?: HttpsProxyAgent<string>;
} = {
headers: {
...(headers ?? {}),
},
timeout: 5000,
};
if (name === EModelEndpoint.anthropic) {
options.headers = {
'x-api-key': apiKey,
'anthropic-version': process.env.ANTHROPIC_VERSION || '2023-06-01',
};
} else {
options.headers.Authorization = `Bearer ${apiKey}`;
}
if (process.env.PROXY) {
options.httpsAgent = new HttpsProxyAgent(process.env.PROXY);
}
if (process.env.OPENAI_ORGANIZATION && baseURL?.includes('openai')) {
options.headers['OpenAI-Organization'] = process.env.OPENAI_ORGANIZATION;
}
const url = new URL(`${(baseURL ?? '').replace(/\/+$/, '')}${azure ? '' : '/models'}`);
if (user && userIdQuery) {
url.searchParams.append('user', user);
}
const res = await axios.get(url.toString(), options);
const input = res.data;
const validationResult = inputSchema.safeParse(input);
if (validationResult.success && createTokenConfig) {
const endpointTokenConfig = processModelData(input);
await tokenConfigCache().set(tokenKey ?? name, endpointTokenConfig);
}
models = input.data.map((item: { id: string }) => item.id);
} catch (error) {
const logMessage = `Failed to fetch models from ${azure ? 'Azure ' : ''}${name} API`;
logAxiosError({ message: logMessage, error: error as Error });
}
if (modelsCache && cacheKey && models.length > 0) {
await modelsCache.set(cacheKey, models, Time.TWO_MINUTES);
}
return models;
}
function modelsCacheKey(baseURL: string, apiKey: string): string {
return crypto.createHash('sha256').update(`${baseURL}:${apiKey}`).digest('hex').slice(0, 32);
}
/** Options for fetching OpenAI models */
export interface GetOpenAIModelsOptions {
/** User ID for API requests */
user?: string;
/** Whether to fetch from Azure */
azure?: boolean;
/** Whether to fetch models for the Assistants endpoint */
assistants?: boolean;
/** OpenAI API key (if not using environment variable) */
openAIApiKey?: string;
/** Skip MODEL_QUERIES cache (e.g., for user-provided keys) */
skipCache?: boolean;
}
function resolveOpenAIApiKey(opts: GetOpenAIModelsOptions): string | undefined {
return opts.openAIApiKey || process.env.OPENAI_API_KEY;
}
/**
* Fetches models from OpenAI or Azure based on the provided options.
* @param opts - Options for fetching models
* @param _models - Fallback models array
* @returns Promise resolving to array of model IDs
*/
export async function fetchOpenAIModels(
opts: GetOpenAIModelsOptions,
_models: string[] = [],
): Promise<string[]> {
let models = _models.slice() ?? [];
const apiKey = resolveOpenAIApiKey(opts);
const openaiBaseURL = 'https://api.openai.com/v1';
let baseURL = openaiBaseURL;
let reverseProxyUrl = process.env.OPENAI_REVERSE_PROXY;
if (opts.assistants && process.env.ASSISTANTS_BASE_URL) {
reverseProxyUrl = process.env.ASSISTANTS_BASE_URL;
} else if (opts.azure) {
return models;
}
if (reverseProxyUrl) {
baseURL = extractBaseURL(reverseProxyUrl) ?? openaiBaseURL;
}
if (baseURL || opts.azure) {
models = await fetchModels({
apiKey: apiKey ?? '',
baseURL,
azure: opts.azure,
user: opts.user,
name: EModelEndpoint.openAI,
skipCache: opts.skipCache,
});
}
if (models.length === 0) {
return _models;
}
if (baseURL === openaiBaseURL) {
const regex = /(text-davinci-003|gpt-|o\d+|chat-latest)/;
const excludeRegex = /audio|realtime/;
models = models.filter((model) => regex.test(model) && !excludeRegex.test(model));
const instructModels = models.filter((model) => model.includes('instruct'));
const otherModels = models.filter((model) => !model.includes('instruct'));
models = otherModels.concat(instructModels);
}
return models;
}
/**
* Loads the default models for OpenAI or Azure.
* @param opts - Options for getting models
* @returns Promise resolving to array of model IDs
*/
export async function getOpenAIModels(opts: GetOpenAIModelsOptions = {}): Promise<string[]> {
let models = defaultModels[EModelEndpoint.openAI];
if (opts.assistants) {
models = defaultModels[EModelEndpoint.assistants];
} else if (opts.azure) {
models = defaultModels[EModelEndpoint.azureAssistants];
}
let key: string;
if (opts.assistants) {
key = 'ASSISTANTS_MODELS';
} else if (opts.azure) {
key = 'AZURE_OPENAI_MODELS';
} else {
key = 'OPENAI_MODELS';
}
if (process.env[key]) {
return splitAndTrim(process.env[key]);
}
if (isUserProvided(resolveOpenAIApiKey(opts))) {
return models;
}
return await fetchOpenAIModels(opts, models);
}
/**
* Fetches models from the Anthropic API.
* @param opts - Options for fetching models
* @param _models - Fallback models array
* @returns Promise resolving to array of model IDs
*/
export async function fetchAnthropicModels(
opts: { user?: string; skipCache?: boolean } = {},
_models: string[] = [],
): Promise<string[]> {
let models = _models.slice() ?? [];
const apiKey = process.env.ANTHROPIC_API_KEY;
const anthropicBaseURL = 'https://api.anthropic.com/v1';
let baseURL = anthropicBaseURL;
const reverseProxyUrl = process.env.ANTHROPIC_REVERSE_PROXY;
if (reverseProxyUrl) {
baseURL = extractBaseURL(reverseProxyUrl) ?? anthropicBaseURL;
}
if (!apiKey) {
return models;
}
if (baseURL) {
models = await fetchModels({
apiKey,
baseURL,
user: opts.user,
name: EModelEndpoint.anthropic,
tokenKey: EModelEndpoint.anthropic,
skipCache: opts.skipCache,
});
}
if (models.length === 0) {
return _models;
}
return models;
}
/**
* Gets Anthropic models from environment or API.
* @param opts - Options for fetching models
* @returns Promise resolving to array of model IDs
*/
export async function getAnthropicModels(
opts: { user?: string; vertexModels?: string[] } = {},
): Promise<string[]> {
const models = defaultModels[EModelEndpoint.anthropic];
// Vertex AI models from YAML config take priority
if (opts.vertexModels && opts.vertexModels.length > 0) {
return opts.vertexModels;
}
if (process.env.ANTHROPIC_MODELS) {
return splitAndTrim(process.env.ANTHROPIC_MODELS);
}
if (isUserProvided(process.env.ANTHROPIC_API_KEY)) {
return models;
}
try {
return await fetchAnthropicModels(opts, models);
} catch (error) {
logger.error('Error fetching Anthropic models:', error);
return models;
}
}
/**
* Gets Google models from environment or defaults.
* @returns Array of model IDs
*/
export function getGoogleModels(): string[] {
let models = defaultModels[EModelEndpoint.google];
if (process.env.GOOGLE_MODELS) {
models = splitAndTrim(process.env.GOOGLE_MODELS);
}
return models;
}
/**
* Gets Bedrock models from environment or defaults.
* @returns Array of model IDs
*/
export function getBedrockModels(): string[] {
let models = defaultModels[EModelEndpoint.bedrock];
if (process.env.BEDROCK_AWS_MODELS) {
models = splitAndTrim(process.env.BEDROCK_AWS_MODELS);
}
return models;
}