diff --git a/packages/api/src/agents/openai/handlers.spec.ts b/packages/api/src/agents/openai/handlers.spec.ts new file mode 100644 index 0000000000..8381e74598 --- /dev/null +++ b/packages/api/src/agents/openai/handlers.spec.ts @@ -0,0 +1,65 @@ +import type { Response as ServerResponse } from 'express'; +import type { OpenAIResponseContext } from './types'; +import { sendFinalChunk, OpenAIModelEndHandler, createOpenAIStreamTracker } from './handlers'; + +describe('OpenAI-compatible agent stream handlers', () => { + const context: OpenAIResponseContext = { + requestId: 'chatcmpl-test', + created: 1778317637, + model: 'anthropic/claude-sonnet-4.6', + }; + + it('preserves reasoning token usage from model end metadata', () => { + const tracker = createOpenAIStreamTracker(); + const write = jest.fn(); + const handler = new OpenAIModelEndHandler({ + context, + tracker, + res: { write } as unknown as ServerResponse, + }); + + handler.handle('on_chat_model_end', { + output: { + usage_metadata: { + input_tokens: 64, + output_tokens: 3315, + output_token_details: { + reasoning: 641, + }, + }, + }, + }); + + expect(tracker.usage).toEqual({ + promptTokens: 64, + completionTokens: 3315, + reasoningTokens: 641, + }); + }); + + it('includes reasoning token details in the final streamed usage chunk', () => { + const tracker = createOpenAIStreamTracker(); + tracker.usage.promptTokens = 64; + tracker.usage.completionTokens = 3315; + tracker.usage.reasoningTokens = 641; + + const writes: string[] = []; + const res = { + write: (chunk: string) => { + writes.push(chunk); + }, + } as unknown as ServerResponse; + + sendFinalChunk({ context, tracker, res }); + + const finalChunk = JSON.parse(writes[0].replace(/^data: /, '').trim()); + expect(finalChunk.usage).toEqual({ + prompt_tokens: 64, + completion_tokens: 3315, + total_tokens: 3379, + completion_tokens_details: { + reasoning_tokens: 641, + }, + }); + }); +}); diff --git a/packages/api/src/agents/openai/handlers.ts b/packages/api/src/agents/openai/handlers.ts index 0eea609771..a4b871c4cd 100644 --- a/packages/api/src/agents/openai/handlers.ts +++ b/packages/api/src/agents/openai/handlers.ts @@ -218,6 +218,10 @@ export interface ModelEndData { input_tokens?: number; output_tokens?: number; model?: string; + output_token_details?: { + reasoning?: number; + reasoning_tokens?: number; + }; }; }; } @@ -354,6 +358,8 @@ export class OpenAIModelEndHandler implements EventHandler { this.config.tracker.usage.promptTokens += usage.input_tokens ?? 0; this.config.tracker.usage.completionTokens += usage.output_tokens ?? 0; + this.config.tracker.usage.reasoningTokens += + usage.output_token_details?.reasoning ?? usage.output_token_details?.reasoning_tokens ?? 0; } } diff --git a/packages/api/src/endpoints/openai/config.spec.ts b/packages/api/src/endpoints/openai/config.spec.ts index baea92b3d9..9de28d4049 100644 --- a/packages/api/src/endpoints/openai/config.spec.ts +++ b/packages/api/src/endpoints/openai/config.spec.ts @@ -1255,9 +1255,9 @@ describe('getOpenAIConfig', () => { // Should NOT have useResponsesApi for OpenRouter expect(result.llmConfig.useResponsesApi).toBeUndefined(); expect(result.llmConfig.maxTokens).toBe(2000); + expect(result.llmConfig.verbosity).toBe(Verbosity.medium); expect(result.llmConfig.modelKwargs).toEqual({ reasoning: { effort: ReasoningEffort.high }, - verbosity: Verbosity.medium, customParam: 'custom-value', plugins: [{ id: 'web' }], // OpenRouter web search format }); @@ -1573,8 +1573,9 @@ describe('getOpenAIConfig', () => { promptCache: true, }); expect(result.llmConfig.include_reasoning).toBeUndefined(); + expect(result.llmConfig.verbosity).toBe(ReasoningEffort.high); expect(result.llmConfig.modelKwargs).toMatchObject({ - reasoning: { effort: ReasoningEffort.high }, + reasoning: { enabled: true }, }); expect(result.configOptions?.baseURL).toBe(baseURL); expect(result.configOptions?.defaultHeaders).toMatchObject({ diff --git a/packages/api/src/endpoints/openai/llm.spec.ts b/packages/api/src/endpoints/openai/llm.spec.ts index 5b2bb845fe..e6553e96c0 100644 --- a/packages/api/src/endpoints/openai/llm.spec.ts +++ b/packages/api/src/endpoints/openai/llm.spec.ts @@ -626,6 +626,231 @@ describe('getOpenAILLMConfig', () => { expect(result.llmConfig).not.toHaveProperty('reasoning_effort'); }); + it('should map OpenRouter adaptive Claude reasoning effort to enabled reasoning and verbosity', () => { + const result = getOpenAILLMConfig({ + apiKey: 'test-api-key', + streaming: true, + useOpenRouter: true, + modelOptions: { + model: 'anthropic/claude-sonnet-4.6', + reasoning_effort: ReasoningEffort.high, + }, + }); + + expect(result.llmConfig.modelKwargs).toHaveProperty('reasoning', { + enabled: true, + }); + expect(result.llmConfig).toHaveProperty('verbosity', ReasoningEffort.high); + expect(result.llmConfig).not.toHaveProperty('include_reasoning'); + }); + + it('should not override explicit OpenRouter verbosity for adaptive Claude models', () => { + const result = getOpenAILLMConfig({ + apiKey: 'test-api-key', + streaming: true, + useOpenRouter: true, + modelOptions: { + model: 'anthropic/claude-opus-4.7', + verbosity: Verbosity.low, + reasoning_effort: ReasoningEffort.xhigh, + }, + }); + + expect(result.llmConfig.modelKwargs).toHaveProperty('reasoning', { + enabled: true, + }); + expect(result.llmConfig).toHaveProperty('verbosity', Verbosity.low); + }); + + it('should handle OpenRouter adaptive Claude model ids with latest routing prefix', () => { + const result = getOpenAILLMConfig({ + apiKey: 'test-api-key', + streaming: true, + useOpenRouter: true, + modelOptions: { + model: '~anthropic/claude-4.7-opus-20260416', + reasoning_effort: ReasoningEffort.xhigh, + }, + }); + + expect(result.llmConfig.modelKwargs).toHaveProperty('reasoning', { + enabled: true, + }); + expect(result.llmConfig).toHaveProperty('verbosity', ReasoningEffort.xhigh); + }); + + it('should map extra-high OpenRouter Claude 4.6 effort to max verbosity', () => { + const result = getOpenAILLMConfig({ + apiKey: 'test-api-key', + streaming: true, + useOpenRouter: true, + modelOptions: { + model: 'anthropic/claude-sonnet-4.6', + reasoning_effort: ReasoningEffort.xhigh, + }, + }); + + expect(result.llmConfig.modelKwargs).toHaveProperty('reasoning', { + enabled: true, + }); + expect(result.llmConfig).toHaveProperty('verbosity', 'max'); + }); + + it('should preserve extra-high OpenRouter verbosity for future adaptive Claude models', () => { + const result = getOpenAILLMConfig({ + apiKey: 'test-api-key', + streaming: true, + useOpenRouter: true, + modelOptions: { + model: 'anthropic/claude-sonnet-5', + reasoning_effort: ReasoningEffort.xhigh, + }, + }); + + expect(result.llmConfig.modelKwargs).toHaveProperty('reasoning', { + enabled: true, + }); + expect(result.llmConfig).toHaveProperty('verbosity', ReasoningEffort.xhigh); + }); + + it('should pass OpenRouter verbosity as a top-level parameter', () => { + const result = getOpenAILLMConfig({ + apiKey: 'test-api-key', + streaming: true, + useOpenRouter: true, + modelOptions: { + model: 'anthropic/claude-sonnet-4.6', + verbosity: Verbosity.high, + }, + }); + + expect(result.llmConfig).toHaveProperty('verbosity', Verbosity.high); + expect(result.llmConfig.modelKwargs).toBeUndefined(); + }); + + it('should pass OpenRouter default verbosity as a top-level parameter', () => { + const result = getOpenAILLMConfig({ + apiKey: 'test-api-key', + streaming: true, + useOpenRouter: true, + defaultParams: { + verbosity: Verbosity.high, + }, + modelOptions: { + model: 'anthropic/claude-sonnet-4.6', + }, + }); + + expect(result.llmConfig).toHaveProperty('verbosity', Verbosity.high); + expect(result.llmConfig.modelKwargs).toBeUndefined(); + }); + + it('should pass OpenRouter max verbosity as a top-level parameter', () => { + const result = getOpenAILLMConfig({ + apiKey: 'test-api-key', + streaming: true, + useOpenRouter: true, + addParams: { + verbosity: 'max', + }, + modelOptions: { + model: 'anthropic/claude-sonnet-4.6', + }, + }); + + expect(result.llmConfig).toHaveProperty('verbosity', 'max'); + expect(result.llmConfig.modelKwargs).toBeUndefined(); + }); + + it('should preserve provider-specific OpenRouter verbosity values', () => { + const result = getOpenAILLMConfig({ + apiKey: 'test-api-key', + streaming: true, + useOpenRouter: true, + addParams: { + verbosity: 'ultra', + }, + modelOptions: { + model: 'custom/openrouter-model', + }, + }); + + expect(result.llmConfig).toHaveProperty('verbosity', 'ultra'); + expect(result.llmConfig.modelKwargs).toBeUndefined(); + }); + + it('should pass OpenRouter Responses API verbosity under text', () => { + const result = getOpenAILLMConfig({ + apiKey: 'test-api-key', + streaming: true, + useOpenRouter: true, + addParams: { + verbosity: 'xhigh', + }, + modelOptions: { + model: 'anthropic/claude-opus-4.7', + useResponsesApi: true, + }, + }); + + expect(result.llmConfig).not.toHaveProperty('verbosity'); + expect(result.llmConfig.modelKwargs).toHaveProperty('text', { + verbosity: 'xhigh', + }); + }); + + it('should pass adaptive OpenRouter Responses API effort verbosity under text', () => { + const result = getOpenAILLMConfig({ + apiKey: 'test-api-key', + streaming: true, + useOpenRouter: true, + modelOptions: { + model: '~anthropic/claude-4.7-opus-20260416', + useResponsesApi: true, + reasoning_effort: ReasoningEffort.xhigh, + }, + }); + + expect(result.llmConfig).not.toHaveProperty('verbosity'); + expect(result.llmConfig.modelKwargs).toMatchObject({ + reasoning: { enabled: true }, + text: { verbosity: ReasoningEffort.xhigh }, + }); + }); + + it('should let OpenRouter added verbosity override model verbosity', () => { + const result = getOpenAILLMConfig({ + apiKey: 'test-api-key', + streaming: true, + useOpenRouter: true, + addParams: { + verbosity: Verbosity.high, + }, + modelOptions: { + model: 'anthropic/claude-sonnet-4.6', + verbosity: Verbosity.low, + }, + }); + + expect(result.llmConfig).toHaveProperty('verbosity', Verbosity.high); + expect(result.llmConfig.modelKwargs).toBeUndefined(); + }); + + it('should disable adaptive Claude reasoning when OpenRouter reasoning_effort is none', () => { + const result = getOpenAILLMConfig({ + apiKey: 'test-api-key', + streaming: true, + useOpenRouter: true, + modelOptions: { + model: 'anthropic/claude-opus-4.7', + reasoning_effort: ReasoningEffort.none, + }, + }); + + expect(result.llmConfig).toHaveProperty('include_reasoning', false); + expect(result.llmConfig).not.toHaveProperty('modelKwargs'); + }); + it('should exclude reasoning_summary from OpenRouter reasoning object', () => { const result = getOpenAILLMConfig({ apiKey: 'test-api-key', @@ -753,6 +978,21 @@ describe('getOpenAILLMConfig', () => { expect(result.llmConfig.modelKwargs).toHaveProperty('verbosity', Verbosity.high); }); + it('should preserve provider-specific verbosity values in modelKwargs', () => { + const result = getOpenAILLMConfig({ + apiKey: 'test-api-key', + streaming: true, + defaultParams: { + verbosity: 'detailed', + }, + modelOptions: { + model: 'custom-model', + }, + }); + + expect(result.llmConfig.modelKwargs).toHaveProperty('verbosity', 'detailed'); + }); + it('should convert verbosity to text object with Responses API', () => { const result = getOpenAILLMConfig({ apiKey: 'test-api-key', diff --git a/packages/api/src/endpoints/openai/llm.ts b/packages/api/src/endpoints/openai/llm.ts index ebdaddfbc8..0e27e3d95c 100644 --- a/packages/api/src/endpoints/openai/llm.ts +++ b/packages/api/src/endpoints/openai/llm.ts @@ -1,4 +1,8 @@ -import { EModelEndpoint, removeNullishValues } from 'librechat-data-provider'; +import { + EModelEndpoint, + removeNullishValues, + supportsAdaptiveThinking, +} from 'librechat-data-provider'; import type { BindToolsInput } from '@librechat/agents/langchain/language_models/chat_models'; import type { AzureOpenAIInput } from '@librechat/agents/langchain/openai'; import type { SettingDefinition } from 'librechat-data-provider'; @@ -7,6 +11,12 @@ import type * as t from '~/types'; import { sanitizeModelName, constructAzureURL } from '~/utils/azure'; import { isEnabled } from '~/utils/common'; +type OpenAILLMConfig = Omit, 'verbosity'> & + Omit, 'verbosity'> & + Omit, 'verbosity'> & { + verbosity?: string | null; + }; + export const knownOpenAIParams = new Set([ // Constructor/Instance Parameters 'model', @@ -76,6 +86,166 @@ function hasReasoningParams({ ); } +const openRouterAnthropicVerbosityByEffort: Record< + string, + NonNullable +> = { + minimal: 'low', + low: 'low', + medium: 'medium', + high: 'high', + xhigh: 'xhigh', +}; + +function isStringVerbosity(value: unknown): value is string { + return typeof value === 'string' && value !== ''; +} + +function applyVerbosityParam({ + value, + override, + llmConfig, + modelKwargs, + useOpenRouter, +}: { + value: unknown; + override: boolean; + llmConfig: OpenAILLMConfig; + modelKwargs: Record; + useOpenRouter?: boolean; +}): boolean { + if (!isStringVerbosity(value)) { + return false; + } + + if (useOpenRouter && (override || llmConfig.verbosity === undefined)) { + llmConfig.verbosity = value; + return false; + } + + if (useOpenRouter) { + return false; + } + + if (!override && modelKwargs.verbosity !== undefined) { + return true; + } + + modelKwargs.verbosity = value; + return true; +} + +function isOpenRouterAnthropicAdaptiveModel(model?: string | null): boolean { + if (typeof model !== 'string') { + return false; + } + const normalizedModel = normalizeOpenRouterModel(model); + return normalizedModel.startsWith('anthropic/') && supportsAdaptiveThinking(model); +} + +function normalizeOpenRouterModel(model: string): string { + return model.toLowerCase().replace(/^~/, ''); +} + +function isOpenRouterClaude46Model(model: string): boolean { + const normalizedModel = normalizeOpenRouterModel(model); + return ( + /claude[-.](?:opus|sonnet)[-.]4[-.]6/.test(normalizedModel) || + /claude[-.]4[-.]6[-.](?:opus|sonnet)/.test(normalizedModel) + ); +} + +function getOpenRouterAnthropicVerbosity( + reasoningEffort?: string | null, + model?: string | null, +): OpenAILLMConfig['verbosity'] | undefined { + if (!reasoningEffort) { + return undefined; + } + const verbosity = openRouterAnthropicVerbosityByEffort[reasoningEffort]; + if (verbosity !== 'xhigh' || typeof model !== 'string') { + return verbosity; + } + return isOpenRouterClaude46Model(model) ? 'max' : 'xhigh'; +} + +function applyOpenRouterReasoningConfig({ + model, + llmConfig, + modelKwargs, + reasoningEffort, +}: { + model?: string | null; + llmConfig: OpenAILLMConfig; + modelKwargs: Record; + reasoningEffort?: string | null; +}): boolean { + if (!hasReasoningParams({ reasoning_effort: reasoningEffort })) { + llmConfig.include_reasoning = true; + return false; + } + + if (!isOpenRouterAnthropicAdaptiveModel(model)) { + modelKwargs.reasoning = { effort: reasoningEffort }; + return true; + } + + const adaptiveVerbosity = getOpenRouterAnthropicVerbosity(reasoningEffort, model); + if (adaptiveVerbosity != null && llmConfig.verbosity == null) { + llmConfig.verbosity = adaptiveVerbosity; + } + + if (reasoningEffort === 'none') { + llmConfig.include_reasoning = false; + return false; + } + + modelKwargs.reasoning = { enabled: true }; + return true; +} + +function getModelKwargsText(modelKwargs: Record): Record { + const { text } = modelKwargs; + if (text == null || typeof text !== 'object' || Array.isArray(text)) { + return {}; + } + return text as Record; +} + +function applyResponsesVerbosity({ + llmConfig, + modelKwargs, + useOpenRouter, +}: { + llmConfig: OpenAILLMConfig; + modelKwargs: Record; + useOpenRouter?: boolean; +}): boolean { + if (llmConfig.useResponsesApi !== true) { + return false; + } + + if (useOpenRouter && llmConfig.verbosity) { + modelKwargs.text = { + ...getModelKwargsText(modelKwargs), + verbosity: llmConfig.verbosity, + }; + delete llmConfig.verbosity; + return true; + } + + if (!useOpenRouter && modelKwargs.verbosity) { + modelKwargs.text = { + ...getModelKwargsText(modelKwargs), + verbosity: modelKwargs.verbosity, + }; + delete modelKwargs.verbosity; + return true; + } + + return false; +} + /** * Extracts default parameters from customParams.paramDefinitions * @param paramDefinitions - Array of parameter definitions with key and default values @@ -162,7 +332,7 @@ export function getOpenAILLMConfig({ model: modelOptions.model ?? '', }, modelOptions, - ) as Partial & Partial & Partial; + ) as OpenAILLMConfig; if (frequency_penalty != null) { llmConfig.frequencyPenalty = frequency_penalty; @@ -174,7 +344,9 @@ export function getOpenAILLMConfig({ const modelKwargs: Record = {}; let hasModelKwargs = false; - if (verbosity != null && verbosity !== '') { + if (verbosity != null && verbosity !== '' && useOpenRouter) { + llmConfig.verbosity = verbosity; + } else if (verbosity != null && verbosity !== '') { modelKwargs.verbosity = verbosity; hasModelKwargs = true; } @@ -197,6 +369,17 @@ export function getOpenAILLMConfig({ } continue; } + if (key === 'verbosity') { + hasModelKwargs = + applyVerbosityParam({ + value, + override: false, + llmConfig, + modelKwargs, + useOpenRouter, + }) || hasModelKwargs; + continue; + } if (knownOpenAIParams.has(key)) { applyDefaultParams(llmConfig as Record, { [key]: value }); @@ -225,6 +408,17 @@ export function getOpenAILLMConfig({ } continue; } + if (key === 'verbosity') { + hasModelKwargs = + applyVerbosityParam({ + value, + override: true, + llmConfig, + modelKwargs, + useOpenRouter, + }) || hasModelKwargs; + continue; + } if (knownOpenAIParams.has(key)) { (llmConfig as Record)[key] = value; } else { @@ -235,19 +429,19 @@ export function getOpenAILLMConfig({ } if (useOpenRouter) { - if (hasReasoningParams({ reasoning_effort })) { - /** - * OpenRouter uses a `reasoning` object — `summary` is not supported. - * ChatOpenRouter treats `reasoning` and `include_reasoning` as mutually exclusive: - * `include_reasoning` is legacy compat that maps to `{ enabled: true }` only when - * no `reasoning` object is present, so we intentionally omit it here. - */ - modelKwargs.reasoning = { effort: reasoning_effort }; - hasModelKwargs = true; - } else { - /** No explicit effort; fall back to legacy `include_reasoning` for reasoning token inclusion */ - llmConfig.include_reasoning = true; - } + /** + * OpenRouter uses a `reasoning` object — `summary` is not supported. + * ChatOpenRouter treats `reasoning` and `include_reasoning` as mutually exclusive: + * `include_reasoning` is legacy compat that maps to `{ enabled: true }` only when + * no `reasoning` object is present, so we intentionally omit it here. + */ + hasModelKwargs = + applyOpenRouterReasoningConfig({ + reasoningEffort: reasoning_effort, + model: modelOptions.model, + modelKwargs, + llmConfig, + }) || hasModelKwargs; } else if ( hasReasoningParams({ reasoning_effort, reasoning_summary }) && (llmConfig.useResponsesApi === true || @@ -355,10 +549,12 @@ export function getOpenAILLMConfig({ }); } - if (modelKwargs.verbosity && llmConfig.useResponsesApi === true) { - modelKwargs.text = { verbosity: modelKwargs.verbosity }; - delete modelKwargs.verbosity; - } + hasModelKwargs = + applyResponsesVerbosity({ + llmConfig, + modelKwargs, + useOpenRouter, + }) || hasModelKwargs; if ( llmConfig.model && diff --git a/packages/api/src/types/openai.ts b/packages/api/src/types/openai.ts index b82cde86f8..646c814c2d 100644 --- a/packages/api/src/types/openai.ts +++ b/packages/api/src/types/openai.ts @@ -28,10 +28,11 @@ export interface OpenAIConfigOptions { export type OpenAIConfiguration = OpenAIClientOptions['configuration']; -export type OAIClientOptions = OpenAIClientOptions & { +export type OAIClientOptions = Omit & { include_reasoning?: boolean; promptCache?: boolean; _lc_stream_delay?: number; + verbosity?: string | null; }; /**