diff --git a/api/server/controllers/ContextProjectionController.js b/api/server/controllers/ContextProjectionController.js new file mode 100644 index 0000000000..df5c79d3bf --- /dev/null +++ b/api/server/controllers/ContextProjectionController.js @@ -0,0 +1,31 @@ +const { logger } = require('@librechat/data-schemas'); +const { resolveContextProjection } = require('@librechat/api'); +const db = require('~/models'); + +/** + * Returns a server-side context-usage projection for the viewed branch + config + * (agents SDK, no model call) — powers the gauge for snapshot-less branches and + * after a model/window switch. Resolution lives in `@librechat/api`; this + * controller only injects request-scoped model accessors. + * @param {ServerRequest} req + * @param {ServerResponse} res + */ +async function contextProjectionController(req, res) { + try { + const params = req.body ?? {}; + if (!params.conversationId || !params.messageId) { + res.json(null); + return; + } + const projection = await resolveContextProjection( + { getMessages: db.getMessages, getAgent: db.getAgent }, + params, + ); + res.json(projection ?? null); + } catch (error) { + logger.error('[contextProjectionController]', error); + res.status(500).json({ error: 'Failed to resolve context projection' }); + } +} + +module.exports = contextProjectionController; diff --git a/api/server/routes/endpoints.js b/api/server/routes/endpoints.js index 8b1fceccc4..b11de153df 100644 --- a/api/server/routes/endpoints.js +++ b/api/server/routes/endpoints.js @@ -3,10 +3,12 @@ const requireJwtAuth = require('~/server/middleware/requireJwtAuth'); const configMiddleware = require('~/server/middleware/config/app'); const endpointController = require('~/server/controllers/EndpointController'); const tokenConfigController = require('~/server/controllers/TokenConfigController'); +const contextProjectionController = require('~/server/controllers/ContextProjectionController'); const router = express.Router(); /** Auth required for role/tenant-scoped endpoint config resolution. */ router.get('/', requireJwtAuth, endpointController); router.get('/token-config', requireJwtAuth, configMiddleware, tokenConfigController); +router.post('/context-projection', requireJwtAuth, configMiddleware, contextProjectionController); module.exports = router; diff --git a/packages/api/src/endpoints/index.ts b/packages/api/src/endpoints/index.ts index 9e6e9dbac0..4be03df1e3 100644 --- a/packages/api/src/endpoints/index.ts +++ b/packages/api/src/endpoints/index.ts @@ -6,4 +6,5 @@ export * from './google'; export * from './models'; export * from './openai'; export * from './pricing'; +export * from './projection'; export * from './tokenConfig'; diff --git a/packages/api/src/endpoints/projection.ts b/packages/api/src/endpoints/projection.ts new file mode 100644 index 0000000000..a126545eb6 --- /dev/null +++ b/packages/api/src/endpoints/projection.ts @@ -0,0 +1,139 @@ +import { HumanMessage, AIMessage } from '@langchain/core/messages'; +import { Providers, createTokenCounter, projectAgentContextUsage } from '@librechat/agents'; +import type { BaseMessage } from '@langchain/core/messages'; +import type { TContextProjectionRequest, TContextUsageEvent } from 'librechat-data-provider'; + +interface ProjectionMessage { + messageId: string; + parentMessageId?: string | null; + tokenCount?: number; + isCreatedByUser?: boolean; + text?: string; +} + +interface ProjectionAgent { + instructions?: string; + provider?: string; + model?: string; + model_parameters?: { maxContextTokens?: number }; +} + +export interface ContextProjectionDeps { + getMessages: ( + filter: { conversationId: string }, + select?: string, + ) => Promise; + getAgent: (filter: { id: string }) => Promise; +} + +/** + * Walks the parent chain from `tailId` to root and returns the branch messages + * oldest→newest. The visited set guards against cycles / self-referential links. + */ +function resolveBranch(messages: ProjectionMessage[], tailId: string): ProjectionMessage[] { + const byId = new Map(); + for (const message of messages) { + byId.set(message.messageId, message); + } + const branch: ProjectionMessage[] = []; + const seen = new Set(); + let currentId: string | null | undefined = tailId; + while (currentId != null && !seen.has(currentId)) { + const message = byId.get(currentId); + if (message == null) { + break; + } + seen.add(currentId); + branch.push(message); + currentId = message.parentMessageId; + } + return branch.reverse(); +} + +/** Maps an endpoint/provider string to the agents `Providers` enum. */ +function resolveProvider(value?: string): Providers { + if (value == null || value === '') { + return Providers.OPENAI; + } + const lower = value.toLowerCase(); + for (const provider of Object.values(Providers)) { + if (provider.toLowerCase() === lower) { + return provider; + } + } + if (lower.includes('anthropic') || lower.includes('claude')) { + return Providers.ANTHROPIC; + } + if (lower.includes('google') || lower.includes('gemini') || lower.includes('vertex')) { + return Providers.GOOGLE; + } + if (lower.includes('bedrock')) { + return Providers.BEDROCK; + } + return Providers.OPENAI; +} + +/** + * Server-side context-usage projection: reconstructs the viewed branch + the + * resolved agent config and asks the agents SDK what the next call's context + * would be, WITHOUT invoking the model. Reuses LibreChat's already-calibrated + * per-message `tokenCount`s (no re-tokenizing). Returns null when there is no + * resolvable context window. NOTE: this first cut targets message-windowing + * accuracy — tool-schema tokens are not yet included; a follow-up will reuse the + * full `initializeAgent` path for exact instruction/tool overhead. + */ +export async function resolveContextProjection( + deps: ContextProjectionDeps, + params: TContextProjectionRequest, +): Promise { + const stored = await deps.getMessages( + { conversationId: params.conversationId }, + 'messageId parentMessageId tokenCount isCreatedByUser text', + ); + const branch = resolveBranch(stored, params.messageId); + if (branch.length === 0) { + return null; + } + + let instructions: string | undefined; + let providerValue: string | undefined = params.endpoint; + let model = params.model; + let maxContextTokens = params.maxContextTokens; + if (params.agentId != null && params.agentId !== '') { + const agent = await deps.getAgent({ id: params.agentId }); + if (agent != null) { + instructions = agent.instructions; + providerValue = agent.provider ?? providerValue; + model = agent.model ?? model; + maxContextTokens = maxContextTokens ?? agent.model_parameters?.maxContextTokens; + } + } + if (maxContextTokens == null || maxContextTokens <= 0) { + return null; + } + + const messages: BaseMessage[] = []; + const indexTokenCountMap: Record = {}; + for (let i = 0; i < branch.length; i++) { + const message = branch[i]; + const text = message.text ?? ''; + messages.push(message.isCreatedByUser === true ? new HumanMessage(text) : new AIMessage(text)); + indexTokenCountMap[String(i)] = message.tokenCount ?? 0; + } + + const encoding = (model ?? '').toLowerCase().includes('claude') ? 'claude' : 'o200k_base'; + const tokenCounter = await createTokenCounter(encoding); + + return projectAgentContextUsage({ + agent: { + agentId: params.agentId ?? 'projection', + provider: resolveProvider(providerValue), + instructions, + maxContextTokens, + }, + messages, + tokenCounter, + indexTokenCountMap, + calibrationRatio: params.calibrationRatio, + }); +}