diff --git a/api/server/controllers/ContextProjectionController.js b/api/server/controllers/ContextProjectionController.js index 88bbbed366..eaf9592e73 100644 --- a/api/server/controllers/ContextProjectionController.js +++ b/api/server/controllers/ContextProjectionController.js @@ -18,7 +18,7 @@ async function contextProjectionController(req, res) { return; } const projection = await resolveContextProjection( - { userId: req.user?.id, getMessages: db.getMessages, getAgent: db.getAgent }, + { userId: req.user?.id, getMessages: db.getMessages }, params, ); res.json(projection ?? null); diff --git a/client/src/data-provider/Endpoints/queries.ts b/client/src/data-provider/Endpoints/queries.ts index 29cacf3300..1ea8b3c0aa 100644 --- a/client/src/data-provider/Endpoints/queries.ts +++ b/client/src/data-provider/Endpoints/queries.ts @@ -61,6 +61,7 @@ export const useContextProjectionQuery = ( params?.model, params?.agentId, params?.maxContextTokens, + params?.revision, ], () => dataService.getContextProjection(params as t.TContextProjectionRequest), { diff --git a/client/src/hooks/Chat/useTokenUsage.ts b/client/src/hooks/Chat/useTokenUsage.ts index f586c3db22..42360f5b3c 100644 --- a/client/src/hooks/Chat/useTokenUsage.ts +++ b/client/src/hooks/Chat/useTokenUsage.ts @@ -121,12 +121,16 @@ export default function useTokenUsage({ ? { conversationId: conversation.conversationId, messageId: branchTotals.tailId, - endpoint: conversation.endpoint, - model: conversation.model ?? undefined, + /** Resolved provider/model (e.g. an agent's actual provider, not the + * `agents` endpoint) so the server picks the right tokenizer. */ + endpoint: limits.endpoint || conversation.endpoint, + model: limits.model || conversation.model || undefined, agentId: conversation.agent_id ?? undefined, spec: conversation.spec ?? undefined, maxContextTokens: resolvedMax, calibrationRatio: branchSnapshot?.calibrationRatio, + /** Content revision so an in-place message edit (same tail id) refetches. */ + revision: branchTotals.input + branchTotals.output, } : null; const { data: projectionData } = useContextProjectionQuery(projectionParams); diff --git a/packages/api/src/endpoints/projection.ts b/packages/api/src/endpoints/projection.ts index 2362e219e0..a7703f41b7 100644 --- a/packages/api/src/endpoints/projection.ts +++ b/packages/api/src/endpoints/projection.ts @@ -7,16 +7,11 @@ interface ProjectionMessage { messageId: string; parentMessageId?: string | null; tokenCount?: number; - summaryTokenCount?: number; isCreatedByUser?: boolean; text?: string; -} - -interface ProjectionAgent { - instructions?: string; - provider?: string; - model?: string; - model_parameters?: { maxContextTokens?: number }; + /** Compaction marker written by the live path (`agents/usage.ts`); its + * presence means the next call sends the summary + tail, not this raw chain. */ + metadata?: { summaryUsedTokens?: number }; } export interface ContextProjectionDeps { @@ -26,7 +21,6 @@ export interface ContextProjectionDeps { filter: { conversationId: string; user?: string }, select?: string, ) => Promise; - getAgent: (filter: { id: string }) => Promise; } /** @@ -77,21 +71,29 @@ function resolveProvider(value?: string): Providers { } /** - * Server-side context-usage projection: reconstructs the viewed branch + the - * resolved agent config and asks the agents SDK what the next call's context - * would be, WITHOUT invoking the model. Reuses LibreChat's already-calibrated - * per-message `tokenCount`s (no re-tokenizing). Returns null when there is no - * resolvable context window. NOTE: this first cut targets message-windowing - * accuracy — tool-schema tokens are not yet included; a follow-up will reuse the - * full `initializeAgent` path for exact instruction/tool overhead. + * Server-side context-usage projection: reconstructs the viewed branch and asks + * the agents SDK what the next call's context would be, WITHOUT invoking the + * model. Provider/model/window come from the (client-resolved) request — no + * agent or model-spec config is loaded here, so there is no cross-user config + * exposure. Reuses LibreChat's already-calibrated per-message `tokenCount`s (no + * re-tokenizing). Returns null when there is no resolvable context window. + * NOTE: this first cut targets message-windowing accuracy — instruction and + * tool-schema tokens (agent instructions, `promptPrefix`, model-spec presets, + * tool schemas) are NOT yet included; a follow-up will reuse the full + * `initializeAgent`/send path for exact overhead and proper access control. */ export async function resolveContextProjection( deps: ContextProjectionDeps, params: TContextProjectionRequest, ): Promise { + const maxContextTokens = params.maxContextTokens; + if (maxContextTokens == null || maxContextTokens <= 0) { + return null; + } + const stored = await deps.getMessages( { conversationId: params.conversationId, user: deps.userId }, - 'messageId parentMessageId tokenCount summaryTokenCount isCreatedByUser text', + 'messageId parentMessageId tokenCount isCreatedByUser text metadata', ); const branch = resolveBranch(stored, params.messageId); if (branch.length === 0) { @@ -100,30 +102,15 @@ export async function resolveContextProjection( /** A summarized/compacted branch's next call sends the saved summary + the * post-summary tail, NOT this raw parent chain — projecting from the full - * history would prune/count the wrong context and omit the summary. Until the - * follow-up replays the summary boundary, fall back (null) so the client's - * summary-baseline-aware estimate handles these branches. */ - if (branch.some((message) => (message.summaryTokenCount ?? 0) > 0)) { - return null; - } - - let instructions: string | undefined; - let providerValue: string | undefined = params.endpoint; - let model = params.model; - let maxContextTokens = params.maxContextTokens; - if (params.agentId != null && params.agentId !== '') { - const agent = await deps.getAgent({ id: params.agentId }); - if (agent != null) { - instructions = agent.instructions; - providerValue = agent.provider ?? providerValue; - model = agent.model ?? model; - maxContextTokens = maxContextTokens ?? agent.model_parameters?.maxContextTokens; - } - } - if (maxContextTokens == null || maxContextTokens <= 0) { + * history would prune/count the wrong context and omit the summary. Detect it + * via the live path's `metadata.summaryUsedTokens` marker and fall back (null) + * so the client's summary-baseline-aware estimate handles these branches until + * a follow-up replays the summary boundary. */ + if (branch.some((message) => (message.metadata?.summaryUsedTokens ?? 0) > 0)) { return null; } + const model = params.model; const encoding = (model ?? '').toLowerCase().includes('claude') ? 'claude' : 'o200k_base'; const tokenCounter = await createTokenCounter(encoding); @@ -147,8 +134,7 @@ export async function resolveContextProjection( return projectAgentContextUsage({ agent: { agentId: params.agentId ?? 'projection', - provider: resolveProvider(providerValue), - instructions, + provider: resolveProvider(params.endpoint), maxContextTokens, }, messages, diff --git a/packages/data-provider/src/types/runs.ts b/packages/data-provider/src/types/runs.ts index cb87d7f512..8e1033c07f 100644 --- a/packages/data-provider/src/types/runs.ts +++ b/packages/data-provider/src/types/runs.ts @@ -106,6 +106,9 @@ export type TContextProjectionRequest = { maxContextTokens?: number; /** Provider-calibrated ratio from a prior snapshot, applied as a static seed. */ calibrationRatio?: number; + /** Client-only cache-bust: a branch content revision so a message edit + * (which keeps the same tail id) refetches. The server ignores it. */ + revision?: number; }; /**