mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-06-27 01:41:19 +00:00
🩹 fix: Codex round 2 — drop agent load, summary marker, edit-invalidation
- Stop loading agent/model-spec config server-side (closes the agent-access IDOR and the spec-prompt special-casing). Provider/model/window now come from the client-resolved request (`limits.endpoint`/model — the agent's real provider, not the `agents` endpoint, so the tokenizer is right). Agent/spec/ promptPrefix instructions are uniformly deferred to the full-fidelity follow-up. - Detect summarized branches via the live path's `metadata.summaryUsedTokens` marker (was the wrong `summaryTokenCount` field) and fall back to the summary-aware estimate. - Invalidate the projection query on in-place message edits via a branch content `revision` in the cache key (the tail id is unchanged on edit). Deferred (valid, not a regression): same-window endpoint/model switch keeps a window-matched snapshot — needs endpoint/model persisted on the snapshot, which lands with the fidelity follow-up. Smoke-tested: fits / prunes / summarized→null / no-window→null.
This commit is contained in:
parent
5701a9da9c
commit
0639c5fd21
5 changed files with 37 additions and 43 deletions
|
|
@ -18,7 +18,7 @@ async function contextProjectionController(req, res) {
|
|||
return;
|
||||
}
|
||||
const projection = await resolveContextProjection(
|
||||
{ userId: req.user?.id, getMessages: db.getMessages, getAgent: db.getAgent },
|
||||
{ userId: req.user?.id, getMessages: db.getMessages },
|
||||
params,
|
||||
);
|
||||
res.json(projection ?? null);
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@ export const useContextProjectionQuery = (
|
|||
params?.model,
|
||||
params?.agentId,
|
||||
params?.maxContextTokens,
|
||||
params?.revision,
|
||||
],
|
||||
() => dataService.getContextProjection(params as t.TContextProjectionRequest),
|
||||
{
|
||||
|
|
|
|||
|
|
@ -121,12 +121,16 @@ export default function useTokenUsage({
|
|||
? {
|
||||
conversationId: conversation.conversationId,
|
||||
messageId: branchTotals.tailId,
|
||||
endpoint: conversation.endpoint,
|
||||
model: conversation.model ?? undefined,
|
||||
/** Resolved provider/model (e.g. an agent's actual provider, not the
|
||||
* `agents` endpoint) so the server picks the right tokenizer. */
|
||||
endpoint: limits.endpoint || conversation.endpoint,
|
||||
model: limits.model || conversation.model || undefined,
|
||||
agentId: conversation.agent_id ?? undefined,
|
||||
spec: conversation.spec ?? undefined,
|
||||
maxContextTokens: resolvedMax,
|
||||
calibrationRatio: branchSnapshot?.calibrationRatio,
|
||||
/** Content revision so an in-place message edit (same tail id) refetches. */
|
||||
revision: branchTotals.input + branchTotals.output,
|
||||
}
|
||||
: null;
|
||||
const { data: projectionData } = useContextProjectionQuery(projectionParams);
|
||||
|
|
|
|||
|
|
@ -7,16 +7,11 @@ interface ProjectionMessage {
|
|||
messageId: string;
|
||||
parentMessageId?: string | null;
|
||||
tokenCount?: number;
|
||||
summaryTokenCount?: number;
|
||||
isCreatedByUser?: boolean;
|
||||
text?: string;
|
||||
}
|
||||
|
||||
interface ProjectionAgent {
|
||||
instructions?: string;
|
||||
provider?: string;
|
||||
model?: string;
|
||||
model_parameters?: { maxContextTokens?: number };
|
||||
/** Compaction marker written by the live path (`agents/usage.ts`); its
|
||||
* presence means the next call sends the summary + tail, not this raw chain. */
|
||||
metadata?: { summaryUsedTokens?: number };
|
||||
}
|
||||
|
||||
export interface ContextProjectionDeps {
|
||||
|
|
@ -26,7 +21,6 @@ export interface ContextProjectionDeps {
|
|||
filter: { conversationId: string; user?: string },
|
||||
select?: string,
|
||||
) => Promise<ProjectionMessage[]>;
|
||||
getAgent: (filter: { id: string }) => Promise<ProjectionAgent | null>;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -77,21 +71,29 @@ function resolveProvider(value?: string): Providers {
|
|||
}
|
||||
|
||||
/**
|
||||
* Server-side context-usage projection: reconstructs the viewed branch + the
|
||||
* resolved agent config and asks the agents SDK what the next call's context
|
||||
* would be, WITHOUT invoking the model. Reuses LibreChat's already-calibrated
|
||||
* per-message `tokenCount`s (no re-tokenizing). Returns null when there is no
|
||||
* resolvable context window. NOTE: this first cut targets message-windowing
|
||||
* accuracy — tool-schema tokens are not yet included; a follow-up will reuse the
|
||||
* full `initializeAgent` path for exact instruction/tool overhead.
|
||||
* Server-side context-usage projection: reconstructs the viewed branch and asks
|
||||
* the agents SDK what the next call's context would be, WITHOUT invoking the
|
||||
* model. Provider/model/window come from the (client-resolved) request — no
|
||||
* agent or model-spec config is loaded here, so there is no cross-user config
|
||||
* exposure. Reuses LibreChat's already-calibrated per-message `tokenCount`s (no
|
||||
* re-tokenizing). Returns null when there is no resolvable context window.
|
||||
* NOTE: this first cut targets message-windowing accuracy — instruction and
|
||||
* tool-schema tokens (agent instructions, `promptPrefix`, model-spec presets,
|
||||
* tool schemas) are NOT yet included; a follow-up will reuse the full
|
||||
* `initializeAgent`/send path for exact overhead and proper access control.
|
||||
*/
|
||||
export async function resolveContextProjection(
|
||||
deps: ContextProjectionDeps,
|
||||
params: TContextProjectionRequest,
|
||||
): Promise<TContextUsageEvent | null> {
|
||||
const maxContextTokens = params.maxContextTokens;
|
||||
if (maxContextTokens == null || maxContextTokens <= 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const stored = await deps.getMessages(
|
||||
{ conversationId: params.conversationId, user: deps.userId },
|
||||
'messageId parentMessageId tokenCount summaryTokenCount isCreatedByUser text',
|
||||
'messageId parentMessageId tokenCount isCreatedByUser text metadata',
|
||||
);
|
||||
const branch = resolveBranch(stored, params.messageId);
|
||||
if (branch.length === 0) {
|
||||
|
|
@ -100,30 +102,15 @@ export async function resolveContextProjection(
|
|||
|
||||
/** A summarized/compacted branch's next call sends the saved summary + the
|
||||
* post-summary tail, NOT this raw parent chain — projecting from the full
|
||||
* history would prune/count the wrong context and omit the summary. Until the
|
||||
* follow-up replays the summary boundary, fall back (null) so the client's
|
||||
* summary-baseline-aware estimate handles these branches. */
|
||||
if (branch.some((message) => (message.summaryTokenCount ?? 0) > 0)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let instructions: string | undefined;
|
||||
let providerValue: string | undefined = params.endpoint;
|
||||
let model = params.model;
|
||||
let maxContextTokens = params.maxContextTokens;
|
||||
if (params.agentId != null && params.agentId !== '') {
|
||||
const agent = await deps.getAgent({ id: params.agentId });
|
||||
if (agent != null) {
|
||||
instructions = agent.instructions;
|
||||
providerValue = agent.provider ?? providerValue;
|
||||
model = agent.model ?? model;
|
||||
maxContextTokens = maxContextTokens ?? agent.model_parameters?.maxContextTokens;
|
||||
}
|
||||
}
|
||||
if (maxContextTokens == null || maxContextTokens <= 0) {
|
||||
* history would prune/count the wrong context and omit the summary. Detect it
|
||||
* via the live path's `metadata.summaryUsedTokens` marker and fall back (null)
|
||||
* so the client's summary-baseline-aware estimate handles these branches until
|
||||
* a follow-up replays the summary boundary. */
|
||||
if (branch.some((message) => (message.metadata?.summaryUsedTokens ?? 0) > 0)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const model = params.model;
|
||||
const encoding = (model ?? '').toLowerCase().includes('claude') ? 'claude' : 'o200k_base';
|
||||
const tokenCounter = await createTokenCounter(encoding);
|
||||
|
||||
|
|
@ -147,8 +134,7 @@ export async function resolveContextProjection(
|
|||
return projectAgentContextUsage({
|
||||
agent: {
|
||||
agentId: params.agentId ?? 'projection',
|
||||
provider: resolveProvider(providerValue),
|
||||
instructions,
|
||||
provider: resolveProvider(params.endpoint),
|
||||
maxContextTokens,
|
||||
},
|
||||
messages,
|
||||
|
|
|
|||
|
|
@ -106,6 +106,9 @@ export type TContextProjectionRequest = {
|
|||
maxContextTokens?: number;
|
||||
/** Provider-calibrated ratio from a prior snapshot, applied as a static seed. */
|
||||
calibrationRatio?: number;
|
||||
/** Client-only cache-bust: a branch content revision so a message edit
|
||||
* (which keeps the same tail id) refetches. The server ignores it. */
|
||||
revision?: number;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue