🩹 fix: Codex round 2 — drop agent load, summary marker, edit-invalidation

- Stop loading agent/model-spec config server-side (closes the agent-access IDOR and the spec-prompt special-casing). Provider/model/window now come from the client-resolved request (`limits.endpoint`/model — the agent's real provider, not the `agents` endpoint, so the tokenizer is right). Agent/spec/ promptPrefix instructions are uniformly deferred to the full-fidelity follow-up. - Detect summarized branches via the live path's `metadata.summaryUsedTokens` marker (was the wrong `summaryTokenCount` field) and fall back to the summary-aware estimate. - Invalidate the projection query on in-place message edits via a branch content `revision` in the cache key (the tail id is unchanged on edit). Deferred (valid, not a regression): same-window endpoint/model switch keeps a window-matched snapshot — needs endpoint/model persisted on the snapshot, which lands with the fidelity follow-up. Smoke-tested: fits / prunes / summarized→null / no-window→null.
2026-06-27 01:41:19 +00:00 · 2026-06-16 15:47:40 -04:00 · 2026-06-16 15:47:40 -04:00 · 0639c5fd21
commit 0639c5fd21
parent 5701a9da9c
5 changed files with 37 additions and 43 deletions
--- a/api/server/controllers/ContextProjectionController.js
+++ b/api/server/controllers/ContextProjectionController.js
@ -18,7 +18,7 @@ async function contextProjectionController(req, res) {
      return;
    }
    const projection = await resolveContextProjection(
-      { userId: req.user?.id, getMessages: db.getMessages, getAgent: db.getAgent },
+      { userId: req.user?.id, getMessages: db.getMessages },
      params,
    );
    res.json(projection ?? null);
--- a/client/src/data-provider/Endpoints/queries.ts
+++ b/client/src/data-provider/Endpoints/queries.ts
@ -61,6 +61,7 @@ export const useContextProjectionQuery = (
      params?.model,
      params?.agentId,
      params?.maxContextTokens,
+      params?.revision,
    ],
    () => dataService.getContextProjection(params as t.TContextProjectionRequest),
    {
--- a/client/src/hooks/Chat/useTokenUsage.ts
+++ b/client/src/hooks/Chat/useTokenUsage.ts
@ -121,12 +121,16 @@ export default function useTokenUsage({
      ? {
          conversationId: conversation.conversationId,
          messageId: branchTotals.tailId,
-          endpoint: conversation.endpoint,
-          model: conversation.model ?? undefined,
+          /** Resolved provider/model (e.g. an agent's actual provider, not the
+           *  `agents` endpoint) so the server picks the right tokenizer. */
+          endpoint: limits.endpoint || conversation.endpoint,
+          model: limits.model || conversation.model || undefined,
          agentId: conversation.agent_id ?? undefined,
          spec: conversation.spec ?? undefined,
          maxContextTokens: resolvedMax,
          calibrationRatio: branchSnapshot?.calibrationRatio,
+          /** Content revision so an in-place message edit (same tail id) refetches. */
+          revision: branchTotals.input + branchTotals.output,
        }
      : null;
  const { data: projectionData } = useContextProjectionQuery(projectionParams);
--- a/packages/api/src/endpoints/projection.ts
+++ b/packages/api/src/endpoints/projection.ts
@ -7,16 +7,11 @@ interface ProjectionMessage {
  messageId: string;
  parentMessageId?: string | null;
  tokenCount?: number;
-  summaryTokenCount?: number;
  isCreatedByUser?: boolean;
  text?: string;
-}
-
-interface ProjectionAgent {
-  instructions?: string;
-  provider?: string;
-  model?: string;
-  model_parameters?: { maxContextTokens?: number };
+  /** Compaction marker written by the live path (`agents/usage.ts`); its
+   *  presence means the next call sends the summary + tail, not this raw chain. */
+  metadata?: { summaryUsedTokens?: number };
 }

 export interface ContextProjectionDeps {
@ -26,7 +21,6 @@ export interface ContextProjectionDeps {
    filter: { conversationId: string; user?: string },
    select?: string,
  ) => Promise<ProjectionMessage[]>;
-  getAgent: (filter: { id: string }) => Promise<ProjectionAgent | null>;
 }

 /**
@ -77,21 +71,29 @@ function resolveProvider(value?: string): Providers {
 }

 /**
- * Server-side context-usage projection: reconstructs the viewed branch + the
- * resolved agent config and asks the agents SDK what the next call's context
- * would be, WITHOUT invoking the model. Reuses LibreChat's already-calibrated
- * per-message `tokenCount`s (no re-tokenizing). Returns null when there is no
- * resolvable context window. NOTE: this first cut targets message-windowing
- * accuracy — tool-schema tokens are not yet included; a follow-up will reuse the
- * full `initializeAgent` path for exact instruction/tool overhead.
+ * Server-side context-usage projection: reconstructs the viewed branch and asks
+ * the agents SDK what the next call's context would be, WITHOUT invoking the
+ * model. Provider/model/window come from the (client-resolved) request — no
+ * agent or model-spec config is loaded here, so there is no cross-user config
+ * exposure. Reuses LibreChat's already-calibrated per-message `tokenCount`s (no
+ * re-tokenizing). Returns null when there is no resolvable context window.
+ * NOTE: this first cut targets message-windowing accuracy — instruction and
+ * tool-schema tokens (agent instructions, `promptPrefix`, model-spec presets,
+ * tool schemas) are NOT yet included; a follow-up will reuse the full
+ * `initializeAgent`/send path for exact overhead and proper access control.
 */
 export async function resolveContextProjection(
  deps: ContextProjectionDeps,
  params: TContextProjectionRequest,
 ): Promise<TContextUsageEvent | null> {
+  const maxContextTokens = params.maxContextTokens;
+  if (maxContextTokens == null || maxContextTokens <= 0) {
+    return null;
+  }
+
  const stored = await deps.getMessages(
    { conversationId: params.conversationId, user: deps.userId },
-    'messageId parentMessageId tokenCount summaryTokenCount isCreatedByUser text',
+    'messageId parentMessageId tokenCount isCreatedByUser text metadata',
  );
  const branch = resolveBranch(stored, params.messageId);
  if (branch.length === 0) {
@ -100,30 +102,15 @@ export async function resolveContextProjection(

  /** A summarized/compacted branch's next call sends the saved summary + the
   *  post-summary tail, NOT this raw parent chain — projecting from the full
-   *  history would prune/count the wrong context and omit the summary. Until the
-   *  follow-up replays the summary boundary, fall back (null) so the client's
-   *  summary-baseline-aware estimate handles these branches. */
-  if (branch.some((message) => (message.summaryTokenCount ?? 0) > 0)) {
-    return null;
-  }
-
-  let instructions: string | undefined;
-  let providerValue: string | undefined = params.endpoint;
-  let model = params.model;
-  let maxContextTokens = params.maxContextTokens;
-  if (params.agentId != null && params.agentId !== '') {
-    const agent = await deps.getAgent({ id: params.agentId });
-    if (agent != null) {
-      instructions = agent.instructions;
-      providerValue = agent.provider ?? providerValue;
-      model = agent.model ?? model;
-      maxContextTokens = maxContextTokens ?? agent.model_parameters?.maxContextTokens;
-    }
-  }
-  if (maxContextTokens == null || maxContextTokens <= 0) {
+   *  history would prune/count the wrong context and omit the summary. Detect it
+   *  via the live path's `metadata.summaryUsedTokens` marker and fall back (null)
+   *  so the client's summary-baseline-aware estimate handles these branches until
+   *  a follow-up replays the summary boundary. */
+  if (branch.some((message) => (message.metadata?.summaryUsedTokens ?? 0) > 0)) {
    return null;
  }

+  const model = params.model;
  const encoding = (model ?? '').toLowerCase().includes('claude') ? 'claude' : 'o200k_base';
  const tokenCounter = await createTokenCounter(encoding);

@ -147,8 +134,7 @@ export async function resolveContextProjection(
  return projectAgentContextUsage({
    agent: {
      agentId: params.agentId ?? 'projection',
-      provider: resolveProvider(providerValue),
-      instructions,
+      provider: resolveProvider(params.endpoint),
      maxContextTokens,
    },
    messages,
--- a/packages/data-provider/src/types/runs.ts
+++ b/packages/data-provider/src/types/runs.ts
@ -106,6 +106,9 @@ export type TContextProjectionRequest = {
  maxContextTokens?: number;
  /** Provider-calibrated ratio from a prior snapshot, applied as a static seed. */
  calibrationRatio?: number;
+  /** Client-only cache-bust: a branch content revision so a message edit
+   *  (which keeps the same tail id) refetches. The server ignores it. */
+  revision?: number;
 };

 /**