diff --git a/api/server/controllers/ContextProjectionController.js b/api/server/controllers/ContextProjectionController.js
index 88bbbed366..eaf9592e73 100644
--- a/api/server/controllers/ContextProjectionController.js
+++ b/api/server/controllers/ContextProjectionController.js
@@ -18,7 +18,7 @@ async function contextProjectionController(req, res) {
       return;
     }
     const projection = await resolveContextProjection(
-      { userId: req.user?.id, getMessages: db.getMessages, getAgent: db.getAgent },
+      { userId: req.user?.id, getMessages: db.getMessages },
       params,
     );
     res.json(projection ?? null);
diff --git a/client/src/data-provider/Endpoints/queries.ts b/client/src/data-provider/Endpoints/queries.ts
index 29cacf3300..1ea8b3c0aa 100644
--- a/client/src/data-provider/Endpoints/queries.ts
+++ b/client/src/data-provider/Endpoints/queries.ts
@@ -61,6 +61,7 @@ export const useContextProjectionQuery = (
       params?.model,
       params?.agentId,
       params?.maxContextTokens,
+      params?.revision,
     ],
     () => dataService.getContextProjection(params as t.TContextProjectionRequest),
     {
diff --git a/client/src/hooks/Chat/useTokenUsage.ts b/client/src/hooks/Chat/useTokenUsage.ts
index f586c3db22..42360f5b3c 100644
--- a/client/src/hooks/Chat/useTokenUsage.ts
+++ b/client/src/hooks/Chat/useTokenUsage.ts
@@ -121,12 +121,16 @@ export default function useTokenUsage({
       ? {
           conversationId: conversation.conversationId,
           messageId: branchTotals.tailId,
-          endpoint: conversation.endpoint,
-          model: conversation.model ?? undefined,
+          /** Resolved provider/model (e.g. an agent's actual provider, not the
+           *  `agents` endpoint) so the server picks the right tokenizer. */
+          endpoint: limits.endpoint || conversation.endpoint,
+          model: limits.model || conversation.model || undefined,
           agentId: conversation.agent_id ?? undefined,
           spec: conversation.spec ?? undefined,
           maxContextTokens: resolvedMax,
           calibrationRatio: branchSnapshot?.calibrationRatio,
+          /** Content revision so an in-place message edit (same tail id) refetches. */
+          revision: branchTotals.input + branchTotals.output,
         }
       : null;
   const { data: projectionData } = useContextProjectionQuery(projectionParams);
diff --git a/packages/api/src/endpoints/projection.ts b/packages/api/src/endpoints/projection.ts
index 2362e219e0..a7703f41b7 100644
--- a/packages/api/src/endpoints/projection.ts
+++ b/packages/api/src/endpoints/projection.ts
@@ -7,16 +7,11 @@ interface ProjectionMessage {
   messageId: string;
   parentMessageId?: string | null;
   tokenCount?: number;
-  summaryTokenCount?: number;
   isCreatedByUser?: boolean;
   text?: string;
-}
-
-interface ProjectionAgent {
-  instructions?: string;
-  provider?: string;
-  model?: string;
-  model_parameters?: { maxContextTokens?: number };
+  /** Compaction marker written by the live path (`agents/usage.ts`); its
+   *  presence means the next call sends the summary + tail, not this raw chain. */
+  metadata?: { summaryUsedTokens?: number };
 }
 
 export interface ContextProjectionDeps {
@@ -26,7 +21,6 @@ export interface ContextProjectionDeps {
     filter: { conversationId: string; user?: string },
     select?: string,
   ) => Promise<ProjectionMessage[]>;
-  getAgent: (filter: { id: string }) => Promise<ProjectionAgent | null>;
 }
 
 /**
@@ -77,21 +71,29 @@ function resolveProvider(value?: string): Providers {
 }
 
 /**
- * Server-side context-usage projection: reconstructs the viewed branch + the
- * resolved agent config and asks the agents SDK what the next call's context
- * would be, WITHOUT invoking the model. Reuses LibreChat's already-calibrated
- * per-message `tokenCount`s (no re-tokenizing). Returns null when there is no
- * resolvable context window. NOTE: this first cut targets message-windowing
- * accuracy — tool-schema tokens are not yet included; a follow-up will reuse the
- * full `initializeAgent` path for exact instruction/tool overhead.
+ * Server-side context-usage projection: reconstructs the viewed branch and asks
+ * the agents SDK what the next call's context would be, WITHOUT invoking the
+ * model. Provider/model/window come from the (client-resolved) request — no
+ * agent or model-spec config is loaded here, so there is no cross-user config
+ * exposure. Reuses LibreChat's already-calibrated per-message `tokenCount`s (no
+ * re-tokenizing). Returns null when there is no resolvable context window.
+ * NOTE: this first cut targets message-windowing accuracy — instruction and
+ * tool-schema tokens (agent instructions, `promptPrefix`, model-spec presets,
+ * tool schemas) are NOT yet included; a follow-up will reuse the full
+ * `initializeAgent`/send path for exact overhead and proper access control.
  */
 export async function resolveContextProjection(
   deps: ContextProjectionDeps,
   params: TContextProjectionRequest,
 ): Promise<TContextUsageEvent | null> {
+  const maxContextTokens = params.maxContextTokens;
+  if (maxContextTokens == null || maxContextTokens <= 0) {
+    return null;
+  }
+
   const stored = await deps.getMessages(
     { conversationId: params.conversationId, user: deps.userId },
-    'messageId parentMessageId tokenCount summaryTokenCount isCreatedByUser text',
+    'messageId parentMessageId tokenCount isCreatedByUser text metadata',
   );
   const branch = resolveBranch(stored, params.messageId);
   if (branch.length === 0) {
@@ -100,30 +102,15 @@ export async function resolveContextProjection(
 
   /** A summarized/compacted branch's next call sends the saved summary + the
    *  post-summary tail, NOT this raw parent chain — projecting from the full
-   *  history would prune/count the wrong context and omit the summary. Until the
-   *  follow-up replays the summary boundary, fall back (null) so the client's
-   *  summary-baseline-aware estimate handles these branches. */
-  if (branch.some((message) => (message.summaryTokenCount ?? 0) > 0)) {
-    return null;
-  }
-
-  let instructions: string | undefined;
-  let providerValue: string | undefined = params.endpoint;
-  let model = params.model;
-  let maxContextTokens = params.maxContextTokens;
-  if (params.agentId != null && params.agentId !== '') {
-    const agent = await deps.getAgent({ id: params.agentId });
-    if (agent != null) {
-      instructions = agent.instructions;
-      providerValue = agent.provider ?? providerValue;
-      model = agent.model ?? model;
-      maxContextTokens = maxContextTokens ?? agent.model_parameters?.maxContextTokens;
-    }
-  }
-  if (maxContextTokens == null || maxContextTokens <= 0) {
+   *  history would prune/count the wrong context and omit the summary. Detect it
+   *  via the live path's `metadata.summaryUsedTokens` marker and fall back (null)
+   *  so the client's summary-baseline-aware estimate handles these branches until
+   *  a follow-up replays the summary boundary. */
+  if (branch.some((message) => (message.metadata?.summaryUsedTokens ?? 0) > 0)) {
     return null;
   }
 
+  const model = params.model;
   const encoding = (model ?? '').toLowerCase().includes('claude') ? 'claude' : 'o200k_base';
   const tokenCounter = await createTokenCounter(encoding);
 
@@ -147,8 +134,7 @@ export async function resolveContextProjection(
   return projectAgentContextUsage({
     agent: {
       agentId: params.agentId ?? 'projection',
-      provider: resolveProvider(providerValue),
-      instructions,
+      provider: resolveProvider(params.endpoint),
       maxContextTokens,
     },
     messages,
diff --git a/packages/data-provider/src/types/runs.ts b/packages/data-provider/src/types/runs.ts
index cb87d7f512..8e1033c07f 100644
--- a/packages/data-provider/src/types/runs.ts
+++ b/packages/data-provider/src/types/runs.ts
@@ -106,6 +106,9 @@ export type TContextProjectionRequest = {
   maxContextTokens?: number;
   /** Provider-calibrated ratio from a prior snapshot, applied as a static seed. */
   calibrationRatio?: number;
+  /** Client-only cache-bust: a branch content revision so a message edit
+   *  (which keeps the same tail id) refetches. The server ignores it. */
+  revision?: number;
 };
 
 /**