diff --git a/packages/api/src/agents/usage.spec.ts b/packages/api/src/agents/usage.spec.ts
index b9656ba2ec..bee0c5248b 100644
--- a/packages/api/src/agents/usage.spec.ts
+++ b/packages/api/src/agents/usage.spec.ts
@@ -364,6 +364,128 @@ describe('recordCollectedUsage', () => {
     });
   });
 
+  describe('reasoning token handling - issue #13006', () => {
+    it('uses total - input when output_tokens undercounts (Vertex stream undercount with details present)', async () => {
+      const collectedUsage: UsageMetadata[] = [
+        {
+          input_tokens: 80657,
+          output_tokens: 766,
+          total_tokens: 83265,
+          output_token_details: { reasoning: 1842 },
+          model: 'gemini-3-flash-preview',
+          provider: 'vertexai',
+        },
+      ];
+
+      const result = await recordCollectedUsage(deps, {
+        ...baseParams,
+        collectedUsage,
+      });
+
+      expect(mockSpendTokens).toHaveBeenCalledWith(
+        expect.objectContaining({ model: 'gemini-3-flash-preview' }),
+        { promptTokens: 80657, completionTokens: 2608 },
+      );
+      expect(result?.output_tokens).toBe(2608);
+    });
+
+    it('uses total - input even when output_token_details is missing (raw langchain google-common path)', async () => {
+      const collectedUsage: UsageMetadata[] = [
+        {
+          input_tokens: 12,
+          output_tokens: 135,
+          total_tokens: 309,
+          model: 'gemini-3-flash-preview',
+          provider: 'vertexai',
+        },
+      ];
+
+      const result = await recordCollectedUsage(deps, {
+        ...baseParams,
+        collectedUsage,
+      });
+
+      expect(mockSpendTokens).toHaveBeenCalledWith(
+        expect.objectContaining({ model: 'gemini-3-flash-preview' }),
+        { promptTokens: 12, completionTokens: 297 },
+      );
+      expect(result?.output_tokens).toBe(297);
+    });
+
+    it('does not change output when invariant already holds (OpenAI o-series, reasoning already a subset)', async () => {
+      const collectedUsage: UsageMetadata[] = [
+        {
+          input_tokens: 100,
+          output_tokens: 500,
+          total_tokens: 600,
+          output_token_details: { reasoning: 200 },
+          model: 'o1-preview',
+          provider: 'openAI',
+        },
+      ];
+
+      const result = await recordCollectedUsage(deps, {
+        ...baseParams,
+        collectedUsage,
+      });
+
+      expect(mockSpendTokens).toHaveBeenCalledWith(
+        expect.objectContaining({ model: 'o1-preview' }),
+        { promptTokens: 100, completionTokens: 500 },
+      );
+      expect(result?.output_tokens).toBe(500);
+    });
+
+    it('routes correction through structured spend when cache tokens are present', async () => {
+      const collectedUsage: UsageMetadata[] = [
+        {
+          input_tokens: 80657,
+          output_tokens: 766,
+          total_tokens: 83265,
+          output_token_details: { reasoning: 1842 },
+          input_token_details: { cache_read: 30000 },
+          model: 'gemini-3-flash-preview',
+          provider: 'vertexai',
+        },
+      ];
+
+      await recordCollectedUsage(deps, {
+        ...baseParams,
+        collectedUsage,
+      });
+
+      expect(mockSpendStructuredTokens).toHaveBeenCalledWith(
+        expect.objectContaining({ model: 'gemini-3-flash-preview' }),
+        {
+          promptTokens: { input: 50657, write: 0, read: 30000 },
+          completionTokens: 2608,
+        },
+      );
+    });
+
+    it('no-op when total_tokens is absent or zero', async () => {
+      const collectedUsage: UsageMetadata[] = [
+        {
+          input_tokens: 100,
+          output_tokens: 50,
+          model: 'gpt-4',
+          provider: 'openAI',
+        },
+      ];
+
+      const result = await recordCollectedUsage(deps, {
+        ...baseParams,
+        collectedUsage,
+      });
+
+      expect(mockSpendTokens).toHaveBeenCalledWith(
+        expect.anything(),
+        { promptTokens: 100, completionTokens: 50 },
+      );
+      expect(result?.output_tokens).toBe(50);
+    });
+  });
+
   describe('mixed cache and non-cache entries', () => {
     it('should handle mixed entries correctly', async () => {
       const collectedUsage: UsageMetadata[] = [
diff --git a/packages/api/src/agents/usage.ts b/packages/api/src/agents/usage.ts
index b5a2fa1f4d..3d8ddf3799 100644
--- a/packages/api/src/agents/usage.ts
+++ b/packages/api/src/agents/usage.ts
@@ -50,6 +50,34 @@ function inputTokensIncludesCache(provider?: string): boolean {
   return provider != null && SUBSET_PROVIDERS.has(provider);
 }
 
+/**
+ * Resolves `completionTokens` for billing, repairing providers whose
+ * `usage_metadata.output_tokens` undercounts.
+ *
+ * The documented `UsageMetadata` contract (`@langchain/core`) is
+ * `total_tokens === input_tokens + output_tokens`. Compliant providers
+ * (OpenAI, Anthropic, Google API via agents' `CustomChatGoogleGenerativeAI`)
+ * include any reasoning/thinking tokens inside `output_tokens` already,
+ * so the invariant holds.
+ *
+ * Vertex AI Gemini through `@langchain/google-common`'s streaming path
+ * emits `output_tokens = candidatesTokenCount` and drops `thoughtsTokenCount`,
+ * leaving `total - input > output`. When that gap shows up we use the
+ * invariant to recover the correct billable output (`total - input`).
+ * Compliant providers have a zero gap, so this is a no-op for them.
+ *
+ * Tracked in: https://github.com/danny-avila/LibreChat/issues/13006
+ */
+function resolveCompletionTokens(usage: UsageMetadata): number {
+  const output = Number(usage.output_tokens) || 0;
+  const total = Number(usage.total_tokens) || 0;
+  const input = Number(usage.input_tokens) || 0;
+  if (total > input + output) {
+    return total - input;
+  }
+  return output;
+}
+
 interface SplitUsage {
   /** Non-cached input portion — what gets billed at the standard input rate */
   inputOnly: number;
@@ -57,6 +85,8 @@ interface SplitUsage {
   cacheRead: number;
   /** Total prompt tokens including cached portion */
   totalInput: number;
+  /** Output tokens for billing (includes reasoning when omitted from `output_tokens`) */
+  completion: number;
 }
 
 function splitUsage(usage: UsageMetadata): SplitUsage {
@@ -67,12 +97,14 @@ function splitUsage(usage: UsageMetadata): SplitUsage {
   const cacheRead =
     Number(usage.input_token_details?.cache_read) || Number(usage.cache_read_input_tokens) || 0;
   const rawInput = Number(usage.input_tokens) || 0;
+  const completion = resolveCompletionTokens(usage);
   if (inputTokensIncludesCache(usage.provider)) {
     return {
       inputOnly: Math.max(0, rawInput - cacheCreation - cacheRead),
       cacheCreation,
       cacheRead,
       totalInput: rawInput,
+      completion,
     };
   }
   return {
@@ -80,6 +112,7 @@ function splitUsage(usage: UsageMetadata): SplitUsage {
     cacheCreation,
     cacheRead,
     totalInput: rawInput + cacheCreation + cacheRead,
+    completion,
   };
 }
 
@@ -161,9 +194,9 @@ export async function recordCollectedUsage(
         continue;
       }
 
-      const { inputOnly, cacheCreation, cacheRead } = splitUsage(usage);
+      const { inputOnly, cacheCreation, cacheRead, completion } = splitUsage(usage);
 
-      total_output_tokens += Number(usage.output_tokens) || 0;
+      total_output_tokens += completion;
 
       const txMetadata: TxMetadata = {
         user,
@@ -187,7 +220,7 @@ export async function recordCollectedUsage(
                     write: cacheCreation,
                     read: cacheRead,
                   },
-                  completionTokens: usage.output_tokens,
+                  completionTokens: completion,
                 },
                 pricing,
               )
@@ -195,7 +228,7 @@ export async function recordCollectedUsage(
                 txMetadata,
                 {
                   promptTokens: inputOnly,
-                  completionTokens: usage.output_tokens,
+                  completionTokens: completion,
                 },
                 pricing,
               );
@@ -211,7 +244,7 @@ export async function recordCollectedUsage(
               write: cacheCreation,
               read: cacheRead,
             },
-            completionTokens: usage.output_tokens,
+            completionTokens: completion,
           })
           .catch((err) => {
             logger.error(
@@ -225,7 +258,7 @@ export async function recordCollectedUsage(
       deps
         .spendTokens(txMetadata, {
           promptTokens: inputOnly,
-          completionTokens: usage.output_tokens,
+          completionTokens: completion,
         })
         .catch((err) => {
           logger.error(
diff --git a/packages/api/src/stream/interfaces/IJobStore.ts b/packages/api/src/stream/interfaces/IJobStore.ts
index 0d07b19538..afd6ac68a0 100644
--- a/packages/api/src/stream/interfaces/IJobStore.ts
+++ b/packages/api/src/stream/interfaces/IJobStore.ts
@@ -98,6 +98,16 @@ export interface UsageMetadata {
    * Present for Claude models. Mutually exclusive with input_token_details.
    */
   cache_read_input_tokens?: number;
+  /**
+   * Breakdown of output token counts. Per the LangChain core contract,
+   * `output_tokens` is the sum of all output token types — these fields
+   * are subsets of `output_tokens`, *not* additional charges.
+   */
+  output_token_details?: {
+    /** Reasoning/thinking tokens generated as chain-of-thought (o1, Gemini thinking, etc.) */
+    reasoning?: number;
+    audio?: number;
+  };
 }
 
 /**