From cb6bd71ab9c565a1bf51b080ad7847cfd63ce04f Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Sat, 30 May 2026 00:29:19 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=AE=20chore:=20Update=20Gemma=20Contex?= =?UTF-8?q?t=20Token=20Defaults=20(#13410)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/utils/tokens.spec.js | 31 +++++++++++++++++++++++++++++++ packages/api/src/utils/tokens.ts | 11 ++++++++++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/api/utils/tokens.spec.js b/api/utils/tokens.spec.js index a97694a872..e2c4ac9ba4 100644 --- a/api/utils/tokens.spec.js +++ b/api/utils/tokens.spec.js @@ -356,6 +356,37 @@ describe('getModelMaxTokens', () => { ); }); + test('should return correct context tokens for Gemma models', () => { + expect(maxTokensMap[EModelEndpoint.google].gemma).toBe(32768); + expect(getModelMaxTokens('gemma', EModelEndpoint.google)).toBe( + maxTokensMap[EModelEndpoint.google].gemma, + ); + expect(getModelMaxTokens('gemma-2-9b-it', EModelEndpoint.google)).toBe( + maxTokensMap[EModelEndpoint.google]['gemma-2'], + ); + expect(getModelMaxTokens('gemma-3-27b-it', EModelEndpoint.google)).toBe( + maxTokensMap[EModelEndpoint.google]['gemma-3-27b'], + ); + expect(getModelMaxTokens('gemma4:latest', EModelEndpoint.google)).toBe( + maxTokensMap[EModelEndpoint.google].gemma4, + ); + expect(getModelMaxTokens('gemma4:e4b', EModelEndpoint.google)).toBe( + maxTokensMap[EModelEndpoint.google].gemma4, + ); + expect(getModelMaxTokens('Gemma4:31B', EModelEndpoint.custom)).toBe( + maxTokensMap[EModelEndpoint.custom]['gemma4:31b'], + ); + expect(getModelMaxTokens('ollama/gemma4:31b', EModelEndpoint.custom)).toBe( + maxTokensMap[EModelEndpoint.custom]['gemma4:31b'], + ); + expect(getModelMaxTokens('google/gemma-4-31B-it', EModelEndpoint.google)).toBe( + maxTokensMap[EModelEndpoint.google]['gemma-4-31b'], + ); + expect(getModelMaxTokens('google/gemma-4-26B-A4B-it', EModelEndpoint.google)).toBe( + maxTokensMap[EModelEndpoint.google]['gemma-4-26b-a4b'], + ); + }); + test('should return correct tokens for partial match - Cohere models', () => { expect(getModelMaxTokens('command', EModelEndpoint.custom)).toBe( maxTokensMap[EModelEndpoint.custom]['command'], diff --git a/packages/api/src/utils/tokens.ts b/packages/api/src/utils/tokens.ts index 7e5cb53fff..fadec1ffe2 100644 --- a/packages/api/src/utils/tokens.ts +++ b/packages/api/src/utils/tokens.ts @@ -95,10 +95,19 @@ const cohereModels = { const googleModels = { /* Max I/O is combined so we subtract the amount from max response tokens for actual total */ - gemma: 8196, + gemma: 32768, 'gemma-2': 32768, 'gemma-3': 32768, 'gemma-3-27b': 131072, + 'gemma4:31b': 256000, + 'gemma4-31b': 256000, + 'gemma-4-31b': 256000, + 'gemma4:26b': 256000, + 'gemma4-26b': 256000, + 'gemma-4-26b-a4b': 256000, + 'gemma-4-26b': 256000, + gemma4: 128000, + 'gemma-4': 128000, gemini: 30720, // -2048 from max 'gemini-pro-vision': 12288, 'gemini-1.5': 1000000,