From ee4dd1b2e998afda7a1b98a7f826920f3c7c81bd Mon Sep 17 00:00:00 2001 From: Marco Beretta <81851188+berry-13@users.noreply.github.com> Date: Fri, 19 Jul 2024 13:59:07 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=80=20feat:=20`gpt-4o-mini`=20(#3384)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: `gpt-4o-mini` * feat: retrival * fix: Update order of model token values for 'gpt-4o' and 'gpt-4o-mini' * fix: Update order of model token values for 'gpt-4o' and 'gpt-4o-mini' * fix: Update order of model token values for 'gpt-4o' and 'gpt-4o-mini' * fix: add jsdoc * fix: Update order of model token values for 'gpt-4o' and 'gpt-4o-mini' --------- Co-authored-by: Danny Avila --- .env.example | 6 +- README.md | 2 +- api/models/tx.js | 3 + api/models/tx.spec.js | 19 ++++ api/utils/tokens.js | 122 ++++++++++++++------------ packages/data-provider/package.json | 2 +- packages/data-provider/src/config.ts | 8 +- packages/data-provider/src/schemas.ts | 2 +- 8 files changed, 99 insertions(+), 65 deletions(-) diff --git a/.env.example b/.env.example index a17cb64ed1..5f0e40ac32 100644 --- a/.env.example +++ b/.env.example @@ -144,7 +144,7 @@ GOOGLE_KEY=user_provided #============# OPENAI_API_KEY=user_provided -# OPENAI_MODELS=gpt-4o,gpt-3.5-turbo-0125,gpt-3.5-turbo-0301,gpt-3.5-turbo,gpt-4,gpt-4-0613,gpt-4-vision-preview,gpt-3.5-turbo-0613,gpt-3.5-turbo-16k-0613,gpt-4-0125-preview,gpt-4-turbo-preview,gpt-4-1106-preview,gpt-3.5-turbo-1106,gpt-3.5-turbo-instruct,gpt-3.5-turbo-instruct-0914,gpt-3.5-turbo-16k +# OPENAI_MODELS=gpt-4o,gpt-4o-mini,gpt-3.5-turbo-0125,gpt-3.5-turbo-0301,gpt-3.5-turbo,gpt-4,gpt-4-0613,gpt-4-vision-preview,gpt-3.5-turbo-0613,gpt-3.5-turbo-16k-0613,gpt-4-0125-preview,gpt-4-turbo-preview,gpt-4-1106-preview,gpt-3.5-turbo-1106,gpt-3.5-turbo-instruct,gpt-3.5-turbo-instruct-0914,gpt-3.5-turbo-16k DEBUG_OPENAI=false @@ -166,7 +166,7 @@ DEBUG_OPENAI=false ASSISTANTS_API_KEY=user_provided # ASSISTANTS_BASE_URL= -# ASSISTANTS_MODELS=gpt-4o,gpt-3.5-turbo-0125,gpt-3.5-turbo-16k-0613,gpt-3.5-turbo-16k,gpt-3.5-turbo,gpt-4,gpt-4-0314,gpt-4-32k-0314,gpt-4-0613,gpt-3.5-turbo-0613,gpt-3.5-turbo-1106,gpt-4-0125-preview,gpt-4-turbo-preview,gpt-4-1106-preview +# ASSISTANTS_MODELS=gpt-4o,gpt-4o-mini,gpt-3.5-turbo-0125,gpt-3.5-turbo-16k-0613,gpt-3.5-turbo-16k,gpt-3.5-turbo,gpt-4,gpt-4-0314,gpt-4-32k-0314,gpt-4-0613,gpt-3.5-turbo-0613,gpt-3.5-turbo-1106,gpt-4-0125-preview,gpt-4-turbo-preview,gpt-4-1106-preview #==========================# # Azure Assistants API # @@ -188,7 +188,7 @@ ASSISTANTS_API_KEY=user_provided # Plugins # #============# -# PLUGIN_MODELS=gpt-4o,gpt-4,gpt-4-turbo-preview,gpt-4-0125-preview,gpt-4-1106-preview,gpt-4-0613,gpt-3.5-turbo,gpt-3.5-turbo-0125,gpt-3.5-turbo-1106,gpt-3.5-turbo-0613 +# PLUGIN_MODELS=gpt-4o,gpt-4o-mini,gpt-4,gpt-4-turbo-preview,gpt-4-0125-preview,gpt-4-1106-preview,gpt-4-0613,gpt-3.5-turbo,gpt-3.5-turbo-0125,gpt-3.5-turbo-1106,gpt-3.5-turbo-0613 DEBUG_PLUGINS=true diff --git a/README.md b/README.md index 0fbb1c92b6..93f80444ae 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ - 🔄 Edit, Resubmit, and Continue Messages with Conversation branching - 🌿 Fork Messages & Conversations for Advanced Context control - 💬 Multimodal Chat: - - Upload and analyze images with Claude 3, GPT-4 (including `gpt-4o`), and Gemini Vision 📸 + - Upload and analyze images with Claude 3, GPT-4 (including `gpt-4o` and `gpt-4o-mini`), and Gemini Vision 📸 - Chat with Files using Custom Endpoints, OpenAI, Azure, Anthropic, & Google. 🗃️ - Advanced Agents with Files, Code Interpreter, Tools, and API Actions 🔦 - Available through the [OpenAI Assistants API](https://platform.openai.com/docs/assistants/overview) 🌤️ diff --git a/api/models/tx.js b/api/models/tx.js index ccd865fc8d..778d2ce048 100644 --- a/api/models/tx.js +++ b/api/models/tx.js @@ -12,6 +12,7 @@ const tokenValues = { '4k': { prompt: 1.5, completion: 2 }, '16k': { prompt: 3, completion: 4 }, 'gpt-3.5-turbo-1106': { prompt: 1, completion: 2 }, + 'gpt-4o-mini': { prompt: 0.15, completion: 0.6 }, 'gpt-4o': { prompt: 5, completion: 15 }, 'gpt-4-1106': { prompt: 10, completion: 30 }, 'gpt-3.5-turbo-0125': { prompt: 0.5, completion: 1.5 }, @@ -54,6 +55,8 @@ const getValueKey = (model, endpoint) => { return 'gpt-3.5-turbo-1106'; } else if (modelName.includes('gpt-3.5')) { return '4k'; + } else if (modelName.includes('gpt-4o-mini')) { + return 'gpt-4o-mini'; } else if (modelName.includes('gpt-4o')) { return 'gpt-4o'; } else if (modelName.includes('gpt-4-vision')) { diff --git a/api/models/tx.spec.js b/api/models/tx.spec.js index 560b7da33d..d59a79a33e 100644 --- a/api/models/tx.spec.js +++ b/api/models/tx.spec.js @@ -49,6 +49,12 @@ describe('getValueKey', () => { expect(getValueKey('gpt-4o-0125')).toBe('gpt-4o'); }); + it('should return "gpt-4o-mini" for model type of "gpt-4o-mini"', () => { + expect(getValueKey('gpt-4o-mini-2024-07-18')).toBe('gpt-4o-mini'); + expect(getValueKey('openai/gpt-4o-mini')).toBe('gpt-4o-mini'); + expect(getValueKey('gpt-4o-mini-0718')).toBe('gpt-4o-mini'); + }); + it('should return "claude-3-5-sonnet" for model type of "claude-3-5-sonnet-"', () => { expect(getValueKey('claude-3-5-sonnet-20240620')).toBe('claude-3-5-sonnet'); expect(getValueKey('anthropic/claude-3-5-sonnet')).toBe('claude-3-5-sonnet'); @@ -109,6 +115,19 @@ describe('getMultiplier', () => { ); }); + it('should return the correct multiplier for gpt-4o-mini', () => { + const valueKey = getValueKey('gpt-4o-mini-2024-07-18'); + expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe( + tokenValues['gpt-4o-mini'].prompt, + ); + expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe( + tokenValues['gpt-4o-mini'].completion, + ); + expect(getMultiplier({ valueKey, tokenType: 'completion' })).not.toBe( + tokenValues['gpt-4-1106'].completion, + ); + }); + it('should derive the valueKey from the model if not provided for new models', () => { expect( getMultiplier({ tokenType: 'prompt', model: 'gpt-3.5-turbo-1106-some-other-info' }), diff --git a/api/utils/tokens.js b/api/utils/tokens.js index 1ff0c4340d..dec669be2c 100644 --- a/api/utils/tokens.js +++ b/api/utils/tokens.js @@ -1,45 +1,6 @@ const z = require('zod'); const { EModelEndpoint } = require('librechat-data-provider'); -const models = [ - 'text-davinci-003', - 'text-davinci-002', - 'text-davinci-001', - 'text-curie-001', - 'text-babbage-001', - 'text-ada-001', - 'davinci', - 'curie', - 'babbage', - 'ada', - 'code-davinci-002', - 'code-davinci-001', - 'code-cushman-002', - 'code-cushman-001', - 'davinci-codex', - 'cushman-codex', - 'text-davinci-edit-001', - 'code-davinci-edit-001', - 'text-embedding-ada-002', - 'text-similarity-davinci-001', - 'text-similarity-curie-001', - 'text-similarity-babbage-001', - 'text-similarity-ada-001', - 'text-search-davinci-doc-001', - 'text-search-curie-doc-001', - 'text-search-babbage-doc-001', - 'text-search-ada-doc-001', - 'code-search-babbage-code-001', - 'code-search-ada-code-001', - 'gpt2', - 'gpt-4', - 'gpt-4-0314', - 'gpt-4-32k', - 'gpt-4-32k-0314', - 'gpt-3.5-turbo', - 'gpt-3.5-turbo-0301', -]; - const openAIModels = { 'gpt-4': 8187, // -5 from max 'gpt-4-0613': 8187, // -5 from max @@ -49,6 +10,7 @@ const openAIModels = { 'gpt-4-1106': 127990, // -10 from max 'gpt-4-0125': 127990, // -10 from max 'gpt-4o': 127990, // -10 from max + 'gpt-4o-mini': 127990, // -10 from max 'gpt-4-turbo': 127990, // -10 from max 'gpt-4-vision': 127990, // -10 from max 'gpt-3.5-turbo': 16375, // -10 from max @@ -101,7 +63,6 @@ const anthropicModels = { const aggregateModels = { ...openAIModels, ...googleModels, ...anthropicModels, ...cohereModels }; -// Order is important here: by model series and context size (gpt-4 then gpt-3, ascending) const maxTokensMap = { [EModelEndpoint.azureOpenAI]: openAIModels, [EModelEndpoint.openAI]: aggregateModels, @@ -110,6 +71,24 @@ const maxTokensMap = { [EModelEndpoint.anthropic]: anthropicModels, }; +/** + * Finds the first matching pattern in the tokens map. + * @param {string} modelName + * @param {Record} tokensMap + * @returns {string|null} + */ +function findMatchingPattern(modelName, tokensMap) { + const keys = Object.keys(tokensMap); + for (let i = keys.length - 1; i >= 0; i--) { + const modelKey = keys[i]; + if (modelName.includes(modelKey)) { + return modelKey; + } + } + + return null; +} + /** * Retrieves the maximum tokens for a given model name. If the exact model name isn't found, * it searches for partial matches within the model name, checking keys in reverse order. @@ -143,12 +122,11 @@ function getModelMaxTokens(modelName, endpoint = EModelEndpoint.openAI, endpoint return tokensMap[modelName]; } - const keys = Object.keys(tokensMap); - for (let i = keys.length - 1; i >= 0; i--) { - if (modelName.includes(keys[i])) { - const result = tokensMap[keys[i]]; - return result?.context ?? result; - } + const matchedPattern = findMatchingPattern(modelName, tokensMap); + + if (matchedPattern) { + const result = tokensMap[matchedPattern]; + return result?.context ?? result; } return undefined; @@ -181,15 +159,8 @@ function matchModelName(modelName, endpoint = EModelEndpoint.openAI) { return modelName; } - const keys = Object.keys(tokensMap); - for (let i = keys.length - 1; i >= 0; i--) { - const modelKey = keys[i]; - if (modelName.includes(modelKey)) { - return modelKey; - } - } - - return modelName; + const matchedPattern = findMatchingPattern(modelName, tokensMap); + return matchedPattern || modelName; } const modelSchema = z.object({ @@ -241,8 +212,47 @@ function processModelData(input) { return tokenConfig; } +const tiktokenModels = new Set([ + 'text-davinci-003', + 'text-davinci-002', + 'text-davinci-001', + 'text-curie-001', + 'text-babbage-001', + 'text-ada-001', + 'davinci', + 'curie', + 'babbage', + 'ada', + 'code-davinci-002', + 'code-davinci-001', + 'code-cushman-002', + 'code-cushman-001', + 'davinci-codex', + 'cushman-codex', + 'text-davinci-edit-001', + 'code-davinci-edit-001', + 'text-embedding-ada-002', + 'text-similarity-davinci-001', + 'text-similarity-curie-001', + 'text-similarity-babbage-001', + 'text-similarity-ada-001', + 'text-search-davinci-doc-001', + 'text-search-curie-doc-001', + 'text-search-babbage-doc-001', + 'text-search-ada-doc-001', + 'code-search-babbage-code-001', + 'code-search-ada-code-001', + 'gpt2', + 'gpt-4', + 'gpt-4-0314', + 'gpt-4-32k', + 'gpt-4-32k-0314', + 'gpt-3.5-turbo', + 'gpt-3.5-turbo-0301', +]); + module.exports = { - tiktokenModels: new Set(models), + tiktokenModels, maxTokensMap, inputSchema, modelSchema, diff --git a/packages/data-provider/package.json b/packages/data-provider/package.json index 393dacd051..8c9a7d4429 100644 --- a/packages/data-provider/package.json +++ b/packages/data-provider/package.json @@ -1,6 +1,6 @@ { "name": "librechat-data-provider", - "version": "0.7.2", + "version": "0.7.3", "description": "data services for librechat apps", "main": "dist/index.js", "module": "dist/index.es.js", diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index cacfdd7b0b..d977f69709 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -12,6 +12,8 @@ export const defaultSocialLogins = ['google', 'facebook', 'openid', 'github', 'd export const defaultRetrievalModels = [ 'gpt-4o', 'gpt-4o-2024-05-13', + 'gpt-4o-mini', + 'gpt-4o-mini-2024-07-18', 'gpt-4-turbo-preview', 'gpt-3.5-turbo-0125', 'gpt-4-0125-preview', @@ -530,7 +532,7 @@ const sharedOpenAIModels = [ export const defaultModels = { [EModelEndpoint.azureAssistants]: sharedOpenAIModels, - [EModelEndpoint.assistants]: ['gpt-4o', ...sharedOpenAIModels], + [EModelEndpoint.assistants]: ['gpt-4o-mini', 'gpt-4o', ...sharedOpenAIModels], [EModelEndpoint.google]: [ 'gemini-pro', 'gemini-pro-vision', @@ -559,13 +561,12 @@ export const defaultModels = { 'claude-instant-1-100k', ], [EModelEndpoint.openAI]: [ + 'gpt-4o-mini', 'gpt-4o', ...sharedOpenAIModels, 'gpt-4-vision-preview', 'gpt-3.5-turbo-instruct-0914', - 'gpt-3.5-turbo-0301', 'gpt-3.5-turbo-instruct', - 'text-davinci-003', ], }; @@ -621,6 +622,7 @@ export const supportsBalanceCheck = { export const visionModels = [ 'gpt-4o', + 'gpt-4o-mini', 'gpt-4-turbo', 'gpt-4-vision', 'llava', diff --git a/packages/data-provider/src/schemas.ts b/packages/data-provider/src/schemas.ts index ea4624295c..049e84a87f 100644 --- a/packages/data-provider/src/schemas.ts +++ b/packages/data-provider/src/schemas.ts @@ -219,7 +219,7 @@ export enum EAgent { export const agentOptionSettings = { model: { - default: 'gpt-4o', + default: 'gpt-4o-mini', }, temperature: { min: 0,