diff --git a/.env.example b/.env.example index c9bb6ac820..4ab6ded239 100644 --- a/.env.example +++ b/.env.example @@ -185,8 +185,16 @@ GOOGLE_KEY=user_provided # GOOGLE_TITLE_MODEL=gemini-2.0-flash-lite-001 +# Google Cloud region for Vertex AI (used by both chat and image generation) # GOOGLE_LOC=us-central1 +# Alternative region env var for Gemini Image Generation +# GOOGLE_CLOUD_LOCATION=global + +# Vertex AI Service Account Configuration +# Path to your Google Cloud service account JSON file +# GOOGLE_SERVICE_KEY_FILE=/path/to/service-account.json + # Google Safety Settings # NOTE: These settings apply to both Vertex AI and Gemini API (AI Studio) # @@ -206,6 +214,27 @@ GOOGLE_KEY=user_provided # GOOGLE_SAFETY_DANGEROUS_CONTENT=BLOCK_ONLY_HIGH # GOOGLE_SAFETY_CIVIC_INTEGRITY=BLOCK_ONLY_HIGH +#========================# +# Gemini Image Generation # +#========================# + +# Gemini Image Generation Tool (for Agents) +# Supports multiple authentication methods in priority order: +# 1. User-provided API key (via GUI) +# 2. GEMINI_API_KEY env var (admin-configured) +# 3. GOOGLE_KEY env var (shared with Google chat endpoint) +# 4. Vertex AI service account (via GOOGLE_SERVICE_KEY_FILE) + +# Option A: Use dedicated Gemini API key for image generation +# GEMINI_API_KEY=your-gemini-api-key + +# Option B: Use Vertex AI (no API key needed, uses service account) +# Set this to enable Vertex AI and allow tool without requiring API keys +# GEMINI_VERTEX_ENABLED=true + +# Vertex AI model for image generation (defaults to gemini-2.5-flash-image) +# GEMINI_IMAGE_MODEL=gemini-2.5-flash-image + #============# # OpenAI # #============# diff --git a/api/app/clients/tools/index.js b/api/app/clients/tools/index.js index 90d1545a5a..1a7c4ff47f 100644 --- a/api/app/clients/tools/index.js +++ b/api/app/clients/tools/index.js @@ -12,6 +12,7 @@ const GoogleSearchAPI = require('./structured/GoogleSearch'); const TraversaalSearch = require('./structured/TraversaalSearch'); const createOpenAIImageTools = require('./structured/OpenAIImageTools'); const TavilySearchResults = require('./structured/TavilySearchResults'); +const createGeminiImageTool = require('./structured/GeminiImageGen'); module.exports = { ...manifest, @@ -27,4 +28,5 @@ module.exports = { createYouTubeTools, TavilySearchResults, createOpenAIImageTools, + createGeminiImageTool, }; diff --git a/api/app/clients/tools/manifest.json b/api/app/clients/tools/manifest.json index c12b962fee..fc037caa4b 100644 --- a/api/app/clients/tools/manifest.json +++ b/api/app/clients/tools/manifest.json @@ -179,5 +179,19 @@ "description": "Provide your Flux API key from your user profile." } ] + }, + { + "name": "Gemini Image Tools", + "pluginKey": "gemini_image_gen", + "toolkit": true, + "description": "Generate high-quality images using Google's Gemini Image Models. Supports Gemini API or Vertex AI.", + "icon": "assets/gemini_image_gen.svg", + "authConfig": [ + { + "authField": "GEMINI_API_KEY||GOOGLE_KEY||GEMINI_VERTEX_ENABLED", + "label": "Gemini API Key (Optional if Vertex AI is configured)", + "description": "Your Google Gemini API Key from Google AI Studio. Leave blank if using Vertex AI with service account." + } + ] } ] diff --git a/api/app/clients/tools/structured/GeminiImageGen.js b/api/app/clients/tools/structured/GeminiImageGen.js new file mode 100644 index 0000000000..c6ee58a61e --- /dev/null +++ b/api/app/clients/tools/structured/GeminiImageGen.js @@ -0,0 +1,576 @@ +const fs = require('fs'); +const path = require('path'); +const sharp = require('sharp'); +const { v4 } = require('uuid'); +const { GoogleGenAI } = require('@google/genai'); +const { tool } = require('@langchain/core/tools'); +const { logger } = require('@librechat/data-schemas'); +const { + FileContext, + ContentTypes, + FileSources, + EImageOutputType, +} = require('librechat-data-provider'); +const { + geminiToolkit, + loadServiceKey, + getBalanceConfig, + getTransactionsConfig, +} = require('@librechat/api'); +const { getStrategyFunctions } = require('~/server/services/Files/strategies'); +const { spendTokens } = require('~/models/spendTokens'); +const { getFiles } = require('~/models/File'); + +/** + * Get the default service key file path (consistent with main Google endpoint) + * @returns {string} - The default path to the service key file + */ +function getDefaultServiceKeyPath() { + return ( + process.env.GOOGLE_SERVICE_KEY_FILE || path.join(process.cwd(), 'api', 'data', 'auth.json') + ); +} + +const displayMessage = + "Gemini displayed an image. All generated images are already plainly visible, so don't repeat the descriptions in detail. Do not list download links as they are available in the UI already. The user may download the images by clicking on them, but do not mention anything about downloading to the user."; + +/** + * Replaces unwanted characters from the input string + * @param {string} inputString - The input string to process + * @returns {string} - The processed string + */ +function replaceUnwantedChars(inputString) { + return inputString?.replace(/[^\w\s\-_.,!?()]/g, '') || ''; +} + +/** + * Validate and sanitize image format + * @param {string} format - The format to validate + * @returns {string} - Safe format + */ +function getSafeFormat(format) { + const allowedFormats = ['png', 'jpg', 'jpeg', 'webp', 'gif']; + return allowedFormats.includes(format?.toLowerCase()) ? format.toLowerCase() : 'png'; +} + +/** + * Convert image buffer to target format if needed + * @param {Buffer} inputBuffer - The input image buffer + * @param {string} targetFormat - The target format (png, jpeg, webp) + * @returns {Promise<{buffer: Buffer, format: string}>} - Converted buffer and format + */ +async function convertImageFormat(inputBuffer, targetFormat) { + const metadata = await sharp(inputBuffer).metadata(); + const currentFormat = metadata.format; + + // Normalize format names (jpg -> jpeg) + const normalizedTarget = targetFormat === 'jpg' ? 'jpeg' : targetFormat.toLowerCase(); + const normalizedCurrent = currentFormat === 'jpg' ? 'jpeg' : currentFormat; + + // If already in target format, return as-is + if (normalizedCurrent === normalizedTarget) { + return { buffer: inputBuffer, format: normalizedTarget }; + } + + // Convert to target format + const convertedBuffer = await sharp(inputBuffer).toFormat(normalizedTarget).toBuffer(); + return { buffer: convertedBuffer, format: normalizedTarget }; +} + +/** + * Initialize Gemini client (supports both Gemini API and Vertex AI) + * Priority: API key (from options, resolved by loadAuthValues) > Vertex AI service account + * @param {Object} options - Initialization options + * @param {string} [options.GEMINI_API_KEY] - Gemini API key (resolved by loadAuthValues) + * @param {string} [options.GOOGLE_KEY] - Google API key (resolved by loadAuthValues) + * @returns {Promise} - The initialized client + */ +async function initializeGeminiClient(options = {}) { + const geminiKey = options.GEMINI_API_KEY; + if (geminiKey) { + logger.debug('[GeminiImageGen] Using Gemini API with GEMINI_API_KEY'); + return new GoogleGenAI({ apiKey: geminiKey }); + } + + const googleKey = options.GOOGLE_KEY; + if (googleKey) { + logger.debug('[GeminiImageGen] Using Gemini API with GOOGLE_KEY'); + return new GoogleGenAI({ apiKey: googleKey }); + } + + // Fall back to Vertex AI with service account + logger.debug('[GeminiImageGen] Using Vertex AI with service account'); + const credentialsPath = getDefaultServiceKeyPath(); + + // Use loadServiceKey for consistent loading (supports file paths, JSON strings, base64) + const serviceKey = await loadServiceKey(credentialsPath); + + if (!serviceKey || !serviceKey.project_id) { + throw new Error( + 'Gemini Image Generation requires one of: user-provided API key, GEMINI_API_KEY or GOOGLE_KEY env var, or a valid Google service account. ' + + `Service account file not found or invalid at: ${credentialsPath}`, + ); + } + + // Set GOOGLE_APPLICATION_CREDENTIALS for any Google Cloud SDK dependencies + try { + await fs.promises.access(credentialsPath); + process.env.GOOGLE_APPLICATION_CREDENTIALS = credentialsPath; + } catch { + // File doesn't exist, skip setting env var + } + + return new GoogleGenAI({ + vertexai: true, + project: serviceKey.project_id, + location: process.env.GOOGLE_LOC || process.env.GOOGLE_CLOUD_LOCATION || 'global', + }); +} + +/** + * Save image to local filesystem + * @param {string} base64Data - Base64 encoded image data + * @param {string} format - Image format + * @param {string} userId - User ID + * @returns {Promise} - The relative URL + */ +async function saveImageLocally(base64Data, format, userId) { + const safeFormat = getSafeFormat(format); + const safeUserId = userId ? path.basename(userId) : 'default'; + const imageName = `gemini-img-${v4()}.${safeFormat}`; + const userDir = path.join(process.cwd(), 'client/public/images', safeUserId); + + await fs.promises.mkdir(userDir, { recursive: true }); + + const filePath = path.join(userDir, imageName); + await fs.promises.writeFile(filePath, Buffer.from(base64Data, 'base64')); + + logger.debug('[GeminiImageGen] Image saved locally to:', filePath); + return `/images/${safeUserId}/${imageName}`; +} + +/** + * Save image to cloud storage + * @param {Object} params - Parameters + * @returns {Promise} - The storage URL or null + */ +async function saveToCloudStorage({ base64Data, format, processFileURL, fileStrategy, userId }) { + if (!processFileURL || !fileStrategy || !userId) { + return null; + } + + try { + const safeFormat = getSafeFormat(format); + const safeUserId = path.basename(userId); + const dataURL = `data:image/${safeFormat};base64,${base64Data}`; + const imageName = `gemini-img-${v4()}.${safeFormat}`; + + const result = await processFileURL({ + URL: dataURL, + basePath: 'images', + userId: safeUserId, + fileName: imageName, + fileStrategy, + context: FileContext.image_generation, + }); + + return result.filepath; + } catch (error) { + logger.error('[GeminiImageGen] Error saving to cloud storage:', error); + return null; + } +} + +/** + * Convert image files to Gemini inline data format + * @param {Object} params - Parameters + * @returns {Promise} - Array of inline data objects + */ +async function convertImagesToInlineData({ imageFiles, image_ids, req, fileStrategy }) { + if (!image_ids || image_ids.length === 0) { + return []; + } + + const streamMethods = {}; + const requestFilesMap = Object.fromEntries(imageFiles.map((f) => [f.file_id, { ...f }])); + const orderedFiles = new Array(image_ids.length); + const idsToFetch = []; + const indexOfMissing = Object.create(null); + + for (let i = 0; i < image_ids.length; i++) { + const id = image_ids[i]; + const file = requestFilesMap[id]; + if (file) { + orderedFiles[i] = file; + } else { + idsToFetch.push(id); + indexOfMissing[id] = i; + } + } + + if (idsToFetch.length && req?.user?.id) { + const fetchedFiles = await getFiles( + { + user: req.user.id, + file_id: { $in: idsToFetch }, + height: { $exists: true }, + width: { $exists: true }, + }, + {}, + {}, + ); + + for (const file of fetchedFiles) { + requestFilesMap[file.file_id] = file; + orderedFiles[indexOfMissing[file.file_id]] = file; + } + } + + const inlineDataArray = []; + for (const imageFile of orderedFiles) { + if (!imageFile) continue; + + try { + const source = imageFile.source || fileStrategy; + if (!source) continue; + + let getDownloadStream = streamMethods[source]; + if (!getDownloadStream) { + ({ getDownloadStream } = getStrategyFunctions(source)); + streamMethods[source] = getDownloadStream; + } + if (!getDownloadStream) continue; + + const stream = await getDownloadStream(req, imageFile.filepath); + if (!stream) continue; + + const chunks = []; + for await (const chunk of stream) { + chunks.push(chunk); + } + const buffer = Buffer.concat(chunks); + const base64Data = buffer.toString('base64'); + const mimeType = imageFile.type || 'image/png'; + + inlineDataArray.push({ + inlineData: { mimeType, data: base64Data }, + }); + } catch (error) { + logger.error('[GeminiImageGen] Error processing image:', imageFile.file_id, error); + } + } + + return inlineDataArray; +} + +/** + * Check for safety blocks in API response + * @param {Object} response - The API response + * @returns {Object|null} - Safety block info or null + */ +function checkForSafetyBlock(response) { + if (!response?.candidates?.length) { + return { reason: 'NO_CANDIDATES', message: 'No candidates returned' }; + } + + const candidate = response.candidates[0]; + const finishReason = candidate.finishReason; + + if (finishReason === 'SAFETY' || finishReason === 'PROHIBITED_CONTENT') { + return { reason: finishReason, message: 'Content blocked by safety filters' }; + } + + if (finishReason === 'RECITATION') { + return { reason: finishReason, message: 'Content blocked due to recitation concerns' }; + } + + if (candidate.safetyRatings) { + for (const rating of candidate.safetyRatings) { + if (rating.probability === 'HIGH' || rating.blocked === true) { + return { + reason: 'SAFETY_RATING', + message: `Blocked due to ${rating.category}`, + category: rating.category, + }; + } + } + } + + return null; +} + +/** + * Record token usage for balance tracking + * @param {Object} params - Parameters + * @param {Object} params.usageMetadata - The usage metadata from API response + * @param {Object} params.req - The request object + * @param {string} params.userId - The user ID + * @param {string} params.conversationId - The conversation ID + * @param {string} params.model - The model name + */ +async function recordTokenUsage({ usageMetadata, req, userId, conversationId, model }) { + if (!usageMetadata) { + logger.debug('[GeminiImageGen] No usage metadata available for balance tracking'); + return; + } + + const appConfig = req?.config; + const balance = getBalanceConfig(appConfig); + const transactions = getTransactionsConfig(appConfig); + + // Skip if neither balance nor transactions are enabled + if (!balance?.enabled && transactions?.enabled === false) { + return; + } + + const promptTokens = usageMetadata.prompt_token_count || usageMetadata.promptTokenCount || 0; + const completionTokens = + usageMetadata.candidates_token_count || usageMetadata.candidatesTokenCount || 0; + + if (promptTokens === 0 && completionTokens === 0) { + logger.debug('[GeminiImageGen] No tokens to record'); + return; + } + + logger.debug('[GeminiImageGen] Recording token usage:', { + promptTokens, + completionTokens, + model, + conversationId, + }); + + try { + await spendTokens( + { + user: userId, + model, + conversationId, + context: 'image_generation', + balance, + transactions, + }, + { + promptTokens, + completionTokens, + }, + ); + } catch (error) { + logger.error('[GeminiImageGen] Error recording token usage:', error); + } +} + +/** + * Creates Gemini Image Generation tool + * @param {Object} fields - Configuration fields + * @returns {ReturnType} - The image generation tool + */ +function createGeminiImageTool(fields = {}) { + const override = fields.override ?? false; + + if (!override && !fields.isAgent) { + throw new Error('This tool is only available for agents.'); + } + + // Skip validation during tool creation - validation happens at runtime in initializeGeminiClient + // This allows the tool to be added to agents when using Vertex AI without requiring API keys + // The actual credentials check happens when the tool is invoked + + const { + req, + imageFiles = [], + processFileURL, + userId, + fileStrategy, + GEMINI_API_KEY, + GOOGLE_KEY, + // GEMINI_VERTEX_ENABLED is used for auth validation only (not used in code) + // When set as env var, it signals Vertex AI is configured and bypasses API key requirement + } = fields; + + const imageOutputType = fields.imageOutputType || EImageOutputType.PNG; + + const geminiImageGenTool = tool( + async ({ prompt, image_ids, aspectRatio, imageSize }, _runnableConfig) => { + if (!prompt) { + throw new Error('Missing required field: prompt'); + } + + logger.debug('[GeminiImageGen] Generating image with prompt:', prompt?.substring(0, 100)); + logger.debug('[GeminiImageGen] Options:', { aspectRatio, imageSize }); + + // Initialize Gemini client with user-provided credentials + let ai; + try { + ai = await initializeGeminiClient({ + GEMINI_API_KEY, + GOOGLE_KEY, + }); + } catch (error) { + logger.error('[GeminiImageGen] Failed to initialize client:', error); + return [ + [{ type: ContentTypes.TEXT, text: `Failed to initialize Gemini: ${error.message}` }], + { content: [], file_ids: [] }, + ]; + } + + // Build request contents + const contents = [{ text: replaceUnwantedChars(prompt) }]; + + // Add context images if provided + if (image_ids?.length > 0) { + const contextImages = await convertImagesToInlineData({ + imageFiles, + image_ids, + req, + fileStrategy, + }); + contents.push(...contextImages); + logger.debug('[GeminiImageGen] Added', contextImages.length, 'context images'); + } + + // Generate image + let apiResponse; + const geminiModel = process.env.GEMINI_IMAGE_MODEL || 'gemini-2.5-flash-image'; + try { + // Build config with optional imageConfig + const config = { + responseModalities: ['TEXT', 'IMAGE'], + }; + + // Add imageConfig if aspectRatio or imageSize is specified + // Note: gemini-2.5-flash-image doesn't support imageSize + const supportsImageSize = !geminiModel.includes('gemini-2.5-flash-image'); + if (aspectRatio || (imageSize && supportsImageSize)) { + config.imageConfig = {}; + if (aspectRatio) { + config.imageConfig.aspectRatio = aspectRatio; + } + if (imageSize && supportsImageSize) { + config.imageConfig.imageSize = imageSize; + } + } + + apiResponse = await ai.models.generateContent({ + model: geminiModel, + contents, + config, + }); + } catch (error) { + logger.error('[GeminiImageGen] API error:', error); + return [ + [{ type: ContentTypes.TEXT, text: `Image generation failed: ${error.message}` }], + { content: [], file_ids: [] }, + ]; + } + + // Check for safety blocks + const safetyBlock = checkForSafetyBlock(apiResponse); + if (safetyBlock) { + logger.warn('[GeminiImageGen] Safety block:', safetyBlock); + const errorMsg = 'Image blocked by content safety filters. Please try different content.'; + return [[{ type: ContentTypes.TEXT, text: errorMsg }], { content: [], file_ids: [] }]; + } + + const rawImageData = apiResponse.candidates?.[0]?.content?.parts?.find((p) => p.inlineData) + ?.inlineData?.data; + + if (!rawImageData) { + logger.warn('[GeminiImageGen] No image data in response'); + return [ + [{ type: ContentTypes.TEXT, text: 'No image was generated. Please try again.' }], + { content: [], file_ids: [] }, + ]; + } + + const rawBuffer = Buffer.from(rawImageData, 'base64'); + const { buffer: convertedBuffer, format: outputFormat } = await convertImageFormat( + rawBuffer, + imageOutputType, + ); + const imageData = convertedBuffer.toString('base64'); + const mimeType = outputFormat === 'jpeg' ? 'image/jpeg' : `image/${outputFormat}`; + + logger.debug('[GeminiImageGen] Image format:', { outputFormat, mimeType }); + + let imageUrl; + const useLocalStorage = !fileStrategy || fileStrategy === FileSources.local; + + if (useLocalStorage) { + try { + imageUrl = await saveImageLocally(imageData, outputFormat, userId); + } catch (error) { + logger.error('[GeminiImageGen] Local save failed:', error); + imageUrl = `data:${mimeType};base64,${imageData}`; + } + } else { + const cloudUrl = await saveToCloudStorage({ + base64Data: imageData, + format: outputFormat, + processFileURL, + fileStrategy, + userId, + }); + + if (cloudUrl) { + imageUrl = cloudUrl; + } else { + // Fallback to local + try { + imageUrl = await saveImageLocally(imageData, outputFormat, userId); + } catch (_error) { + imageUrl = `data:${mimeType};base64,${imageData}`; + } + } + } + + logger.debug('[GeminiImageGen] Image URL:', imageUrl); + + // For the artifact, we need a data URL (same as OpenAI) + // The local file save is for persistence, but the response needs a data URL + const dataUrl = `data:${mimeType};base64,${imageData}`; + + // Return in content_and_artifact format (same as OpenAI) + const file_ids = [v4()]; + const content = [ + { + type: ContentTypes.IMAGE_URL, + image_url: { url: dataUrl }, + }, + ]; + + const textResponse = [ + { + type: ContentTypes.TEXT, + text: + displayMessage + + `\n\ngenerated_image_id: "${file_ids[0]}"` + + (image_ids?.length > 0 ? `\nreferenced_image_ids: ["${image_ids.join('", "')}"]` : ''), + }, + ]; + + // Record token usage for balance tracking (don't await to avoid blocking response) + const conversationId = _runnableConfig?.configurable?.thread_id; + recordTokenUsage({ + usageMetadata: apiResponse.usageMetadata, + req, + userId, + conversationId, + model: geminiModel, + }).catch((error) => { + logger.error('[GeminiImageGen] Failed to record token usage:', error); + }); + + return [textResponse, { content, file_ids }]; + }, + { + ...geminiToolkit.gemini_image_gen, + responseFormat: 'content_and_artifact', + }, + ); + + return geminiImageGenTool; +} + +// Export both for compatibility +module.exports = createGeminiImageTool; +module.exports.createGeminiImageTool = createGeminiImageTool; diff --git a/api/app/clients/tools/util/handleTools.js b/api/app/clients/tools/util/handleTools.js index e9361a70d9..e39bebd36a 100644 --- a/api/app/clients/tools/util/handleTools.js +++ b/api/app/clients/tools/util/handleTools.js @@ -10,6 +10,7 @@ const { createSafeUser, mcpToolPattern, loadWebSearchAuth, + buildImageToolContext, } = require('@librechat/api'); const { getMCPServersRegistry } = require('~/config'); const { @@ -35,6 +36,7 @@ const { StructuredWolfram, createYouTubeTools, TavilySearchResults, + createGeminiImageTool, createOpenAIImageTools, } = require('../'); const { primeFiles: primeCodeFiles } = require('~/server/services/Files/Code/process'); @@ -192,21 +194,11 @@ const loadTools = async ({ const authFields = getAuthFields('image_gen_oai'); const authValues = await loadAuthValues({ userId: user, authFields }); const imageFiles = options.tool_resources?.[EToolResources.image_edit]?.files ?? []; - let toolContext = ''; - for (let i = 0; i < imageFiles.length; i++) { - const file = imageFiles[i]; - if (!file) { - continue; - } - if (i === 0) { - toolContext = - 'Image files provided in this request (their image IDs listed in order of appearance) available for image editing:'; - } - toolContext += `\n\t- ${file.file_id}`; - if (i === imageFiles.length - 1) { - toolContext += `\n\nInclude any you need in the \`image_ids\` array when calling \`${EToolResources.image_edit}_oai\`. You may also include previously referenced or generated image IDs.`; - } - } + const toolContext = buildImageToolContext({ + imageFiles, + toolName: `${EToolResources.image_edit}_oai`, + contextDescription: 'image editing', + }); if (toolContext) { toolContextMap.image_edit_oai = toolContext; } @@ -219,6 +211,28 @@ const loadTools = async ({ imageFiles, }); }, + gemini_image_gen: async (toolContextMap) => { + const authFields = getAuthFields('gemini_image_gen'); + const authValues = await loadAuthValues({ userId: user, authFields }); + const imageFiles = options.tool_resources?.[EToolResources.image_edit]?.files ?? []; + const toolContext = buildImageToolContext({ + imageFiles, + toolName: 'gemini_image_gen', + contextDescription: 'image context', + }); + if (toolContext) { + toolContextMap.gemini_image_gen = toolContext; + } + return createGeminiImageTool({ + ...authValues, + isAgent: !!agent, + req: options.req, + imageFiles, + processFileURL: options.processFileURL, + userId: user, + fileStrategy, + }); + }, }; const requestedTools = {}; @@ -241,6 +255,7 @@ const loadTools = async ({ flux: imageGenOptions, dalle: imageGenOptions, 'stable-diffusion': imageGenOptions, + gemini_image_gen: imageGenOptions, }; /** @type {Record} */ diff --git a/api/models/tx.js b/api/models/tx.js index b92ee62bfc..6ff105a458 100644 --- a/api/models/tx.js +++ b/api/models/tx.js @@ -160,7 +160,9 @@ const tokenValues = Object.assign( 'gemini-2.5-flash': { prompt: 0.3, completion: 2.5 }, 'gemini-2.5-flash-lite': { prompt: 0.1, completion: 0.4 }, 'gemini-2.5-pro': { prompt: 1.25, completion: 10 }, + 'gemini-2.5-flash-image': { prompt: 0.15, completion: 30 }, 'gemini-3': { prompt: 2, completion: 12 }, + 'gemini-3-pro-image': { prompt: 2, completion: 120 }, 'gemini-pro-vision': { prompt: 0.5, completion: 1.5 }, grok: { prompt: 2.0, completion: 10.0 }, // Base pattern defaults to grok-2 'grok-beta': { prompt: 5.0, completion: 15.0 }, diff --git a/api/package.json b/api/package.json index 68bde06c5e..727b44e6e1 100644 --- a/api/package.json +++ b/api/package.json @@ -42,6 +42,7 @@ "@azure/identity": "^4.7.0", "@azure/search-documents": "^12.0.0", "@azure/storage-blob": "^12.27.0", + "@google/genai": "^1.19.0", "@googleapis/youtube": "^20.0.0", "@keyv/redis": "^4.3.3", "@langchain/core": "^0.3.80", diff --git a/api/server/services/start/tools.js b/api/server/services/start/tools.js index f139eaac4d..4fd35755bc 100644 --- a/api/server/services/start/tools.js +++ b/api/server/services/start/tools.js @@ -5,7 +5,7 @@ const { Calculator } = require('@librechat/agents'); const { logger } = require('@librechat/data-schemas'); const { zodToJsonSchema } = require('zod-to-json-schema'); const { Tools, ImageVisionTool } = require('librechat-data-provider'); -const { getToolkitKey, oaiToolkit, ytToolkit } = require('@librechat/api'); +const { getToolkitKey, oaiToolkit, ytToolkit, geminiToolkit } = require('@librechat/api'); const { toolkits } = require('~/app/clients/tools/manifest'); /** @@ -84,6 +84,7 @@ function loadAndFormatTools({ directory, adminFilter = [], adminIncluded = [] }) new Calculator(), ...Object.values(oaiToolkit), ...Object.values(ytToolkit), + ...Object.values(geminiToolkit), ]; for (const toolInstance of basicToolInstances) { const formattedTool = formatToOpenAIAssistantTool(toolInstance); diff --git a/client/public/assets/gemini_image_gen.svg b/client/public/assets/gemini_image_gen.svg new file mode 100644 index 0000000000..25f7457266 --- /dev/null +++ b/client/public/assets/gemini_image_gen.svg @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/client/src/components/Chat/Messages/Content/Part.tsx b/client/src/components/Chat/Messages/Content/Part.tsx index 16de45d476..bfa2b28fac 100644 --- a/client/src/components/Chat/Messages/Content/Part.tsx +++ b/client/src/components/Chat/Messages/Content/Part.tsx @@ -103,7 +103,9 @@ const Part = memo( ); } else if ( isToolCall && - (toolCall.name === 'image_gen_oai' || toolCall.name === 'image_edit_oai') + (toolCall.name === 'image_gen_oai' || + toolCall.name === 'image_edit_oai' || + toolCall.name === 'gemini_image_gen') ) { return ( = 1) { + return localize('com_ui_image_created'); + } + if (progress >= 0.7) { + return localize('com_ui_final_touch'); + } + if (progress >= 0.5) { + return localize('com_ui_adding_details'); + } + if (progress >= 0.3) { + return localize('com_ui_creating_image'); + } + return localize('com_ui_getting_started'); + } + if (progress >= 1) { return localize('com_ui_image_created'); } diff --git a/package-lock.json b/package-lock.json index 8678ea73eb..e4cc241f7a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -56,6 +56,7 @@ "@azure/identity": "^4.7.0", "@azure/search-documents": "^12.0.0", "@azure/storage-blob": "^12.27.0", + "@google/genai": "^1.19.0", "@googleapis/youtube": "^20.0.0", "@keyv/redis": "^4.3.3", "@langchain/core": "^0.3.80", @@ -10621,6 +10622,136 @@ "integrity": "sha512-kym7SodPp8/wloecOpcmSnWJsK7M0E5Wg8UcFA+uO4B9s5d0ywXOEro/8HM9x0rW+TljRzul/14UYz3TleT3ig==", "license": "MIT" }, + "node_modules/@google/genai": { + "version": "1.33.0", + "resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.33.0.tgz", + "integrity": "sha512-ThUjFZ1N0DU88peFjnQkb8K198EWaW2RmmnDShFQ+O+xkIH9itjpRe358x3L/b4X/A7dimkvq63oz49Vbh7Cog==", + "license": "Apache-2.0", + "dependencies": { + "google-auth-library": "^10.3.0", + "ws": "^8.18.0" + }, + "engines": { + "node": ">=20.0.0" + }, + "peerDependencies": { + "@modelcontextprotocol/sdk": "^1.24.0" + }, + "peerDependenciesMeta": { + "@modelcontextprotocol/sdk": { + "optional": true + } + } + }, + "node_modules/@google/genai/node_modules/agent-base": { + "version": "7.1.4", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz", + "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==", + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, + "node_modules/@google/genai/node_modules/gaxios": { + "version": "7.1.3", + "resolved": "https://registry.npmjs.org/gaxios/-/gaxios-7.1.3.tgz", + "integrity": "sha512-YGGyuEdVIjqxkxVH1pUTMY/XtmmsApXrCVv5EU25iX6inEPbV+VakJfLealkBtJN69AQmh1eGOdCl9Sm1UP6XQ==", + "license": "Apache-2.0", + "dependencies": { + "extend": "^3.0.2", + "https-proxy-agent": "^7.0.1", + "node-fetch": "^3.3.2", + "rimraf": "^5.0.1" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@google/genai/node_modules/gcp-metadata": { + "version": "8.1.2", + "resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-8.1.2.tgz", + "integrity": "sha512-zV/5HKTfCeKWnxG0Dmrw51hEWFGfcF2xiXqcA3+J90WDuP0SvoiSO5ORvcBsifmx/FoIjgQN3oNOGaQ5PhLFkg==", + "license": "Apache-2.0", + "dependencies": { + "gaxios": "^7.0.0", + "google-logging-utils": "^1.0.0", + "json-bigint": "^1.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@google/genai/node_modules/google-auth-library": { + "version": "10.5.0", + "resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-10.5.0.tgz", + "integrity": "sha512-7ABviyMOlX5hIVD60YOfHw4/CxOfBhyduaYB+wbFWCWoni4N7SLcV46hrVRktuBbZjFC9ONyqamZITN7q3n32w==", + "license": "Apache-2.0", + "dependencies": { + "base64-js": "^1.3.0", + "ecdsa-sig-formatter": "^1.0.11", + "gaxios": "^7.0.0", + "gcp-metadata": "^8.0.0", + "google-logging-utils": "^1.0.0", + "gtoken": "^8.0.0", + "jws": "^4.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@google/genai/node_modules/google-logging-utils": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/google-logging-utils/-/google-logging-utils-1.1.3.tgz", + "integrity": "sha512-eAmLkjDjAFCVXg7A1unxHsLf961m6y17QFqXqAXGj/gVkKFrEICfStRfwUlGNfeCEjNRa32JEWOUTlYXPyyKvA==", + "license": "Apache-2.0", + "engines": { + "node": ">=14" + } + }, + "node_modules/@google/genai/node_modules/gtoken": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/gtoken/-/gtoken-8.0.0.tgz", + "integrity": "sha512-+CqsMbHPiSTdtSO14O51eMNlrp9N79gmeqmXeouJOhfucAedHw9noVe/n5uJk3tbKE6a+6ZCQg3RPhVhHByAIw==", + "license": "MIT", + "dependencies": { + "gaxios": "^7.0.0", + "jws": "^4.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@google/genai/node_modules/https-proxy-agent": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", + "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/@google/genai/node_modules/node-fetch": { + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.3.2.tgz", + "integrity": "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==", + "license": "MIT", + "dependencies": { + "data-uri-to-buffer": "^4.0.0", + "fetch-blob": "^3.1.4", + "formdata-polyfill": "^4.0.10" + }, + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/node-fetch" + } + }, "node_modules/@google/generative-ai": { "version": "0.24.0", "resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.24.0.tgz", @@ -12144,6 +12275,15 @@ "@langchain/core": ">=0.3.58 <0.4.0" } }, + "node_modules/@langchain/google-gauth/node_modules/agent-base": { + "version": "7.1.4", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz", + "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==", + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, "node_modules/@langchain/google-gauth/node_modules/brace-expansion": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz", @@ -12182,26 +12322,6 @@ "node": ">=18" } }, - "node_modules/@langchain/google-gauth/node_modules/glob": { - "version": "10.5.0", - "resolved": "https://registry.npmjs.org/glob/-/glob-10.5.0.tgz", - "integrity": "sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==", - "license": "ISC", - "dependencies": { - "foreground-child": "^3.1.0", - "jackspeak": "^3.1.2", - "minimatch": "^9.0.4", - "minipass": "^7.1.2", - "package-json-from-dist": "^1.0.0", - "path-scurry": "^1.11.1" - }, - "bin": { - "glob": "dist/esm/bin.mjs" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, "node_modules/@langchain/google-gauth/node_modules/google-auth-library": { "version": "10.5.0", "resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-10.5.0.tgz", @@ -12242,6 +12362,19 @@ "node": ">=18" } }, + "node_modules/@langchain/google-gauth/node_modules/https-proxy-agent": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", + "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, "node_modules/@langchain/google-gauth/node_modules/minimatch": { "version": "9.0.5", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", @@ -12275,21 +12408,6 @@ "url": "https://opencollective.com/node-fetch" } }, - "node_modules/@langchain/google-gauth/node_modules/rimraf": { - "version": "5.0.10", - "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-5.0.10.tgz", - "integrity": "sha512-l0OE8wL34P4nJH/H2ffoaniAokM2qSmrtXHmlpvYr5AVVX8msAyW0l8NVJFDxlSK4u3Uh/f41cQheDVdnYijwQ==", - "license": "ISC", - "dependencies": { - "glob": "^10.3.7" - }, - "bin": { - "rimraf": "dist/esm/bin.mjs" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, "node_modules/@langchain/google-genai": { "version": "0.2.18", "resolved": "https://registry.npmjs.org/@langchain/google-genai/-/google-genai-0.2.18.tgz", @@ -38437,6 +38555,65 @@ "dev": true, "license": "MIT" }, + "node_modules/rimraf": { + "version": "5.0.10", + "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-5.0.10.tgz", + "integrity": "sha512-l0OE8wL34P4nJH/H2ffoaniAokM2qSmrtXHmlpvYr5AVVX8msAyW0l8NVJFDxlSK4u3Uh/f41cQheDVdnYijwQ==", + "license": "ISC", + "dependencies": { + "glob": "^10.3.7" + }, + "bin": { + "rimraf": "dist/esm/bin.mjs" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/rimraf/node_modules/brace-expansion": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz", + "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==", + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0" + } + }, + "node_modules/rimraf/node_modules/glob": { + "version": "10.5.0", + "resolved": "https://registry.npmjs.org/glob/-/glob-10.5.0.tgz", + "integrity": "sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==", + "license": "ISC", + "dependencies": { + "foreground-child": "^3.1.0", + "jackspeak": "^3.1.2", + "minimatch": "^9.0.4", + "minipass": "^7.1.2", + "package-json-from-dist": "^1.0.0", + "path-scurry": "^1.11.1" + }, + "bin": { + "glob": "dist/esm/bin.mjs" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/rimraf/node_modules/minimatch": { + "version": "9.0.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", + "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", + "license": "ISC", + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, "node_modules/ripemd160": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/ripemd160/-/ripemd160-2.0.2.tgz", @@ -42747,7 +42924,6 @@ "version": "8.18.0", "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.0.tgz", "integrity": "sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==", - "devOptional": true, "engines": { "node": ">=10.0.0" }, @@ -43055,6 +43231,7 @@ "@azure/identity": "^4.7.0", "@azure/search-documents": "^12.0.0", "@azure/storage-blob": "^12.27.0", + "@google/genai": "^1.19.0", "@keyv/redis": "^4.3.3", "@langchain/core": "^0.3.80", "@librechat/agents": "^3.0.66", diff --git a/packages/api/package.json b/packages/api/package.json index 776f23d33a..6f3fb82672 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -85,6 +85,7 @@ "@azure/identity": "^4.7.0", "@azure/search-documents": "^12.0.0", "@azure/storage-blob": "^12.27.0", + "@google/genai": "^1.19.0", "@keyv/redis": "^4.3.3", "@langchain/core": "^0.3.80", "@librechat/agents": "^3.0.66", diff --git a/packages/api/src/endpoints/google/initialize.ts b/packages/api/src/endpoints/google/initialize.ts index 8881c9ff24..812769e030 100644 --- a/packages/api/src/endpoints/google/initialize.ts +++ b/packages/api/src/endpoints/google/initialize.ts @@ -45,7 +45,7 @@ export async function initializeGoogle({ /** Only attempt to load service key if GOOGLE_KEY is not provided */ try { const serviceKeyPath = - process.env.GOOGLE_SERVICE_KEY_FILE || path.join(process.cwd(), 'data', 'auth.json'); + process.env.GOOGLE_SERVICE_KEY_FILE || path.join(process.cwd(), 'api', 'data', 'auth.json'); const loadedKey = await loadServiceKey(serviceKeyPath); if (loadedKey) { serviceKey = loadedKey; diff --git a/packages/api/src/tools/toolkits/gemini.ts b/packages/api/src/tools/toolkits/gemini.ts new file mode 100644 index 0000000000..3785856fbb --- /dev/null +++ b/packages/api/src/tools/toolkits/gemini.ts @@ -0,0 +1,100 @@ +import { z } from 'zod'; + +/** Default description for Gemini image generation tool */ +const DEFAULT_GEMINI_IMAGE_GEN_DESCRIPTION = + `Generates high-quality, original images based on text prompts, with optional image context. + +When to use \`gemini_image_gen\`: +- To create entirely new images from detailed text descriptions +- To generate images using existing images as context or inspiration +- When the user requests image generation, creation, or asks to "generate an image" +- When the user asks to "edit", "modify", "change", or "swap" elements in an image (generates new image with changes) + +When NOT to use \`gemini_image_gen\`: +- For uploading or saving existing images without modification + +Generated image IDs will be returned in the response, so you can refer to them in future requests.` as const; + +const getGeminiImageGenDescription = () => { + return process.env.GEMINI_IMAGE_GEN_DESCRIPTION || DEFAULT_GEMINI_IMAGE_GEN_DESCRIPTION; +}; + +/** Default prompt description for Gemini image generation */ +const DEFAULT_GEMINI_IMAGE_GEN_PROMPT_DESCRIPTION = + `A detailed text description of the desired image, up to 32000 characters. For "editing" requests, describe the changes you want to make to the referenced image. Be specific about composition, style, lighting, and subject matter.` as const; + +const getGeminiImageGenPromptDescription = () => { + return ( + process.env.GEMINI_IMAGE_GEN_PROMPT_DESCRIPTION || DEFAULT_GEMINI_IMAGE_GEN_PROMPT_DESCRIPTION + ); +}; + +/** Default image IDs description */ +const DEFAULT_GEMINI_IMAGE_IDS_DESCRIPTION = ` +Optional array of image IDs to use as visual context for generation. + +Guidelines: +- For "editing" requests: ALWAYS include the image ID being "edited" +- For new generation with context: Include any relevant reference image IDs +- If the user's request references any prior images, include their image IDs in this array +- These images will be used as visual context/inspiration for the new generation +- Never invent or hallucinate IDs; only use IDs that are visible in the conversation +- If no images are relevant, omit this field entirely +`.trim(); + +const getGeminiImageIdsDescription = () => { + return process.env.GEMINI_IMAGE_IDS_DESCRIPTION || DEFAULT_GEMINI_IMAGE_IDS_DESCRIPTION; +}; + +export const geminiToolkit = { + gemini_image_gen: { + name: 'gemini_image_gen' as const, + description: getGeminiImageGenDescription(), + description_for_model: `Use this tool to generate images from text descriptions using Vertex AI Gemini. +1. Prompts should be detailed and specific for best results. +2. One image per function call. Create only 1 image per request. +3. IMPORTANT: When user asks to "edit", "modify", "change", or "swap" elements in an existing image: + - ALWAYS include the original image ID in the image_ids array + - Describe the desired changes clearly in the prompt + - The tool will generate a new image based on the original image context + your prompt +4. IMPORTANT: For editing requests, use DIRECT editing instructions: + - User says "remove the gun" → prompt should be "remove the gun from this image" + - User says "make it blue" → prompt should be "make this image blue" + - User says "add sunglasses" → prompt should be "add sunglasses to this image" + - DO NOT reconstruct or modify the original prompt - use the user's editing instruction directly + - ALWAYS include the image being edited in image_ids array +5. OPTIONAL: Use image_ids to provide context images that will influence the generation: + - Include any relevant image IDs from the conversation in the image_ids array + - These images will be used as visual context/inspiration for the new generation + - For "editing" requests, always include the image being "edited" +6. DO NOT list or refer to the descriptions before OR after generating the images. +7. Always mention the image type (photo, oil painting, watercolor painting, illustration, cartoon, drawing, vector, render, etc.) at the beginning of the prompt. +8. Use aspectRatio to control the shape of the image: + - 16:9 or 3:2 for landscape/wide images + - 9:16 or 2:3 for portrait/tall images + - 21:9 for ultra-wide/cinematic images + - 1:1 for square images (default) +9. Use imageSize to control the resolution: 1K (standard), 2K (high), 4K (maximum quality). + +The prompt should be a detailed paragraph describing every part of the image in concrete, objective detail.`, + schema: z.object({ + prompt: z.string().max(32000).describe(getGeminiImageGenPromptDescription()), + image_ids: z.array(z.string()).optional().describe(getGeminiImageIdsDescription()), + aspectRatio: z + .enum(['1:1', '2:3', '3:2', '3:4', '4:3', '4:5', '5:4', '9:16', '16:9', '21:9']) + .optional() + .describe( + 'The aspect ratio of the generated image. Use 16:9 or 3:2 for landscape, 9:16 or 2:3 for portrait, 21:9 for ultra-wide/cinematic, 1:1 for square. Defaults to 1:1 if not specified.', + ), + imageSize: z + .enum(['1K', '2K', '4K']) + .optional() + .describe( + 'The resolution of the generated image. Use 1K for standard, 2K for high, 4K for maximum quality. Defaults to 1K if not specified.', + ), + }), + responseFormat: 'content_and_artifact' as const, + }, +} as const; + +export type GeminiToolkit = typeof geminiToolkit; diff --git a/packages/api/src/tools/toolkits/imageContext.ts b/packages/api/src/tools/toolkits/imageContext.ts new file mode 100644 index 0000000000..0485ed815a --- /dev/null +++ b/packages/api/src/tools/toolkits/imageContext.ts @@ -0,0 +1,38 @@ +/** + * Builds tool context string for image generation tools based on available image files. + * @param params - The parameters for building image context + * @param params.imageFiles - Array of image file objects with file_id property + * @param params.toolName - The name of the tool (e.g., 'gemini_image_gen', 'image_edit_oai') + * @param params.contextDescription - Description of what the images are for (e.g., 'image context', 'image editing') + * @returns The tool context string or empty string if no images + */ +export function buildImageToolContext({ + imageFiles, + toolName, + contextDescription = 'image context', +}: { + imageFiles: Array<{ file_id: string }>; + toolName: string; + contextDescription?: string; +}): string { + if (!imageFiles || imageFiles.length === 0) { + return ''; + } + + let toolContext = ''; + for (let i = 0; i < imageFiles.length; i++) { + const file = imageFiles[i]; + if (!file) { + continue; + } + if (i === 0) { + toolContext = `Image files provided in this request (their image IDs listed in order of appearance) available for ${contextDescription}:`; + } + toolContext += `\n\t- ${file.file_id}`; + if (i === imageFiles.length - 1) { + toolContext += `\n\nInclude any you need in the \`image_ids\` array when calling \`${toolName}\` to use them as visual context for generation. You may also include previously referenced or generated image IDs.`; + } + } + return toolContext; +} + diff --git a/packages/api/src/tools/toolkits/index.ts b/packages/api/src/tools/toolkits/index.ts index 33807c673b..def468d18b 100644 --- a/packages/api/src/tools/toolkits/index.ts +++ b/packages/api/src/tools/toolkits/index.ts @@ -1,2 +1,4 @@ +export * from './gemini'; +export * from './imageContext'; export * from './oai'; export * from './yt'; diff --git a/packages/api/src/utils/tokens.ts b/packages/api/src/utils/tokens.ts index cc7270de71..750a2c9244 100644 --- a/packages/api/src/utils/tokens.ts +++ b/packages/api/src/utils/tokens.ts @@ -77,9 +77,11 @@ const googleModels = { 'gemini-pro-vision': 12288, 'gemini-exp': 2000000, 'gemini-3': 1000000, // 1M input tokens, 64k output tokens + 'gemini-3-pro-image': 1000000, 'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens 'gemini-2.5-pro': 1000000, 'gemini-2.5-flash': 1000000, + 'gemini-2.5-flash-image': 1000000, 'gemini-2.5-flash-lite': 1000000, 'gemini-2.0': 2000000, 'gemini-2.0-flash': 1000000, diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index 1f6faab9eb..bce5bc287d 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -1279,7 +1279,13 @@ export function validateVisionModel({ return visionModels.concat(additionalModels).some((visionModel) => model.includes(visionModel)); } -export const imageGenTools = new Set(['dalle', 'dall-e', 'stable-diffusion', 'flux']); +export const imageGenTools = new Set([ + 'dalle', + 'dall-e', + 'stable-diffusion', + 'flux', + 'gemini_image_gen', +]); /** * Enum for collections using infinite queries