From 3da1d8c96198e4eac5e86a51d4d6e329e1427baa Mon Sep 17 00:00:00 2001 From: Yashwanth Alapati <145064639+yashwanth-alapati@users.noreply.github.com> Date: Sun, 3 May 2026 22:29:13 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=8D=20feat:=20add=20Tavily=20as=20Sear?= =?UTF-8?q?ch=20and=20Scraper=20Provider=20(#12581)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add Tavily integration as search provider and scraper provider * chore:update tavily web search parameters * chore:tavily paramer update * chore:update data-schemas test for tavily * fix: allow Tavily string option modes * fix: align Tavily config options * fix: scope Tavily scraper timeout * fix: use resolved scraper provider timeout * fix: widen Tavily search provider types * fix: harden Tavily web search config * fix: cap Tavily option timeouts --------- Co-authored-by: Danny Avila --- .env.example | 3 + .../SidePanel/Agents/Search/ApiKeyDialog.tsx | 28 ++ client/src/hooks/Plugins/useAuthSearchTool.ts | 2 + client/src/locales/en/translation.json | 4 + librechat.example.yaml | 34 +++ packages/api/src/web/web.spec.ts | 269 ++++++++++++++++++ packages/api/src/web/web.ts | 16 +- packages/data-provider/src/config.spec.ts | 74 ++++- packages/data-provider/src/config.ts | 32 +++ packages/data-provider/src/types/web.ts | 16 +- packages/data-schemas/src/app/web.spec.ts | 3 + packages/data-schemas/src/app/web.ts | 16 +- packages/data-schemas/src/types/web.ts | 3 + 13 files changed, 495 insertions(+), 5 deletions(-) diff --git a/.env.example b/.env.example index d069599476..9a5514ee9f 100644 --- a/.env.example +++ b/.env.example @@ -823,6 +823,9 @@ OPENWEATHER_API_KEY= # Search Provider (Required) # SERPER_API_KEY=your_serper_api_key +# Tavily (Search Provider and/or Scraper) +# TAVILY_API_KEY=your_tavily_api_key + # Scraper (Required) # FIRECRAWL_API_KEY=your_firecrawl_api_key # Optional: Custom Firecrawl API URL diff --git a/client/src/components/SidePanel/Agents/Search/ApiKeyDialog.tsx b/client/src/components/SidePanel/Agents/Search/ApiKeyDialog.tsx index a7ec8fdc17..0202ec96ee 100644 --- a/client/src/components/SidePanel/Agents/Search/ApiKeyDialog.tsx +++ b/client/src/components/SidePanel/Agents/Search/ApiKeyDialog.tsx @@ -78,6 +78,20 @@ export default function ApiKeyDialog({ }, }, }, + { + key: SearchProviders.TAVILY, + label: localize('com_ui_web_search_provider_tavily'), + inputs: { + tavilyApiKey: { + placeholder: localize('com_ui_enter_api_key'), + type: 'password' as const, + link: { + url: 'https://app.tavily.com/home', + text: localize('com_ui_web_search_provider_tavily_key'), + }, + }, + }, + }, ]; const rerankerOptions: DropdownOption[] = [ @@ -152,6 +166,20 @@ export default function ApiKeyDialog({ }, }, }, + { + key: ScraperProviders.TAVILY, + label: localize('com_ui_web_search_scraper_tavily'), + inputs: { + tavilyApiKey: { + placeholder: localize('com_ui_enter_api_key'), + type: 'password' as const, + link: { + url: 'https://app.tavily.com/home', + text: localize('com_ui_web_search_scraper_tavily_key'), + }, + }, + }, + }, ]; const [dropdownOpen, setDropdownOpen] = useState({ diff --git a/client/src/hooks/Plugins/useAuthSearchTool.ts b/client/src/hooks/Plugins/useAuthSearchTool.ts index bd5f41fe78..ffb156dba1 100644 --- a/client/src/hooks/Plugins/useAuthSearchTool.ts +++ b/client/src/hooks/Plugins/useAuthSearchTool.ts @@ -14,6 +14,7 @@ export type SearchApiKeyFormData = { searxngApiKey: string; firecrawlApiKey: string; firecrawlApiUrl: string; + tavilyApiKey: string; jinaApiKey: string; jinaApiUrl: string; cohereApiKey: string; @@ -54,6 +55,7 @@ const useAuthSearchTool = (options?: { isEntityTool: boolean }) => { searxngApiKey: data.searxngApiKey, firecrawlApiKey: data.firecrawlApiKey, firecrawlApiUrl: data.firecrawlApiUrl, + tavilyApiKey: data.tavilyApiKey, jinaApiKey: data.jinaApiKey, jinaApiUrl: data.jinaApiUrl, cohereApiKey: data.cohereApiKey, diff --git a/client/src/locales/en/translation.json b/client/src/locales/en/translation.json index 2c7cd6c284..efeaced119 100644 --- a/client/src/locales/en/translation.json +++ b/client/src/locales/en/translation.json @@ -1678,6 +1678,8 @@ "com_ui_web_search_provider_searxng": "SearXNG", "com_ui_web_search_provider_serper": "Serper API", "com_ui_web_search_provider_serper_key": "Get your Serper API key", + "com_ui_web_search_provider_tavily": "Tavily API", + "com_ui_web_search_provider_tavily_key": "Get your Tavily API key", "com_ui_web_search_reading": "Reading results", "com_ui_web_search_reranker": "Reranker", "com_ui_web_search_reranker_cohere": "Cohere", @@ -1690,6 +1692,8 @@ "com_ui_web_search_scraper_firecrawl_key": "Get your Firecrawl API key", "com_ui_web_search_scraper_serper": "Serper Scrape API", "com_ui_web_search_scraper_serper_key": "Get your Serper API key", + "com_ui_web_search_scraper_tavily": "Tavily Extract API", + "com_ui_web_search_scraper_tavily_key": "Get your Tavily API key", "com_ui_web_search_searxng_api_key": "Enter SearXNG API Key (optional)", "com_ui_web_search_searxng_instance_url": "SearXNG Instance URL", "com_ui_web_search_source": "{{count}} source", diff --git a/librechat.example.yaml b/librechat.example.yaml index c8bf99fe31..98de2c6541 100644 --- a/librechat.example.yaml +++ b/librechat.example.yaml @@ -628,9 +628,43 @@ endpoints: # serperApiKey: '${SERPER_API_KEY}' # searxngInstanceUrl: '${SEARXNG_INSTANCE_URL}' # searxngApiKey: '${SEARXNG_API_KEY}' +# # Tavily (search provider and/or scraper) +# tavilyApiKey: '${TAVILY_API_KEY}' # # Content scrapers # firecrawlApiKey: '${FIRECRAWL_API_KEY}' # firecrawlApiUrl: '${FIRECRAWL_API_URL}' +# +# Tavily as both search and scraper provider example: +# webSearch: +# searchProvider: tavily +# scraperProvider: tavily +# tavilyApiKey: '${TAVILY_API_KEY}' +# # Optional: custom API URLs (defaults to https://api.tavily.com/search and https://api.tavily.com/extract) +# # tavilySearchUrl: '${TAVILY_SEARCH_URL}' +# # tavilyExtractUrl: '${TAVILY_EXTRACT_URL}' +# tavilySearchOptions: +# searchDepth: basic # 'basic', 'advanced', 'fast', or 'ultra-fast' (default: basic) +# maxResults: 5 # 1-20 results per search (default: 5) +# topic: general # 'general', 'news', or 'finance' +# # includeAnswer: basic # Include answer summary: true, 'basic', or 'advanced' +# # includeRawContent: markdown # Include raw content: true, 'markdown', or 'text' +# # includeImages: true # Include images in results +# # includeFavicon: true # Include favicon URL for each result +# # chunksPerSource: 3 # Chunks per source, only with 'advanced' depth (1-3) +# # safeSearch: false # Override Tavily safe search filtering +# # includeDomains: # Restrict search to specific domains (max 300) +# # - 'example.com' +# # - 'docs.example.com' +# # excludeDomains: # Exclude specific domains from results (max 150) +# # - 'spam.com' +# # timeRange: week # 'day', 'week', 'month', or 'year' +# # timeout: 15000 # HTTP request timeout in milliseconds (max 120000) +# tavilyScraperOptions: +# extractDepth: basic # 'basic' (1 credit/5 URLs) or 'advanced' (2 credits/5 URLs, more thorough) +# # includeImages: false # Include images extracted from URLs +# # includeFavicon: false # Include favicon URL for each result +# # format: markdown # 'markdown' (default) or 'text' (plain text, may increase latency) +# # timeout: 15000 # HTTP request timeout in milliseconds (max 120000); Tavily Extract receives seconds clamped to 1-60 # Memory configuration for user memories # memory: diff --git a/packages/api/src/web/web.spec.ts b/packages/api/src/web/web.spec.ts index 5a7bf4a53a..81f9a087b4 100644 --- a/packages/api/src/web/web.spec.ts +++ b/packages/api/src/web/web.spec.ts @@ -730,6 +730,130 @@ describe('web.ts', () => { expect(providerCalls.length).toBe(1); }); + it('should authenticate Tavily as a search provider and pass options through', async () => { + const webSearchConfig: TCustomConfig['webSearch'] = { + tavilyApiKey: '${TAVILY_API_KEY}', + tavilySearchUrl: '${TAVILY_SEARCH_URL}', + firecrawlApiKey: '${FIRECRAWL_API_KEY}', + firecrawlApiUrl: '${FIRECRAWL_API_URL}', + safeSearch: SafeSearchTypes.MODERATE, + searchProvider: 'tavily' as SearchProviders, + scraperProvider: 'firecrawl' as ScraperProviders, + rerankerType: 'none' as RerankerTypes, + tavilySearchOptions: { + searchDepth: 'advanced', + maxResults: 5, + includeRawContent: 'markdown', + }, + }; + + mockLoadAuthValues.mockImplementation(({ authFields }) => { + const result: Record = {}; + authFields.forEach((field: string) => { + if (field === 'TAVILY_API_KEY') { + result[field] = 'tavily-api-key'; + } else if (field === 'TAVILY_SEARCH_URL') { + result[field] = 'https://api.tavily.com/search'; + } else if (field === 'FIRECRAWL_API_URL') { + result[field] = 'https://api.firecrawl.dev'; + } else { + result[field] = 'test-api-key'; + } + }); + return Promise.resolve(result); + }); + + const result = await loadWebSearchAuth({ + userId, + webSearchConfig, + loadAuthValues: mockLoadAuthValues, + }); + + expect(result.authenticated).toBe(true); + expect(result.authResult.searchProvider).toBe('tavily'); + expect(result.authResult.tavilyApiKey).toBe('tavily-api-key'); + expect(result.authResult.tavilySearchUrl).toBe('https://api.tavily.com/search'); + expect(result.authResult.tavilySearchOptions).toEqual(webSearchConfig.tavilySearchOptions); + }); + + it('should fail authentication when Tavily search API key is missing', async () => { + const webSearchConfig: TCustomConfig['webSearch'] = { + tavilyApiKey: '${TAVILY_API_KEY}', + firecrawlApiKey: '${FIRECRAWL_API_KEY}', + firecrawlApiUrl: '${FIRECRAWL_API_URL}', + safeSearch: SafeSearchTypes.MODERATE, + searchProvider: 'tavily' as SearchProviders, + scraperProvider: 'firecrawl' as ScraperProviders, + rerankerType: 'none' as RerankerTypes, + }; + + mockLoadAuthValues.mockImplementation(({ authFields }) => { + const result: Record = {}; + authFields.forEach((field: string) => { + if (field !== 'TAVILY_API_KEY') { + result[field] = + field === 'FIRECRAWL_API_URL' ? 'https://api.firecrawl.dev' : 'test-api-key'; + } + }); + return Promise.resolve(result); + }); + + const result = await loadWebSearchAuth({ + userId, + webSearchConfig, + loadAuthValues: mockLoadAuthValues, + }); + + expect(result.authenticated).toBe(false); + const providersAuthType = result.authTypes.find( + ([category]) => category === 'providers', + )?.[1]; + expect(providersAuthType).toBe(AuthType.USER_PROVIDED); + }); + + it('should authenticate Tavily as both search provider and scraper with a shared key', async () => { + const webSearchConfig: TCustomConfig['webSearch'] = { + tavilyApiKey: '${TAVILY_API_KEY}', + tavilySearchUrl: '${TAVILY_SEARCH_URL}', + tavilyExtractUrl: '${TAVILY_EXTRACT_URL}', + safeSearch: SafeSearchTypes.MODERATE, + searchProvider: 'tavily' as SearchProviders, + scraperProvider: 'tavily' as ScraperProviders, + rerankerType: 'none' as RerankerTypes, + tavilyScraperOptions: { + extractDepth: 'advanced', + timeout: 20000, + }, + }; + + mockLoadAuthValues.mockImplementation(({ authFields }) => { + const result: Record = {}; + authFields.forEach((field: string) => { + if (field === 'TAVILY_API_KEY') { + result[field] = 'tavily-api-key'; + } else if (field === 'TAVILY_SEARCH_URL') { + result[field] = 'https://api.tavily.com/search'; + } else if (field === 'TAVILY_EXTRACT_URL') { + result[field] = 'https://api.tavily.com/extract'; + } + }); + return Promise.resolve(result); + }); + + const result = await loadWebSearchAuth({ + userId, + webSearchConfig, + loadAuthValues: mockLoadAuthValues, + }); + + expect(result.authenticated).toBe(true); + expect(result.authResult.searchProvider).toBe('tavily'); + expect(result.authResult.scraperProvider).toBe('tavily'); + expect(result.authResult.tavilyApiKey).toBe('tavily-api-key'); + expect(result.authResult.tavilyScraperOptions).toEqual(webSearchConfig.tavilyScraperOptions); + expect(result.authResult.scraperTimeout).toBe(20000); + }); + it('should only check the specified scraperProvider', async () => { // Initialize a webSearchConfig with a specific scraperProvider const webSearchConfig: TCustomConfig['webSearch'] = { @@ -1165,6 +1289,76 @@ describe('web.ts', () => { }); }); + it('should not use tavilyScraperOptions.timeout for firecrawl scraper', async () => { + const webSearchConfig = { + serperApiKey: '${SERPER_API_KEY}', + firecrawlApiKey: '${FIRECRAWL_API_KEY}', + firecrawlApiUrl: '${FIRECRAWL_API_URL}', + tavilyApiKey: '${TAVILY_API_KEY}', + jinaApiKey: '${JINA_API_KEY}', + jinaApiUrl: '${JINA_API_URL}', + safeSearch: SafeSearchTypes.MODERATE, + scraperProvider: 'firecrawl' as ScraperProviders, + tavilyScraperOptions: { + timeout: 22000, + }, + } as TCustomConfig['webSearch']; + + mockLoadAuthValues.mockImplementation(({ authFields }) => { + const result: Record = {}; + authFields.forEach((field: string) => { + result[field] = + field === 'FIRECRAWL_API_URL' ? 'https://api.firecrawl.dev' : 'test-api-key'; + }); + return Promise.resolve(result); + }); + + const result = await loadWebSearchAuth({ + userId, + webSearchConfig, + loadAuthValues: mockLoadAuthValues, + }); + + expect(result.authenticated).toBe(true); + expect(result.authResult.scraperTimeout).toBe(7500); + }); + + it('should use tavilyScraperOptions.timeout for tavily scraper', async () => { + const webSearchConfig = { + serperApiKey: '${SERPER_API_KEY}', + firecrawlApiKey: '${FIRECRAWL_API_KEY}', + firecrawlApiUrl: '${FIRECRAWL_API_URL}', + tavilyApiKey: '${TAVILY_API_KEY}', + jinaApiKey: '${JINA_API_KEY}', + jinaApiUrl: '${JINA_API_URL}', + safeSearch: SafeSearchTypes.MODERATE, + scraperProvider: 'tavily' as ScraperProviders, + firecrawlOptions: { + timeout: 12000, + }, + tavilyScraperOptions: { + timeout: 22000, + }, + } as TCustomConfig['webSearch']; + + mockLoadAuthValues.mockImplementation(({ authFields }) => { + const result: Record = {}; + authFields.forEach((field: string) => { + result[field] = + field === 'FIRECRAWL_API_URL' ? 'https://api.firecrawl.dev' : 'test-api-key'; + }); + return Promise.resolve(result); + }); + + const result = await loadWebSearchAuth({ + userId, + webSearchConfig, + loadAuthValues: mockLoadAuthValues, + }); + + expect(result.authResult.scraperTimeout).toBe(22000); + }); + it('should handle firecrawlOptions.formats when only formats is provided', async () => { // Initialize a webSearchConfig with only firecrawlOptions.formats const webSearchConfig = { @@ -1359,6 +1553,81 @@ describe('web.ts', () => { expect(scrapersAuth).toBe(AuthType.USER_PROVIDED); }); + it('should block user-provided tavilySearchUrl targeting localhost', async () => { + mockIsSSRFTarget.mockImplementation((hostname: string) => hostname === 'localhost'); + + const webSearchConfig: TCustomConfig['webSearch'] = { + tavilyApiKey: '${TAVILY_API_KEY}', + tavilySearchUrl: '${TAVILY_SEARCH_URL}', + firecrawlApiKey: '${FIRECRAWL_API_KEY}', + safeSearch: SafeSearchTypes.MODERATE, + searchProvider: 'tavily' as SearchProviders, + rerankerType: 'none' as RerankerTypes, + }; + + mockLoadAuthValues.mockImplementation(({ authFields }) => { + const result: Record = {}; + authFields.forEach((field: string) => { + if (field === 'TAVILY_SEARCH_URL') { + result[field] = 'http://localhost:8080/search'; + } else { + result[field] = 'test-api-key'; + } + }); + return Promise.resolve(result); + }); + + const result = await loadWebSearchAuth({ + userId, + webSearchConfig, + loadAuthValues: mockLoadAuthValues, + }); + + expect(result.authResult.tavilySearchUrl).toBeUndefined(); + expect(result.authResult.searchProvider).toBe('tavily'); + expect(result.authenticated).toBe(true); + expect(mockIsSSRFTarget).toHaveBeenCalledWith('localhost'); + }); + + it('should block user-provided tavilyExtractUrl resolving to private IP', async () => { + mockResolveHostnameSSRF.mockImplementation((hostname: string) => + Promise.resolve(hostname === 'extract.internal-service.com'), + ); + + const webSearchConfig: TCustomConfig['webSearch'] = { + serperApiKey: '${SERPER_API_KEY}', + tavilyApiKey: '${TAVILY_API_KEY}', + tavilyExtractUrl: '${TAVILY_EXTRACT_URL}', + safeSearch: SafeSearchTypes.MODERATE, + scraperProvider: 'tavily' as ScraperProviders, + rerankerType: 'none' as RerankerTypes, + }; + + mockLoadAuthValues.mockImplementation(({ authFields }) => { + const result: Record = {}; + authFields.forEach((field: string) => { + if (field === 'TAVILY_EXTRACT_URL') { + result[field] = 'https://extract.internal-service.com/extract'; + } else { + result[field] = 'test-api-key'; + } + }); + return Promise.resolve(result); + }); + + const result = await loadWebSearchAuth({ + userId, + webSearchConfig, + loadAuthValues: mockLoadAuthValues, + }); + + expect(result.authResult.tavilyExtractUrl).toBeUndefined(); + expect(result.authResult.scraperProvider).toBe('tavily'); + expect(result.authenticated).toBe(true); + const scrapersAuth = result.authTypes.find(([c]) => c === 'scrapers')?.[1]; + expect(scrapersAuth).toBe(AuthType.USER_PROVIDED); + }); + it('should block user-provided searxngInstanceUrl targeting metadata endpoint', async () => { mockIsSSRFTarget.mockImplementation((hostname: string) => hostname === '169.254.169.254'); diff --git a/packages/api/src/web/web.ts b/packages/api/src/web/web.ts index 0b66a49510..59d071a8d7 100644 --- a/packages/api/src/web/web.ts +++ b/packages/api/src/web/web.ts @@ -23,6 +23,8 @@ const WEB_SEARCH_URL_KEYS = new Set([ 'searxngInstanceUrl', 'firecrawlApiUrl', 'jinaApiUrl', + 'tavilySearchUrl', + 'tavilyExtractUrl', ]); /** @@ -245,10 +247,20 @@ export async function loadWebSearchAuth({ authTypes.push([category, isUserProvided ? AuthType.USER_PROVIDED : AuthType.SYSTEM_DEFINED]); } + const scraperProvider = + authResult.scraperProvider ?? webSearchConfig?.scraperProvider ?? 'firecrawl'; + let scraperOptionsTimeout: number | undefined; + if (scraperProvider === 'tavily') { + scraperOptionsTimeout = webSearchConfig?.tavilyScraperOptions?.timeout; + } else if (scraperProvider === 'firecrawl') { + scraperOptionsTimeout = webSearchConfig?.firecrawlOptions?.timeout; + } + authResult.safeSearch = webSearchConfig?.safeSearch ?? SafeSearchTypes.MODERATE; - authResult.scraperTimeout = - webSearchConfig?.scraperTimeout ?? webSearchConfig?.firecrawlOptions?.timeout ?? 7500; + authResult.scraperTimeout = webSearchConfig?.scraperTimeout ?? scraperOptionsTimeout ?? 7500; authResult.firecrawlOptions = webSearchConfig?.firecrawlOptions; + authResult.tavilySearchOptions = webSearchConfig?.tavilySearchOptions; + authResult.tavilyScraperOptions = webSearchConfig?.tavilyScraperOptions; return { authTypes, diff --git a/packages/data-provider/src/config.spec.ts b/packages/data-provider/src/config.spec.ts index 7160404366..a6326b2635 100644 --- a/packages/data-provider/src/config.spec.ts +++ b/packages/data-provider/src/config.spec.ts @@ -1,7 +1,13 @@ import type { TEndpointsConfig } from './types'; import { EModelEndpoint, isDocumentSupportedProvider } from './schemas'; import { getEndpointFileConfig, mergeFileConfig } from './file-config'; -import { allowedAddressesSchema, configSchema, resolveEndpointType, excludedKeys } from './config'; +import { + allowedAddressesSchema, + configSchema, + excludedKeys, + resolveEndpointType, + webSearchSchema, +} from './config'; const endpointsConfig: TEndpointsConfig = { [EModelEndpoint.openAI]: { userProvide: false, order: 0 }, @@ -454,3 +460,69 @@ describe('allowedAddressesSchema', () => { }); }); }); + +describe('webSearchSchema', () => { + it('accepts Tavily string modes for answer and raw content options', () => { + const result = webSearchSchema.parse({ + tavilySearchOptions: { + includeAnswer: 'advanced', + includeRawContent: 'markdown', + safeSearch: false, + }, + }); + + expect(result.tavilySearchOptions?.includeAnswer).toBe('advanced'); + expect(result.tavilySearchOptions?.includeRawContent).toBe('markdown'); + expect(result.tavilySearchOptions?.safeSearch).toBe(false); + }); + + it('accepts Tavily scraper options', () => { + const result = webSearchSchema.parse({ + tavilyScraperOptions: { + extractDepth: 'advanced', + format: 'text', + includeFavicon: true, + timeout: 15000, + }, + }); + + expect(result.tavilyScraperOptions?.extractDepth).toBe('advanced'); + expect(result.tavilyScraperOptions?.format).toBe('text'); + expect(result.tavilyScraperOptions?.includeFavicon).toBe(true); + expect(result.tavilyScraperOptions?.timeout).toBe(15000); + }); + + it('rejects invalid Tavily search options', () => { + expect(() => + webSearchSchema.parse({ + tavilySearchOptions: { + searchDepth: 'invalid', + }, + }), + ).toThrow(); + + expect(() => + webSearchSchema.parse({ + tavilySearchOptions: { + maxResults: 0, + }, + }), + ).toThrow(); + + expect(() => + webSearchSchema.parse({ + tavilySearchOptions: { + timeout: 120001, + }, + }), + ).toThrow(); + + expect(() => + webSearchSchema.parse({ + tavilyScraperOptions: { + timeout: 120001, + }, + }), + ).toThrow(); + }); +}); diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index 00d4f1a820..72228d547f 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -1026,11 +1026,13 @@ export enum SearchCategories { export enum SearchProviders { SERPER = 'serper', SEARXNG = 'searxng', + TAVILY = 'tavily', } export enum ScraperProviders { FIRECRAWL = 'firecrawl', SERPER = 'serper', + TAVILY = 'tavily', } export enum RerankerTypes { @@ -1052,6 +1054,9 @@ export const webSearchSchema = z.object({ firecrawlApiKey: z.string().optional().default('${FIRECRAWL_API_KEY}'), firecrawlApiUrl: z.string().optional().default('${FIRECRAWL_API_URL}'), firecrawlVersion: z.string().optional().default('${FIRECRAWL_VERSION}'), + tavilyApiKey: z.string().optional().default('${TAVILY_API_KEY}'), + tavilySearchUrl: z.string().optional().default('${TAVILY_SEARCH_URL}'), + tavilyExtractUrl: z.string().optional().default('${TAVILY_EXTRACT_URL}'), jinaApiKey: z.string().optional().default('${JINA_API_KEY}'), jinaApiUrl: z.string().optional().default('${JINA_API_URL}'), cohereApiKey: z.string().optional().default('${COHERE_API_KEY}'), @@ -1093,6 +1098,33 @@ export const webSearchSchema = z.object({ .optional(), }) .optional(), + tavilySearchOptions: z + .object({ + searchDepth: z.enum(['basic', 'advanced', 'fast', 'ultra-fast']).optional(), + maxResults: z.number().int().min(1).max(20).optional(), + includeImages: z.boolean().optional(), + includeAnswer: z.union([z.boolean(), z.enum(['basic', 'advanced'])]).optional(), + includeRawContent: z.union([z.boolean(), z.enum(['markdown', 'text'])]).optional(), + includeDomains: z.array(z.string()).optional(), + excludeDomains: z.array(z.string()).optional(), + topic: z.enum(['general', 'news', 'finance']).optional(), + timeRange: z.enum(['day', 'week', 'month', 'year', 'd', 'w', 'm', 'y']).optional(), + includeImageDescriptions: z.boolean().optional(), + includeFavicon: z.boolean().optional(), + chunksPerSource: z.number().int().min(1).max(3).optional(), + safeSearch: z.boolean().optional(), + timeout: z.number().int().nonnegative().max(120000).optional(), + }) + .optional(), + tavilyScraperOptions: z + .object({ + extractDepth: z.enum(['basic', 'advanced']).optional(), + includeImages: z.boolean().optional(), + includeFavicon: z.boolean().optional(), + format: z.enum(['markdown', 'text']).optional(), + timeout: z.number().int().nonnegative().max(120000).optional(), + }) + .optional(), }); export type TWebSearchConfig = DeepPartial>; diff --git a/packages/data-provider/src/types/web.ts b/packages/data-provider/src/types/web.ts index 0a95e0e6a5..2196b3cd85 100644 --- a/packages/data-provider/src/types/web.ts +++ b/packages/data-provider/src/types/web.ts @@ -1,4 +1,6 @@ import type { Logger as WinstonLogger } from 'winston'; +import type { z } from 'zod'; +import type { webSearchSchema } from '../config'; export type SearchRefType = 'search' | 'image' | 'news' | 'video' | 'ref'; @@ -10,7 +12,8 @@ export enum DATE_RANGE { PAST_YEAR = 'y', } -export type SearchProvider = 'serper' | 'searxng'; +export type SearchProvider = 'serper' | 'searxng' | 'tavily'; +export type ScraperProvider = 'firecrawl' | 'serper' | 'tavily'; export type RerankerType = 'infinity' | 'jina' | 'cohere' | 'none'; export interface Highlight { @@ -73,6 +76,9 @@ export interface SearchConfig { serperApiKey?: string; searxngInstanceUrl?: string; searxngApiKey?: string; + tavilyApiKey?: string; + tavilySearchUrl?: string; + tavilySearchOptions?: TavilyConfig['tavilySearchOptions']; } export type References = { @@ -129,6 +135,14 @@ export interface FirecrawlConfig { }; } +export interface TavilyConfig { + tavilyApiKey?: string; + tavilySearchUrl?: string; + tavilyExtractUrl?: string; + tavilySearchOptions?: z.infer['tavilySearchOptions']; + tavilyScraperOptions?: z.infer['tavilyScraperOptions']; +} + export interface ScraperContentResult { content: string; } diff --git a/packages/data-schemas/src/app/web.spec.ts b/packages/data-schemas/src/app/web.spec.ts index 2f188778ff..9a9a0596dc 100644 --- a/packages/data-schemas/src/app/web.spec.ts +++ b/packages/data-schemas/src/app/web.spec.ts @@ -57,6 +57,9 @@ describe('loadWebSearchConfig', () => { cohereApiKey: '${COHERE_API_KEY}', safeSearch: SafeSearchTypes.MODERATE, rerankerType: undefined, + tavilyApiKey: '${TAVILY_API_KEY}', + tavilySearchUrl: '${TAVILY_SEARCH_URL}', + tavilyExtractUrl: '${TAVILY_EXTRACT_URL}', }); }); diff --git a/packages/data-schemas/src/app/web.ts b/packages/data-schemas/src/app/web.ts index f416e67e2d..989931db87 100644 --- a/packages/data-schemas/src/app/web.ts +++ b/packages/data-schemas/src/app/web.ts @@ -12,6 +12,10 @@ export const webSearchAuth = { /** Optional (0) */ searxngApiKey: 0 as const, }, + tavily: { + tavilyApiKey: 1 as const, + tavilySearchUrl: 0 as const, + }, }, scrapers: { firecrawl: { @@ -23,6 +27,10 @@ export const webSearchAuth = { serper: { serperApiKey: 1 as const, }, + tavily: { + tavilyApiKey: 1 as const, + tavilyExtractUrl: 0 as const, + }, }, rerankers: { jina: { @@ -69,6 +77,9 @@ export function loadWebSearchConfig( const firecrawlApiKey = config?.firecrawlApiKey ?? '${FIRECRAWL_API_KEY}'; const firecrawlApiUrl = config?.firecrawlApiUrl ?? '${FIRECRAWL_API_URL}'; const firecrawlVersion = config?.firecrawlVersion ?? '${FIRECRAWL_VERSION}'; + const tavilyApiKey = config?.tavilyApiKey ?? '${TAVILY_API_KEY}'; + const tavilySearchUrl = config?.tavilySearchUrl ?? '${TAVILY_SEARCH_URL}'; + const tavilyExtractUrl = config?.tavilyExtractUrl ?? '${TAVILY_EXTRACT_URL}'; const jinaApiKey = config?.jinaApiKey ?? '${JINA_API_KEY}'; const jinaApiUrl = config?.jinaApiUrl ?? '${JINA_API_URL}'; const cohereApiKey = config?.cohereApiKey ?? '${COHERE_API_KEY}'; @@ -76,13 +87,16 @@ export function loadWebSearchConfig( const rerankerType = config?.rerankerType; return { - ...config, + ...config, // Preserve provider-specific option blocks such as firecrawlOptions and tavilySearchOptions. safeSearch, jinaApiKey, jinaApiUrl, cohereApiKey, serperApiKey, searxngApiKey, + tavilyApiKey, + tavilySearchUrl, + tavilyExtractUrl, firecrawlApiKey, firecrawlApiUrl, firecrawlVersion, diff --git a/packages/data-schemas/src/types/web.ts b/packages/data-schemas/src/types/web.ts index a9cc1f0cc6..69018234a3 100644 --- a/packages/data-schemas/src/types/web.ts +++ b/packages/data-schemas/src/types/web.ts @@ -7,6 +7,9 @@ export type TWebSearchKeys = | 'firecrawlApiKey' | 'firecrawlApiUrl' | 'firecrawlVersion' + | 'tavilyApiKey' + | 'tavilySearchUrl' + | 'tavilyExtractUrl' | 'jinaApiKey' | 'jinaApiUrl' | 'cohereApiKey';