mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-05-13 07:46:47 +00:00
🔍 feat: add Tavily as Search and Scraper Provider (#12581)
* feat: add Tavily integration as search provider and scraper provider * chore:update tavily web search parameters * chore:tavily paramer update * chore:update data-schemas test for tavily * fix: allow Tavily string option modes * fix: align Tavily config options * fix: scope Tavily scraper timeout * fix: use resolved scraper provider timeout * fix: widen Tavily search provider types * fix: harden Tavily web search config * fix: cap Tavily option timeouts --------- Co-authored-by: Danny Avila <danny@librechat.ai>
This commit is contained in:
parent
d6d70eeb26
commit
3da1d8c961
13 changed files with 495 additions and 5 deletions
|
|
@ -823,6 +823,9 @@ OPENWEATHER_API_KEY=
|
|||
# Search Provider (Required)
|
||||
# SERPER_API_KEY=your_serper_api_key
|
||||
|
||||
# Tavily (Search Provider and/or Scraper)
|
||||
# TAVILY_API_KEY=your_tavily_api_key
|
||||
|
||||
# Scraper (Required)
|
||||
# FIRECRAWL_API_KEY=your_firecrawl_api_key
|
||||
# Optional: Custom Firecrawl API URL
|
||||
|
|
|
|||
|
|
@ -78,6 +78,20 @@ export default function ApiKeyDialog({
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
key: SearchProviders.TAVILY,
|
||||
label: localize('com_ui_web_search_provider_tavily'),
|
||||
inputs: {
|
||||
tavilyApiKey: {
|
||||
placeholder: localize('com_ui_enter_api_key'),
|
||||
type: 'password' as const,
|
||||
link: {
|
||||
url: 'https://app.tavily.com/home',
|
||||
text: localize('com_ui_web_search_provider_tavily_key'),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const rerankerOptions: DropdownOption[] = [
|
||||
|
|
@ -152,6 +166,20 @@ export default function ApiKeyDialog({
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
key: ScraperProviders.TAVILY,
|
||||
label: localize('com_ui_web_search_scraper_tavily'),
|
||||
inputs: {
|
||||
tavilyApiKey: {
|
||||
placeholder: localize('com_ui_enter_api_key'),
|
||||
type: 'password' as const,
|
||||
link: {
|
||||
url: 'https://app.tavily.com/home',
|
||||
text: localize('com_ui_web_search_scraper_tavily_key'),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const [dropdownOpen, setDropdownOpen] = useState({
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ export type SearchApiKeyFormData = {
|
|||
searxngApiKey: string;
|
||||
firecrawlApiKey: string;
|
||||
firecrawlApiUrl: string;
|
||||
tavilyApiKey: string;
|
||||
jinaApiKey: string;
|
||||
jinaApiUrl: string;
|
||||
cohereApiKey: string;
|
||||
|
|
@ -54,6 +55,7 @@ const useAuthSearchTool = (options?: { isEntityTool: boolean }) => {
|
|||
searxngApiKey: data.searxngApiKey,
|
||||
firecrawlApiKey: data.firecrawlApiKey,
|
||||
firecrawlApiUrl: data.firecrawlApiUrl,
|
||||
tavilyApiKey: data.tavilyApiKey,
|
||||
jinaApiKey: data.jinaApiKey,
|
||||
jinaApiUrl: data.jinaApiUrl,
|
||||
cohereApiKey: data.cohereApiKey,
|
||||
|
|
|
|||
|
|
@ -1678,6 +1678,8 @@
|
|||
"com_ui_web_search_provider_searxng": "SearXNG",
|
||||
"com_ui_web_search_provider_serper": "Serper API",
|
||||
"com_ui_web_search_provider_serper_key": "Get your Serper API key",
|
||||
"com_ui_web_search_provider_tavily": "Tavily API",
|
||||
"com_ui_web_search_provider_tavily_key": "Get your Tavily API key",
|
||||
"com_ui_web_search_reading": "Reading results",
|
||||
"com_ui_web_search_reranker": "Reranker",
|
||||
"com_ui_web_search_reranker_cohere": "Cohere",
|
||||
|
|
@ -1690,6 +1692,8 @@
|
|||
"com_ui_web_search_scraper_firecrawl_key": "Get your Firecrawl API key",
|
||||
"com_ui_web_search_scraper_serper": "Serper Scrape API",
|
||||
"com_ui_web_search_scraper_serper_key": "Get your Serper API key",
|
||||
"com_ui_web_search_scraper_tavily": "Tavily Extract API",
|
||||
"com_ui_web_search_scraper_tavily_key": "Get your Tavily API key",
|
||||
"com_ui_web_search_searxng_api_key": "Enter SearXNG API Key (optional)",
|
||||
"com_ui_web_search_searxng_instance_url": "SearXNG Instance URL",
|
||||
"com_ui_web_search_source": "{{count}} source",
|
||||
|
|
|
|||
|
|
@ -628,9 +628,43 @@ endpoints:
|
|||
# serperApiKey: '${SERPER_API_KEY}'
|
||||
# searxngInstanceUrl: '${SEARXNG_INSTANCE_URL}'
|
||||
# searxngApiKey: '${SEARXNG_API_KEY}'
|
||||
# # Tavily (search provider and/or scraper)
|
||||
# tavilyApiKey: '${TAVILY_API_KEY}'
|
||||
# # Content scrapers
|
||||
# firecrawlApiKey: '${FIRECRAWL_API_KEY}'
|
||||
# firecrawlApiUrl: '${FIRECRAWL_API_URL}'
|
||||
#
|
||||
# Tavily as both search and scraper provider example:
|
||||
# webSearch:
|
||||
# searchProvider: tavily
|
||||
# scraperProvider: tavily
|
||||
# tavilyApiKey: '${TAVILY_API_KEY}'
|
||||
# # Optional: custom API URLs (defaults to https://api.tavily.com/search and https://api.tavily.com/extract)
|
||||
# # tavilySearchUrl: '${TAVILY_SEARCH_URL}'
|
||||
# # tavilyExtractUrl: '${TAVILY_EXTRACT_URL}'
|
||||
# tavilySearchOptions:
|
||||
# searchDepth: basic # 'basic', 'advanced', 'fast', or 'ultra-fast' (default: basic)
|
||||
# maxResults: 5 # 1-20 results per search (default: 5)
|
||||
# topic: general # 'general', 'news', or 'finance'
|
||||
# # includeAnswer: basic # Include answer summary: true, 'basic', or 'advanced'
|
||||
# # includeRawContent: markdown # Include raw content: true, 'markdown', or 'text'
|
||||
# # includeImages: true # Include images in results
|
||||
# # includeFavicon: true # Include favicon URL for each result
|
||||
# # chunksPerSource: 3 # Chunks per source, only with 'advanced' depth (1-3)
|
||||
# # safeSearch: false # Override Tavily safe search filtering
|
||||
# # includeDomains: # Restrict search to specific domains (max 300)
|
||||
# # - 'example.com'
|
||||
# # - 'docs.example.com'
|
||||
# # excludeDomains: # Exclude specific domains from results (max 150)
|
||||
# # - 'spam.com'
|
||||
# # timeRange: week # 'day', 'week', 'month', or 'year'
|
||||
# # timeout: 15000 # HTTP request timeout in milliseconds (max 120000)
|
||||
# tavilyScraperOptions:
|
||||
# extractDepth: basic # 'basic' (1 credit/5 URLs) or 'advanced' (2 credits/5 URLs, more thorough)
|
||||
# # includeImages: false # Include images extracted from URLs
|
||||
# # includeFavicon: false # Include favicon URL for each result
|
||||
# # format: markdown # 'markdown' (default) or 'text' (plain text, may increase latency)
|
||||
# # timeout: 15000 # HTTP request timeout in milliseconds (max 120000); Tavily Extract receives seconds clamped to 1-60
|
||||
|
||||
# Memory configuration for user memories
|
||||
# memory:
|
||||
|
|
|
|||
|
|
@ -730,6 +730,130 @@ describe('web.ts', () => {
|
|||
expect(providerCalls.length).toBe(1);
|
||||
});
|
||||
|
||||
it('should authenticate Tavily as a search provider and pass options through', async () => {
|
||||
const webSearchConfig: TCustomConfig['webSearch'] = {
|
||||
tavilyApiKey: '${TAVILY_API_KEY}',
|
||||
tavilySearchUrl: '${TAVILY_SEARCH_URL}',
|
||||
firecrawlApiKey: '${FIRECRAWL_API_KEY}',
|
||||
firecrawlApiUrl: '${FIRECRAWL_API_URL}',
|
||||
safeSearch: SafeSearchTypes.MODERATE,
|
||||
searchProvider: 'tavily' as SearchProviders,
|
||||
scraperProvider: 'firecrawl' as ScraperProviders,
|
||||
rerankerType: 'none' as RerankerTypes,
|
||||
tavilySearchOptions: {
|
||||
searchDepth: 'advanced',
|
||||
maxResults: 5,
|
||||
includeRawContent: 'markdown',
|
||||
},
|
||||
};
|
||||
|
||||
mockLoadAuthValues.mockImplementation(({ authFields }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field: string) => {
|
||||
if (field === 'TAVILY_API_KEY') {
|
||||
result[field] = 'tavily-api-key';
|
||||
} else if (field === 'TAVILY_SEARCH_URL') {
|
||||
result[field] = 'https://api.tavily.com/search';
|
||||
} else if (field === 'FIRECRAWL_API_URL') {
|
||||
result[field] = 'https://api.firecrawl.dev';
|
||||
} else {
|
||||
result[field] = 'test-api-key';
|
||||
}
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
expect(result.authenticated).toBe(true);
|
||||
expect(result.authResult.searchProvider).toBe('tavily');
|
||||
expect(result.authResult.tavilyApiKey).toBe('tavily-api-key');
|
||||
expect(result.authResult.tavilySearchUrl).toBe('https://api.tavily.com/search');
|
||||
expect(result.authResult.tavilySearchOptions).toEqual(webSearchConfig.tavilySearchOptions);
|
||||
});
|
||||
|
||||
it('should fail authentication when Tavily search API key is missing', async () => {
|
||||
const webSearchConfig: TCustomConfig['webSearch'] = {
|
||||
tavilyApiKey: '${TAVILY_API_KEY}',
|
||||
firecrawlApiKey: '${FIRECRAWL_API_KEY}',
|
||||
firecrawlApiUrl: '${FIRECRAWL_API_URL}',
|
||||
safeSearch: SafeSearchTypes.MODERATE,
|
||||
searchProvider: 'tavily' as SearchProviders,
|
||||
scraperProvider: 'firecrawl' as ScraperProviders,
|
||||
rerankerType: 'none' as RerankerTypes,
|
||||
};
|
||||
|
||||
mockLoadAuthValues.mockImplementation(({ authFields }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field: string) => {
|
||||
if (field !== 'TAVILY_API_KEY') {
|
||||
result[field] =
|
||||
field === 'FIRECRAWL_API_URL' ? 'https://api.firecrawl.dev' : 'test-api-key';
|
||||
}
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
expect(result.authenticated).toBe(false);
|
||||
const providersAuthType = result.authTypes.find(
|
||||
([category]) => category === 'providers',
|
||||
)?.[1];
|
||||
expect(providersAuthType).toBe(AuthType.USER_PROVIDED);
|
||||
});
|
||||
|
||||
it('should authenticate Tavily as both search provider and scraper with a shared key', async () => {
|
||||
const webSearchConfig: TCustomConfig['webSearch'] = {
|
||||
tavilyApiKey: '${TAVILY_API_KEY}',
|
||||
tavilySearchUrl: '${TAVILY_SEARCH_URL}',
|
||||
tavilyExtractUrl: '${TAVILY_EXTRACT_URL}',
|
||||
safeSearch: SafeSearchTypes.MODERATE,
|
||||
searchProvider: 'tavily' as SearchProviders,
|
||||
scraperProvider: 'tavily' as ScraperProviders,
|
||||
rerankerType: 'none' as RerankerTypes,
|
||||
tavilyScraperOptions: {
|
||||
extractDepth: 'advanced',
|
||||
timeout: 20000,
|
||||
},
|
||||
};
|
||||
|
||||
mockLoadAuthValues.mockImplementation(({ authFields }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field: string) => {
|
||||
if (field === 'TAVILY_API_KEY') {
|
||||
result[field] = 'tavily-api-key';
|
||||
} else if (field === 'TAVILY_SEARCH_URL') {
|
||||
result[field] = 'https://api.tavily.com/search';
|
||||
} else if (field === 'TAVILY_EXTRACT_URL') {
|
||||
result[field] = 'https://api.tavily.com/extract';
|
||||
}
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
expect(result.authenticated).toBe(true);
|
||||
expect(result.authResult.searchProvider).toBe('tavily');
|
||||
expect(result.authResult.scraperProvider).toBe('tavily');
|
||||
expect(result.authResult.tavilyApiKey).toBe('tavily-api-key');
|
||||
expect(result.authResult.tavilyScraperOptions).toEqual(webSearchConfig.tavilyScraperOptions);
|
||||
expect(result.authResult.scraperTimeout).toBe(20000);
|
||||
});
|
||||
|
||||
it('should only check the specified scraperProvider', async () => {
|
||||
// Initialize a webSearchConfig with a specific scraperProvider
|
||||
const webSearchConfig: TCustomConfig['webSearch'] = {
|
||||
|
|
@ -1165,6 +1289,76 @@ describe('web.ts', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('should not use tavilyScraperOptions.timeout for firecrawl scraper', async () => {
|
||||
const webSearchConfig = {
|
||||
serperApiKey: '${SERPER_API_KEY}',
|
||||
firecrawlApiKey: '${FIRECRAWL_API_KEY}',
|
||||
firecrawlApiUrl: '${FIRECRAWL_API_URL}',
|
||||
tavilyApiKey: '${TAVILY_API_KEY}',
|
||||
jinaApiKey: '${JINA_API_KEY}',
|
||||
jinaApiUrl: '${JINA_API_URL}',
|
||||
safeSearch: SafeSearchTypes.MODERATE,
|
||||
scraperProvider: 'firecrawl' as ScraperProviders,
|
||||
tavilyScraperOptions: {
|
||||
timeout: 22000,
|
||||
},
|
||||
} as TCustomConfig['webSearch'];
|
||||
|
||||
mockLoadAuthValues.mockImplementation(({ authFields }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field: string) => {
|
||||
result[field] =
|
||||
field === 'FIRECRAWL_API_URL' ? 'https://api.firecrawl.dev' : 'test-api-key';
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
expect(result.authenticated).toBe(true);
|
||||
expect(result.authResult.scraperTimeout).toBe(7500);
|
||||
});
|
||||
|
||||
it('should use tavilyScraperOptions.timeout for tavily scraper', async () => {
|
||||
const webSearchConfig = {
|
||||
serperApiKey: '${SERPER_API_KEY}',
|
||||
firecrawlApiKey: '${FIRECRAWL_API_KEY}',
|
||||
firecrawlApiUrl: '${FIRECRAWL_API_URL}',
|
||||
tavilyApiKey: '${TAVILY_API_KEY}',
|
||||
jinaApiKey: '${JINA_API_KEY}',
|
||||
jinaApiUrl: '${JINA_API_URL}',
|
||||
safeSearch: SafeSearchTypes.MODERATE,
|
||||
scraperProvider: 'tavily' as ScraperProviders,
|
||||
firecrawlOptions: {
|
||||
timeout: 12000,
|
||||
},
|
||||
tavilyScraperOptions: {
|
||||
timeout: 22000,
|
||||
},
|
||||
} as TCustomConfig['webSearch'];
|
||||
|
||||
mockLoadAuthValues.mockImplementation(({ authFields }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field: string) => {
|
||||
result[field] =
|
||||
field === 'FIRECRAWL_API_URL' ? 'https://api.firecrawl.dev' : 'test-api-key';
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
expect(result.authResult.scraperTimeout).toBe(22000);
|
||||
});
|
||||
|
||||
it('should handle firecrawlOptions.formats when only formats is provided', async () => {
|
||||
// Initialize a webSearchConfig with only firecrawlOptions.formats
|
||||
const webSearchConfig = {
|
||||
|
|
@ -1359,6 +1553,81 @@ describe('web.ts', () => {
|
|||
expect(scrapersAuth).toBe(AuthType.USER_PROVIDED);
|
||||
});
|
||||
|
||||
it('should block user-provided tavilySearchUrl targeting localhost', async () => {
|
||||
mockIsSSRFTarget.mockImplementation((hostname: string) => hostname === 'localhost');
|
||||
|
||||
const webSearchConfig: TCustomConfig['webSearch'] = {
|
||||
tavilyApiKey: '${TAVILY_API_KEY}',
|
||||
tavilySearchUrl: '${TAVILY_SEARCH_URL}',
|
||||
firecrawlApiKey: '${FIRECRAWL_API_KEY}',
|
||||
safeSearch: SafeSearchTypes.MODERATE,
|
||||
searchProvider: 'tavily' as SearchProviders,
|
||||
rerankerType: 'none' as RerankerTypes,
|
||||
};
|
||||
|
||||
mockLoadAuthValues.mockImplementation(({ authFields }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field: string) => {
|
||||
if (field === 'TAVILY_SEARCH_URL') {
|
||||
result[field] = 'http://localhost:8080/search';
|
||||
} else {
|
||||
result[field] = 'test-api-key';
|
||||
}
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
expect(result.authResult.tavilySearchUrl).toBeUndefined();
|
||||
expect(result.authResult.searchProvider).toBe('tavily');
|
||||
expect(result.authenticated).toBe(true);
|
||||
expect(mockIsSSRFTarget).toHaveBeenCalledWith('localhost');
|
||||
});
|
||||
|
||||
it('should block user-provided tavilyExtractUrl resolving to private IP', async () => {
|
||||
mockResolveHostnameSSRF.mockImplementation((hostname: string) =>
|
||||
Promise.resolve(hostname === 'extract.internal-service.com'),
|
||||
);
|
||||
|
||||
const webSearchConfig: TCustomConfig['webSearch'] = {
|
||||
serperApiKey: '${SERPER_API_KEY}',
|
||||
tavilyApiKey: '${TAVILY_API_KEY}',
|
||||
tavilyExtractUrl: '${TAVILY_EXTRACT_URL}',
|
||||
safeSearch: SafeSearchTypes.MODERATE,
|
||||
scraperProvider: 'tavily' as ScraperProviders,
|
||||
rerankerType: 'none' as RerankerTypes,
|
||||
};
|
||||
|
||||
mockLoadAuthValues.mockImplementation(({ authFields }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field: string) => {
|
||||
if (field === 'TAVILY_EXTRACT_URL') {
|
||||
result[field] = 'https://extract.internal-service.com/extract';
|
||||
} else {
|
||||
result[field] = 'test-api-key';
|
||||
}
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
expect(result.authResult.tavilyExtractUrl).toBeUndefined();
|
||||
expect(result.authResult.scraperProvider).toBe('tavily');
|
||||
expect(result.authenticated).toBe(true);
|
||||
const scrapersAuth = result.authTypes.find(([c]) => c === 'scrapers')?.[1];
|
||||
expect(scrapersAuth).toBe(AuthType.USER_PROVIDED);
|
||||
});
|
||||
|
||||
it('should block user-provided searxngInstanceUrl targeting metadata endpoint', async () => {
|
||||
mockIsSSRFTarget.mockImplementation((hostname: string) => hostname === '169.254.169.254');
|
||||
|
||||
|
|
|
|||
|
|
@ -23,6 +23,8 @@ const WEB_SEARCH_URL_KEYS = new Set<TWebSearchKeys>([
|
|||
'searxngInstanceUrl',
|
||||
'firecrawlApiUrl',
|
||||
'jinaApiUrl',
|
||||
'tavilySearchUrl',
|
||||
'tavilyExtractUrl',
|
||||
]);
|
||||
|
||||
/**
|
||||
|
|
@ -245,10 +247,20 @@ export async function loadWebSearchAuth({
|
|||
authTypes.push([category, isUserProvided ? AuthType.USER_PROVIDED : AuthType.SYSTEM_DEFINED]);
|
||||
}
|
||||
|
||||
const scraperProvider =
|
||||
authResult.scraperProvider ?? webSearchConfig?.scraperProvider ?? 'firecrawl';
|
||||
let scraperOptionsTimeout: number | undefined;
|
||||
if (scraperProvider === 'tavily') {
|
||||
scraperOptionsTimeout = webSearchConfig?.tavilyScraperOptions?.timeout;
|
||||
} else if (scraperProvider === 'firecrawl') {
|
||||
scraperOptionsTimeout = webSearchConfig?.firecrawlOptions?.timeout;
|
||||
}
|
||||
|
||||
authResult.safeSearch = webSearchConfig?.safeSearch ?? SafeSearchTypes.MODERATE;
|
||||
authResult.scraperTimeout =
|
||||
webSearchConfig?.scraperTimeout ?? webSearchConfig?.firecrawlOptions?.timeout ?? 7500;
|
||||
authResult.scraperTimeout = webSearchConfig?.scraperTimeout ?? scraperOptionsTimeout ?? 7500;
|
||||
authResult.firecrawlOptions = webSearchConfig?.firecrawlOptions;
|
||||
authResult.tavilySearchOptions = webSearchConfig?.tavilySearchOptions;
|
||||
authResult.tavilyScraperOptions = webSearchConfig?.tavilyScraperOptions;
|
||||
|
||||
return {
|
||||
authTypes,
|
||||
|
|
|
|||
|
|
@ -1,7 +1,13 @@
|
|||
import type { TEndpointsConfig } from './types';
|
||||
import { EModelEndpoint, isDocumentSupportedProvider } from './schemas';
|
||||
import { getEndpointFileConfig, mergeFileConfig } from './file-config';
|
||||
import { allowedAddressesSchema, configSchema, resolveEndpointType, excludedKeys } from './config';
|
||||
import {
|
||||
allowedAddressesSchema,
|
||||
configSchema,
|
||||
excludedKeys,
|
||||
resolveEndpointType,
|
||||
webSearchSchema,
|
||||
} from './config';
|
||||
|
||||
const endpointsConfig: TEndpointsConfig = {
|
||||
[EModelEndpoint.openAI]: { userProvide: false, order: 0 },
|
||||
|
|
@ -454,3 +460,69 @@ describe('allowedAddressesSchema', () => {
|
|||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('webSearchSchema', () => {
|
||||
it('accepts Tavily string modes for answer and raw content options', () => {
|
||||
const result = webSearchSchema.parse({
|
||||
tavilySearchOptions: {
|
||||
includeAnswer: 'advanced',
|
||||
includeRawContent: 'markdown',
|
||||
safeSearch: false,
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.tavilySearchOptions?.includeAnswer).toBe('advanced');
|
||||
expect(result.tavilySearchOptions?.includeRawContent).toBe('markdown');
|
||||
expect(result.tavilySearchOptions?.safeSearch).toBe(false);
|
||||
});
|
||||
|
||||
it('accepts Tavily scraper options', () => {
|
||||
const result = webSearchSchema.parse({
|
||||
tavilyScraperOptions: {
|
||||
extractDepth: 'advanced',
|
||||
format: 'text',
|
||||
includeFavicon: true,
|
||||
timeout: 15000,
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.tavilyScraperOptions?.extractDepth).toBe('advanced');
|
||||
expect(result.tavilyScraperOptions?.format).toBe('text');
|
||||
expect(result.tavilyScraperOptions?.includeFavicon).toBe(true);
|
||||
expect(result.tavilyScraperOptions?.timeout).toBe(15000);
|
||||
});
|
||||
|
||||
it('rejects invalid Tavily search options', () => {
|
||||
expect(() =>
|
||||
webSearchSchema.parse({
|
||||
tavilySearchOptions: {
|
||||
searchDepth: 'invalid',
|
||||
},
|
||||
}),
|
||||
).toThrow();
|
||||
|
||||
expect(() =>
|
||||
webSearchSchema.parse({
|
||||
tavilySearchOptions: {
|
||||
maxResults: 0,
|
||||
},
|
||||
}),
|
||||
).toThrow();
|
||||
|
||||
expect(() =>
|
||||
webSearchSchema.parse({
|
||||
tavilySearchOptions: {
|
||||
timeout: 120001,
|
||||
},
|
||||
}),
|
||||
).toThrow();
|
||||
|
||||
expect(() =>
|
||||
webSearchSchema.parse({
|
||||
tavilyScraperOptions: {
|
||||
timeout: 120001,
|
||||
},
|
||||
}),
|
||||
).toThrow();
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1026,11 +1026,13 @@ export enum SearchCategories {
|
|||
export enum SearchProviders {
|
||||
SERPER = 'serper',
|
||||
SEARXNG = 'searxng',
|
||||
TAVILY = 'tavily',
|
||||
}
|
||||
|
||||
export enum ScraperProviders {
|
||||
FIRECRAWL = 'firecrawl',
|
||||
SERPER = 'serper',
|
||||
TAVILY = 'tavily',
|
||||
}
|
||||
|
||||
export enum RerankerTypes {
|
||||
|
|
@ -1052,6 +1054,9 @@ export const webSearchSchema = z.object({
|
|||
firecrawlApiKey: z.string().optional().default('${FIRECRAWL_API_KEY}'),
|
||||
firecrawlApiUrl: z.string().optional().default('${FIRECRAWL_API_URL}'),
|
||||
firecrawlVersion: z.string().optional().default('${FIRECRAWL_VERSION}'),
|
||||
tavilyApiKey: z.string().optional().default('${TAVILY_API_KEY}'),
|
||||
tavilySearchUrl: z.string().optional().default('${TAVILY_SEARCH_URL}'),
|
||||
tavilyExtractUrl: z.string().optional().default('${TAVILY_EXTRACT_URL}'),
|
||||
jinaApiKey: z.string().optional().default('${JINA_API_KEY}'),
|
||||
jinaApiUrl: z.string().optional().default('${JINA_API_URL}'),
|
||||
cohereApiKey: z.string().optional().default('${COHERE_API_KEY}'),
|
||||
|
|
@ -1093,6 +1098,33 @@ export const webSearchSchema = z.object({
|
|||
.optional(),
|
||||
})
|
||||
.optional(),
|
||||
tavilySearchOptions: z
|
||||
.object({
|
||||
searchDepth: z.enum(['basic', 'advanced', 'fast', 'ultra-fast']).optional(),
|
||||
maxResults: z.number().int().min(1).max(20).optional(),
|
||||
includeImages: z.boolean().optional(),
|
||||
includeAnswer: z.union([z.boolean(), z.enum(['basic', 'advanced'])]).optional(),
|
||||
includeRawContent: z.union([z.boolean(), z.enum(['markdown', 'text'])]).optional(),
|
||||
includeDomains: z.array(z.string()).optional(),
|
||||
excludeDomains: z.array(z.string()).optional(),
|
||||
topic: z.enum(['general', 'news', 'finance']).optional(),
|
||||
timeRange: z.enum(['day', 'week', 'month', 'year', 'd', 'w', 'm', 'y']).optional(),
|
||||
includeImageDescriptions: z.boolean().optional(),
|
||||
includeFavicon: z.boolean().optional(),
|
||||
chunksPerSource: z.number().int().min(1).max(3).optional(),
|
||||
safeSearch: z.boolean().optional(),
|
||||
timeout: z.number().int().nonnegative().max(120000).optional(),
|
||||
})
|
||||
.optional(),
|
||||
tavilyScraperOptions: z
|
||||
.object({
|
||||
extractDepth: z.enum(['basic', 'advanced']).optional(),
|
||||
includeImages: z.boolean().optional(),
|
||||
includeFavicon: z.boolean().optional(),
|
||||
format: z.enum(['markdown', 'text']).optional(),
|
||||
timeout: z.number().int().nonnegative().max(120000).optional(),
|
||||
})
|
||||
.optional(),
|
||||
});
|
||||
|
||||
export type TWebSearchConfig = DeepPartial<z.infer<typeof webSearchSchema>>;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
import type { Logger as WinstonLogger } from 'winston';
|
||||
import type { z } from 'zod';
|
||||
import type { webSearchSchema } from '../config';
|
||||
|
||||
export type SearchRefType = 'search' | 'image' | 'news' | 'video' | 'ref';
|
||||
|
||||
|
|
@ -10,7 +12,8 @@ export enum DATE_RANGE {
|
|||
PAST_YEAR = 'y',
|
||||
}
|
||||
|
||||
export type SearchProvider = 'serper' | 'searxng';
|
||||
export type SearchProvider = 'serper' | 'searxng' | 'tavily';
|
||||
export type ScraperProvider = 'firecrawl' | 'serper' | 'tavily';
|
||||
export type RerankerType = 'infinity' | 'jina' | 'cohere' | 'none';
|
||||
|
||||
export interface Highlight {
|
||||
|
|
@ -73,6 +76,9 @@ export interface SearchConfig {
|
|||
serperApiKey?: string;
|
||||
searxngInstanceUrl?: string;
|
||||
searxngApiKey?: string;
|
||||
tavilyApiKey?: string;
|
||||
tavilySearchUrl?: string;
|
||||
tavilySearchOptions?: TavilyConfig['tavilySearchOptions'];
|
||||
}
|
||||
|
||||
export type References = {
|
||||
|
|
@ -129,6 +135,14 @@ export interface FirecrawlConfig {
|
|||
};
|
||||
}
|
||||
|
||||
export interface TavilyConfig {
|
||||
tavilyApiKey?: string;
|
||||
tavilySearchUrl?: string;
|
||||
tavilyExtractUrl?: string;
|
||||
tavilySearchOptions?: z.infer<typeof webSearchSchema>['tavilySearchOptions'];
|
||||
tavilyScraperOptions?: z.infer<typeof webSearchSchema>['tavilyScraperOptions'];
|
||||
}
|
||||
|
||||
export interface ScraperContentResult {
|
||||
content: string;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -57,6 +57,9 @@ describe('loadWebSearchConfig', () => {
|
|||
cohereApiKey: '${COHERE_API_KEY}',
|
||||
safeSearch: SafeSearchTypes.MODERATE,
|
||||
rerankerType: undefined,
|
||||
tavilyApiKey: '${TAVILY_API_KEY}',
|
||||
tavilySearchUrl: '${TAVILY_SEARCH_URL}',
|
||||
tavilyExtractUrl: '${TAVILY_EXTRACT_URL}',
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -12,6 +12,10 @@ export const webSearchAuth = {
|
|||
/** Optional (0) */
|
||||
searxngApiKey: 0 as const,
|
||||
},
|
||||
tavily: {
|
||||
tavilyApiKey: 1 as const,
|
||||
tavilySearchUrl: 0 as const,
|
||||
},
|
||||
},
|
||||
scrapers: {
|
||||
firecrawl: {
|
||||
|
|
@ -23,6 +27,10 @@ export const webSearchAuth = {
|
|||
serper: {
|
||||
serperApiKey: 1 as const,
|
||||
},
|
||||
tavily: {
|
||||
tavilyApiKey: 1 as const,
|
||||
tavilyExtractUrl: 0 as const,
|
||||
},
|
||||
},
|
||||
rerankers: {
|
||||
jina: {
|
||||
|
|
@ -69,6 +77,9 @@ export function loadWebSearchConfig(
|
|||
const firecrawlApiKey = config?.firecrawlApiKey ?? '${FIRECRAWL_API_KEY}';
|
||||
const firecrawlApiUrl = config?.firecrawlApiUrl ?? '${FIRECRAWL_API_URL}';
|
||||
const firecrawlVersion = config?.firecrawlVersion ?? '${FIRECRAWL_VERSION}';
|
||||
const tavilyApiKey = config?.tavilyApiKey ?? '${TAVILY_API_KEY}';
|
||||
const tavilySearchUrl = config?.tavilySearchUrl ?? '${TAVILY_SEARCH_URL}';
|
||||
const tavilyExtractUrl = config?.tavilyExtractUrl ?? '${TAVILY_EXTRACT_URL}';
|
||||
const jinaApiKey = config?.jinaApiKey ?? '${JINA_API_KEY}';
|
||||
const jinaApiUrl = config?.jinaApiUrl ?? '${JINA_API_URL}';
|
||||
const cohereApiKey = config?.cohereApiKey ?? '${COHERE_API_KEY}';
|
||||
|
|
@ -76,13 +87,16 @@ export function loadWebSearchConfig(
|
|||
const rerankerType = config?.rerankerType;
|
||||
|
||||
return {
|
||||
...config,
|
||||
...config, // Preserve provider-specific option blocks such as firecrawlOptions and tavilySearchOptions.
|
||||
safeSearch,
|
||||
jinaApiKey,
|
||||
jinaApiUrl,
|
||||
cohereApiKey,
|
||||
serperApiKey,
|
||||
searxngApiKey,
|
||||
tavilyApiKey,
|
||||
tavilySearchUrl,
|
||||
tavilyExtractUrl,
|
||||
firecrawlApiKey,
|
||||
firecrawlApiUrl,
|
||||
firecrawlVersion,
|
||||
|
|
|
|||
|
|
@ -7,6 +7,9 @@ export type TWebSearchKeys =
|
|||
| 'firecrawlApiKey'
|
||||
| 'firecrawlApiUrl'
|
||||
| 'firecrawlVersion'
|
||||
| 'tavilyApiKey'
|
||||
| 'tavilySearchUrl'
|
||||
| 'tavilyExtractUrl'
|
||||
| 'jinaApiKey'
|
||||
| 'jinaApiUrl'
|
||||
| 'cohereApiKey';
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue