LibreChat/api/server/services/Endpoints/agents/initialize.js
Danny Avila db7011d567
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run
GitNexus Index / index (push) Waiting to run
GitNexus Index / post-index (push) Blocked by required conditions
📊 feat: Real-Time Context Window & Token Usage Tracking (#13670)
* 📊 feat: Real-Time Context Window & Token Usage Tracking

* 🧪 fix: Align Pricing Spec Dep Signatures with TxDeps

* 🩹 fix: Resolve Codex Findings for Context Usage Tracking

* 📊 feat: Granular Tool Token Breakdown with Deferred Splits

* 🧪 test: Cover Session Cost in Mock E2E and Scope Usage Selectors

* 🧪 test: Live Host-Pipeline Usage Verification (Env-Gated)

* 🧪 test: Local Real-Provider Multi-Turn E2E Harness

* 🪙 fix: Keep Tagged Usage Buckets Out of the Live Context Estimate

* 🩹 fix: Scoped Token-Config Fallback and Sequential Visibility for Usage Events

* 🩹 fix: Address Usage Review Findings — Cost Timing, Scoped Caches, Finalized Output

- carry the post-snapshot output estimate into the context snapshot at
  finalize so the gauge keeps the last response after live resets
- accumulate per-rate billable units and price the session cost at
  render, so usage events arriving before the token-config load still
  count once it resolves
- pass user-scoped token-config cache keys through loadConfigModels
  fetches and drop the controller's unscoped fallback to prevent serving
  another user's resolved config
- tag emitted usage events with a per-run seq so resume dedupe never
  drops a distinct call with an identical payload
- admit the static tokenConfig override in the custom endpoint schema so
  it survives zod parsing into req.config

* 🩹 fix: Align Client Usage Accounting with Backend Cost Semantics

- classify cache tokens by provider (shared inputTokensIncludesCache from
  data-provider, consumed by both the backend billing path and the client)
  instead of a magnitude heuristic, so Anthropic/Bedrock turns where cache
  is smaller than uncached input no longer under-bill input
- mirror resolveCompletionTokens on the client so Vertex-style hidden
  thinking tokens are reflected in the Output row and session cost
- prefer endpoint pricing over adapter-provider pricing so a custom
  endpoint can price a known model name without built-in rates shadowing it
- carry static cacheRead/cacheWrite overrides through the tokenConfig
  schema and buildTokenConfigMap

* 🩹 fix: Honor Static Token Config in Billing; Tighten Usage Freshness

- initializeCustom now uses a static endpoint tokenConfig as the agent's
  endpointTokenConfig (billing + balance checks), not just the advertised
  UI config — previously the gauge showed admin rates while the agent
  billed against built-in tables
- invalidate the token-config query alongside models on user-key add/
  revoke so context windows and pricing refresh without a reload
- include maxContextTokens in ChatForm's stabilized conversation memo so
  the gauge reflects a changed context-window setting immediately
- feed the live output estimate from the legacy content path (direct and
  assistants streams), setting from cumulative part text rather than
  accumulating deltas

* 🩹 fix: Resume Usage Dedup, Agent Pricing, and Partial Override Billing

- fold usage events idempotently by (runId, seq) so resume backfill no
  longer resets the conversation totals — a mid-stream reconnect keeps the
  usage of prompts already completed earlier in the session
- tap replayed pending message/reasoning/content events so output streamed
  past the resume snapshot reaches the live estimate, not just the message
- resolve cost against the agent's backing endpoint (Agents conversations
  report endpoint `agents` / provider `openAI`, neither of which keys a
  custom endpoint's tokenConfig)
- getMultiplier/getCacheMultiplier fall back to the standard tables for
  models absent from a partial endpointTokenConfig, so a partial static
  override no longer bills non-listed models at defaultRate while the UI
  shows the correct pattern rate

* 🩹 fix: Repaired Output in Gauge, Cache-Rate Keys, Config Gate, Usage Cleanup

- live/completed gauge counts the repaired completion (normalized output),
  so under-reporting providers don't drop the response from used context
- translate static tokenConfig cacheWrite/cacheRead onto the write/read
  keys getCacheMultiplier reads, so cache tokens bill at the configured
  rate instead of the prompt-rate fallback
- clear the token index and usage atoms when leaving a conversation, so
  visited histories don't accumulate in memory for the tab's lifetime
- wait for startupConfig before mounting the gauge, so a deployment with
  contextUsage disabled never briefly mounts it or fires the token-config
  query on first load

* 🩹 fix: Move Token-Config Resolution to TS; Key Live Usage by Created Convo

- extract the token-config resolution (override gathering + cache lookup +
  buildTokenConfigMap) into resolveTokenConfigMap in packages/api, leaving
  the /api controller a thin request-scoped wrapper (CLAUDE.md TS rule)
- getConvoKey prefers the user message's real conversationId once the
  `created` event stamps it, so a new chat's first-response live gauge and
  totals land under the id TokenUsage subscribes to instead of NEW_CONVO

* 🩹 fix: Clear Stale Redis Job Usage; Live-Tap Legacy Streams; Share Fetched Config

- DEL the Redis job hash before re-creating it so a reused streamId can't
  inherit a prior run's contextUsage/tokenUsage and backfill stale usage
- tap the legacy {message,text} stream branch (non-agent OpenAI/Anthropic
  streams) into the live estimate, not just the content path
- copy a deduped fetch's token config to every sibling endpoint sharing the
  baseURL/key/headers, so /token-config resolves each by its own name

*  revert: Don't DEL Redis job hash in createJob (breaks cross-replica resume)

createJob is an idempotent join — a second replica calls it for the same
streamId to share an in-flight stream's state. DELeting the hash wiped the
prior replica's persisted created/usage state, so a joining replica missed
the created event (GenerationJobManager cross-replica integration test).
Reverts the F1 change from 2bfce0c34b; the stale-usage concern doesn't
arise in practice (streamId is unique per generation).

* 🩹 fix: Best-Effort Usage Emit; Tag Hidden Sequential-Agent Usage

- wrap the ModelEndHandler usage emit in try/catch so a failed telemetry
  delivery (closed SSE / Redis publish error) can't abort the handler
  before thought-signature capture, which would break resumed tool calls
- tag hidden sequential-agent usage as 'sequential' (non-primary) so the
  client folds it into session cost/totals but not the live context gauge,
  instead of letting an undefined usage_type inflate the visible gauge

* 🩹 fix: Refetch Stale Token Config on Mount; Normalize Vertex for Lookup

- useTokenConfigQuery refetches on mount when stale, so a user-key change
  that invalidates tokenConfig while the gauge is unmounted takes effect on
  return instead of serving the prior key's resolved config
- normalize a Vertex-backed agent's provider (vertexai) to the google
  token-config key, so Gemini context windows and rates resolve instead of
  showing unknown context / $0 cost

*  feat: Server-Side Per-Event Cost (Authoritative Pricing for the Gauge)

Move usage-cost pricing to the single source of truth. The backend prices
each model call with the same billing functions (premium tiers via
getMultiplier(inputTokenCount), cache rates) and emits the USD cost on
on_token_usage when interface.contextCost is enabled; the client sums
emitted costs instead of re-deriving from base token-config rates.

- computeUsageCostUSD reuses prepareTokenSpend/prepareStructuredTokenSpend
  so the emitted cost matches what is billed (incl. premium thresholds)
- getDefaultHandlers gains a usageCost pricing context; initialize.js wires
  db.getMultiplier/getCacheMultiplier gated on contextCost (agents path)
- client UsageTotals carries a summed costUSD; retire the client-side rate
  lookups (costFromUnits/calcUsageCost) that drifted from backend pricing
  and produced the provider-keying / cache-key / Vertex / premium findings
- keep normalizeUsageUnits for the displayed token counts; token-config is
  still used for the context-window meter

Fixes the premium-tier session-cost under-report (gpt-5.x / gemini-3.1
above their input thresholds).

* 🩹 fix: Branch-Accurate Usage Snapshot + Clearer Gauge Track Contrast

- re-anchor the context snapshot from the user message to the response
  message at finalize. Regenerating a response branches off a shared user
  message, so anchoring on it made the snapshot read as "active" on both
  branches — switching to the sibling branch showed the wrong (other
  branch's) context. The response message is branch-unique, so sibling
  branches now correctly fall back to their own per-branch totals.
- raise the gauge ring's track/fill contrast (muted track, prominent fill)
  so the used portion reads clearly as a fill-level indicator

* 🩹 fix: Tag Sequential Usage in Billing; Emit Subagent Cost; Reset Live on Resume Errors

- tag hidden sequential-agent usage `usage_type: 'sequential'` on the
  COLLECTED usage (not just the emit), and treat it as non-primary in
  recordCollectedUsage (billed, excluded from the reported output total) so
  hidden intermediate output stops inflating the parent's tokenCount/pruning
- emit on_token_usage from the subagent usage sink (tagged `subagent`, with
  authoritative cost when contextCost is on) so the gauge's session
  cost/totals include billed subagent usage; it stays out of the live meter
- call resetLive on the resumable 404 and max-retry terminal branches so the
  gauge doesn't keep counting stale in-flight tokens after the stream ends

* 🎨 fix: Contrast the Popup Context Bar; Revert Ring Restyle

- raise the popup breakdown's context progressbar contrast (muted
  surface-tertiary track, prominent text-primary fill) — that's the bar the
  contrast feedback was about
- revert the gauge ring restyle (kept its original border-heavy track /
  text-secondary fill); the ring wasn't the element in question

* 🩹 fix: Stop Snapshot Granularity Leaking Across Branches; Revert Tree Memo

- a null-anchor context snapshot was treated as active on every branch,
  leaking one generation's granular breakdown onto sibling branches. Require
  a non-null (response-message) anchor on the viewed branch instead, so
  siblings without a matching snapshot fall back to their own totals.
- revert the buildTree WeakMap memo in messages.ts. buildTree is pure (builds
  from shallow copies) so the memo was behaviorally identical, but it was the
  feature's only change to core branch-navigation selectors — removing it
  matches upstream and rules it out of branch-navigation debugging.

* 🪙 fix: Thread Endpoint Token Config to Agent Billing, Cost, and Context Limits

Custom-endpoint agents resolve an endpointTokenConfig during agent init but
it never reached the AgentClient, so spending, emitted cost, and runtime
max-token resolution all fell back to default rates for those agents.

- Surface options.endpointTokenConfig on the returned InitializedAgent.
- Pass it to the AgentClient (this.options.endpointTokenConfig) so the
  spending path bills at configured rates.
- Thread it through usageCost to computeUsageCostUSD so emitted per-event
  cost matches billing.
- getModelMaxTokens/getModelMaxOutputTokens fall back to the built-in map
  for models absent from a partial override (matches buildTokenConfigMap);
  consolidates the duplicated fallback in pricing.ts.

* 🪙 fix: Preserve Granular Breakdown Across Branch Switches

The granular context breakdown lives only in the live on_context_usage
snapshot — a single per-conversation slot, anchored to the latest response
and overwritten by each generation. Switching to a branch generated earlier
this session lost its tool/skill/system rows and fell back to coarse totals.

Retain each generation's finalized snapshot in a per-conversation map keyed
by its branch-unique response id (snapshotsByAnchorFamily). When the live
snapshot is off the viewed branch, walk the branch tail for its deepest
stored anchor and render that breakdown. Bounded by generation count and
cleared on conversation switch; the live/just-generated path is unchanged.

* 🪙 fix: Harden Resume Seeding and Subagent Usage Emission

- useResumableSSE: skip the trailing-output live seed when the resume
  carries a context snapshot; the snapshot's messageTokens already counts
  produced output, so seeding it again inflated usage until the next reset.
- AgentClient subagent emitter: await GenerationJobManager.emitChunk like
  every other caller (it persists before publishing), so a floating promise
  can't race job cleanup and a Redis/publish failure is caught by the
  emitter's try/catch instead of surfacing as an unhandled rejection.

* 🧪 test: Playwright Coverage for Context Breakdown Granularity

Add a test-only data-testid distinguishing the granular snapshot breakdown
(context-breakdown) from the coarse message-history estimate
(context-estimate), then assert granularity in the mock e2e harness:

- renders the granular breakdown from the live on_context_usage snapshot
  (guards that the snapshot event actually reaches the popover, not just the
  usage totals).
- preserves the granular breakdown after switching branches — regenerate to
  overwrite the single live snapshot, switch back, and confirm the rows
  survive via the per-anchor snapshot history map.

Branch regenerate/sibling selectors mirror the existing chat.spec branch test.
All three usage specs pass against the mock pipeline.

* 🪙 fix: Correct Resume Live-Seed, Fallback Re-index, and Subagent Emit Flush

Codex round on the prior commit:

- countTrailingOutputChars now counts only output at the very END of the
  aggregated content (0 when the model paused at a tool call), and the resume
  path always seeds it. The earlier skip-trailing-tool-parts behavior plus the
  skip-seed-when-snapshot gate together over- or under-counted in-flight
  output on resume; one rule fixes both — pre-invoke snapshot budget is never
  double-counted, and genuine in-flight output is no longer dropped.
- useTokenUsage re-indexes from the messages cache on tail change while
  submitting. The cache subscriber is muted during streaming, so without a
  context snapshot (non-agent streams) sumBranch missed the created tail and
  dropped history + prompt until finalize. Bounded — tailId only shifts on
  created/finalize/branch-switch.
- AgentClient tracks subagent usage emit promises and flushes them in
  chatCompletion's finally. The sink fires the emitter without awaiting, and
  resume reads the usage emitChunk persists (HSET), so cleanup must not race
  it or resumed clients miss billed subagent usage.
2026-06-13 19:38:28 -04:00

932 lines
33 KiB
JavaScript

const { logger } = require('@librechat/data-schemas');
const { createContentAggregator } = require('@librechat/agents');
const {
loadSkillStates,
initializeAgent,
primeInvokedSkills,
validateAgentModel,
extractManualSkills,
GenerationJobManager,
getCustomEndpointConfig,
discoverConnectedAgents,
resolveAgentScopedSkillIds,
resolveModelSpecSkillIds,
buildAgentContextAttachmentsByAgentId,
} = require('@librechat/api');
const {
ResourceType,
EModelEndpoint,
PermissionBits,
MAX_SUBAGENT_DEPTH,
isAgentsEndpoint,
getResponseSender,
AgentCapabilities,
MAX_SUBAGENT_GRAPH_NODES,
isEphemeralAgentId,
} = require('librechat-data-provider');
const {
createToolEndCallback,
getDefaultHandlers,
} = require('~/server/controllers/agents/callbacks');
const { loadAgentTools, loadToolsForExecution } = require('~/server/services/ToolService');
const { filterFilesByAgentAccess } = require('~/server/services/Files/permissions');
const {
getSkillToolDeps,
getSkillDbMethods,
canAuthorSkillFiles,
withDeploymentSkillIds,
buildAgentToolContext,
enrichLoadedToolsWithAgentContext,
} = require('./skillDeps');
const { getModelsConfig } = require('~/server/controllers/ModelController');
const { checkPermission, findAccessibleResources } = require('~/server/services/PermissionService');
const AgentClient = require('~/server/controllers/agents/client');
const { processAddedConvo } = require('./addedConvo');
const { logViolation } = require('~/cache');
const db = require('~/models');
/**
* Creates a tool loader function for the agent.
* @param {AbortSignal} signal - The abort signal
* @param {string | null} [streamId] - The stream ID for resumable mode
* @param {boolean} [definitionsOnly=false] - When true, returns only serializable
* tool definitions without creating full tool instances (for event-driven mode)
*/
function createToolLoader(signal, streamId = null, definitionsOnly = false) {
/**
* @param {object} params
* @param {ServerRequest} params.req
* @param {ServerResponse} params.res
* @param {string} params.agentId
* @param {string[]} params.tools
* @param {string} params.provider
* @param {string} params.model
* @param {AgentToolResources} params.tool_resources
* @returns {Promise<{
* tools?: StructuredTool[],
* toolContextMap: Record<string, unknown>,
* toolDefinitions?: import('@librechat/agents').LCTool[],
* userMCPAuthMap?: Record<string, Record<string, string>>,
* toolRegistry?: import('@librechat/agents').LCToolRegistry
* } | undefined>}
*/
return async function loadTools({
req,
res,
tools,
model,
agentId,
provider,
tool_options,
tool_resources,
}) {
const agent = { id: agentId, tools, provider, model, tool_options };
try {
return await loadAgentTools({
req,
res,
agent,
signal,
streamId,
tool_resources,
definitionsOnly,
});
} catch (error) {
logger.error('Error loading tools for agent ' + agentId, error);
}
};
}
/**
* Initializes the AgentClient for a given request/response cycle.
* @param {Object} params
* @param {Express.Request} params.req
* @param {Express.Response} params.res
* @param {AbortSignal} params.signal
* @param {Object} params.endpointOption
*/
const initializeClient = async ({ req, res, signal, endpointOption }) => {
if (!endpointOption) {
throw new Error('Endpoint option not provided');
}
const appConfig = req.config;
/** @type {string | null} */
const streamId = req._resumableStreamId || null;
/** @type {Array<UsageMetadata>} */
const collectedUsage = [];
/**
* Vertex Gemini 3 thought signatures captured from `chat_model_end` events,
* keyed by `tool_call_id`. Persisted on
* `responseMessage.metadata.thoughtSignatures` so subsequent conversation
* turns can restore each signature onto the right reconstructed AIMessage's
* `additional_kwargs.signatures` and avoid 400s when resuming after a tool
* round-trip without a final text reply. Always allocated; capture path
* is a no-op for providers that don't emit signatures (OpenAI, Anthropic,
* Bedrock, etc.).
* @type {Record<string, string>}
*/
const collectedThoughtSignatures = {};
/** @type {ArtifactPromises} */
const artifactPromises = [];
const { contentParts, aggregateContent } = createContentAggregator();
const toolEndCallback = createToolEndCallback({ req, res, artifactPromises, streamId });
/** Query accessible skill IDs once per run (shared across all agents).
* Skills activate under strict opt-in semantics — see
* `resolveAgentScopedSkillIds` for the per-agent activation predicate:
* - Ephemeral agent → model-spec `skills` config first, otherwise the
* per-conversation skills badge toggle (full catalog).
* - Persisted agent → `agent.skills_enabled === true`. Optional
* `agent.skills` allowlist narrows the catalog; empty/undefined
* allowlist with the toggle on = full accessible catalog. */
const enabledCapabilities = new Set(appConfig?.endpoints?.[EModelEndpoint.agents]?.capabilities);
const skillsCapabilityEnabled = enabledCapabilities.has(AgentCapabilities.skills);
const codeEnvAvailable = enabledCapabilities.has(AgentCapabilities.execute_code);
const ephemeralSkillsToggle = req.body?.ephemeralAgent?.skills === true;
const skillDbMethods = getSkillDbMethods();
const accessibleSkillIds = skillsCapabilityEnabled
? withDeploymentSkillIds(
await findAccessibleResources({
userId: req.user.id,
role: req.user.role,
resourceType: ResourceType.SKILL,
requiredPermissions: PermissionBits.VIEW,
}),
)
: [];
const editableSkillIds = skillsCapabilityEnabled
? await findAccessibleResources({
userId: req.user.id,
role: req.user.role,
resourceType: ResourceType.SKILL,
requiredPermissions: PermissionBits.EDIT,
})
: [];
const skillCreateAllowed = skillsCapabilityEnabled
? await getSkillToolDeps().canCreateSkill({ req })
: false;
const { skillStates, defaultActiveOnShare } = await loadSkillStates({
userId: req.user.id,
appConfig,
getUserById: db.getUserById,
accessibleSkillIds,
});
/**
* Agent context store - populated after initialization, accessed by callback via closure.
* Maps agentId -> { userMCPAuthMap, agent, tool_resources, toolRegistry, openAIApiKey }
* @type {Map<string, {
* userMCPAuthMap?: Record<string, Record<string, string>>,
* agent?: object,
* tool_resources?: object,
* toolRegistry?: import('@librechat/agents').LCToolRegistry,
* requestScopedConnections?: import('@librechat/api').RequestScopedMCPConnectionStore,
* openAIApiKey?: string
* }>}
*/
const agentToolContexts = new Map();
const toolExecuteOptions = {
loadTools: async (toolNames, agentId) => {
const ctx = agentToolContexts.get(agentId) ?? {};
logger.debug(`[ON_TOOL_EXECUTE] ctx found: ${!!ctx.userMCPAuthMap}, agent: ${ctx.agent?.id}`);
logger.debug(`[ON_TOOL_EXECUTE] toolRegistry size: ${ctx.toolRegistry?.size ?? 'undefined'}`);
const result = await loadToolsForExecution({
req,
res,
signal,
streamId,
toolNames,
agent: ctx.agent,
toolRegistry: ctx.toolRegistry,
mcpAvailableTools: ctx.mcpAvailableTools,
requestScopedConnections: ctx.requestScopedConnections,
userMCPAuthMap: ctx.userMCPAuthMap,
tool_resources: ctx.tool_resources,
actionsEnabled: ctx.actionsEnabled,
});
logger.debug(`[ON_TOOL_EXECUTE] loaded ${result.loadedTools?.length ?? 0} tools`);
/** Per-agent narrowed flag (admin capability AND agent.tools
* includes execute_code), captured in `agentToolContexts` when
* the agent initialized. Falls back to `false` on any stray
* ctx miss so a skills-only agent never gains sandbox access
* even if capability lookup somehow skips. */
return enrichLoadedToolsWithAgentContext({
result,
req,
ctx,
});
},
toolEndCallback,
...getSkillToolDeps(),
};
const summarizationOptions =
appConfig?.summarization?.enabled === false ? { enabled: false } : { enabled: true };
/**
* Per-request map of per-subagent `createContentAggregator` instances
* keyed by the parent's `tool_call_id`. The handler in `callbacks.js`
* lazily creates an aggregator for each distinct `parentToolCallId`
* and folds every `ON_SUBAGENT_UPDATE` event into it as they stream
* in. `AgentClient` pulls each aggregator's `contentParts` at message
* save time and attaches them to the matching `subagent` tool_call so
* the child's reasoning / tool calls / final text survive a page
* refresh — the client-side Recoil atom is best-effort live-only.
*/
const subagentAggregatorsByToolCallId = new Map();
/** Backend prices each model call authoritatively (premium tiers, cache
* rates) and emits the cost on on_token_usage when contextCost is on, so
* the gauge sums real costs instead of re-deriving from base rates.
* `endpointTokenConfig` is filled in once `primaryConfig` resolves below so
* custom-endpoint agents price with their configured rates, not defaults. */
const usageCost = {
enabled: appConfig?.interfaceConfig?.contextCost === true,
pricing: { getMultiplier: db.getMultiplier, getCacheMultiplier: db.getCacheMultiplier },
};
const eventHandlers = getDefaultHandlers({
res,
toolExecuteOptions,
summarizationOptions,
aggregateContent,
toolEndCallback,
collectedUsage,
collectedThoughtSignatures,
streamId,
subagentAggregatorsByToolCallId,
usageCost,
});
if (!endpointOption.agent) {
throw new Error('No agent promise provided');
}
const primaryAgent = await endpointOption.agent;
delete endpointOption.agent;
if (!primaryAgent) {
throw new Error('Agent not found');
}
const modelsConfig = await getModelsConfig(req);
const validationResult = await validateAgentModel({
req,
res,
modelsConfig,
logViolation,
agent: primaryAgent,
});
if (!validationResult.isValid) {
throw new Error(validationResult.error?.message);
}
const agentConfigs = new Map();
const allowedProviders = new Set(appConfig?.endpoints?.[EModelEndpoint.agents]?.allowedProviders);
/** Event-driven mode: only load tool definitions, not full instances */
const loadTools = createToolLoader(signal, streamId, true);
/** @type {Array<MongoFile>} */
const requestFiles = req.body.files ?? [];
/** @type {string} */
const conversationId = req.body.conversationId;
/** @type {string | undefined} */
const parentMessageId = req.body.parentMessageId;
/**
* Skill names the user invoked via the `$` popover for this turn. Only flows
* to the primary agent — handoff agents are follow-up turns that don't see
* the user's per-submission `$` selections. `extractManualSkills` also
* drops non-string / empty elements so a crafted payload can't reach the
* `getSkillByName` DB query with nonsense values.
* @type {string[] | undefined}
*/
const manualSkills = extractManualSkills(req.body);
const selectedModelSpec =
endpointOption.spec && Array.isArray(appConfig?.modelSpecs?.list)
? appConfig.modelSpecs.list.find((modelSpec) => modelSpec.name === endpointOption.spec)
: null;
if (
primaryAgent &&
isEphemeralAgentId(primaryAgent.id) &&
selectedModelSpec &&
Object.hasOwn(selectedModelSpec, 'skills')
) {
if (selectedModelSpec.skills === true) {
primaryAgent.skills_enabled = true;
delete primaryAgent.skills;
} else if (selectedModelSpec.skills === false) {
primaryAgent.skills_enabled = false;
primaryAgent.skills = [];
} else if (Array.isArray(selectedModelSpec.skills)) {
const resolvedSkillIds = await resolveModelSpecSkillIds({
names: selectedModelSpec.skills,
accessibleSkillIds,
getSkillByName: db.getSkillByName,
});
primaryAgent.skills_enabled = true;
primaryAgent.skills = resolvedSkillIds.map((id) => id.toString());
}
}
const primaryScopedSkillIds = resolveAgentScopedSkillIds({
agent: primaryAgent,
accessibleSkillIds,
skillsCapabilityEnabled,
ephemeralSkillsToggle,
});
const primaryScopedEditableSkillIds = resolveAgentScopedSkillIds({
agent: primaryAgent,
accessibleSkillIds: editableSkillIds,
skillsCapabilityEnabled,
ephemeralSkillsToggle,
});
const primarySkillAuthoringAvailable = canAuthorSkillFiles({
agent: primaryAgent,
scopedEditableSkillIds: primaryScopedEditableSkillIds,
skillCreateAllowed,
skillsCapabilityEnabled,
ephemeralSkillsToggle,
});
const primaryConfig = await initializeAgent(
{
req,
res,
loadTools,
requestFiles,
conversationId,
parentMessageId,
agent: primaryAgent,
endpointOption,
allowedProviders,
isInitialAgent: true,
accessibleSkillIds: primaryScopedSkillIds,
skillAuthoringAvailable: primarySkillAuthoringAvailable,
codeEnvAvailable,
skillStates,
defaultActiveOnShare,
manualSkills,
},
{
getFiles: db.getFiles,
getUserKey: db.getUserKey,
getMessages: db.getMessages,
getConvoFiles: db.getConvoFiles,
updateFilesUsage: db.updateFilesUsage,
getUserKeyValues: db.getUserKeyValues,
getUserCodeFiles: db.getUserCodeFiles,
getToolFilesByIds: db.getToolFilesByIds,
getCodeGeneratedFiles: db.getCodeGeneratedFiles,
filterFilesByAgentAccess,
listSkillsByAccess: skillDbMethods.listSkillsByAccess,
listAlwaysApplySkills: skillDbMethods.listAlwaysApplySkills,
getSkillByName: skillDbMethods.getSkillByName,
},
);
/** Price emitted usage with the primary agent's resolved endpoint config so
* custom-endpoint agents reflect configured rates (mirrors the AgentClient
* spending path, which reads the same config). */
usageCost.endpointTokenConfig = primaryConfig.endpointTokenConfig;
logger.debug(
`[initializeClient] Storing tool context for ${primaryConfig.id}: ${primaryConfig.toolDefinitions?.length ?? 0} tools, registry size: ${primaryConfig.toolRegistry?.size ?? '0'}`,
);
agentToolContexts.set(
primaryConfig.id,
buildAgentToolContext({ agent: primaryAgent, config: primaryConfig }),
);
const {
agentConfigs: discoveredConfigs,
edges: discoveredEdges,
userMCPAuthMap: discoveredMCPAuthMap,
skippedAgentIds: discoveredSkippedIds,
} = await discoverConnectedAgents(
{
req,
res,
primaryConfig,
agent_ids: primaryConfig.agent_ids,
endpointOption,
allowedProviders,
modelsConfig,
loadTools,
requestFiles,
conversationId,
parentMessageId,
computeAccessibleSkillIds: (agent) =>
resolveAgentScopedSkillIds({
agent,
accessibleSkillIds,
skillsCapabilityEnabled,
ephemeralSkillsToggle,
}),
computeSkillAuthoringAvailable: (agent) =>
canAuthorSkillFiles({
agent,
scopedEditableSkillIds: resolveAgentScopedSkillIds({
agent,
accessibleSkillIds: editableSkillIds,
skillsCapabilityEnabled,
ephemeralSkillsToggle,
}),
skillCreateAllowed,
skillsCapabilityEnabled,
ephemeralSkillsToggle,
}),
skillStates,
defaultActiveOnShare,
codeEnvAvailable,
},
{
getAgent: db.getAgent,
checkPermission,
logViolation,
db: {
getFiles: db.getFiles,
getUserKey: db.getUserKey,
getMessages: db.getMessages,
getConvoFiles: db.getConvoFiles,
updateFilesUsage: db.updateFilesUsage,
getUserKeyValues: db.getUserKeyValues,
getUserCodeFiles: db.getUserCodeFiles,
getToolFilesByIds: db.getToolFilesByIds,
getCodeGeneratedFiles: db.getCodeGeneratedFiles,
filterFilesByAgentAccess,
listSkillsByAccess: skillDbMethods.listSkillsByAccess,
listAlwaysApplySkills: skillDbMethods.listAlwaysApplySkills,
getSkillByName: skillDbMethods.getSkillByName,
},
// The callback fires during BFS, before the helper prunes agents
// whose edges end up filtered. Don't populate `agentConfigs` here —
// `discoveredConfigs` (returned below) is the authoritative pruned
// set. The per-agent tool context map is OK to keep populated even
// for pruned ids: it's only read by closure in ON_TOOL_EXECUTE,
// stale entries are unreachable at runtime.
onAgentInitialized: (agentId, agent, config) => {
agentToolContexts.set(agentId, buildAgentToolContext({ agent, config }));
},
// Pass through the `@librechat/api` exports so that tests which
// `jest.mock('@librechat/api')` can override the initializer/validator.
initializeAgent,
validateAgentModel,
},
);
// Copy the pruned discovery result into the outer map. Anything the
// helper dropped (skipped or unreachable after edge filtering) is
// intentionally absent. `processAddedConvo` below may still add more
// entries for parallel multi-convo execution.
for (const [agentId, config] of discoveredConfigs) {
agentConfigs.set(agentId, config);
}
let userMCPAuthMap = discoveredMCPAuthMap;
let edges = discoveredEdges;
/** Multi-Convo: Process addedConvo for parallel agent execution */
const { userMCPAuthMap: updatedMCPAuthMap } = await processAddedConvo({
req,
res,
loadTools,
logViolation,
modelsConfig,
requestFiles,
agentConfigs,
primaryAgent,
endpointOption,
userMCPAuthMap,
conversationId,
parentMessageId,
allowedProviders,
primaryAgentId: primaryConfig.id,
accessibleSkillIds,
editableSkillIds,
skillsCapabilityEnabled,
ephemeralSkillsToggle,
skillCreateAllowed,
skillStates,
defaultActiveOnShare,
codeEnvAvailable,
});
if (updatedMCPAuthMap) {
userMCPAuthMap = updatedMCPAuthMap;
}
for (const [agentId, config] of agentConfigs) {
if (agentToolContexts.has(agentId)) {
continue;
}
agentToolContexts.set(agentId, buildAgentToolContext({ agent: config, config }));
}
// `discoverConnectedAgents` always returns a concrete array, so no
// further normalization is needed before handing this to `createRun`.
primaryConfig.edges = edges;
// Subagents: load any explicit subagent configs. Subagents run in isolated
// context windows and are invoked via a dedicated spawn tool (not handoff
// edges). An agent that is ONLY referenced as a subagent is dropped from
// `agentConfigs` so the LangGraph pipeline doesn't treat it as a
// parallel/handoff node, but it is KEPT in `agentToolContexts` — the child's
// `ON_TOOL_EXECUTE` dispatches resolve tool execution context (agent,
// tool_resources, skill ACLs, ...) from that map, so removing it would leave
// action tools skipped and resource-scoped tools running without their
// configured resources.
const subagentsCapabilityEnabled = enabledCapabilities.has(AgentCapabilities.subagents);
/** Track skipped ids locally so repeated failures short-circuit within
* the subagent loading loop. Seeded from the discovery helper's skip
* list so agents that already failed handoff loading don't get retried. */
const skippedAgentIds = new Set(discoveredSkippedIds ?? []);
/** All agent ids referenced on any edge (source OR target). Used by
* `loadSubagentsFor` to decide whether an agent that's only a subagent
* can be safely dropped from `agentConfigs` — LangGraph doesn't treat
* pure subagents as parallel/handoff nodes. */
const edgeAgentIds = new Set([primaryConfig.id]);
for (const edge of edges ?? []) {
const sources = Array.isArray(edge.from) ? edge.from : [edge.from];
const targets = Array.isArray(edge.to) ? edge.to : [edge.to];
for (const id of sources) {
if (typeof id === 'string') edgeAgentIds.add(id);
}
for (const id of targets) {
if (typeof id === 'string') edgeAgentIds.add(id);
}
}
/** Lazy per-id agent loader used for subagents that weren't reachable
* via the handoff edge graph (so `discoverConnectedAgents` didn't
* initialize them). Mirrors the helper's internal `processAgent`:
* DB lookup + VIEW check + `initializeAgent`, then inserts into
* `agentConfigs` and `agentToolContexts`. Returns `null` on any
* failure so the caller can skip gracefully. */
const loadAgentById = async (agentId) => {
if (skippedAgentIds.has(agentId)) return null;
const existing = agentConfigs.get(agentId);
if (existing) return existing;
try {
const agent = await db.getAgent({ id: agentId });
if (!agent) {
skippedAgentIds.add(agentId);
return null;
}
const userId = req.user?.id;
if (!userId) {
skippedAgentIds.add(agentId);
return null;
}
const hasAccess = await checkPermission({
userId,
role: req.user?.role,
resourceType: ResourceType.AGENT,
resourceId: agent._id,
requiredPermission: PermissionBits.VIEW,
});
if (!hasAccess) {
logger.warn(
`[processAgent] User ${userId} lacks VIEW access to subagent ${agentId}, skipping`,
);
skippedAgentIds.add(agentId);
return null;
}
const validation = await validateAgentModel({
req,
res,
agent,
modelsConfig,
logViolation,
});
if (!validation.isValid) {
logger.warn(
`[processAgent] Subagent ${agentId} failed model validation: ${validation.error?.message}`,
);
skippedAgentIds.add(agentId);
return null;
}
const scopedSkillIds = resolveAgentScopedSkillIds({
agent,
accessibleSkillIds,
skillsCapabilityEnabled,
ephemeralSkillsToggle,
});
const scopedEditableSkillIds = resolveAgentScopedSkillIds({
agent,
accessibleSkillIds: editableSkillIds,
skillsCapabilityEnabled,
ephemeralSkillsToggle,
});
const config = await initializeAgent(
{
req,
res,
agent,
loadTools,
requestFiles,
conversationId,
parentMessageId,
endpointOption: { ...endpointOption, endpoint: EModelEndpoint.agents },
allowedProviders,
accessibleSkillIds: scopedSkillIds,
skillAuthoringAvailable: canAuthorSkillFiles({
agent,
scopedEditableSkillIds,
skillCreateAllowed,
skillsCapabilityEnabled,
ephemeralSkillsToggle,
}),
/** Match the primary / handoff / addedConvo paths: forward the
* endpoint-level admin flag so `initializeAgent` can compute the
* per-agent narrowing (admin AND agent.tools includes
* execute_code) into `InitializedAgent.codeEnvAvailable`. Without
* this, a code-enabled subagent loaded only through
* `subagentAgentConfigs` initializes with `codeEnvAvailable:
* false`, so `bash_tool` / `read_file` sandbox fallback are
* silently gated off even though the seed walk found it. */
codeEnvAvailable,
skillStates,
defaultActiveOnShare,
},
{
getFiles: db.getFiles,
getUserKey: db.getUserKey,
getMessages: db.getMessages,
getConvoFiles: db.getConvoFiles,
updateFilesUsage: db.updateFilesUsage,
getUserKeyValues: db.getUserKeyValues,
getUserCodeFiles: db.getUserCodeFiles,
getToolFilesByIds: db.getToolFilesByIds,
getCodeGeneratedFiles: db.getCodeGeneratedFiles,
filterFilesByAgentAccess,
listSkillsByAccess: skillDbMethods.listSkillsByAccess,
listAlwaysApplySkills: skillDbMethods.listAlwaysApplySkills,
getSkillByName: skillDbMethods.getSkillByName,
},
);
agentConfigs.set(agentId, config);
agentToolContexts.set(agentId, buildAgentToolContext({ agent, config }));
return config;
} catch (err) {
logger.error(`[processAgent] Error processing subagent ${agentId}:`, err);
skippedAgentIds.add(agentId);
return null;
}
};
/** Collected during resolution; applied to `agentConfigs` only after
* every config has had its subagents resolved. Eager pruning would
* hide pure-subagent ids from the subsequent `loadSubagentsFor`
* loop, which would leave *their* `subagentAgentConfigs` empty and
* silently break nested delegation like A → B → C where B is only
* a subagent of A. */
const pureSubagentIds = new Set();
const subagentGraphIds = new Set();
const loadedSubagentConfigIds = new Set();
const assertSubagentGraphRoom = (agentId) => {
if (subagentGraphIds.has(agentId)) {
return;
}
if (subagentGraphIds.size >= MAX_SUBAGENT_GRAPH_NODES) {
logger.warn('[initializeClient] Subagent graph node limit exceeded', {
agentId,
primaryAgentId: primaryConfig.id,
loadedSubagentCount: subagentGraphIds.size,
maxSubagentGraphNodes: MAX_SUBAGENT_GRAPH_NODES,
});
throw new Error(
`Subagent graph exceeds the maximum of ${MAX_SUBAGENT_GRAPH_NODES} unique agents.`,
);
}
};
/**
* Loads `subagentAgentConfigs` for a single agent config. Shared
* between the primary agent and handoff-target agents (and pure
* subagents, transitively) so an agent used via handoff or
* nested-subagent that has its own explicit `subagents.agent_ids`
* gets them honored at runtime. Self-spawn works regardless (no DB
* lookup needed). Pruning decisions are deferred to `pureSubagentIds`.
*/
const loadSubagentsFor = async (config, depth = 0) => {
const sub = config.subagents;
if (!subagentsCapabilityEnabled || !sub?.enabled) {
config.subagentAgentConfigs = [];
return;
}
if (loadedSubagentConfigIds.has(config.id)) {
if ((config.subagentAgentConfigs?.length ?? 0) > 0 && depth >= MAX_SUBAGENT_DEPTH) {
logger.warn('[initializeClient] Subagent graph depth limit exceeded', {
agentId: config.id,
primaryAgentId: primaryConfig.id,
depth,
maxSubagentDepth: MAX_SUBAGENT_DEPTH,
childCount: config.subagentAgentConfigs.length,
});
throw new Error(
`Subagent graph exceeds the maximum depth of ${MAX_SUBAGENT_DEPTH} at agent ${config.id}.`,
);
}
return;
}
/** Dedupe and filter in one pass — a crafted payload could
* legitimately include the same ID twice; the backend shouldn't
* create duplicate SubagentConfig entries for the LLM to see as
* separate spawn targets. */
const explicitSubagentIds = Array.from(
new Set(
Array.isArray(sub.agent_ids)
? sub.agent_ids.filter((id) => typeof id === 'string' && id && id !== config.id)
: [],
),
);
if (explicitSubagentIds.length > 0 && depth >= MAX_SUBAGENT_DEPTH) {
logger.warn('[initializeClient] Subagent graph depth limit exceeded', {
agentId: config.id,
primaryAgentId: primaryConfig.id,
depth,
maxSubagentDepth: MAX_SUBAGENT_DEPTH,
childCount: explicitSubagentIds.length,
});
throw new Error(
`Subagent graph exceeds the maximum depth of ${MAX_SUBAGENT_DEPTH} at agent ${config.id}.`,
);
}
loadedSubagentConfigIds.add(config.id);
/** @type {Array<Object>} */
const resolved = [];
for (const subagentId of explicitSubagentIds) {
if (skippedAgentIds.has(subagentId)) continue;
/** Cycle guard: a configuration like A ↔ B (B lists A as its
* subagent) would otherwise trigger `loadAgentById` on the
* primary — inserting a second config for the same primary id,
* which downstream duplicates in the agent array. Reuse the
* existing primary config when a subagent ref points back at it. */
if (subagentId === primaryConfig.id) {
resolved.push(primaryConfig);
continue;
}
assertSubagentGraphRoom(subagentId);
const subagentConfig = await loadAgentById(subagentId);
if (!subagentConfig) continue;
subagentGraphIds.add(subagentConfig.id ?? subagentId);
resolved.push(subagentConfig);
if (!edgeAgentIds.has(subagentId)) {
pureSubagentIds.add(subagentId);
}
}
config.subagentAgentConfigs = resolved;
};
const maxResolvedDepthByConfigId = new Map();
/** BFS across subagent trees so nested chains like A → B → C get
* resolved before any pruning. Agent configs are loaded once, but
* overlapping roots can still be revisited at deeper path depths so
* the depth guard observes the deepest reachable subagent path. */
const resolveSubagentTrees = async (rootConfigs) => {
const pending = rootConfigs.map((cfg) => ({ cfg, depth: 0 }));
for (let index = 0; index < pending.length; index++) {
const { cfg, depth } = pending[index];
if (!cfg?.id) continue;
const previousDepth = maxResolvedDepthByConfigId.get(cfg.id);
if (previousDepth != null && previousDepth >= depth) continue;
maxResolvedDepthByConfigId.set(cfg.id, depth);
await loadSubagentsFor(cfg, depth);
for (const child of cfg.subagentAgentConfigs ?? []) {
const childDepth = depth + 1;
const previousChildDepth = child?.id ? maxResolvedDepthByConfigId.get(child.id) : undefined;
if (child?.id && (previousChildDepth == null || previousChildDepth < childDepth)) {
pending.push({ cfg: child, depth: childDepth });
}
}
}
};
await resolveSubagentTrees([primaryConfig, ...agentConfigs.values()]);
/** Drop pure-subagent entries now that every reachable config has
* had its subagents resolved. They stay in `agentToolContexts` so
* their tools still execute with the right scoping. */
for (const id of pureSubagentIds) {
agentConfigs.delete(id);
}
primaryConfig.subagents = subagentsCapabilityEnabled ? primaryConfig.subagents : undefined;
/** If the capability is off at the endpoint level, strip `subagents` on
* every loaded config — not just the primary. `run.ts` calls
* `buildSubagentConfigs` for every agent in the array, so a handoff
* agent with `subagents.enabled: true` persisted on its document would
* otherwise still expose self-spawn at runtime even though the admin
* has disabled the capability globally. */
if (!subagentsCapabilityEnabled) {
for (const config of agentConfigs.values()) {
config.subagents = undefined;
config.subagentAgentConfigs = undefined;
}
}
const agentContextAttachmentsByAgentId = buildAgentContextAttachmentsByAgentId([
primaryConfig,
...agentConfigs.values(),
]);
let endpointConfig = appConfig.endpoints?.[primaryConfig.endpoint];
if (!isAgentsEndpoint(primaryConfig.endpoint) && !endpointConfig) {
try {
endpointConfig = getCustomEndpointConfig({
endpoint: primaryConfig.endpoint,
appConfig,
});
} catch (err) {
logger.error(
'[api/server/controllers/agents/client.js #titleConvo] Error getting custom endpoint config',
err,
);
}
}
const sender =
primaryAgent.name ??
getResponseSender({
...endpointOption,
model: endpointOption.model_parameters.model,
modelDisplayLabel: endpointConfig?.modelDisplayLabel,
modelLabel: endpointOption.model_parameters.modelLabel,
});
/** History priming uses the user's full ACL-accessible skill set (not
* per-agent scoped) because prior turns may reference skills no longer
* in any active agent's scope; the ACL check is the security gate.
* `codeEnvAvailable` comes from `primaryConfig` — @see
* `InitializedAgent.codeEnvAvailable` for the per-agent narrowing. */
const handlePrimeInvokedSkills = skillsCapabilityEnabled
? (payload) =>
primeInvokedSkills({
req,
payload,
accessibleSkillIds,
codeEnvAvailable: primaryConfig.codeEnvAvailable === true,
...getSkillToolDeps(),
})
: undefined;
const client = new AgentClient({
req,
res,
sender,
contentParts,
agentConfigs,
eventHandlers,
collectedUsage,
collectedThoughtSignatures,
aggregateContent,
artifactPromises,
primeInvokedSkills: handlePrimeInvokedSkills,
agent: primaryConfig,
spec: endpointOption.spec,
iconURL: endpointOption.iconURL,
chatProjectId: endpointOption.chatProjectId,
attachments: primaryConfig.requestAttachments ?? primaryConfig.attachments,
agentContextAttachmentsByAgentId,
endpointType: endpointOption.endpointType,
resendFiles: primaryConfig.resendFiles ?? true,
maxContextTokens: primaryConfig.maxContextTokens,
endpoint: isEphemeralAgentId(primaryConfig.id) ? primaryConfig.endpoint : EModelEndpoint.agents,
subagentAggregatorsByToolCallId,
/** Resolved endpoint token/pricing config so spending and cost reflect
* configured rates for custom-endpoint agents instead of defaults. */
endpointTokenConfig: primaryConfig.endpointTokenConfig,
});
if (streamId) {
GenerationJobManager.setCollectedUsage(streamId, collectedUsage);
}
return { client, userMCPAuthMap };
};
module.exports = { initializeClient };