mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-06-09 17:31:19 +00:00
🧊 perf: Memoize Completed Markdown Blocks During Streaming (#13576)
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run
GitNexus Index / index (push) Waiting to run
GitNexus Index / post-index (push) Blocked by required conditions
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run
GitNexus Index / index (push) Waiting to run
GitNexus Index / post-index (push) Blocked by required conditions
Render assistant markdown as independently memoized top-level blocks instead of a single ReactMarkdown that re-parses and re-highlights the entire message on every streamed token. Once a block's source slice is stable it skips re-parse/re-render; only the final, still-growing block re-parses. - splitMarkdown: split a message into top-level blocks via mdast-util-from-markdown (+ gfm/directive/math extensions) using node source offsets; also report per-block executable-code and artifact index counts. - MarkdownBlocks: render each block memoized on its raw slice, each wrapped in its own CodeBlock/Artifact providers seeded with prefix-summed base indices, so the document-order indices used to match code-execution results stay stable under memoization (verified by OLD-vs-NEW parity tests across direct + streamed renders). - CodeBlockContext/ArtifactContext: add optional baseIndex (default 0, fully backward compatible) so per-block providers continue the running index. - markdownConfig: extract the shared remark/rehype plugins + components map. - deps: declare mdast-util-from-markdown, mdast-util-gfm/math/directive and the micromark gfm/math/directive extensions as direct client dependencies (previously resolved transitively via react-markdown). - Tests: splitter unit tests; index parity + DOM equivalence vs the whole-message renderer; rendering smoke tests. - Bench (MarkdownBlocks.bench.tsx, outside __tests__ so the default jest run skips it): ~88% fewer code-block renders and ~2.3x faster cumulative render across a simulated stream.
This commit is contained in:
parent
90ebecb254
commit
15ea03624d
12 changed files with 1107 additions and 91 deletions
|
|
@ -82,8 +82,15 @@
|
|||
"lodash": "^4.17.23",
|
||||
"lucide-react": "^0.394.0",
|
||||
"match-sorter": "^8.1.0",
|
||||
"mdast-util-directive": "^3.0.0",
|
||||
"mdast-util-from-markdown": "^2.0.1",
|
||||
"mdast-util-gfm": "^3.0.0",
|
||||
"mdast-util-math": "^3.0.0",
|
||||
"mermaid": "^11.15.0",
|
||||
"micromark-extension-directive": "^3.0.1",
|
||||
"micromark-extension-gfm": "^3.0.0",
|
||||
"micromark-extension-llm-math": "^3.1.0",
|
||||
"micromark-extension-math": "^3.1.0",
|
||||
"qrcode.react": "^4.2.0",
|
||||
"rc-input-number": "^7.4.2",
|
||||
"react": "^18.2.0",
|
||||
|
|
|
|||
|
|
@ -8,17 +8,31 @@ type TArtifactContext = {
|
|||
export const ArtifactContext = createContext<TArtifactContext>({} as TArtifactContext);
|
||||
export const useArtifactContext = () => useContext(ArtifactContext);
|
||||
|
||||
export function ArtifactProvider({ children }: { children: ReactNode }) {
|
||||
export function ArtifactProvider({
|
||||
children,
|
||||
baseIndex = 0,
|
||||
}: {
|
||||
children: ReactNode;
|
||||
/**
|
||||
* Offset added to every assigned index, so per-block memoized rendering can
|
||||
* seed each block's provider with the count of artifacts in earlier blocks
|
||||
* and keep document-order indices stable.
|
||||
*/
|
||||
baseIndex?: number;
|
||||
}) {
|
||||
const counterRef = useRef(0);
|
||||
|
||||
const getNextIndex = useCallback((skip: boolean) => {
|
||||
if (skip) {
|
||||
return counterRef.current;
|
||||
}
|
||||
const nextIndex = counterRef.current;
|
||||
counterRef.current += 1;
|
||||
return nextIndex;
|
||||
}, []);
|
||||
const getNextIndex = useCallback(
|
||||
(skip: boolean) => {
|
||||
if (skip) {
|
||||
return baseIndex + counterRef.current;
|
||||
}
|
||||
const nextIndex = counterRef.current;
|
||||
counterRef.current += 1;
|
||||
return baseIndex + nextIndex;
|
||||
},
|
||||
[baseIndex],
|
||||
);
|
||||
|
||||
const resetCounter = useCallback(() => {
|
||||
counterRef.current = 0;
|
||||
|
|
|
|||
|
|
@ -8,17 +8,33 @@ type TCodeBlockContext = {
|
|||
export const CodeBlockContext = createContext<TCodeBlockContext>({} as TCodeBlockContext);
|
||||
export const useCodeBlockContext = () => useContext(CodeBlockContext);
|
||||
|
||||
export function CodeBlockProvider({ children }: { children: ReactNode }) {
|
||||
export function CodeBlockProvider({
|
||||
children,
|
||||
baseIndex = 0,
|
||||
}: {
|
||||
children: ReactNode;
|
||||
/**
|
||||
* Offset added to every assigned index. When rendering a message as
|
||||
* independently memoized blocks, each block gets its own provider seeded with
|
||||
* the running count of executable code blocks in earlier blocks, so document-
|
||||
* order indices are preserved without a single shared (memoization-fragile)
|
||||
* counter.
|
||||
*/
|
||||
baseIndex?: number;
|
||||
}) {
|
||||
const counterRef = useRef(0);
|
||||
|
||||
const getNextIndex = useCallback((skip: boolean) => {
|
||||
if (skip) {
|
||||
return counterRef.current;
|
||||
}
|
||||
const nextIndex = counterRef.current;
|
||||
counterRef.current += 1;
|
||||
return nextIndex;
|
||||
}, []);
|
||||
const getNextIndex = useCallback(
|
||||
(skip: boolean) => {
|
||||
if (skip) {
|
||||
return baseIndex + counterRef.current;
|
||||
}
|
||||
const nextIndex = counterRef.current;
|
||||
counterRef.current += 1;
|
||||
return baseIndex + nextIndex;
|
||||
},
|
||||
[baseIndex],
|
||||
);
|
||||
|
||||
const resetCounter = useCallback(() => {
|
||||
counterRef.current = 0;
|
||||
|
|
|
|||
|
|
@ -1,25 +1,9 @@
|
|||
import React, { memo, useMemo } from 'react';
|
||||
import remarkGfm from 'remark-gfm';
|
||||
import remarkMath from 'remark-math';
|
||||
import supersub from 'remark-supersub';
|
||||
import rehypeKatex from 'rehype-katex';
|
||||
import { useRecoilValue } from 'recoil';
|
||||
import ReactMarkdown from 'react-markdown';
|
||||
import rehypeHighlight from 'rehype-highlight';
|
||||
import remarkDirective from 'remark-directive';
|
||||
import type { Pluggable } from 'unified';
|
||||
import { Citation, CompositeCitation, HighlightedText } from '~/components/Web/Citation';
|
||||
import {
|
||||
mcpUIResourcePlugin,
|
||||
MCPUIResource,
|
||||
MCPUIResourceCarousel,
|
||||
} from '~/components/MCPUIResource';
|
||||
import { Artifact, artifactPlugin } from '~/components/Artifacts/Artifact';
|
||||
import { ArtifactProvider, CodeBlockProvider } from '~/Providers';
|
||||
import { getRemarkPlugins, getRehypePlugins, getMarkdownComponents } from './markdownConfig';
|
||||
import MarkdownErrorBoundary from './MarkdownErrorBoundary';
|
||||
import { langSubset, preprocessLaTeX } from '~/utils';
|
||||
import { unicodeCitation } from '~/components/Web';
|
||||
import { code, a, p, img, table } from './MarkdownComponents';
|
||||
import MarkdownBlocks from './MarkdownBlocks';
|
||||
import { preprocessLaTeX } from '~/utils';
|
||||
import store from '~/store';
|
||||
|
||||
type TContentProps = {
|
||||
|
|
@ -38,31 +22,6 @@ const Markdown = memo(function Markdown({ content = '', isLatestMessage }: TCont
|
|||
return LaTeXParsing ? preprocessLaTeX(content) : content;
|
||||
}, [content, LaTeXParsing, isInitializing]);
|
||||
|
||||
const rehypePlugins = useMemo(
|
||||
() => [
|
||||
[rehypeKatex],
|
||||
[
|
||||
rehypeHighlight,
|
||||
{
|
||||
detect: true,
|
||||
ignoreMissing: true,
|
||||
subset: langSubset,
|
||||
},
|
||||
],
|
||||
],
|
||||
[],
|
||||
);
|
||||
|
||||
const remarkPlugins: Pluggable[] = [
|
||||
supersub,
|
||||
remarkGfm,
|
||||
remarkDirective,
|
||||
artifactPlugin,
|
||||
[remarkMath, { singleDollarTextMath: false }],
|
||||
unicodeCitation,
|
||||
mcpUIResourcePlugin,
|
||||
];
|
||||
|
||||
if (isInitializing) {
|
||||
return (
|
||||
<div className="absolute">
|
||||
|
|
@ -75,35 +34,12 @@ const Markdown = memo(function Markdown({ content = '', isLatestMessage }: TCont
|
|||
|
||||
return (
|
||||
<MarkdownErrorBoundary content={content} codeExecution={true}>
|
||||
<ArtifactProvider>
|
||||
<CodeBlockProvider>
|
||||
<ReactMarkdown
|
||||
/** @ts-ignore */
|
||||
remarkPlugins={remarkPlugins}
|
||||
/* @ts-ignore */
|
||||
rehypePlugins={rehypePlugins}
|
||||
components={
|
||||
{
|
||||
code,
|
||||
a,
|
||||
p,
|
||||
img,
|
||||
table,
|
||||
artifact: Artifact,
|
||||
citation: Citation,
|
||||
'highlighted-text': HighlightedText,
|
||||
'composite-citation': CompositeCitation,
|
||||
'mcp-ui-resource': MCPUIResource,
|
||||
'mcp-ui-carousel': MCPUIResourceCarousel,
|
||||
} as {
|
||||
[nodeType: string]: React.ElementType;
|
||||
}
|
||||
}
|
||||
>
|
||||
{currentContent}
|
||||
</ReactMarkdown>
|
||||
</CodeBlockProvider>
|
||||
</ArtifactProvider>
|
||||
<MarkdownBlocks
|
||||
content={currentContent}
|
||||
remarkPlugins={getRemarkPlugins()}
|
||||
rehypePlugins={getRehypePlugins()}
|
||||
components={getMarkdownComponents()}
|
||||
/>
|
||||
</MarkdownErrorBoundary>
|
||||
);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -0,0 +1,165 @@
|
|||
import React, { Profiler } from 'react';
|
||||
import { RecoilRoot } from 'recoil';
|
||||
import ReactMarkdown from 'react-markdown';
|
||||
import { render } from '@testing-library/react';
|
||||
import { getRemarkPlugins, getRehypePlugins, getMarkdownComponents } from './markdownConfig';
|
||||
import { ArtifactProvider, CodeBlockProvider } from '~/Providers';
|
||||
import CodeBlock from '~/components/Messages/Content/CodeBlock';
|
||||
import Markdown from './Markdown';
|
||||
|
||||
/**
|
||||
* Streaming render benchmark comparing the previous whole-message renderer
|
||||
* (one ReactMarkdown re-parsing everything per token) against the per-block
|
||||
* memoized renderer. This file lives outside `__tests__/` and is named
|
||||
* `.bench.tsx` so the default jest run skips it; execute it explicitly with:
|
||||
*
|
||||
* node node_modules/jest/bin/jest.js --runInBand --coverage=false \
|
||||
* --testMatch '**\/MarkdownBlocks.bench.tsx'
|
||||
*
|
||||
* Two metrics are reported:
|
||||
* - codeBlockRenders: deterministic structural metric — how many times code
|
||||
* blocks render across the whole stream (the memoization win, noise-free).
|
||||
* - totalMs: summed React Profiler actualDuration (wall-clock; jsdom absolute
|
||||
* numbers are not browser-accurate, but the OLD/NEW ratio is indicative).
|
||||
*/
|
||||
|
||||
jest.mock('~/components/Messages/Content/CodeBlock', () => ({
|
||||
__esModule: true,
|
||||
default: jest.fn(() => null),
|
||||
}));
|
||||
|
||||
const codeBlockMock = CodeBlock as unknown as jest.Mock;
|
||||
|
||||
const LANGS = ['python', 'javascript', 'typescript', 'bash', 'json', 'sql', 'go', 'rust'];
|
||||
|
||||
const buildMessage = (sections: number): string => {
|
||||
const parts: string[] = [];
|
||||
for (let i = 0; i < sections; i += 1) {
|
||||
parts.push(`## Section ${i + 1}`, '');
|
||||
parts.push(
|
||||
`This is paragraph ${i + 1} explaining the code below with some **bold** and ` +
|
||||
`\`inline\` text, intentionally a bit long to add realistic reflow cost during ` +
|
||||
`streaming, repeated across every section of the message.`,
|
||||
'',
|
||||
);
|
||||
const lang = LANGS[i % LANGS.length];
|
||||
parts.push('```' + lang);
|
||||
for (let l = 0; l < 8; l += 1) {
|
||||
parts.push(`const value_${i}_${l} = computeSomething(${l}, "arg_${l}"); // line ${l}`);
|
||||
}
|
||||
parts.push('```', '');
|
||||
if (i % 3 === 0) {
|
||||
parts.push('| Name | Type | Value |', '| --- | --- | --- |');
|
||||
for (let r = 0; r < 5; r += 1) {
|
||||
parts.push(`| item_${i}_${r} | number | ${r * i} |`);
|
||||
}
|
||||
parts.push('');
|
||||
}
|
||||
}
|
||||
return parts.join('\n');
|
||||
};
|
||||
|
||||
const makePrefixes = (content: string, steps: number): string[] => {
|
||||
const prefixes: string[] = [];
|
||||
for (let s = 1; s <= steps; s += 1) {
|
||||
prefixes.push(content.slice(0, Math.ceil((content.length * s) / steps)));
|
||||
}
|
||||
return prefixes;
|
||||
};
|
||||
|
||||
const OldMarkdown = ({ content }: { content: string }) => (
|
||||
<ArtifactProvider>
|
||||
<CodeBlockProvider>
|
||||
<ReactMarkdown
|
||||
/** @ts-ignore */
|
||||
remarkPlugins={getRemarkPlugins()}
|
||||
/** @ts-ignore */
|
||||
rehypePlugins={getRehypePlugins()}
|
||||
components={getMarkdownComponents()}
|
||||
>
|
||||
{content}
|
||||
</ReactMarkdown>
|
||||
</CodeBlockProvider>
|
||||
</ArtifactProvider>
|
||||
);
|
||||
|
||||
const NewMarkdown = ({ content }: { content: string }) => (
|
||||
<Markdown content={content} isLatestMessage={true} />
|
||||
);
|
||||
|
||||
const measure = (
|
||||
Component: React.ComponentType<{ content: string }>,
|
||||
prefixes: string[],
|
||||
): { totalMs: number; codeBlockRenders: number } => {
|
||||
codeBlockMock.mockClear();
|
||||
let totalMs = 0;
|
||||
const onRender = (_id: string, _phase: string, actualDuration: number) => {
|
||||
totalMs += actualDuration;
|
||||
};
|
||||
const tree = (content: string) => (
|
||||
<Profiler id="bench" onRender={onRender}>
|
||||
<RecoilRoot>
|
||||
<Component content={content} />
|
||||
</RecoilRoot>
|
||||
</Profiler>
|
||||
);
|
||||
const { rerender, unmount } = render(tree(prefixes[0]));
|
||||
for (let i = 1; i < prefixes.length; i += 1) {
|
||||
rerender(tree(prefixes[i]));
|
||||
}
|
||||
const result = { totalMs, codeBlockRenders: codeBlockMock.mock.calls.length };
|
||||
unmount();
|
||||
return result;
|
||||
};
|
||||
|
||||
describe('Markdown streaming benchmark (OLD whole-message vs NEW per-block)', () => {
|
||||
it('reports render cost across a simulated stream', () => {
|
||||
const content = buildMessage(12);
|
||||
const steps = 80;
|
||||
const prefixes = makePrefixes(content, steps);
|
||||
const iterations = 3;
|
||||
|
||||
// Warm up module/highlight caches so the first measured run isn't skewed.
|
||||
measure(OldMarkdown, prefixes);
|
||||
measure(NewMarkdown, prefixes);
|
||||
|
||||
const old: Array<{ totalMs: number; codeBlockRenders: number }> = [];
|
||||
const neu: Array<{ totalMs: number; codeBlockRenders: number }> = [];
|
||||
for (let i = 0; i < iterations; i += 1) {
|
||||
old.push(measure(OldMarkdown, prefixes));
|
||||
neu.push(measure(NewMarkdown, prefixes));
|
||||
}
|
||||
|
||||
const minMs = (rs: Array<{ totalMs: number }>) => Math.min(...rs.map((r) => r.totalMs));
|
||||
const oldMs = minMs(old);
|
||||
const newMs = minMs(neu);
|
||||
const oldRenders = old[0].codeBlockRenders;
|
||||
const newRenders = neu[0].codeBlockRenders;
|
||||
|
||||
console.log(
|
||||
[
|
||||
'',
|
||||
'================ Markdown streaming benchmark ================',
|
||||
`message size: ${content.length} chars, stream steps: ${steps}, iterations: ${iterations}`,
|
||||
'',
|
||||
`code-block renders over the stream (structural, noise-free):`,
|
||||
` OLD (whole-message): ${oldRenders}`,
|
||||
` NEW (per-block) : ${newRenders}`,
|
||||
` reduction : ${(100 * (1 - newRenders / oldRenders)).toFixed(1)}%`,
|
||||
'',
|
||||
`total render time (min of ${iterations}, summed Profiler actualDuration; jsdom):`,
|
||||
` OLD: ${oldMs.toFixed(1)} ms`,
|
||||
` NEW: ${newMs.toFixed(1)} ms`,
|
||||
` speedup: ${(oldMs / newMs).toFixed(2)}x`,
|
||||
'=============================================================',
|
||||
'',
|
||||
].join('\n'),
|
||||
);
|
||||
|
||||
// Sanity: the per-block renderer must not render code blocks MORE than the
|
||||
// whole-message renderer. The real win is asserted separately below.
|
||||
expect(newRenders).toBeLessThanOrEqual(oldRenders);
|
||||
// Memoization should cut total code-block renders by a wide margin.
|
||||
expect(newRenders).toBeLessThan(oldRenders * 0.5);
|
||||
});
|
||||
});
|
||||
110
client/src/components/Chat/Messages/Content/MarkdownBlocks.tsx
Normal file
110
client/src/components/Chat/Messages/Content/MarkdownBlocks.tsx
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
import React, { memo, useMemo } from 'react';
|
||||
import ReactMarkdown from 'react-markdown';
|
||||
import type { PluggableList } from 'unified';
|
||||
import type { ElementType } from 'react';
|
||||
import { ArtifactProvider, CodeBlockProvider } from '~/Providers';
|
||||
import { splitMarkdownIntoBlocks } from './splitMarkdown';
|
||||
|
||||
type SharedProps = {
|
||||
remarkPlugins: PluggableList;
|
||||
rehypePlugins: PluggableList;
|
||||
components: { [nodeType: string]: ElementType };
|
||||
};
|
||||
|
||||
type MarkdownBlockProps = SharedProps & {
|
||||
content: string;
|
||||
codeBaseIndex: number;
|
||||
artifactBaseIndex: number;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders one top-level markdown block inside its own code/artifact providers,
|
||||
* seeded with the running index of executable code blocks and artifacts in
|
||||
* earlier blocks. Memoized on `content` and the base indices: a completed block
|
||||
* whose source slice and bases are unchanged across streamed tokens skips both
|
||||
* re-parsing and re-rendering, so only the final, still-growing block re-parses.
|
||||
*/
|
||||
const MarkdownBlock = memo(
|
||||
function MarkdownBlock({
|
||||
content,
|
||||
codeBaseIndex,
|
||||
artifactBaseIndex,
|
||||
remarkPlugins,
|
||||
rehypePlugins,
|
||||
components,
|
||||
}: MarkdownBlockProps) {
|
||||
return (
|
||||
<ArtifactProvider baseIndex={artifactBaseIndex}>
|
||||
<CodeBlockProvider baseIndex={codeBaseIndex}>
|
||||
<ReactMarkdown
|
||||
/** @ts-ignore */
|
||||
remarkPlugins={remarkPlugins}
|
||||
/** @ts-ignore */
|
||||
rehypePlugins={rehypePlugins}
|
||||
components={components}
|
||||
>
|
||||
{content}
|
||||
</ReactMarkdown>
|
||||
</CodeBlockProvider>
|
||||
</ArtifactProvider>
|
||||
);
|
||||
},
|
||||
(prev, next) =>
|
||||
prev.content === next.content &&
|
||||
prev.codeBaseIndex === next.codeBaseIndex &&
|
||||
prev.artifactBaseIndex === next.artifactBaseIndex,
|
||||
);
|
||||
MarkdownBlock.displayName = 'MarkdownBlock';
|
||||
|
||||
type MarkdownBlocksProps = SharedProps & {
|
||||
content: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Splits a message into top-level blocks and renders each independently so
|
||||
* that, during streaming, only the last block re-parses while earlier blocks
|
||||
* (tables, code, etc.) stay memoized. Each block's executable code and artifact
|
||||
* indices are preserved in document order via per-block providers seeded with
|
||||
* prefix-summed base indices.
|
||||
*/
|
||||
const MarkdownBlocks = memo(function MarkdownBlocks({
|
||||
content,
|
||||
remarkPlugins,
|
||||
rehypePlugins,
|
||||
components,
|
||||
}: MarkdownBlocksProps) {
|
||||
const blocks = useMemo(() => {
|
||||
let codeBaseIndex = 0;
|
||||
let artifactBaseIndex = 0;
|
||||
return splitMarkdownIntoBlocks(content).map((block) => {
|
||||
const entry = { raw: block.raw, codeBaseIndex, artifactBaseIndex };
|
||||
codeBaseIndex += block.codeBlockCount;
|
||||
artifactBaseIndex += block.artifactCount;
|
||||
return entry;
|
||||
});
|
||||
}, [content]);
|
||||
|
||||
return (
|
||||
<>
|
||||
{blocks.map((block, index) => (
|
||||
// Key includes the base indices so that an in-place edit which inserts a
|
||||
// block before existing code/artifact blocks (shifting their base) forces
|
||||
// a remount, refreshing the index each code/artifact block captures in a
|
||||
// ref. During append-only streaming these stay constant, so completed
|
||||
// blocks keep a stable key and are not remounted.
|
||||
<MarkdownBlock
|
||||
key={`${index}-${block.codeBaseIndex}-${block.artifactBaseIndex}`}
|
||||
content={block.raw}
|
||||
codeBaseIndex={block.codeBaseIndex}
|
||||
artifactBaseIndex={block.artifactBaseIndex}
|
||||
remarkPlugins={remarkPlugins}
|
||||
rehypePlugins={rehypePlugins}
|
||||
components={components}
|
||||
/>
|
||||
))}
|
||||
</>
|
||||
);
|
||||
});
|
||||
MarkdownBlocks.displayName = 'MarkdownBlocks';
|
||||
|
||||
export default MarkdownBlocks;
|
||||
|
|
@ -0,0 +1,117 @@
|
|||
import React from 'react';
|
||||
import { RecoilRoot } from 'recoil';
|
||||
import ReactMarkdown from 'react-markdown';
|
||||
import { MemoryRouter } from 'react-router-dom';
|
||||
import { render, screen } from '@testing-library/react';
|
||||
import { getRemarkPlugins, getRehypePlugins, getMarkdownComponents } from '../markdownConfig';
|
||||
import { MessageContext, ArtifactProvider, CodeBlockProvider } from '~/Providers';
|
||||
import Markdown from '../Markdown';
|
||||
|
||||
/**
|
||||
* End-to-end artifact-index regression tests. The real `Artifact` component
|
||||
* computes its document-order index via the (per-block) ArtifactProvider and
|
||||
* stores it on the artifact passed to `ArtifactButton`; we stub only
|
||||
* `ArtifactButton` to read that index back. This verifies the per-block base
|
||||
* indexing assigns artifacts the same indices the whole-message renderer did —
|
||||
* the index used by artifact edit/update calls.
|
||||
*/
|
||||
jest.mock('~/components/Artifacts/ArtifactButton', () => ({
|
||||
__esModule: true,
|
||||
default: ({ artifact }: { artifact?: { index?: number; identifier?: string } }) => (
|
||||
<div
|
||||
data-testid="art"
|
||||
data-index={String(artifact?.index)}
|
||||
data-id={String(artifact?.identifier)}
|
||||
/>
|
||||
),
|
||||
}));
|
||||
|
||||
const artifact = (id: string, title: string) =>
|
||||
`:::artifact{identifier="${id}" type="text/markdown" title="${title}"}\nhello ${id}\n:::`;
|
||||
|
||||
const wrap = (ui: React.ReactNode) => (
|
||||
<MemoryRouter>
|
||||
<RecoilRoot>
|
||||
<MessageContext.Provider value={{ messageId: 'm1', isExpanded: true }}>
|
||||
{ui}
|
||||
</MessageContext.Provider>
|
||||
</RecoilRoot>
|
||||
</MemoryRouter>
|
||||
);
|
||||
|
||||
/** The previous whole-message renderer: one ReactMarkdown under one ArtifactProvider. */
|
||||
const OldMarkdown = ({ content }: { content: string }) => (
|
||||
<ArtifactProvider>
|
||||
<CodeBlockProvider>
|
||||
<ReactMarkdown
|
||||
/** @ts-ignore */
|
||||
remarkPlugins={getRemarkPlugins()}
|
||||
/** @ts-ignore */
|
||||
rehypePlugins={getRehypePlugins()}
|
||||
components={getMarkdownComponents()}
|
||||
>
|
||||
{content}
|
||||
</ReactMarkdown>
|
||||
</CodeBlockProvider>
|
||||
</ArtifactProvider>
|
||||
);
|
||||
|
||||
const readArtifacts = async () =>
|
||||
(await screen.findAllByTestId('art')).map((el) => ({
|
||||
idx: el.getAttribute('data-index'),
|
||||
id: el.getAttribute('data-id'),
|
||||
}));
|
||||
|
||||
describe('MarkdownBlocks artifact-index parity (e2e)', () => {
|
||||
it('assigns document-order indices to multiple artifacts', async () => {
|
||||
const content = `${artifact('a', 'A')}\n\n${artifact('b', 'B')}`;
|
||||
render(wrap(<Markdown content={content} isLatestMessage={false} />));
|
||||
|
||||
expect(await readArtifacts()).toEqual([
|
||||
{ idx: '0', id: 'a' },
|
||||
{ idx: '1', id: 'b' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('matches the whole-message renderer indices (text + artifacts interleaved)', async () => {
|
||||
const content = `Intro.\n\n${artifact('a', 'A')}\n\nMiddle.\n\n${artifact('b', 'B')}\n\nEnd.`;
|
||||
|
||||
const { unmount } = render(wrap(<OldMarkdown content={content} />));
|
||||
const oldIdx = await readArtifacts();
|
||||
unmount();
|
||||
|
||||
render(wrap(<Markdown content={content} isLatestMessage={false} />));
|
||||
const newIdx = await readArtifacts();
|
||||
|
||||
expect(newIdx).toEqual(oldIdx);
|
||||
expect(newIdx).toEqual([
|
||||
{ idx: '0', id: 'a' },
|
||||
{ idx: '1', id: 'b' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('refreshes artifact indices when an in-place edit inserts an artifact before another', async () => {
|
||||
const before = `Intro.\n\n${artifact('b', 'B')}`;
|
||||
const after = `${artifact('a', 'A')}\n\n${artifact('b', 'B')}`;
|
||||
|
||||
const { rerender } = render(wrap(<Markdown content={before} isLatestMessage={false} />));
|
||||
expect(await readArtifacts()).toEqual([{ idx: '0', id: 'b' }]);
|
||||
|
||||
rerender(wrap(<Markdown content={after} isLatestMessage={false} />));
|
||||
// 'b' was index 0; inserting 'a' before it shifts its base to 1. Without the
|
||||
// base-aware block key its ref-cached index would stay 0 (duplicating 'a').
|
||||
expect(await readArtifacts()).toEqual([
|
||||
{ idx: '0', id: 'a' },
|
||||
{ idx: '1', id: 'b' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('does not consume an index for inline text artifact directives', async () => {
|
||||
// `:artifact{}` (text directive) renders as literal text, not an Artifact, so
|
||||
// the following real artifact must still be index 0.
|
||||
const content = `See :artifact{identifier="x"} inline.\n\n${artifact('a', 'A')}`;
|
||||
render(wrap(<Markdown content={content} isLatestMessage={false} />));
|
||||
|
||||
expect(await readArtifacts()).toEqual([{ idx: '0', id: 'a' }]);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,240 @@
|
|||
import React from 'react';
|
||||
import { RecoilRoot } from 'recoil';
|
||||
import ReactMarkdown from 'react-markdown';
|
||||
import { render, screen } from '@testing-library/react';
|
||||
import { QueryClient, QueryClientProvider } from '@tanstack/react-query';
|
||||
import { getRemarkPlugins, getRehypePlugins, getMarkdownComponents } from '../markdownConfig';
|
||||
import { ArtifactProvider, CodeBlockProvider } from '~/Providers';
|
||||
import Markdown from '../Markdown';
|
||||
|
||||
/**
|
||||
* Stub CodeBlock so we can read the blockIndex each executable code block
|
||||
* receives, while still exercising the real `code` override's skip/single-line
|
||||
* decision (which decides whether a CodeBlock renders at all).
|
||||
*/
|
||||
jest.mock('~/components/Messages/Content/CodeBlock', () => ({
|
||||
__esModule: true,
|
||||
default: ({ lang, blockIndex }: { lang?: string; blockIndex?: number }) => (
|
||||
<div data-testid="cb" data-block-index={String(blockIndex)} data-lang={String(lang)} />
|
||||
),
|
||||
}));
|
||||
|
||||
/** The previous whole-message renderer: a single ReactMarkdown under one set of providers. */
|
||||
const OldMarkdown = ({ content }: { content: string }) => (
|
||||
<ArtifactProvider>
|
||||
<CodeBlockProvider>
|
||||
<ReactMarkdown
|
||||
/** @ts-ignore */
|
||||
remarkPlugins={getRemarkPlugins()}
|
||||
/** @ts-ignore */
|
||||
rehypePlugins={getRehypePlugins()}
|
||||
components={getMarkdownComponents()}
|
||||
>
|
||||
{content}
|
||||
</ReactMarkdown>
|
||||
</CodeBlockProvider>
|
||||
</ArtifactProvider>
|
||||
);
|
||||
|
||||
/**
|
||||
* The whole-message renderer emits whitespace-only text nodes ("\n") between
|
||||
* top-level block elements; the per-block renderer parses each block in
|
||||
* isolation and omits them. That whitespace is collapsed between block-level
|
||||
* elements, so it is visually and functionally irrelevant — normalize it away
|
||||
* before comparing structure.
|
||||
*/
|
||||
const normalizeHtml = (html: string) => html.replace(/>\s+</g, '><').trim();
|
||||
|
||||
const indicesIn = (container: HTMLElement) =>
|
||||
Array.from(container.querySelectorAll('[data-testid="cb"]')).map((el) => ({
|
||||
idx: el.getAttribute('data-block-index'),
|
||||
lang: el.getAttribute('data-lang'),
|
||||
}));
|
||||
|
||||
const streamThrough = (
|
||||
Component: React.ComponentType<{ content: string }>,
|
||||
content: string,
|
||||
): HTMLElement => {
|
||||
const lines = content.split('\n');
|
||||
const { container, rerender } = render(
|
||||
<RecoilRoot>
|
||||
<Component content={lines[0]} />
|
||||
</RecoilRoot>,
|
||||
);
|
||||
for (let i = 2; i <= lines.length; i += 1) {
|
||||
rerender(
|
||||
<RecoilRoot>
|
||||
<Component content={lines.slice(0, i).join('\n')} />
|
||||
</RecoilRoot>,
|
||||
);
|
||||
}
|
||||
return container;
|
||||
};
|
||||
|
||||
const MIXED = [
|
||||
'Intro paragraph.',
|
||||
'',
|
||||
'```python',
|
||||
'print("one")',
|
||||
'x = 1',
|
||||
'```',
|
||||
'',
|
||||
'Some `inline` code here.',
|
||||
'',
|
||||
'```js',
|
||||
'console.log("two");',
|
||||
'const y = 2;',
|
||||
'```',
|
||||
'',
|
||||
'```math',
|
||||
'E = mc^2',
|
||||
'```',
|
||||
'',
|
||||
'```bash',
|
||||
'echo single',
|
||||
'```',
|
||||
'',
|
||||
'```mermaid',
|
||||
'graph TD; A-->B;',
|
||||
'```',
|
||||
'',
|
||||
'```ts',
|
||||
'const z: number = 3;',
|
||||
'export {};',
|
||||
'```',
|
||||
].join('\n');
|
||||
|
||||
const EXPECTED = [
|
||||
{ idx: '0', lang: 'python' },
|
||||
{ idx: '1', lang: 'js' },
|
||||
{ idx: '2', lang: 'bash' },
|
||||
{ idx: '3', lang: 'ts' },
|
||||
];
|
||||
|
||||
const NewMarkdown = ({ content }: { content: string }) => (
|
||||
<Markdown content={content} isLatestMessage={false} />
|
||||
);
|
||||
|
||||
describe('MarkdownBlocks code-block index parity', () => {
|
||||
it('assigns document-order indices on a direct render (matches whole-message renderer)', () => {
|
||||
const { container: oldC } = render(
|
||||
<RecoilRoot>
|
||||
<OldMarkdown content={MIXED} />
|
||||
</RecoilRoot>,
|
||||
);
|
||||
const { container: newC } = render(
|
||||
<RecoilRoot>
|
||||
<Markdown content={MIXED} isLatestMessage={false} />
|
||||
</RecoilRoot>,
|
||||
);
|
||||
|
||||
expect(indicesIn(oldC)).toEqual(EXPECTED);
|
||||
expect(indicesIn(newC)).toEqual(EXPECTED);
|
||||
});
|
||||
|
||||
it('keeps indices correct across a simulated stream (no drift under memoization)', () => {
|
||||
const oldC = streamThrough(OldMarkdown, MIXED);
|
||||
const newC = streamThrough(NewMarkdown, MIXED);
|
||||
|
||||
expect(indicesIn(oldC)).toEqual(EXPECTED);
|
||||
expect(indicesIn(newC)).toEqual(EXPECTED);
|
||||
});
|
||||
|
||||
it('streamed indices match a fresh direct render (stable for stored execution results)', () => {
|
||||
const streamed = streamThrough(NewMarkdown, MIXED);
|
||||
const { container: fresh } = render(
|
||||
<RecoilRoot>
|
||||
<Markdown content={MIXED} isLatestMessage={false} />
|
||||
</RecoilRoot>,
|
||||
);
|
||||
expect(indicesIn(streamed)).toEqual(indicesIn(fresh));
|
||||
});
|
||||
|
||||
it('refreshes indices when an in-place edit inserts a code block before existing ones', () => {
|
||||
const before = 'intro\n\n```js\na\n```';
|
||||
const after = '```py\nx\n```\n\n```js\na\n```';
|
||||
const { container, rerender } = render(
|
||||
<RecoilRoot>
|
||||
<Markdown content={before} isLatestMessage={false} />
|
||||
</RecoilRoot>,
|
||||
);
|
||||
expect(indicesIn(container)).toEqual([{ idx: '0', lang: 'js' }]);
|
||||
|
||||
rerender(
|
||||
<RecoilRoot>
|
||||
<Markdown content={after} isLatestMessage={false} />
|
||||
</RecoilRoot>,
|
||||
);
|
||||
// The js block's base shifted 0 -> 1; without forcing a remount its ref-cached
|
||||
// index would stay 0 (duplicating py). It must become 1.
|
||||
expect(indicesIn(container)).toEqual([
|
||||
{ idx: '0', lang: 'py' },
|
||||
{ idx: '1', lang: 'js' },
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('MarkdownBlocks DOM equivalence (non-code blocks)', () => {
|
||||
const cases: Array<[string, string]> = [
|
||||
['paragraphs', 'First paragraph.\n\nSecond paragraph.'],
|
||||
['gfm table', ['| a | b |', '| - | - |', '| 1 | 2 |', '| 3 | 4 |'].join('\n')],
|
||||
['unordered list', '- one\n- two\n- three'],
|
||||
['ordered list', '1. one\n2. two'],
|
||||
['headings + text', '# Title\n\nBody text with **bold** and _italics_.'],
|
||||
['blockquote', '> quoted line one\n> quoted line two'],
|
||||
['inline code', 'Use the `useMemo` hook for memoization.'],
|
||||
['mixed', '# H\n\nPara with `code`.\n\n| x | y |\n| - | - |\n| 1 | 2 |\n\n- a\n- b'],
|
||||
];
|
||||
|
||||
it.each(cases)('renders identical DOM to the whole-message renderer: %s', (_label, content) => {
|
||||
const { container: oldC } = render(
|
||||
<RecoilRoot>
|
||||
<OldMarkdown content={content} />
|
||||
</RecoilRoot>,
|
||||
);
|
||||
const { container: newC } = render(
|
||||
<RecoilRoot>
|
||||
<Markdown content={content} isLatestMessage={false} />
|
||||
</RecoilRoot>,
|
||||
);
|
||||
expect(normalizeHtml(newC.innerHTML)).toBe(normalizeHtml(oldC.innerHTML));
|
||||
});
|
||||
});
|
||||
|
||||
describe('MarkdownBlocks rendering smoke', () => {
|
||||
it('renders an empty cursor placeholder while initializing', () => {
|
||||
const { container } = render(
|
||||
<RecoilRoot>
|
||||
<Markdown content="" isLatestMessage={true} />
|
||||
</RecoilRoot>,
|
||||
);
|
||||
expect(container.querySelector('.result-thinking')).not.toBeNull();
|
||||
});
|
||||
|
||||
it('renders executable code blocks for a multi-code message', () => {
|
||||
render(
|
||||
<RecoilRoot>
|
||||
<Markdown content={MIXED} isLatestMessage={false} />
|
||||
</RecoilRoot>,
|
||||
);
|
||||
expect(screen.getAllByTestId('cb')).toHaveLength(4);
|
||||
});
|
||||
});
|
||||
|
||||
describe('MarkdownBlocks document-level definitions', () => {
|
||||
it('resolves a reference-style link whose definition is in a separate block', () => {
|
||||
const queryClient = new QueryClient();
|
||||
const content = 'See [docs][d] for details.\n\n[d]: https://example.com/docs';
|
||||
render(
|
||||
<QueryClientProvider client={queryClient}>
|
||||
<RecoilRoot>
|
||||
<Markdown content={content} isLatestMessage={false} />
|
||||
</RecoilRoot>
|
||||
</QueryClientProvider>,
|
||||
);
|
||||
expect(screen.getByRole('link', { name: 'docs' })).toHaveAttribute(
|
||||
'href',
|
||||
'https://example.com/docs',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,168 @@
|
|||
import { splitMarkdownIntoBlocks } from '../splitMarkdown';
|
||||
|
||||
const raws = (content: string) => splitMarkdownIntoBlocks(content).map((block) => block.raw);
|
||||
|
||||
describe('splitMarkdownIntoBlocks', () => {
|
||||
it('returns [] for empty content', () => {
|
||||
expect(splitMarkdownIntoBlocks('')).toEqual([]);
|
||||
});
|
||||
|
||||
it('returns a single block for one paragraph', () => {
|
||||
expect(raws('Hello world.')).toEqual(['Hello world.']);
|
||||
});
|
||||
|
||||
it('splits consecutive paragraphs into separate blocks', () => {
|
||||
expect(raws('First.\n\nSecond.')).toEqual(['First.', 'Second.']);
|
||||
});
|
||||
|
||||
it('keeps a GFM table as one atomic block', () => {
|
||||
const table = '| a | b |\n| - | - |\n| 1 | 2 |';
|
||||
const content = `Intro.\n\n${table}\n\nOutro.`;
|
||||
expect(raws(content)).toEqual(['Intro.', table, 'Outro.']);
|
||||
});
|
||||
|
||||
it('keeps a fenced code block (with internal blank lines) as one block', () => {
|
||||
const code = '```js\nconst x = 1;\n\nconst y = 2;\n```';
|
||||
const content = `Before.\n\n${code}\n\nAfter.`;
|
||||
expect(raws(content)).toEqual(['Before.', code, 'After.']);
|
||||
});
|
||||
|
||||
it('keeps a $$ math block as one block', () => {
|
||||
const mathBlock = '$$\nE = mc^2\n$$';
|
||||
const content = `Text.\n\n${mathBlock}\n\nMore.`;
|
||||
expect(raws(content)).toEqual(['Text.', mathBlock, 'More.']);
|
||||
});
|
||||
|
||||
it('keeps a :::artifact::: container (with inner blank lines) intact', () => {
|
||||
const artifact = ':::artifact{title="t"}\nline one\n\nline two\n:::';
|
||||
const content = `Lead.\n\n${artifact}\n\nTail.`;
|
||||
expect(raws(content)).toEqual(['Lead.', artifact, 'Tail.']);
|
||||
});
|
||||
|
||||
it('keeps a list as one block (does not split items)', () => {
|
||||
const list = '- one\n- two\n- three';
|
||||
expect(raws(list)).toEqual([list]);
|
||||
});
|
||||
|
||||
it('does not split inline constructs across blocks (inline code/markers stay in their paragraph)', () => {
|
||||
const para = 'Use `inline` code and \\ui{abc} markers here.';
|
||||
expect(raws(para)).toEqual([para]);
|
||||
});
|
||||
|
||||
it('each block re-parses to the same single block (idempotent boundaries)', () => {
|
||||
const content = ['# Heading', '', 'A paragraph.', '', '```py\nprint(1)\n```'].join('\n');
|
||||
const blocks = raws(content);
|
||||
expect(blocks).toHaveLength(3);
|
||||
for (const block of blocks) {
|
||||
expect(raws(block)).toEqual([block]);
|
||||
}
|
||||
});
|
||||
|
||||
describe('index counts (mirror the code/artifact render decision)', () => {
|
||||
it('counts a multi-line fenced code block as executable', () => {
|
||||
const [block] = splitMarkdownIntoBlocks('```js\nconst x = 1;\nconst y = 2;\n```');
|
||||
expect(block.codeBlockCount).toBe(1);
|
||||
});
|
||||
|
||||
it('counts a single-line block with a known language as executable', () => {
|
||||
const [block] = splitMarkdownIntoBlocks('```bash\necho hi\n```');
|
||||
expect(block.codeBlockCount).toBe(1);
|
||||
});
|
||||
|
||||
it('counts a no-language single-line block as executable', () => {
|
||||
const [block] = splitMarkdownIntoBlocks('```\necho hi\n```');
|
||||
expect(block.codeBlockCount).toBe(1);
|
||||
});
|
||||
|
||||
it('counts a single-line block with an unsupported language (renderer still renders a CodeBlock)', () => {
|
||||
const [block] = splitMarkdownIntoBlocks('```madeuplang\nhi\n```');
|
||||
expect(block.codeBlockCount).toBe(1);
|
||||
});
|
||||
|
||||
it('does NOT count math or mermaid fences', () => {
|
||||
expect(splitMarkdownIntoBlocks('```math\nE=mc^2\n```')[0].codeBlockCount).toBe(0);
|
||||
expect(splitMarkdownIntoBlocks('```mermaid\ngraph TD; A-->B;\n```')[0].codeBlockCount).toBe(
|
||||
0,
|
||||
);
|
||||
});
|
||||
|
||||
it('does NOT count hyphenated math/mermaid languages (renderer normalizes them)', () => {
|
||||
expect(
|
||||
splitMarkdownIntoBlocks('```mermaid-js\ngraph TD; A-->B;\n```')[0].codeBlockCount,
|
||||
).toBe(0);
|
||||
expect(splitMarkdownIntoBlocks('```math-tex\nE=mc^2\n```')[0].codeBlockCount).toBe(0);
|
||||
});
|
||||
|
||||
it('does count a fence that merely starts with math/mermaid letters', () => {
|
||||
expect(splitMarkdownIntoBlocks('```mathematica\nx\n```')[0].codeBlockCount).toBe(1);
|
||||
});
|
||||
|
||||
it('counts an artifact container once and does not count code inside it', () => {
|
||||
const artifact = ':::artifact{title="t"}\n```js\nconst x = 1;\nconst y = 2;\n```\n:::';
|
||||
const [block] = splitMarkdownIntoBlocks(artifact);
|
||||
expect(block.artifactCount).toBe(1);
|
||||
expect(block.codeBlockCount).toBe(0);
|
||||
});
|
||||
|
||||
it('counts leaf artifact directives, not just containers', () => {
|
||||
expect(splitMarkdownIntoBlocks('::artifact{identifier="a" type="x"}')[0].artifactCount).toBe(
|
||||
1,
|
||||
);
|
||||
});
|
||||
|
||||
it('does NOT count inline text artifact directives (the plugin rewrites them to text)', () => {
|
||||
expect(splitMarkdownIntoBlocks('see :artifact{identifier="b"} inline')[0].artifactCount).toBe(
|
||||
0,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('document-level definitions force single-block rendering', () => {
|
||||
it('keeps a reference-style link and its definition in one block', () => {
|
||||
const content = 'See [docs][d] for details.\n\n[d]: https://example.com/docs';
|
||||
const blocks = splitMarkdownIntoBlocks(content);
|
||||
expect(blocks).toHaveLength(1);
|
||||
expect(blocks[0].raw).toBe(content);
|
||||
});
|
||||
|
||||
it('keeps a footnote reference and its definition in one block', () => {
|
||||
const content = 'Claim with a note.[^1]\n\n[^1]: the footnote text';
|
||||
const blocks = splitMarkdownIntoBlocks(content);
|
||||
expect(blocks).toHaveLength(1);
|
||||
expect(blocks[0].raw).toBe(content);
|
||||
});
|
||||
|
||||
it('forces single-block when a definition is nested in a blockquote', () => {
|
||||
const content = 'See [docs][d].\n\n> [d]: https://example.com/docs';
|
||||
const blocks = splitMarkdownIntoBlocks(content);
|
||||
expect(blocks).toHaveLength(1);
|
||||
expect(blocks[0].raw).toBe(content);
|
||||
});
|
||||
|
||||
it('forces single-block when a definition is nested in a list item', () => {
|
||||
const content = 'See [a][d].\n\n- item\n\n [d]: https://example.com';
|
||||
const blocks = splitMarkdownIntoBlocks(content);
|
||||
expect(blocks).toHaveLength(1);
|
||||
expect(blocks[0].raw).toBe(content);
|
||||
});
|
||||
|
||||
it('still splits when no document-level definition is present', () => {
|
||||
const blocks = splitMarkdownIntoBlocks('First paragraph.\n\nSecond paragraph.');
|
||||
expect(blocks).toHaveLength(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe('raw HTML blocks force single-block rendering', () => {
|
||||
it('renders the whole message as one block for multiple top-level HTML blocks', () => {
|
||||
const content = '<div>a</div>\n\n<div>b</div>';
|
||||
const blocks = splitMarkdownIntoBlocks(content);
|
||||
expect(blocks).toHaveLength(1);
|
||||
expect(blocks[0].raw).toBe(content);
|
||||
});
|
||||
|
||||
it('still splits when HTML is only inline within a paragraph', () => {
|
||||
const blocks = splitMarkdownIntoBlocks('para with <br> inline.\n\nsecond para');
|
||||
expect(blocks).toHaveLength(2);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
import remarkGfm from 'remark-gfm';
|
||||
import remarkMath from 'remark-math';
|
||||
import supersub from 'remark-supersub';
|
||||
import rehypeKatex from 'rehype-katex';
|
||||
import rehypeHighlight from 'rehype-highlight';
|
||||
import remarkDirective from 'remark-directive';
|
||||
import type { PluggableList } from 'unified';
|
||||
import type { ElementType } from 'react';
|
||||
import {
|
||||
mcpUIResourcePlugin,
|
||||
MCPUIResource,
|
||||
MCPUIResourceCarousel,
|
||||
} from '~/components/MCPUIResource';
|
||||
import { Citation, CompositeCitation, HighlightedText } from '~/components/Web/Citation';
|
||||
import { Artifact, artifactPlugin } from '~/components/Artifacts/Artifact';
|
||||
import { code, a, p, img, table } from './MarkdownComponents';
|
||||
import { unicodeCitation } from '~/components/Web';
|
||||
import { langSubset } from '~/utils';
|
||||
|
||||
/**
|
||||
* Single source of truth for the markdown rendering pipeline, shared by the
|
||||
* whole-message renderer and the per-block memoized renderer so both produce
|
||||
* identical output.
|
||||
*
|
||||
* These are exposed as lazily-initialized, cached getters rather than top-level
|
||||
* consts on purpose: `MarkdownComponents` participates in a circular import
|
||||
* (`MarkdownComponents` → `CodeBlock` → `Parts` → `Markdown` → here →
|
||||
* `MarkdownComponents`). Reading `code`/`a`/… at module-evaluation time throws
|
||||
* `Cannot access 'code' before initialization` under native ESM. Deferring the
|
||||
* read to first call (render time) sidesteps the temporal dead zone, and caching
|
||||
* keeps a stable reference so react-markdown does not rebuild its processor.
|
||||
*/
|
||||
let remarkPluginsCache: PluggableList | null = null;
|
||||
let rehypePluginsCache: PluggableList | null = null;
|
||||
let markdownComponentsCache: { [nodeType: string]: ElementType } | null = null;
|
||||
|
||||
export const getRemarkPlugins = (): PluggableList => {
|
||||
if (remarkPluginsCache === null) {
|
||||
remarkPluginsCache = [
|
||||
supersub,
|
||||
remarkGfm,
|
||||
remarkDirective,
|
||||
artifactPlugin,
|
||||
[remarkMath, { singleDollarTextMath: false }],
|
||||
unicodeCitation,
|
||||
mcpUIResourcePlugin,
|
||||
];
|
||||
}
|
||||
return remarkPluginsCache;
|
||||
};
|
||||
|
||||
export const getRehypePlugins = (): PluggableList => {
|
||||
if (rehypePluginsCache === null) {
|
||||
rehypePluginsCache = [
|
||||
[rehypeKatex],
|
||||
[rehypeHighlight, { detect: true, ignoreMissing: true, subset: langSubset }],
|
||||
];
|
||||
}
|
||||
return rehypePluginsCache;
|
||||
};
|
||||
|
||||
export const getMarkdownComponents = (): { [nodeType: string]: ElementType } => {
|
||||
if (markdownComponentsCache === null) {
|
||||
markdownComponentsCache = {
|
||||
code,
|
||||
a,
|
||||
p,
|
||||
img,
|
||||
table,
|
||||
artifact: Artifact,
|
||||
citation: Citation,
|
||||
'highlighted-text': HighlightedText,
|
||||
'composite-citation': CompositeCitation,
|
||||
'mcp-ui-resource': MCPUIResource,
|
||||
'mcp-ui-carousel': MCPUIResourceCarousel,
|
||||
};
|
||||
}
|
||||
return markdownComponentsCache;
|
||||
};
|
||||
157
client/src/components/Chat/Messages/Content/splitMarkdown.ts
Normal file
157
client/src/components/Chat/Messages/Content/splitMarkdown.ts
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
import { gfm } from 'micromark-extension-gfm';
|
||||
import { math } from 'micromark-extension-math';
|
||||
import { gfmFromMarkdown } from 'mdast-util-gfm';
|
||||
import { mathFromMarkdown } from 'mdast-util-math';
|
||||
import { fromMarkdown } from 'mdast-util-from-markdown';
|
||||
import { directive } from 'micromark-extension-directive';
|
||||
import { directiveFromMarkdown } from 'mdast-util-directive';
|
||||
|
||||
export type MarkdownBlock = {
|
||||
/** Exact source slice for this top-level block. */
|
||||
raw: string;
|
||||
/** Executable code blocks within this block (those that render a runnable CodeBlock). */
|
||||
codeBlockCount: number;
|
||||
/** Artifact containers within this block. */
|
||||
artifactCount: number;
|
||||
};
|
||||
|
||||
type MdastNode = {
|
||||
type: string;
|
||||
name?: string;
|
||||
lang?: string | null;
|
||||
value?: string;
|
||||
children?: MdastNode[];
|
||||
position?: { start?: { offset?: number }; end?: { offset?: number } };
|
||||
};
|
||||
|
||||
/**
|
||||
* Mirror the `code` component's decision for whether a fenced block renders as a
|
||||
* runnable CodeBlock (and therefore consumes a block index). Every fenced code
|
||||
* block does, except `math` and `mermaid` fences, which have dedicated
|
||||
* renderers. mdast strips a fenced block's trailing newline, but
|
||||
* react-markdown/remark-rehype re-add it, so the `code` component never treats a
|
||||
* fenced block as single-line inline code regardless of its language — only true
|
||||
* inline code (an `inlineCode` node, not counted here) is.
|
||||
*/
|
||||
const renderedCodeLang = (lang: string): string =>
|
||||
/language-(\w+)/.exec(`language-${lang}`)?.[1] ?? '';
|
||||
|
||||
/**
|
||||
* Normalize the fence info string the same way the `code` component does — it
|
||||
* reads the language from `className` via `/language-(\w+)/`, so only the leading
|
||||
* word characters survive (`mermaid-js` → `mermaid`, `math-tex` → `math`). A
|
||||
* fence is executable (consumes a CodeBlock index) unless it normalizes to
|
||||
* `math` or `mermaid`, which have dedicated renderers.
|
||||
*/
|
||||
const isExecutableCode = (lang: string): boolean => {
|
||||
const normalized = renderedCodeLang(lang);
|
||||
return normalized !== 'math' && normalized !== 'mermaid';
|
||||
};
|
||||
|
||||
const containsDefinition = (node: MdastNode): boolean => {
|
||||
if (node.type === 'definition' || node.type === 'footnoteDefinition') {
|
||||
return true;
|
||||
}
|
||||
return (node.children ?? []).some(containsDefinition);
|
||||
};
|
||||
|
||||
const ARTIFACT_DIRECTIVE_TYPES = new Set(['containerDirective', 'leafDirective']);
|
||||
|
||||
const countWithin = (node: MdastNode, counts: { code: number; artifact: number }): void => {
|
||||
if (ARTIFACT_DIRECTIVE_TYPES.has(node.type) && node.name === 'artifact') {
|
||||
// artifactPlugin renders container (`:::artifact:::`) and leaf
|
||||
// (`::artifact{}`) artifact directives as an Artifact, each consuming one
|
||||
// index; their children never render as executable code blocks, so stop
|
||||
// descending. Inline text directives (`:artifact{}`) are intentionally
|
||||
// excluded — the plugin rewrites every textDirective to literal text, so no
|
||||
// Artifact renders and no index is consumed.
|
||||
counts.artifact += 1;
|
||||
return;
|
||||
}
|
||||
if (node.type === 'code' && isExecutableCode(node.lang ?? '')) {
|
||||
counts.code += 1;
|
||||
}
|
||||
if (node.children) {
|
||||
for (const child of node.children) {
|
||||
countWithin(child, counts);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Parse markdown into an mdast tree using the same structural constructs the
|
||||
* render pipeline relies on (GFM tables, container directives like
|
||||
* `:::artifact:::`, and `$$` math), so top-level block boundaries match what
|
||||
* react-markdown produces. Inline-only transforms (citations, MCP-UI markers,
|
||||
* supersub) never cross a top-level block, so they are intentionally omitted.
|
||||
*/
|
||||
const parseToMdast = (content: string): MdastNode =>
|
||||
fromMarkdown(content, {
|
||||
extensions: [gfm(), directive(), math()],
|
||||
mdastExtensions: [gfmFromMarkdown(), directiveFromMarkdown(), mathFromMarkdown()],
|
||||
}) as MdastNode;
|
||||
|
||||
/**
|
||||
* Split a markdown string into its top-level blocks, returning the exact source
|
||||
* slice for each block plus the index counts it consumes. Completed blocks
|
||||
* produce byte-identical slices (and stable counts) across streamed updates,
|
||||
* which is what makes per-block memoization effective: only the final, still-
|
||||
* growing block changes from one token to the next.
|
||||
*
|
||||
* Inter-block whitespace (blank lines) is not part of any node's span and is
|
||||
* dropped; block-level elements carry their own margins, so rendering each
|
||||
* slice independently is visually equivalent to rendering the whole string.
|
||||
*/
|
||||
export function splitMarkdownIntoBlocks(content: string): MarkdownBlock[] {
|
||||
if (!content) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const tree = parseToMdast(content);
|
||||
const children = tree.children ?? [];
|
||||
|
||||
if (children.length === 0) {
|
||||
return [{ raw: content, codeBlockCount: 0, artifactCount: 0 }];
|
||||
}
|
||||
|
||||
// Per-block rendering loses document-global context, so render the whole
|
||||
// message as one block when it uses a construct that needs it:
|
||||
// - reference/footnote definitions are document-scoped (and may be nested in
|
||||
// a blockquote or list item), so a reference would otherwise render as
|
||||
// literal text once severed from its definition; and
|
||||
// - top-level raw HTML blocks are escaped to text (rehypeRaw is not enabled),
|
||||
// so the separator between adjacent HTML blocks would otherwise be dropped.
|
||||
const requiresWholeMessage = children.some(
|
||||
(node) => node.type === 'html' || containsDefinition(node),
|
||||
);
|
||||
if (requiresWholeMessage) {
|
||||
return [{ raw: content, ...blockCounts(children) }];
|
||||
}
|
||||
|
||||
const blocks: MarkdownBlock[] = [];
|
||||
|
||||
for (const node of children) {
|
||||
const start = node.position?.start?.offset;
|
||||
const end = node.position?.end?.offset;
|
||||
if (start == null || end == null) {
|
||||
return [{ raw: content, ...blockCounts(children) }];
|
||||
}
|
||||
const counts = { code: 0, artifact: 0 };
|
||||
countWithin(node, counts);
|
||||
blocks.push({
|
||||
raw: content.slice(start, end),
|
||||
codeBlockCount: counts.code,
|
||||
artifactCount: counts.artifact,
|
||||
});
|
||||
}
|
||||
|
||||
return blocks;
|
||||
}
|
||||
|
||||
const blockCounts = (children: MdastNode[]): { codeBlockCount: number; artifactCount: number } => {
|
||||
const counts = { code: 0, artifact: 0 };
|
||||
for (const node of children) {
|
||||
countWithin(node, counts);
|
||||
}
|
||||
return { codeBlockCount: counts.code, artifactCount: counts.artifact };
|
||||
};
|
||||
7
package-lock.json
generated
7
package-lock.json
generated
|
|
@ -464,8 +464,15 @@
|
|||
"lodash": "^4.17.23",
|
||||
"lucide-react": "^0.394.0",
|
||||
"match-sorter": "^8.1.0",
|
||||
"mdast-util-directive": "^3.0.0",
|
||||
"mdast-util-from-markdown": "^2.0.1",
|
||||
"mdast-util-gfm": "^3.0.0",
|
||||
"mdast-util-math": "^3.0.0",
|
||||
"mermaid": "^11.15.0",
|
||||
"micromark-extension-directive": "^3.0.1",
|
||||
"micromark-extension-gfm": "^3.0.0",
|
||||
"micromark-extension-llm-math": "^3.1.0",
|
||||
"micromark-extension-math": "^3.1.0",
|
||||
"qrcode.react": "^4.2.0",
|
||||
"rc-input-number": "^7.4.2",
|
||||
"react": "^18.2.0",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue