🧊 perf: Memoize Completed Markdown Blocks During Streaming (#13576)

Render assistant markdown as independently memoized top-level blocks instead of a single ReactMarkdown that re-parses and re-highlights the entire message on every streamed token. Once a block's source slice is stable it skips re-parse/re-render; only the final, still-growing block re-parses. - splitMarkdown: split a message into top-level blocks via mdast-util-from-markdown (+ gfm/directive/math extensions) using node source offsets; also report per-block executable-code and artifact index counts. - MarkdownBlocks: render each block memoized on its raw slice, each wrapped in its own CodeBlock/Artifact providers seeded with prefix-summed base indices, so the document-order indices used to match code-execution results stay stable under memoization (verified by OLD-vs-NEW parity tests across direct + streamed renders). - CodeBlockContext/ArtifactContext: add optional baseIndex (default 0, fully backward compatible) so per-block providers continue the running index. - markdownConfig: extract the shared remark/rehype plugins + components map. - deps: declare mdast-util-from-markdown, mdast-util-gfm/math/directive and the micromark gfm/math/directive extensions as direct client dependencies (previously resolved transitively via react-markdown). - Tests: splitter unit tests; index parity + DOM equivalence vs the whole-message renderer; rendering smoke tests. - Bench (MarkdownBlocks.bench.tsx, outside __tests__ so the default jest run skips it): ~88% fewer code-block renders and ~2.3x faster cumulative render across a simulated stream.
2026-06-09 17:31:19 +00:00 · 2026-06-07 20:31:56 -04:00 · 2026-06-07 20:31:56 -04:00 · 15ea03624d
commit 15ea03624d
parent 90ebecb254
12 changed files with 1107 additions and 91 deletions
--- a/client/package.json
+++ b/client/package.json
@ -82,8 +82,15 @@
    "lodash": "^4.17.23",
    "lucide-react": "^0.394.0",
    "match-sorter": "^8.1.0",
+    "mdast-util-directive": "^3.0.0",
+    "mdast-util-from-markdown": "^2.0.1",
+    "mdast-util-gfm": "^3.0.0",
+    "mdast-util-math": "^3.0.0",
    "mermaid": "^11.15.0",
+    "micromark-extension-directive": "^3.0.1",
+    "micromark-extension-gfm": "^3.0.0",
    "micromark-extension-llm-math": "^3.1.0",
+    "micromark-extension-math": "^3.1.0",
    "qrcode.react": "^4.2.0",
    "rc-input-number": "^7.4.2",
    "react": "^18.2.0",
--- a/client/src/Providers/ArtifactContext.tsx
+++ b/client/src/Providers/ArtifactContext.tsx
@ -8,17 +8,31 @@ type TArtifactContext = {
 export const ArtifactContext = createContext<TArtifactContext>({} as TArtifactContext);
 export const useArtifactContext = () => useContext(ArtifactContext);

-export function ArtifactProvider({ children }: { children: ReactNode }) {
+export function ArtifactProvider({
+  children,
+  baseIndex = 0,
+}: {
+  children: ReactNode;
+  /**
+   * Offset added to every assigned index, so per-block memoized rendering can
+   * seed each block's provider with the count of artifacts in earlier blocks
+   * and keep document-order indices stable.
+   */
+  baseIndex?: number;
+}) {
  const counterRef = useRef(0);

-  const getNextIndex = useCallback((skip: boolean) => {
-    if (skip) {
-      return counterRef.current;
-    }
-    const nextIndex = counterRef.current;
-    counterRef.current += 1;
-    return nextIndex;
-  }, []);
+  const getNextIndex = useCallback(
+    (skip: boolean) => {
+      if (skip) {
+        return baseIndex + counterRef.current;
+      }
+      const nextIndex = counterRef.current;
+      counterRef.current += 1;
+      return baseIndex + nextIndex;
+    },
+    [baseIndex],
+  );

  const resetCounter = useCallback(() => {
    counterRef.current = 0;
--- a/client/src/Providers/CodeBlockContext.tsx
+++ b/client/src/Providers/CodeBlockContext.tsx
@ -8,17 +8,33 @@ type TCodeBlockContext = {
 export const CodeBlockContext = createContext<TCodeBlockContext>({} as TCodeBlockContext);
 export const useCodeBlockContext = () => useContext(CodeBlockContext);

-export function CodeBlockProvider({ children }: { children: ReactNode }) {
+export function CodeBlockProvider({
+  children,
+  baseIndex = 0,
+}: {
+  children: ReactNode;
+  /**
+   * Offset added to every assigned index. When rendering a message as
+   * independently memoized blocks, each block gets its own provider seeded with
+   * the running count of executable code blocks in earlier blocks, so document-
+   * order indices are preserved without a single shared (memoization-fragile)
+   * counter.
+   */
+  baseIndex?: number;
+}) {
  const counterRef = useRef(0);

-  const getNextIndex = useCallback((skip: boolean) => {
-    if (skip) {
-      return counterRef.current;
-    }
-    const nextIndex = counterRef.current;
-    counterRef.current += 1;
-    return nextIndex;
-  }, []);
+  const getNextIndex = useCallback(
+    (skip: boolean) => {
+      if (skip) {
+        return baseIndex + counterRef.current;
+      }
+      const nextIndex = counterRef.current;
+      counterRef.current += 1;
+      return baseIndex + nextIndex;
+    },
+    [baseIndex],
+  );

  const resetCounter = useCallback(() => {
    counterRef.current = 0;
--- a/client/src/components/Chat/Messages/Content/Markdown.tsx
+++ b/client/src/components/Chat/Messages/Content/Markdown.tsx
@ -1,25 +1,9 @@
 import React, { memo, useMemo } from 'react';
-import remarkGfm from 'remark-gfm';
-import remarkMath from 'remark-math';
-import supersub from 'remark-supersub';
-import rehypeKatex from 'rehype-katex';
 import { useRecoilValue } from 'recoil';
-import ReactMarkdown from 'react-markdown';
-import rehypeHighlight from 'rehype-highlight';
-import remarkDirective from 'remark-directive';
-import type { Pluggable } from 'unified';
-import { Citation, CompositeCitation, HighlightedText } from '~/components/Web/Citation';
-import {
-  mcpUIResourcePlugin,
-  MCPUIResource,
-  MCPUIResourceCarousel,
-} from '~/components/MCPUIResource';
-import { Artifact, artifactPlugin } from '~/components/Artifacts/Artifact';
-import { ArtifactProvider, CodeBlockProvider } from '~/Providers';
+import { getRemarkPlugins, getRehypePlugins, getMarkdownComponents } from './markdownConfig';
 import MarkdownErrorBoundary from './MarkdownErrorBoundary';
-import { langSubset, preprocessLaTeX } from '~/utils';
-import { unicodeCitation } from '~/components/Web';
-import { code, a, p, img, table } from './MarkdownComponents';
+import MarkdownBlocks from './MarkdownBlocks';
+import { preprocessLaTeX } from '~/utils';
 import store from '~/store';

 type TContentProps = {
@ -38,31 +22,6 @@ const Markdown = memo(function Markdown({ content = '', isLatestMessage }: TCont
    return LaTeXParsing ? preprocessLaTeX(content) : content;
  }, [content, LaTeXParsing, isInitializing]);

-  const rehypePlugins = useMemo(
-    () => [
-      [rehypeKatex],
-      [
-        rehypeHighlight,
-        {
-          detect: true,
-          ignoreMissing: true,
-          subset: langSubset,
-        },
-      ],
-    ],
-    [],
-  );
-
-  const remarkPlugins: Pluggable[] = [
-    supersub,
-    remarkGfm,
-    remarkDirective,
-    artifactPlugin,
-    [remarkMath, { singleDollarTextMath: false }],
-    unicodeCitation,
-    mcpUIResourcePlugin,
-  ];
-
  if (isInitializing) {
    return (
      <div className="absolute">
@ -75,35 +34,12 @@ const Markdown = memo(function Markdown({ content = '', isLatestMessage }: TCont

  return (
    <MarkdownErrorBoundary content={content} codeExecution={true}>
-      <ArtifactProvider>
-        <CodeBlockProvider>
-          <ReactMarkdown
-            /** @ts-ignore */
-            remarkPlugins={remarkPlugins}
-            /* @ts-ignore */
-            rehypePlugins={rehypePlugins}
-            components={
-              {
-                code,
-                a,
-                p,
-                img,
-                table,
-                artifact: Artifact,
-                citation: Citation,
-                'highlighted-text': HighlightedText,
-                'composite-citation': CompositeCitation,
-                'mcp-ui-resource': MCPUIResource,
-                'mcp-ui-carousel': MCPUIResourceCarousel,
-              } as {
-                [nodeType: string]: React.ElementType;
-              }
-            }
-          >
-            {currentContent}
-          </ReactMarkdown>
-        </CodeBlockProvider>
-      </ArtifactProvider>
+      <MarkdownBlocks
+        content={currentContent}
+        remarkPlugins={getRemarkPlugins()}
+        rehypePlugins={getRehypePlugins()}
+        components={getMarkdownComponents()}
+      />
    </MarkdownErrorBoundary>
  );
 });
--- a/client/src/components/Chat/Messages/Content/MarkdownBlocks.bench.tsx
+++ b/client/src/components/Chat/Messages/Content/MarkdownBlocks.bench.tsx
@ -0,0 +1,165 @@
+import React, { Profiler } from 'react';
+import { RecoilRoot } from 'recoil';
+import ReactMarkdown from 'react-markdown';
+import { render } from '@testing-library/react';
+import { getRemarkPlugins, getRehypePlugins, getMarkdownComponents } from './markdownConfig';
+import { ArtifactProvider, CodeBlockProvider } from '~/Providers';
+import CodeBlock from '~/components/Messages/Content/CodeBlock';
+import Markdown from './Markdown';
+
+/**
+ * Streaming render benchmark comparing the previous whole-message renderer
+ * (one ReactMarkdown re-parsing everything per token) against the per-block
+ * memoized renderer. This file lives outside `__tests__/` and is named
+ * `.bench.tsx` so the default jest run skips it; execute it explicitly with:
+ *
+ *   node node_modules/jest/bin/jest.js --runInBand --coverage=false \
+ *     --testMatch '**\/MarkdownBlocks.bench.tsx'
+ *
+ * Two metrics are reported:
+ *  - codeBlockRenders: deterministic structural metric — how many times code
+ *    blocks render across the whole stream (the memoization win, noise-free).
+ *  - totalMs: summed React Profiler actualDuration (wall-clock; jsdom absolute
+ *    numbers are not browser-accurate, but the OLD/NEW ratio is indicative).
+ */
+
+jest.mock('~/components/Messages/Content/CodeBlock', () => ({
+  __esModule: true,
+  default: jest.fn(() => null),
+}));
+
+const codeBlockMock = CodeBlock as unknown as jest.Mock;
+
+const LANGS = ['python', 'javascript', 'typescript', 'bash', 'json', 'sql', 'go', 'rust'];
+
+const buildMessage = (sections: number): string => {
+  const parts: string[] = [];
+  for (let i = 0; i < sections; i += 1) {
+    parts.push(`## Section ${i + 1}`, '');
+    parts.push(
+      `This is paragraph ${i + 1} explaining the code below with some **bold** and ` +
+        `\`inline\` text, intentionally a bit long to add realistic reflow cost during ` +
+        `streaming, repeated across every section of the message.`,
+      '',
+    );
+    const lang = LANGS[i % LANGS.length];
+    parts.push('```' + lang);
+    for (let l = 0; l < 8; l += 1) {
+      parts.push(`const value_${i}_${l} = computeSomething(${l}, "arg_${l}"); // line ${l}`);
+    }
+    parts.push('```', '');
+    if (i % 3 === 0) {
+      parts.push('| Name | Type | Value |', '| --- | --- | --- |');
+      for (let r = 0; r < 5; r += 1) {
+        parts.push(`| item_${i}_${r} | number | ${r * i} |`);
+      }
+      parts.push('');
+    }
+  }
+  return parts.join('\n');
+};
+
+const makePrefixes = (content: string, steps: number): string[] => {
+  const prefixes: string[] = [];
+  for (let s = 1; s <= steps; s += 1) {
+    prefixes.push(content.slice(0, Math.ceil((content.length * s) / steps)));
+  }
+  return prefixes;
+};
+
+const OldMarkdown = ({ content }: { content: string }) => (
+  <ArtifactProvider>
+    <CodeBlockProvider>
+      <ReactMarkdown
+        /** @ts-ignore */
+        remarkPlugins={getRemarkPlugins()}
+        /** @ts-ignore */
+        rehypePlugins={getRehypePlugins()}
+        components={getMarkdownComponents()}
+      >
+        {content}
+      </ReactMarkdown>
+    </CodeBlockProvider>
+  </ArtifactProvider>
+);
+
+const NewMarkdown = ({ content }: { content: string }) => (
+  <Markdown content={content} isLatestMessage={true} />
+);
+
+const measure = (
+  Component: React.ComponentType<{ content: string }>,
+  prefixes: string[],
+): { totalMs: number; codeBlockRenders: number } => {
+  codeBlockMock.mockClear();
+  let totalMs = 0;
+  const onRender = (_id: string, _phase: string, actualDuration: number) => {
+    totalMs += actualDuration;
+  };
+  const tree = (content: string) => (
+    <Profiler id="bench" onRender={onRender}>
+      <RecoilRoot>
+        <Component content={content} />
+      </RecoilRoot>
+    </Profiler>
+  );
+  const { rerender, unmount } = render(tree(prefixes[0]));
+  for (let i = 1; i < prefixes.length; i += 1) {
+    rerender(tree(prefixes[i]));
+  }
+  const result = { totalMs, codeBlockRenders: codeBlockMock.mock.calls.length };
+  unmount();
+  return result;
+};
+
+describe('Markdown streaming benchmark (OLD whole-message vs NEW per-block)', () => {
+  it('reports render cost across a simulated stream', () => {
+    const content = buildMessage(12);
+    const steps = 80;
+    const prefixes = makePrefixes(content, steps);
+    const iterations = 3;
+
+    // Warm up module/highlight caches so the first measured run isn't skewed.
+    measure(OldMarkdown, prefixes);
+    measure(NewMarkdown, prefixes);
+
+    const old: Array<{ totalMs: number; codeBlockRenders: number }> = [];
+    const neu: Array<{ totalMs: number; codeBlockRenders: number }> = [];
+    for (let i = 0; i < iterations; i += 1) {
+      old.push(measure(OldMarkdown, prefixes));
+      neu.push(measure(NewMarkdown, prefixes));
+    }
+
+    const minMs = (rs: Array<{ totalMs: number }>) => Math.min(...rs.map((r) => r.totalMs));
+    const oldMs = minMs(old);
+    const newMs = minMs(neu);
+    const oldRenders = old[0].codeBlockRenders;
+    const newRenders = neu[0].codeBlockRenders;
+
+    console.log(
+      [
+        '',
+        '================ Markdown streaming benchmark ================',
+        `message size: ${content.length} chars, stream steps: ${steps}, iterations: ${iterations}`,
+        '',
+        `code-block renders over the stream (structural, noise-free):`,
+        `  OLD (whole-message): ${oldRenders}`,
+        `  NEW (per-block)    : ${newRenders}`,
+        `  reduction          : ${(100 * (1 - newRenders / oldRenders)).toFixed(1)}%`,
+        '',
+        `total render time (min of ${iterations}, summed Profiler actualDuration; jsdom):`,
+        `  OLD: ${oldMs.toFixed(1)} ms`,
+        `  NEW: ${newMs.toFixed(1)} ms`,
+        `  speedup: ${(oldMs / newMs).toFixed(2)}x`,
+        '=============================================================',
+        '',
+      ].join('\n'),
+    );
+
+    // Sanity: the per-block renderer must not render code blocks MORE than the
+    // whole-message renderer. The real win is asserted separately below.
+    expect(newRenders).toBeLessThanOrEqual(oldRenders);
+    // Memoization should cut total code-block renders by a wide margin.
+    expect(newRenders).toBeLessThan(oldRenders * 0.5);
+  });
+});
--- a/client/src/components/Chat/Messages/Content/MarkdownBlocks.tsx
+++ b/client/src/components/Chat/Messages/Content/MarkdownBlocks.tsx
@ -0,0 +1,110 @@
+import React, { memo, useMemo } from 'react';
+import ReactMarkdown from 'react-markdown';
+import type { PluggableList } from 'unified';
+import type { ElementType } from 'react';
+import { ArtifactProvider, CodeBlockProvider } from '~/Providers';
+import { splitMarkdownIntoBlocks } from './splitMarkdown';
+
+type SharedProps = {
+  remarkPlugins: PluggableList;
+  rehypePlugins: PluggableList;
+  components: { [nodeType: string]: ElementType };
+};
+
+type MarkdownBlockProps = SharedProps & {
+  content: string;
+  codeBaseIndex: number;
+  artifactBaseIndex: number;
+};
+
+/**
+ * Renders one top-level markdown block inside its own code/artifact providers,
+ * seeded with the running index of executable code blocks and artifacts in
+ * earlier blocks. Memoized on `content` and the base indices: a completed block
+ * whose source slice and bases are unchanged across streamed tokens skips both
+ * re-parsing and re-rendering, so only the final, still-growing block re-parses.
+ */
+const MarkdownBlock = memo(
+  function MarkdownBlock({
+    content,
+    codeBaseIndex,
+    artifactBaseIndex,
+    remarkPlugins,
+    rehypePlugins,
+    components,
+  }: MarkdownBlockProps) {
+    return (
+      <ArtifactProvider baseIndex={artifactBaseIndex}>
+        <CodeBlockProvider baseIndex={codeBaseIndex}>
+          <ReactMarkdown
+            /** @ts-ignore */
+            remarkPlugins={remarkPlugins}
+            /** @ts-ignore */
+            rehypePlugins={rehypePlugins}
+            components={components}
+          >
+            {content}
+          </ReactMarkdown>
+        </CodeBlockProvider>
+      </ArtifactProvider>
+    );
+  },
+  (prev, next) =>
+    prev.content === next.content &&
+    prev.codeBaseIndex === next.codeBaseIndex &&
+    prev.artifactBaseIndex === next.artifactBaseIndex,
+);
+MarkdownBlock.displayName = 'MarkdownBlock';
+
+type MarkdownBlocksProps = SharedProps & {
+  content: string;
+};
+
+/**
+ * Splits a message into top-level blocks and renders each independently so
+ * that, during streaming, only the last block re-parses while earlier blocks
+ * (tables, code, etc.) stay memoized. Each block's executable code and artifact
+ * indices are preserved in document order via per-block providers seeded with
+ * prefix-summed base indices.
+ */
+const MarkdownBlocks = memo(function MarkdownBlocks({
+  content,
+  remarkPlugins,
+  rehypePlugins,
+  components,
+}: MarkdownBlocksProps) {
+  const blocks = useMemo(() => {
+    let codeBaseIndex = 0;
+    let artifactBaseIndex = 0;
+    return splitMarkdownIntoBlocks(content).map((block) => {
+      const entry = { raw: block.raw, codeBaseIndex, artifactBaseIndex };
+      codeBaseIndex += block.codeBlockCount;
+      artifactBaseIndex += block.artifactCount;
+      return entry;
+    });
+  }, [content]);
+
+  return (
+    <>
+      {blocks.map((block, index) => (
+        // Key includes the base indices so that an in-place edit which inserts a
+        // block before existing code/artifact blocks (shifting their base) forces
+        // a remount, refreshing the index each code/artifact block captures in a
+        // ref. During append-only streaming these stay constant, so completed
+        // blocks keep a stable key and are not remounted.
+        <MarkdownBlock
+          key={`${index}-${block.codeBaseIndex}-${block.artifactBaseIndex}`}
+          content={block.raw}
+          codeBaseIndex={block.codeBaseIndex}
+          artifactBaseIndex={block.artifactBaseIndex}
+          remarkPlugins={remarkPlugins}
+          rehypePlugins={rehypePlugins}
+          components={components}
+        />
+      ))}
+    </>
+  );
+});
+MarkdownBlocks.displayName = 'MarkdownBlocks';
+
+export default MarkdownBlocks;
--- a/client/src/components/Chat/Messages/Content/tests/MarkdownBlocks.artifacts.test.tsx
+++ b/client/src/components/Chat/Messages/Content/tests/MarkdownBlocks.artifacts.test.tsx
@ -0,0 +1,117 @@
+import React from 'react';
+import { RecoilRoot } from 'recoil';
+import ReactMarkdown from 'react-markdown';
+import { MemoryRouter } from 'react-router-dom';
+import { render, screen } from '@testing-library/react';
+import { getRemarkPlugins, getRehypePlugins, getMarkdownComponents } from '../markdownConfig';
+import { MessageContext, ArtifactProvider, CodeBlockProvider } from '~/Providers';
+import Markdown from '../Markdown';
+
+/**
+ * End-to-end artifact-index regression tests. The real `Artifact` component
+ * computes its document-order index via the (per-block) ArtifactProvider and
+ * stores it on the artifact passed to `ArtifactButton`; we stub only
+ * `ArtifactButton` to read that index back. This verifies the per-block base
+ * indexing assigns artifacts the same indices the whole-message renderer did —
+ * the index used by artifact edit/update calls.
+ */
+jest.mock('~/components/Artifacts/ArtifactButton', () => ({
+  __esModule: true,
+  default: ({ artifact }: { artifact?: { index?: number; identifier?: string } }) => (
+    <div
+      data-testid="art"
+      data-index={String(artifact?.index)}
+      data-id={String(artifact?.identifier)}
+    />
+  ),
+}));
+
+const artifact = (id: string, title: string) =>
+  `:::artifact{identifier="${id}" type="text/markdown" title="${title}"}\nhello ${id}\n:::`;
+
+const wrap = (ui: React.ReactNode) => (
+  <MemoryRouter>
+    <RecoilRoot>
+      <MessageContext.Provider value={{ messageId: 'm1', isExpanded: true }}>
+        {ui}
+      </MessageContext.Provider>
+    </RecoilRoot>
+  </MemoryRouter>
+);
+
+/** The previous whole-message renderer: one ReactMarkdown under one ArtifactProvider. */
+const OldMarkdown = ({ content }: { content: string }) => (
+  <ArtifactProvider>
+    <CodeBlockProvider>
+      <ReactMarkdown
+        /** @ts-ignore */
+        remarkPlugins={getRemarkPlugins()}
+        /** @ts-ignore */
+        rehypePlugins={getRehypePlugins()}
+        components={getMarkdownComponents()}
+      >
+        {content}
+      </ReactMarkdown>
+    </CodeBlockProvider>
+  </ArtifactProvider>
+);
+
+const readArtifacts = async () =>
+  (await screen.findAllByTestId('art')).map((el) => ({
+    idx: el.getAttribute('data-index'),
+    id: el.getAttribute('data-id'),
+  }));
+
+describe('MarkdownBlocks artifact-index parity (e2e)', () => {
+  it('assigns document-order indices to multiple artifacts', async () => {
+    const content = `${artifact('a', 'A')}\n\n${artifact('b', 'B')}`;
+    render(wrap(<Markdown content={content} isLatestMessage={false} />));
+
+    expect(await readArtifacts()).toEqual([
+      { idx: '0', id: 'a' },
+      { idx: '1', id: 'b' },
+    ]);
+  });
+
+  it('matches the whole-message renderer indices (text + artifacts interleaved)', async () => {
+    const content = `Intro.\n\n${artifact('a', 'A')}\n\nMiddle.\n\n${artifact('b', 'B')}\n\nEnd.`;
+
+    const { unmount } = render(wrap(<OldMarkdown content={content} />));
+    const oldIdx = await readArtifacts();
+    unmount();
+
+    render(wrap(<Markdown content={content} isLatestMessage={false} />));
+    const newIdx = await readArtifacts();
+
+    expect(newIdx).toEqual(oldIdx);
+    expect(newIdx).toEqual([
+      { idx: '0', id: 'a' },
+      { idx: '1', id: 'b' },
+    ]);
+  });
+
+  it('refreshes artifact indices when an in-place edit inserts an artifact before another', async () => {
+    const before = `Intro.\n\n${artifact('b', 'B')}`;
+    const after = `${artifact('a', 'A')}\n\n${artifact('b', 'B')}`;
+
+    const { rerender } = render(wrap(<Markdown content={before} isLatestMessage={false} />));
+    expect(await readArtifacts()).toEqual([{ idx: '0', id: 'b' }]);
+
+    rerender(wrap(<Markdown content={after} isLatestMessage={false} />));
+    // 'b' was index 0; inserting 'a' before it shifts its base to 1. Without the
+    // base-aware block key its ref-cached index would stay 0 (duplicating 'a').
+    expect(await readArtifacts()).toEqual([
+      { idx: '0', id: 'a' },
+      { idx: '1', id: 'b' },
+    ]);
+  });
+
+  it('does not consume an index for inline text artifact directives', async () => {
+    // `:artifact{}` (text directive) renders as literal text, not an Artifact, so
+    // the following real artifact must still be index 0.
+    const content = `See :artifact{identifier="x"} inline.\n\n${artifact('a', 'A')}`;
+    render(wrap(<Markdown content={content} isLatestMessage={false} />));
+
+    expect(await readArtifacts()).toEqual([{ idx: '0', id: 'a' }]);
+  });
+});
--- a/client/src/components/Chat/Messages/Content/tests/MarkdownBlocks.test.tsx
+++ b/client/src/components/Chat/Messages/Content/tests/MarkdownBlocks.test.tsx
@ -0,0 +1,240 @@
+import React from 'react';
+import { RecoilRoot } from 'recoil';
+import ReactMarkdown from 'react-markdown';
+import { render, screen } from '@testing-library/react';
+import { QueryClient, QueryClientProvider } from '@tanstack/react-query';
+import { getRemarkPlugins, getRehypePlugins, getMarkdownComponents } from '../markdownConfig';
+import { ArtifactProvider, CodeBlockProvider } from '~/Providers';
+import Markdown from '../Markdown';
+
+/**
+ * Stub CodeBlock so we can read the blockIndex each executable code block
+ * receives, while still exercising the real `code` override's skip/single-line
+ * decision (which decides whether a CodeBlock renders at all).
+ */
+jest.mock('~/components/Messages/Content/CodeBlock', () => ({
+  __esModule: true,
+  default: ({ lang, blockIndex }: { lang?: string; blockIndex?: number }) => (
+    <div data-testid="cb" data-block-index={String(blockIndex)} data-lang={String(lang)} />
+  ),
+}));
+
+/** The previous whole-message renderer: a single ReactMarkdown under one set of providers. */
+const OldMarkdown = ({ content }: { content: string }) => (
+  <ArtifactProvider>
+    <CodeBlockProvider>
+      <ReactMarkdown
+        /** @ts-ignore */
+        remarkPlugins={getRemarkPlugins()}
+        /** @ts-ignore */
+        rehypePlugins={getRehypePlugins()}
+        components={getMarkdownComponents()}
+      >
+        {content}
+      </ReactMarkdown>
+    </CodeBlockProvider>
+  </ArtifactProvider>
+);
+
+/**
+ * The whole-message renderer emits whitespace-only text nodes ("\n") between
+ * top-level block elements; the per-block renderer parses each block in
+ * isolation and omits them. That whitespace is collapsed between block-level
+ * elements, so it is visually and functionally irrelevant — normalize it away
+ * before comparing structure.
+ */
+const normalizeHtml = (html: string) => html.replace(/>\s+</g, '><').trim();
+
+const indicesIn = (container: HTMLElement) =>
+  Array.from(container.querySelectorAll('[data-testid="cb"]')).map((el) => ({
+    idx: el.getAttribute('data-block-index'),
+    lang: el.getAttribute('data-lang'),
+  }));
+
+const streamThrough = (
+  Component: React.ComponentType<{ content: string }>,
+  content: string,
+): HTMLElement => {
+  const lines = content.split('\n');
+  const { container, rerender } = render(
+    <RecoilRoot>
+      <Component content={lines[0]} />
+    </RecoilRoot>,
+  );
+  for (let i = 2; i <= lines.length; i += 1) {
+    rerender(
+      <RecoilRoot>
+        <Component content={lines.slice(0, i).join('\n')} />
+      </RecoilRoot>,
+    );
+  }
+  return container;
+};
+
+const MIXED = [
+  'Intro paragraph.',
+  '',
+  '```python',
+  'print("one")',
+  'x = 1',
+  '```',
+  '',
+  'Some `inline` code here.',
+  '',
+  '```js',
+  'console.log("two");',
+  'const y = 2;',
+  '```',
+  '',
+  '```math',
+  'E = mc^2',
+  '```',
+  '',
+  '```bash',
+  'echo single',
+  '```',
+  '',
+  '```mermaid',
+  'graph TD; A-->B;',
+  '```',
+  '',
+  '```ts',
+  'const z: number = 3;',
+  'export {};',
+  '```',
+].join('\n');
+
+const EXPECTED = [
+  { idx: '0', lang: 'python' },
+  { idx: '1', lang: 'js' },
+  { idx: '2', lang: 'bash' },
+  { idx: '3', lang: 'ts' },
+];
+
+const NewMarkdown = ({ content }: { content: string }) => (
+  <Markdown content={content} isLatestMessage={false} />
+);
+
+describe('MarkdownBlocks code-block index parity', () => {
+  it('assigns document-order indices on a direct render (matches whole-message renderer)', () => {
+    const { container: oldC } = render(
+      <RecoilRoot>
+        <OldMarkdown content={MIXED} />
+      </RecoilRoot>,
+    );
+    const { container: newC } = render(
+      <RecoilRoot>
+        <Markdown content={MIXED} isLatestMessage={false} />
+      </RecoilRoot>,
+    );
+
+    expect(indicesIn(oldC)).toEqual(EXPECTED);
+    expect(indicesIn(newC)).toEqual(EXPECTED);
+  });
+
+  it('keeps indices correct across a simulated stream (no drift under memoization)', () => {
+    const oldC = streamThrough(OldMarkdown, MIXED);
+    const newC = streamThrough(NewMarkdown, MIXED);
+
+    expect(indicesIn(oldC)).toEqual(EXPECTED);
+    expect(indicesIn(newC)).toEqual(EXPECTED);
+  });
+
+  it('streamed indices match a fresh direct render (stable for stored execution results)', () => {
+    const streamed = streamThrough(NewMarkdown, MIXED);
+    const { container: fresh } = render(
+      <RecoilRoot>
+        <Markdown content={MIXED} isLatestMessage={false} />
+      </RecoilRoot>,
+    );
+    expect(indicesIn(streamed)).toEqual(indicesIn(fresh));
+  });
+
+  it('refreshes indices when an in-place edit inserts a code block before existing ones', () => {
+    const before = 'intro\n\n```js\na\n```';
+    const after = '```py\nx\n```\n\n```js\na\n```';
+    const { container, rerender } = render(
+      <RecoilRoot>
+        <Markdown content={before} isLatestMessage={false} />
+      </RecoilRoot>,
+    );
+    expect(indicesIn(container)).toEqual([{ idx: '0', lang: 'js' }]);
+
+    rerender(
+      <RecoilRoot>
+        <Markdown content={after} isLatestMessage={false} />
+      </RecoilRoot>,
+    );
+    // The js block's base shifted 0 -> 1; without forcing a remount its ref-cached
+    // index would stay 0 (duplicating py). It must become 1.
+    expect(indicesIn(container)).toEqual([
+      { idx: '0', lang: 'py' },
+      { idx: '1', lang: 'js' },
+    ]);
+  });
+});
+
+describe('MarkdownBlocks DOM equivalence (non-code blocks)', () => {
+  const cases: Array<[string, string]> = [
+    ['paragraphs', 'First paragraph.\n\nSecond paragraph.'],
+    ['gfm table', ['| a | b |', '| - | - |', '| 1 | 2 |', '| 3 | 4 |'].join('\n')],
+    ['unordered list', '- one\n- two\n- three'],
+    ['ordered list', '1. one\n2. two'],
+    ['headings + text', '# Title\n\nBody text with **bold** and _italics_.'],
+    ['blockquote', '> quoted line one\n> quoted line two'],
+    ['inline code', 'Use the `useMemo` hook for memoization.'],
+    ['mixed', '# H\n\nPara with `code`.\n\n| x | y |\n| - | - |\n| 1 | 2 |\n\n- a\n- b'],
+  ];
+
+  it.each(cases)('renders identical DOM to the whole-message renderer: %s', (_label, content) => {
+    const { container: oldC } = render(
+      <RecoilRoot>
+        <OldMarkdown content={content} />
+      </RecoilRoot>,
+    );
+    const { container: newC } = render(
+      <RecoilRoot>
+        <Markdown content={content} isLatestMessage={false} />
+      </RecoilRoot>,
+    );
+    expect(normalizeHtml(newC.innerHTML)).toBe(normalizeHtml(oldC.innerHTML));
+  });
+});
+
+describe('MarkdownBlocks rendering smoke', () => {
+  it('renders an empty cursor placeholder while initializing', () => {
+    const { container } = render(
+      <RecoilRoot>
+        <Markdown content="" isLatestMessage={true} />
+      </RecoilRoot>,
+    );
+    expect(container.querySelector('.result-thinking')).not.toBeNull();
+  });
+
+  it('renders executable code blocks for a multi-code message', () => {
+    render(
+      <RecoilRoot>
+        <Markdown content={MIXED} isLatestMessage={false} />
+      </RecoilRoot>,
+    );
+    expect(screen.getAllByTestId('cb')).toHaveLength(4);
+  });
+});
+
+describe('MarkdownBlocks document-level definitions', () => {
+  it('resolves a reference-style link whose definition is in a separate block', () => {
+    const queryClient = new QueryClient();
+    const content = 'See [docs][d] for details.\n\n[d]: https://example.com/docs';
+    render(
+      <QueryClientProvider client={queryClient}>
+        <RecoilRoot>
+          <Markdown content={content} isLatestMessage={false} />
+        </RecoilRoot>
+      </QueryClientProvider>,
+    );
+    expect(screen.getByRole('link', { name: 'docs' })).toHaveAttribute(
+      'href',
+      'https://example.com/docs',
+    );
+  });
+});
--- a/client/src/components/Chat/Messages/Content/tests/splitMarkdown.test.ts
+++ b/client/src/components/Chat/Messages/Content/tests/splitMarkdown.test.ts
@ -0,0 +1,168 @@
+import { splitMarkdownIntoBlocks } from '../splitMarkdown';
+
+const raws = (content: string) => splitMarkdownIntoBlocks(content).map((block) => block.raw);
+
+describe('splitMarkdownIntoBlocks', () => {
+  it('returns [] for empty content', () => {
+    expect(splitMarkdownIntoBlocks('')).toEqual([]);
+  });
+
+  it('returns a single block for one paragraph', () => {
+    expect(raws('Hello world.')).toEqual(['Hello world.']);
+  });
+
+  it('splits consecutive paragraphs into separate blocks', () => {
+    expect(raws('First.\n\nSecond.')).toEqual(['First.', 'Second.']);
+  });
+
+  it('keeps a GFM table as one atomic block', () => {
+    const table = '| a | b |\n| - | - |\n| 1 | 2 |';
+    const content = `Intro.\n\n${table}\n\nOutro.`;
+    expect(raws(content)).toEqual(['Intro.', table, 'Outro.']);
+  });
+
+  it('keeps a fenced code block (with internal blank lines) as one block', () => {
+    const code = '```js\nconst x = 1;\n\nconst y = 2;\n```';
+    const content = `Before.\n\n${code}\n\nAfter.`;
+    expect(raws(content)).toEqual(['Before.', code, 'After.']);
+  });
+
+  it('keeps a $$ math block as one block', () => {
+    const mathBlock = '$$\nE = mc^2\n$$';
+    const content = `Text.\n\n${mathBlock}\n\nMore.`;
+    expect(raws(content)).toEqual(['Text.', mathBlock, 'More.']);
+  });
+
+  it('keeps a :::artifact::: container (with inner blank lines) intact', () => {
+    const artifact = ':::artifact{title="t"}\nline one\n\nline two\n:::';
+    const content = `Lead.\n\n${artifact}\n\nTail.`;
+    expect(raws(content)).toEqual(['Lead.', artifact, 'Tail.']);
+  });
+
+  it('keeps a list as one block (does not split items)', () => {
+    const list = '- one\n- two\n- three';
+    expect(raws(list)).toEqual([list]);
+  });
+
+  it('does not split inline constructs across blocks (inline code/markers stay in their paragraph)', () => {
+    const para = 'Use `inline` code and \\ui{abc} markers here.';
+    expect(raws(para)).toEqual([para]);
+  });
+
+  it('each block re-parses to the same single block (idempotent boundaries)', () => {
+    const content = ['# Heading', '', 'A paragraph.', '', '```py\nprint(1)\n```'].join('\n');
+    const blocks = raws(content);
+    expect(blocks).toHaveLength(3);
+    for (const block of blocks) {
+      expect(raws(block)).toEqual([block]);
+    }
+  });
+
+  describe('index counts (mirror the code/artifact render decision)', () => {
+    it('counts a multi-line fenced code block as executable', () => {
+      const [block] = splitMarkdownIntoBlocks('```js\nconst x = 1;\nconst y = 2;\n```');
+      expect(block.codeBlockCount).toBe(1);
+    });
+
+    it('counts a single-line block with a known language as executable', () => {
+      const [block] = splitMarkdownIntoBlocks('```bash\necho hi\n```');
+      expect(block.codeBlockCount).toBe(1);
+    });
+
+    it('counts a no-language single-line block as executable', () => {
+      const [block] = splitMarkdownIntoBlocks('```\necho hi\n```');
+      expect(block.codeBlockCount).toBe(1);
+    });
+
+    it('counts a single-line block with an unsupported language (renderer still renders a CodeBlock)', () => {
+      const [block] = splitMarkdownIntoBlocks('```madeuplang\nhi\n```');
+      expect(block.codeBlockCount).toBe(1);
+    });
+
+    it('does NOT count math or mermaid fences', () => {
+      expect(splitMarkdownIntoBlocks('```math\nE=mc^2\n```')[0].codeBlockCount).toBe(0);
+      expect(splitMarkdownIntoBlocks('```mermaid\ngraph TD; A-->B;\n```')[0].codeBlockCount).toBe(
+        0,
+      );
+    });
+
+    it('does NOT count hyphenated math/mermaid languages (renderer normalizes them)', () => {
+      expect(
+        splitMarkdownIntoBlocks('```mermaid-js\ngraph TD; A-->B;\n```')[0].codeBlockCount,
+      ).toBe(0);
+      expect(splitMarkdownIntoBlocks('```math-tex\nE=mc^2\n```')[0].codeBlockCount).toBe(0);
+    });
+
+    it('does count a fence that merely starts with math/mermaid letters', () => {
+      expect(splitMarkdownIntoBlocks('```mathematica\nx\n```')[0].codeBlockCount).toBe(1);
+    });
+
+    it('counts an artifact container once and does not count code inside it', () => {
+      const artifact = ':::artifact{title="t"}\n```js\nconst x = 1;\nconst y = 2;\n```\n:::';
+      const [block] = splitMarkdownIntoBlocks(artifact);
+      expect(block.artifactCount).toBe(1);
+      expect(block.codeBlockCount).toBe(0);
+    });
+
+    it('counts leaf artifact directives, not just containers', () => {
+      expect(splitMarkdownIntoBlocks('::artifact{identifier="a" type="x"}')[0].artifactCount).toBe(
+        1,
+      );
+    });
+
+    it('does NOT count inline text artifact directives (the plugin rewrites them to text)', () => {
+      expect(splitMarkdownIntoBlocks('see :artifact{identifier="b"} inline')[0].artifactCount).toBe(
+        0,
+      );
+    });
+  });
+
+  describe('document-level definitions force single-block rendering', () => {
+    it('keeps a reference-style link and its definition in one block', () => {
+      const content = 'See [docs][d] for details.\n\n[d]: https://example.com/docs';
+      const blocks = splitMarkdownIntoBlocks(content);
+      expect(blocks).toHaveLength(1);
+      expect(blocks[0].raw).toBe(content);
+    });
+
+    it('keeps a footnote reference and its definition in one block', () => {
+      const content = 'Claim with a note.[^1]\n\n[^1]: the footnote text';
+      const blocks = splitMarkdownIntoBlocks(content);
+      expect(blocks).toHaveLength(1);
+      expect(blocks[0].raw).toBe(content);
+    });
+
+    it('forces single-block when a definition is nested in a blockquote', () => {
+      const content = 'See [docs][d].\n\n> [d]: https://example.com/docs';
+      const blocks = splitMarkdownIntoBlocks(content);
+      expect(blocks).toHaveLength(1);
+      expect(blocks[0].raw).toBe(content);
+    });
+
+    it('forces single-block when a definition is nested in a list item', () => {
+      const content = 'See [a][d].\n\n- item\n\n  [d]: https://example.com';
+      const blocks = splitMarkdownIntoBlocks(content);
+      expect(blocks).toHaveLength(1);
+      expect(blocks[0].raw).toBe(content);
+    });
+
+    it('still splits when no document-level definition is present', () => {
+      const blocks = splitMarkdownIntoBlocks('First paragraph.\n\nSecond paragraph.');
+      expect(blocks).toHaveLength(2);
+    });
+  });
+
+  describe('raw HTML blocks force single-block rendering', () => {
+    it('renders the whole message as one block for multiple top-level HTML blocks', () => {
+      const content = '<div>a</div>\n\n<div>b</div>';
+      const blocks = splitMarkdownIntoBlocks(content);
+      expect(blocks).toHaveLength(1);
+      expect(blocks[0].raw).toBe(content);
+    });
+
+    it('still splits when HTML is only inline within a paragraph', () => {
+      const blocks = splitMarkdownIntoBlocks('para with <br> inline.\n\nsecond para');
+      expect(blocks).toHaveLength(2);
+    });
+  });
+});
--- a/client/src/components/Chat/Messages/Content/markdownConfig.ts
+++ b/client/src/components/Chat/Messages/Content/markdownConfig.ts
@ -0,0 +1,79 @@
+import remarkGfm from 'remark-gfm';
+import remarkMath from 'remark-math';
+import supersub from 'remark-supersub';
+import rehypeKatex from 'rehype-katex';
+import rehypeHighlight from 'rehype-highlight';
+import remarkDirective from 'remark-directive';
+import type { PluggableList } from 'unified';
+import type { ElementType } from 'react';
+import {
+  mcpUIResourcePlugin,
+  MCPUIResource,
+  MCPUIResourceCarousel,
+} from '~/components/MCPUIResource';
+import { Citation, CompositeCitation, HighlightedText } from '~/components/Web/Citation';
+import { Artifact, artifactPlugin } from '~/components/Artifacts/Artifact';
+import { code, a, p, img, table } from './MarkdownComponents';
+import { unicodeCitation } from '~/components/Web';
+import { langSubset } from '~/utils';
+
+/**
+ * Single source of truth for the markdown rendering pipeline, shared by the
+ * whole-message renderer and the per-block memoized renderer so both produce
+ * identical output.
+ *
+ * These are exposed as lazily-initialized, cached getters rather than top-level
+ * consts on purpose: `MarkdownComponents` participates in a circular import
+ * (`MarkdownComponents` → `CodeBlock` → `Parts` → `Markdown` → here →
+ * `MarkdownComponents`). Reading `code`/`a`/… at module-evaluation time throws
+ * `Cannot access 'code' before initialization` under native ESM. Deferring the
+ * read to first call (render time) sidesteps the temporal dead zone, and caching
+ * keeps a stable reference so react-markdown does not rebuild its processor.
+ */
+let remarkPluginsCache: PluggableList | null = null;
+let rehypePluginsCache: PluggableList | null = null;
+let markdownComponentsCache: { [nodeType: string]: ElementType } | null = null;
+
+export const getRemarkPlugins = (): PluggableList => {
+  if (remarkPluginsCache === null) {
+    remarkPluginsCache = [
+      supersub,
+      remarkGfm,
+      remarkDirective,
+      artifactPlugin,
+      [remarkMath, { singleDollarTextMath: false }],
+      unicodeCitation,
+      mcpUIResourcePlugin,
+    ];
+  }
+  return remarkPluginsCache;
+};
+
+export const getRehypePlugins = (): PluggableList => {
+  if (rehypePluginsCache === null) {
+    rehypePluginsCache = [
+      [rehypeKatex],
+      [rehypeHighlight, { detect: true, ignoreMissing: true, subset: langSubset }],
+    ];
+  }
+  return rehypePluginsCache;
+};
+
+export const getMarkdownComponents = (): { [nodeType: string]: ElementType } => {
+  if (markdownComponentsCache === null) {
+    markdownComponentsCache = {
+      code,
+      a,
+      p,
+      img,
+      table,
+      artifact: Artifact,
+      citation: Citation,
+      'highlighted-text': HighlightedText,
+      'composite-citation': CompositeCitation,
+      'mcp-ui-resource': MCPUIResource,
+      'mcp-ui-carousel': MCPUIResourceCarousel,
+    };
+  }
+  return markdownComponentsCache;
+};
--- a/client/src/components/Chat/Messages/Content/splitMarkdown.ts
+++ b/client/src/components/Chat/Messages/Content/splitMarkdown.ts
@ -0,0 +1,157 @@
+import { gfm } from 'micromark-extension-gfm';
+import { math } from 'micromark-extension-math';
+import { gfmFromMarkdown } from 'mdast-util-gfm';
+import { mathFromMarkdown } from 'mdast-util-math';
+import { fromMarkdown } from 'mdast-util-from-markdown';
+import { directive } from 'micromark-extension-directive';
+import { directiveFromMarkdown } from 'mdast-util-directive';
+
+export type MarkdownBlock = {
+  /** Exact source slice for this top-level block. */
+  raw: string;
+  /** Executable code blocks within this block (those that render a runnable CodeBlock). */
+  codeBlockCount: number;
+  /** Artifact containers within this block. */
+  artifactCount: number;
+};
+
+type MdastNode = {
+  type: string;
+  name?: string;
+  lang?: string | null;
+  value?: string;
+  children?: MdastNode[];
+  position?: { start?: { offset?: number }; end?: { offset?: number } };
+};
+
+/**
+ * Mirror the `code` component's decision for whether a fenced block renders as a
+ * runnable CodeBlock (and therefore consumes a block index). Every fenced code
+ * block does, except `math` and `mermaid` fences, which have dedicated
+ * renderers. mdast strips a fenced block's trailing newline, but
+ * react-markdown/remark-rehype re-add it, so the `code` component never treats a
+ * fenced block as single-line inline code regardless of its language — only true
+ * inline code (an `inlineCode` node, not counted here) is.
+ */
+const renderedCodeLang = (lang: string): string =>
+  /language-(\w+)/.exec(`language-${lang}`)?.[1] ?? '';
+
+/**
+ * Normalize the fence info string the same way the `code` component does — it
+ * reads the language from `className` via `/language-(\w+)/`, so only the leading
+ * word characters survive (`mermaid-js` → `mermaid`, `math-tex` → `math`). A
+ * fence is executable (consumes a CodeBlock index) unless it normalizes to
+ * `math` or `mermaid`, which have dedicated renderers.
+ */
+const isExecutableCode = (lang: string): boolean => {
+  const normalized = renderedCodeLang(lang);
+  return normalized !== 'math' && normalized !== 'mermaid';
+};
+
+const containsDefinition = (node: MdastNode): boolean => {
+  if (node.type === 'definition' || node.type === 'footnoteDefinition') {
+    return true;
+  }
+  return (node.children ?? []).some(containsDefinition);
+};
+
+const ARTIFACT_DIRECTIVE_TYPES = new Set(['containerDirective', 'leafDirective']);
+
+const countWithin = (node: MdastNode, counts: { code: number; artifact: number }): void => {
+  if (ARTIFACT_DIRECTIVE_TYPES.has(node.type) && node.name === 'artifact') {
+    // artifactPlugin renders container (`:::artifact:::`) and leaf
+    // (`::artifact{}`) artifact directives as an Artifact, each consuming one
+    // index; their children never render as executable code blocks, so stop
+    // descending. Inline text directives (`:artifact{}`) are intentionally
+    // excluded — the plugin rewrites every textDirective to literal text, so no
+    // Artifact renders and no index is consumed.
+    counts.artifact += 1;
+    return;
+  }
+  if (node.type === 'code' && isExecutableCode(node.lang ?? '')) {
+    counts.code += 1;
+  }
+  if (node.children) {
+    for (const child of node.children) {
+      countWithin(child, counts);
+    }
+  }
+};
+
+/**
+ * Parse markdown into an mdast tree using the same structural constructs the
+ * render pipeline relies on (GFM tables, container directives like
+ * `:::artifact:::`, and `$$` math), so top-level block boundaries match what
+ * react-markdown produces. Inline-only transforms (citations, MCP-UI markers,
+ * supersub) never cross a top-level block, so they are intentionally omitted.
+ */
+const parseToMdast = (content: string): MdastNode =>
+  fromMarkdown(content, {
+    extensions: [gfm(), directive(), math()],
+    mdastExtensions: [gfmFromMarkdown(), directiveFromMarkdown(), mathFromMarkdown()],
+  }) as MdastNode;
+
+/**
+ * Split a markdown string into its top-level blocks, returning the exact source
+ * slice for each block plus the index counts it consumes. Completed blocks
+ * produce byte-identical slices (and stable counts) across streamed updates,
+ * which is what makes per-block memoization effective: only the final, still-
+ * growing block changes from one token to the next.
+ *
+ * Inter-block whitespace (blank lines) is not part of any node's span and is
+ * dropped; block-level elements carry their own margins, so rendering each
+ * slice independently is visually equivalent to rendering the whole string.
+ */
+export function splitMarkdownIntoBlocks(content: string): MarkdownBlock[] {
+  if (!content) {
+    return [];
+  }
+
+  const tree = parseToMdast(content);
+  const children = tree.children ?? [];
+
+  if (children.length === 0) {
+    return [{ raw: content, codeBlockCount: 0, artifactCount: 0 }];
+  }
+
+  // Per-block rendering loses document-global context, so render the whole
+  // message as one block when it uses a construct that needs it:
+  //  - reference/footnote definitions are document-scoped (and may be nested in
+  //    a blockquote or list item), so a reference would otherwise render as
+  //    literal text once severed from its definition; and
+  //  - top-level raw HTML blocks are escaped to text (rehypeRaw is not enabled),
+  //    so the separator between adjacent HTML blocks would otherwise be dropped.
+  const requiresWholeMessage = children.some(
+    (node) => node.type === 'html' || containsDefinition(node),
+  );
+  if (requiresWholeMessage) {
+    return [{ raw: content, ...blockCounts(children) }];
+  }
+
+  const blocks: MarkdownBlock[] = [];
+
+  for (const node of children) {
+    const start = node.position?.start?.offset;
+    const end = node.position?.end?.offset;
+    if (start == null || end == null) {
+      return [{ raw: content, ...blockCounts(children) }];
+    }
+    const counts = { code: 0, artifact: 0 };
+    countWithin(node, counts);
+    blocks.push({
+      raw: content.slice(start, end),
+      codeBlockCount: counts.code,
+      artifactCount: counts.artifact,
+    });
+  }
+
+  return blocks;
+}
+
+const blockCounts = (children: MdastNode[]): { codeBlockCount: number; artifactCount: number } => {
+  const counts = { code: 0, artifact: 0 };
+  for (const node of children) {
+    countWithin(node, counts);
+  }
+  return { codeBlockCount: counts.code, artifactCount: counts.artifact };
+};
--- a/package-lock.json
+++ b/package-lock.json
@ -464,8 +464,15 @@
        "lodash": "^4.17.23",
        "lucide-react": "^0.394.0",
        "match-sorter": "^8.1.0",
+        "mdast-util-directive": "^3.0.0",
+        "mdast-util-from-markdown": "^2.0.1",
+        "mdast-util-gfm": "^3.0.0",
+        "mdast-util-math": "^3.0.0",
        "mermaid": "^11.15.0",
+        "micromark-extension-directive": "^3.0.1",
+        "micromark-extension-gfm": "^3.0.0",
        "micromark-extension-llm-math": "^3.1.0",
+        "micromark-extension-math": "^3.1.0",
        "qrcode.react": "^4.2.0",
        "rc-input-number": "^7.4.2",
        "react": "^18.2.0",