import { normalizeMermaidMarkdown } from "./mermaid"; const MERMAID_BLOCK_HINT_RE = /mermaid/i; // marked's blockquote tokenizer (used by Streamdown to split content into // memoizable blocks) recurses once per nesting level and overflows the call // stack at roughly 2,000 levels, replacing the whole chat route with an error // page. 100 levels is far beyond any legitimate content while keeping a wide // margin below the crash threshold. const MAX_BLOCKQUOTE_DEPTH = 100; const DEEP_BLOCKQUOTE_HINT_RE = new RegExp( `^(?:[ \\t]*>){${MAX_BLOCKQUOTE_DEPTH + 1}}`, "m", ); // Only up to 3 leading spaces can start a blockquote; 4+ (or a tab) is an // indented code block, where ">" runs are literal content. const BLOCKQUOTE_PREFIX_RE = /^ {0,3}(?:[ \t]*>)+/; const CODE_FENCE_RE = /^ {0,3}(?:```|~~~)/; const INDENTED_CODE_RE = /^(?: {4}|\t)/; export function capBlockquoteNesting(markdown: string): string { if (!DEEP_BLOCKQUOTE_HINT_RE.test(markdown)) { return markdown; } let insideFence = false; return markdown .split("\n") .map((line) => { if (CODE_FENCE_RE.test(line)) { insideFence = !insideFence; return line; } // ">" runs inside fenced or indented code blocks are literal text, not // nesting — rewriting them would silently corrupt code content. if (insideFence || INDENTED_CODE_RE.test(line)) { return line; } const match = BLOCKQUOTE_PREFIX_RE.exec(line); if (!match) { return line; } const prefix = match[0]; let depth = 0; for (let i = 0; i < prefix.length; i++) { if (prefix[i] === ">") { depth += 1; if (depth > MAX_BLOCKQUOTE_DEPTH) { return line.slice(0, i) + line.slice(prefix.length); } } } return line; }) .join("\n"); } export function preprocessStreamdownMarkdown(markdown: string): string { if (!MERMAID_BLOCK_HINT_RE.test(markdown) || !markdown.includes("-.->")) { return markdown; } return normalizeMermaidMarkdown(markdown); }