// Copyright (c) 2025 Bytedance Ltd. and/or its affiliates // SPDX-License-Identifier: MIT import { Check, Copy } from "lucide-react"; import { useMemo, useState } from "react"; import ReactMarkdown, { type Options as ReactMarkdownOptions, } from "react-markdown"; import rehypeKatex from "rehype-katex"; import remarkGfm from "remark-gfm"; import remarkMath from "remark-math"; import "katex/dist/katex.min.css"; import { Button } from "~/components/ui/button"; import { rehypeSplitWordsIntoSpans } from "~/core/rehype"; import { katexOptions } from "~/core/markdown/katex"; import { autoFixMarkdown, normalizeMathForDisplay } from "~/core/utils/markdown"; import { cn } from "~/lib/utils"; import Image from "./image"; import { Tooltip } from "./tooltip"; import { Link } from "./link"; import { CitationLink, type CitationData } from "./citation"; export function Markdown({ className, children, style, enableCopy, animated = false, checkLinkCredibility = false, citations = [], ...props }: ReactMarkdownOptions & { className?: string; enableCopy?: boolean; style?: React.CSSProperties; animated?: boolean; checkLinkCredibility?: boolean; citations?: CitationData[]; }) { // Pre-compute normalized URL map for O(1) lookup const citationMap = useMemo(() => { const map = new Map(); citations?.forEach((c, index) => { if (!c.url) return; // Add exact match map.set(c.url, index); // Add decoded match try { const decoded = decodeURIComponent(c.url); if (decoded !== c.url) map.set(decoded, index); } catch {} // Add encoded match try { const encoded = encodeURI(c.url); if (encoded !== c.url) map.set(encoded, index); } catch {} }); return map; }, [citations]); const components: ReactMarkdownOptions["components"] = useMemo(() => { return { a: ({ href, children }) => { const hrefStr = href ?? ""; // Handle citation anchor targets (rendered in Reference list) // Format: [[n]](#citation-target-n) const targetMatch = hrefStr.match(/^#citation-target-(\d+)$/); if (targetMatch) { const index = targetMatch[1]; return ( [{index}] ); } // Handle inline citation links (rendered in text) // Format: [[n]](#ref-n), [n](#ref1), [n](#1) const linkMatch = hrefStr.match(/^#(?:ref-?)?(\d+)$/); if (linkMatch) { return ( { e.preventDefault(); const targetId = `ref-${linkMatch[1]}`; const element = document.getElementById(targetId); if (element) { element.scrollIntoView({ behavior: "smooth", block: "start" }); } }} > {children} ); } // If we have citation data, use CitationLink for enhanced display if (citations && citations.length > 0) { // Find if this URL is one of our citations const citationIndex = citationMap.get(hrefStr) ?? -1; if (citationIndex !== -1) { // Heuristic to determine if this is a citation target (in Reference list) // vs a citation link (in text). // Targets are usually the full title, while links are numbers like [1]. const childrenText = Array.isArray(children) ? children.join("") : String(children); // Heuristic: inline citation text usually looks like a numeric marker // rather than a full title. We treat the following as "inline": // "1", "[1]", "^1^", "[^1]" (with optional surrounding whitespace). // This pattern matches either: // - a bracketed number: "[1]" // - a caret-style number: "1", "^1", "1^", "^1^" // and ignores surrounding whitespace. const inlineCitationPattern = /^\s*(?:\[\d+\]|\^?\d+\^?)\s*$/; const isInline = inlineCitationPattern.test(childrenText); return ( {children} ); } return ( {children} ); } // Otherwise fall back to regular Link return ( {children} ); }, img: ({ src, alt }) => ( {alt ), }; }, [checkLinkCredibility, citations, citationMap]); const rehypePlugins = useMemo>(() => { const plugins: NonNullable = [[ rehypeKatex, katexOptions, ]]; if (animated) { plugins.push(rehypeSplitWordsIntoSpans); } return plugins; }, [animated]); return (
{autoFixMarkdown( dropMarkdownQuote(normalizeMathForDisplay(children ?? "")) ?? "", )} {enableCopy && typeof children === "string" && (
)}
); } function CopyButton({ content }: { content: string }) { const [copied, setCopied] = useState(false); return ( ); } function dropMarkdownQuote(markdown?: string | null): string | null { if (!markdown) return null; const patternsToRemove = [ { prefix: "```markdown\n", suffix: "\n```", prefixLen: 12 }, { prefix: "```text\n", suffix: "\n```", prefixLen: 8 }, { prefix: "```\n", suffix: "\n```", prefixLen: 4 }, ]; let result = markdown; for (const { prefix, suffix, prefixLen } of patternsToRemove) { if (result.startsWith(prefix) && !result.endsWith(suffix)) { result = result.slice(prefixLen); break; // remove prefix without suffix only once } } let changed = true; while (changed) { changed = false; for (const { prefix, suffix, prefixLen } of patternsToRemove) { let startIndex = 0; while ((startIndex = result.indexOf(prefix, startIndex)) !== -1) { const endIndex = result.indexOf(suffix, startIndex + prefixLen); if (endIndex !== -1) { // only remove fully matched code blocks const before = result.slice(0, startIndex); const content = result.slice(startIndex + prefixLen, endIndex); const after = result.slice(endIndex + suffix.length); result = before + content + after; changed = true; startIndex = before.length + content.length; } else { startIndex += prefixLen; } } } } return result; }