paper-burner/js/processing/markdown_processor_enhanced.js

// js/processing/markdown_processor_enhanced.js
// Enhanced markdown processor with improved robustness for formulas and complex content
(function MarkdownProcessorEnhanced(global) {
    // Enhanced cache with versioning and size limits
    const renderCache = new Map();
    const MAX_CACHE_SIZE = 1000;
    const CACHE_VERSION = '2.0';

    // Performance metrics tracking
    const metrics = {
        cacheHits: 0,
        cacheMisses: 0,
        totalRenders: 0,
        avgRenderTime: 0,
        formulaErrors: 0,
        formulaSuccesses: 0
    };

    const FORMULA_BLOCK_HINTS = [
        /\r|\n/, // explicit line breaks
        /\\\\/, // LaTeX newline command
        /\\tag\b/, // equation tags
        /\\label\b/,
        /\\eqref\b/,
        /\\display(?:style|limits)\b/,
        /\\begin\{(?:align\*?|aligned|flalign\*?|gather\*?|multline\*?|split|cases|array|pmatrix|bmatrix|vmatrix|Vmatrix|matrix|smallmatrix)\}/,
        /\\end\{(?:align\*?|aligned|flalign\*?|gather\*?|multline\*?|split|cases|array|pmatrix|bmatrix|vmatrix|Vmatrix|matrix|smallmatrix)\}/
    ];

    /**
     * Enhanced markdown preprocessing with robust formula and image handling
     * @param {string} md - Input markdown text
     * @param {Array<Object>} images - Image objects with name/id and data
     * @returns {string} Processed markdown text
     */
    function safeMarkdownEnhanced(md, images) {
        performance.mark('safeMarkdown-enhanced-start');

        if (!md || typeof md !== 'string') {
            performance.mark('safeMarkdown-enhanced-end');
            performance.measure('safeMarkdown-enhanced', 'safeMarkdown-enhanced-start', 'safeMarkdown-enhanced-end');
            return '';
        }

        // Build robust image mapping with multiple fallback keys
        const imgMap = new Map();
        if (Array.isArray(images)) {
            images.forEach((img, idx) => {
                if (!img || !img.data) return;

                const keys = new Set();

                // Add various possible keys
                if (img.name) keys.add(img.name);
                if (img.id) keys.add(img.id);
                keys.add(`img-${idx}.jpeg.png`);
                keys.add(`img-${idx + 1}.jpeg.png`);

                // Add with 'images/' prefix
                [...keys].forEach(k => keys.add('images/' + k));

                const src = img.data.startsWith('data:') ? img.data : `data:image/png;base64,${img.data}`;
                keys.forEach(k => imgMap.set(k, src));
            });
        }

        // Enhanced image replacement with better error handling
        // 支持多种格式：
        // - images/page3_img1.png (Local PDF)
        // - images/img-1.jpeg.png (旧格式)
        // - page3_img1 (不带扩展名)
        // - 任意相对路径
        md = md.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (match, alt, path) => {
            // 跳过外部链接和已经是 base64 的图片
            const p = String(path).trim();
            if (/^(https?:|data:|\/\/)/i.test(p)) {
                return match;
            }

            // 去除查询参数和锚点
            const clean = p.split('?')[0].split('#')[0];

            // 尝试多种可能的 key
            const candidates = [
                clean,                              // 原始路径
                clean.replace(/^images\//, ''),     // 去掉 images/ 前缀
                clean.replace(/\.png$/i, ''),       // 去掉 .png 后缀
                clean.replace(/^images\//, '').replace(/\.png$/i, ''), // 两者都去掉
                'images/' + clean,                  // 添加 images/ 前缀
                clean.split('/').pop(),             // 只取文件名
                'images/' + clean.split('/').pop()  // 文件名 + images/ 前缀
            ];

            // 尝试所有候选 key
            for (const key of candidates) {
                if (imgMap.has(key)) {
                    return `![${alt || ''}](${imgMap.get(key)})`;
                }
            }

            // 未找到图片，输出警告
            console.warn(`[MarkdownProcessorEnhanced] Image not found: ${path}, tried:`, candidates.slice(0, 5));
            return match; // 保持原样
        });

        // Enhanced custom syntax processing with better error handling
        md = processCustomSyntax(md);

        performance.mark('safeMarkdown-enhanced-end');
        performance.measure('safeMarkdown-enhanced', 'safeMarkdown-enhanced-start', 'safeMarkdown-enhanced-end');
        return md;
    }

    /**
     * Process custom syntax (subscripts, superscripts) with enhanced robustness
     * @param {string} md - Markdown text
     * @returns {string} Processed markdown
     */
    function processCustomSyntax(md) {
        // Enhanced regex patterns with better boundary detection
        const patterns = [
            // Base with superscript: ${base}^{sup}$
            {
                regex: /\$\{\s*([^}]*?)\s*\}\^\{([^}]*?)\}\$/g,
                replacement: (_, base, sup) => {
                    const cleanBase = (base || '').trim();
                    const cleanSup = (sup || '').trim();
                    return cleanBase ?
                        `<span>${escapeHtml(cleanBase)}<sup>${escapeHtml(cleanSup)}</sup></span>` :
                        `<sup>${escapeHtml(cleanSup)}</sup>`;
                }
            },
            // Base with subscript: ${base}_{sub}$
            {
                regex: /\$\{\s*([^}]*?)\s*\}_\{([^}]*?)\}\$/g,
                replacement: (_, base, sub) => {
                    const cleanBase = (base || '').trim();
                    const cleanSub = (sub || '').trim();
                    return cleanBase ?
                        `<span>${escapeHtml(cleanBase)}<sub>${escapeHtml(cleanSub)}</sub></span>` :
                        `<sub>${escapeHtml(cleanSub)}</sub>`;
                }
            },
            // Empty base superscript: ${}^{sup}$
            {
                regex: /\$\{\s*\}\^\{([^}]*?)\}\$/g,
                replacement: (_, sup) => `<sup>${escapeHtml((sup || '').trim())}</sup>`
            },
            // Empty base subscript: ${}_{sub}$
            {
                regex: /\$\{\s*\}_\{([^}]*?)\}\$/g,
                replacement: (_, sub) => `<sub>${escapeHtml((sub || '').trim())}</sub>`
            },
            // Simple superscript: ${content}$
            {
                regex: /\$\{\s*([^}]*?)\s*\}\$/g,
                replacement: (_, content) => `<sup>${escapeHtml((content || '').trim())}</sup>`
            }
        ];

        patterns.forEach(({ regex, replacement }) => {
            try {
                md = md.replace(regex, replacement);
            } catch (error) {
                console.warn(`[MarkdownProcessorEnhanced] Custom syntax processing error:`, error);
            }
        });

        return md;
    }

    /**
     * Enhanced KaTeX rendering with improved error handling and formula analysis
     * @param {string} md - Preprocessed markdown text
     * @param {Function} customRenderer - Custom marked renderer
     * @returns {string} Rendered HTML
     */
    function renderWithKatexEnhanced(md, customRenderer) {
        performance.mark('renderKatex-enhanced-start');
        metrics.totalRenders++;

        const cacheKey = `${CACHE_VERSION}:${md}`;

        // Enhanced cache check
        if (renderCache.has(cacheKey)) {
            metrics.cacheHits++;
            performance.mark('renderKatex-enhanced-end');
            performance.measure('renderWithKatex-enhanced (cache)', 'renderKatex-enhanced-start', 'renderKatex-enhanced-end');
            return renderCache.get(cacheKey);
        }

        metrics.cacheMisses++;

        // Protected content extraction (code blocks, existing HTML)
        const protectedContent = new Map();
        let protectedCounter = 0;

        // **IMPORTANT: Process formulas BEFORE protecting code blocks**
        // This prevents code protection from interfering with formula delimiters
        const formulaProtectedCounter = { value: 0 };
        md = processFormulasEnhanced(md, protectedContent, formulaProtectedCounter);

        // Now protect code blocks and HTML (using updated counter)
        protectedCounter = formulaProtectedCounter.value;
        md = protectContent(md, protectedContent, protectedCounter);

        // Render remaining markdown
        let result;
        try {
            const markedOptions = customRenderer ? { renderer: customRenderer } : {};
            result = marked.parse(md, markedOptions);
        } catch (error) {
            console.error(`[MarkdownProcessorEnhanced] Marked parsing error:`, error);
            result = `<div class="markdown-error">Markdown parsing failed: ${escapeHtml(error.message)}</div>`;
        }

        // Restore protected content
        result = restoreContent(result, protectedContent);

        // Cache management with size limit
        if (renderCache.size >= MAX_CACHE_SIZE) {
            const firstKey = renderCache.keys().next().value;
            renderCache.delete(firstKey);
        }
        renderCache.set(cacheKey, result);

        // Update performance metrics
        const renderTime = performance.now();
        metrics.avgRenderTime = (metrics.avgRenderTime * (metrics.totalRenders - 1) + renderTime) / metrics.totalRenders;

        performance.mark('renderKatex-enhanced-end');
        performance.measure('renderWithKatex-enhanced', 'renderKatex-enhanced-start', 'renderKatex-enhanced-end');
        return result;
    }

    /**
     * Protect content that should not be processed (code blocks, HTML)
     * @param {string} md - Markdown text
     * @param {Map} protectedContent - Map to store protected content
     * @param {number} counter - Starting counter value
     * @returns {string} Markdown with protected content replaced by placeholders
     */
    function protectContent(md, protectedContent, counter) {
        // Protect fenced code blocks (``` ... ```)
        md = md.replace(/```[\s\S]*?```/g, (match) => {
            const placeholder = `PBTOKEN${counter++}Z`;
            protectedContent.set(placeholder, match);
            return placeholder;
        });

        // Protect inline code (`...`)
        md = md.replace(/`[^`\n]+?`/g, (match) => {
            const placeholder = `PBTOKEN${counter++}Z`;
            protectedContent.set(placeholder, match);
            return placeholder;
        });

        // Protect only real HTML constructs to avoid eating math comparators like "<="
        const htmlPatterns = [
            /<!--[\s\S]*?-->/g,                                     // HTML comments
            /<!DOCTYPE[^>]*?>/gi,                                     // DOCTYPE
            /<\/?[A-Za-z][A-Za-z0-9-]*(\s+[^<>]*?)?>/g              // opening/closing/self-closing tags
        ];
        htmlPatterns.forEach((re) => {
            md = md.replace(re, (match) => {
                const placeholder = `PBTOKEN${counter++}Z`;
                protectedContent.set(placeholder, match);
                return placeholder;
            });
        });

        return md;
    }

    /**
     * Enhanced formula processing with better error handling and context analysis
     * @param {string} md - Markdown text
     * @param {Map} protectedContent - Map to store protected content
     * @param {Object} counterObj - Counter object with 'value' property
     * @returns {string} Processed markdown with formulas rendered and protected
     */
    function processFormulasEnhanced(md, protectedContent, counterObj) {
        // Normalize math delimiters to avoid regex mismatches and nested '$' leakage
        function normalizeMathDelimiters(text) {
            if (typeof text !== 'string' || !text) return text;
            let s = text;
            // Convert encoded dollars to literal '$'
            s = s.replace(/&(?:#0*36|dollar);/gi, '$');
            // Normalize fullwidth dollar to ASCII
            s = s.replace(/\uFF04/g, '$');
            // Remove zero-width and combining marks immediately around '$' so `$̲` → `$`
            s = s.replace(/\$[\u200B-\u200D\uFEFF\u0300-\u036F]+/g, '$');
            s = s.replace(/[\u200B-\u200D\uFEFF\u0300-\u036F]+\$/g, '$');

            // **NEW: 修复 OCR 错误转义的 $ 符号**
            // 1. $\$ ... \$ ，$ → $ ... $ ，（移除尾部的 ，$）
            s = s.replace(/\$\\\$\s*([^\$]+?)\s*\\\$\s*，\s*\$/g, '$$$1$$ ，');

            // 2. $\$ ... \$$ → $ ... $ (处理末尾多余的 $$，先处理这个避免被后面的规则误处理)
            s = s.replace(/\$\\\$\s*([^\$]+?)\s*\\\$\$/g, '$$$1$$');

            // 3. $\$ ... \$ → $ ... $
            s = s.replace(/\$\\\$\s*([^\$]+?)\s*\\\$/g, '$$$1$$');

            // 4. \$...\$ → $...$ (完全转义的内联公式)
            s = s.replace(/\\\$([^\$\n]+?)\\\$/g, '$$$1$$');

            return s;
        }

        md = normalizeMathDelimiters(md);

        // Helper function to protect rendered formulas
        function protectRenderedFormula(renderedHtml) {
            if (!renderedHtml || typeof renderedHtml !== 'string') return renderedHtml;
            // 如果渲染结果包含 HTML 标签，保护它
            if (renderedHtml.includes('<')) {
                const placeholder = `PBTOKEN${counterObj.value++}Z`;
                protectedContent.set(placeholder, renderedHtml);
                return placeholder;
            }
            return renderedHtml;
        }

        // Process block formulas first ($$...$$)
        md = md.replace(/\$\$([\s\S]*?)\$\$/g, (match, content) => {
            const rendered = renderFormula(content.trim(), true, match);
            return protectRenderedFormula(rendered);
        });

        // Process LaTeX-style block formulas (\[...\])
        md = md.replace(/\\\[([\s\S]*?)\\\]/g, (match, content) => {
            const rendered = renderFormula(content.trim(), true, match);
            return protectRenderedFormula(rendered);
        });

        // Process inline formulas ($...$)
        // 支持多行公式，但限制长度防止误匹配
        md = md.replace(/\$([^\$]{1,2000}?)\$/g, (match, content) => {
            // 快速检查：如果内容是纯中文（没有任何数学符号），直接跳过
            const trimmed = content.trim();
            if (trimmed && /^[\u4e00-\u9fa5，、。；：！？""''（）【】《》\s]+$/.test(trimmed)) {
                console.log(`[MarkdownProcessorEnhanced] Skipping pure Chinese inline: "${trimmed}"`);
                return match; // 保留原始 $...$
            }
            // 如果包含多个段落（连续两个换行），可能是误匹配，跳过
            if (/\n\s*\n/.test(content)) {
                console.log(`[MarkdownProcessorEnhanced] Skipping multi-paragraph match: "${trimmed.substring(0, 50)}..."`);
                return match;
            }
            const rendered = renderFormula(content.trim(), false, match);
            return protectRenderedFormula(rendered);
        });

        // Process LaTeX-style inline formulas (\(...\))
        md = md.replace(/\\\(([^)]*?)\\\)/g, (match, content) => {
            const rendered = renderFormula(content.trim(), false, match);
            return protectRenderedFormula(rendered);
        });

        return md;
    }

    /**
     * Analyze formula structure to determine appropriate display mode.
     * @param {string} content - Raw formula content.
     * @param {boolean} displayHint - Preferred display mode from the matcher.
     * @returns {{ text: string, displayMode: boolean, forcedByHint: boolean, forcedByStructure: boolean }}
     */
    function analyzeFormulaLayout(content, displayHint) {
        const normalized = typeof content === 'string' ? content.trim() : '';
        if (!normalized) {
            return {
                text: '',
                displayMode: !!displayHint,
                forcedByHint: !!displayHint,
                forcedByStructure: false
            };
        }

        let displayMode = !!displayHint;
        let forcedByStructure = false;

        if (!displayMode) {
            forcedByStructure = FORMULA_BLOCK_HINTS.some(pattern => pattern.test(normalized));
            if (forcedByStructure) {
                displayMode = true;
            }
        }

        return {
            text: normalized,
            displayMode,
            forcedByHint: !!displayHint,
            forcedByStructure
        };
    }

    /**
     * Build an accessible fallback block when KaTeX rendering fails.
     * @param {string} content - Formula content.
     * @param {boolean} displayMode - Final display mode.
     * @param {Error|string} error - Rendering error.
     * @returns {string} HTML fallback snippet.
     */
    function buildKatexFallback(content, displayMode, error) {
        const sanitized = escapeHtml(content || '');
        const message = error && error.message ? error.message : (typeof error === 'string' ? error : '');
        const errorInfo = message
            ? ` data-katex-error="${escapeHtml(message)}" title="Formula rendering failed: ${escapeHtml(message)}"`
            : '';

        if (displayMode) {
            return `
<div class="katex-fallback katex-block"${errorInfo}><pre class="katex-fallback-source">${sanitized}</pre></div>
`;
        }

        return `<span class="katex-fallback katex-inline"${errorInfo}><span class="katex-fallback-source">${sanitized}</span></span>`;
    }

    /**
     * Render individual formula with enhanced error handling
     * @param {string} content - Formula content
     * @param {boolean} displayModeHint - Whether to use display mode
     * @param {string} originalMatch - Original matched text for fallback
     * @returns {string} Rendered formula or fallback
     */
    function renderFormula(content, displayModeHint, originalMatch) {
        // Decode a limited set of HTML entities that may leak into TeX inputs
        function htmlUnescape(text) {
            if (typeof text !== 'string' || text.length === 0) return '';
            let s = text;
            // Fix corrupted entities like "&̲#39;" (ampersand followed by combining marks)
            s = s.replace(/&[\u0300-\u036F]+#/g, '&#');
            // Named entities
            s = s.replace(/&amp;/g, '&')
                 .replace(/&lt;/g, '<')
                 .replace(/&gt;/g, '>')
                 .replace(/&quot;/g, '"')
                 .replace(/&apos;/g, "'")
                 .replace(/&#39;/g, "'")
                 .replace(/&nbsp;/g, ' ');
            // Numeric entities (decimal and hex)
            s = s.replace(/&#(\d+);/g, (_, dec) => {
                const code = parseInt(dec, 10);
                return Number.isFinite(code) ? String.fromCharCode(code) : _;
            });
            s = s.replace(/&#x([0-9A-Fa-f]+);/g, (_, hex) => {
                const code = parseInt(hex, 16);
                return Number.isFinite(code) ? String.fromCharCode(code) : _;
            });
            return s;
        }

        // Sanitize TeX: remove stray punctuation at edges, zero-width/combining chars, normalize common unicode symbols
        function sanitizeTeX(src) {
            let s = typeof src === 'string' ? src : '';
            if (!s) return '';
            // Decode HTML entities first, e.g. &#39; → ', &amp; → &
            s = htmlUnescape(s);
            // remove zero-width, BOM and combining marks anywhere
            s = s.replace(/[\u200B-\u200D\uFEFF\u0300-\u036F]/g, '');
            // strip private-use glyphs sometimes appearing as unknown symbols (e.g. '')
            s = s.replace(/[\uE000-\uF8FF]/g, '');
            // also normalize stray combining marks immediately after '&' that break entities
            s = s.replace(/&[\u0300-\u036F]+/g, '&');
            // trim leading/trailing CJK punctuation and quotes that accidentally wrapped TeX
            s = s.replace(/^[\s\u3000。，、；：：""\(（\)）\[\]【】《》'''"–—-]+/, '');
            s = s.replace(/[\s\u3000。，、；：：""\(（\)）\[\]【】《》'''"–—-]+$/, '');

            // **NEW: Remove trailing orphaned backslashes** (孤立的尾部反斜杠)
            // 移除末尾的单个反斜杠，除非它是有效的 LaTeX 命令的一部分
            // 注意：\backslash 后面跟的反斜杠也要清理
            s = s.replace(/\\backslash\s+\\\s*$/, '\\backslash'); // \backslash \ → \backslash
            s = s.replace(/\\\s*$/, ''); // 其他孤立的尾部反斜杠

            // **NEW: Remove standalone backslashes not part of commands**
            // 如果整个字符串就是一个反斜杠，清空它
            if (s === '\\' || /^\\+$/.test(s)) {
                return '';
            }

            // **NEW: Clean up invalid patterns that can't be LaTeX**
            // 移除纯中文后跟反斜杠的无效模式（中文不应该出现在数学公式中，除非在 \text{} 里）
            // 注意：只清理纯中文的，不要清理包含有效 LaTeX 命令的
            if (/^[\u4e00-\u9fa5，、。；：\s]+$/.test(s)) {
                return ''; // 纯中文，不是有效的数学公式
            }

            // **NEW: 修复常见的 OCR 错误**
            // \backslash \operatorname{vec} → \vec
            s = s.replace(/\\backslash\s+\\operatorname\{vec\}/g, '\\vec');
            // \backslash \operatorname{sum} → \sum
            s = s.replace(/\\backslash\s+\\operatorname\{sum\}/g, '\\sum');
            // \backslash \operatorname{prod} → \prod
            s = s.replace(/\\backslash\s+\\operatorname\{prod\}/g, '\\prod');

            // 修复下标中的空格: x \_1 → x_1, x \_n → x_n
            s = s.replace(/\s+\\_/g, '_');
            // 修复下标中的 {-} 错误: x_{-} i → x_i
            s = s.replace(/\{-\}\s*/g, '');
            s = s.replace(/_\{-\s+([^\}]+)\}/g, '_{$1}');

            // collapse excessive inner spaces
            s = s.replace(/\s{2,}/g, ' ');
            // If trailing delimiter for \right was stripped by cleanup, add default ')'
            if (/\\right\s*$/.test(s)) {
                let close = ')';
                try {
                    const re = /\\left\s*([\(\[\{])/g;
                    let m;
                    while ((m = re.exec(s)) !== null) {
                        const ch = m[1];
                        close = ch === '(' ? ')' : ch === '[' ? ']' : '}';
                    }
                } catch (_) { /* ignore */ }
                s = s.replace(/\\right\s*$/, `\\right${close}`);
            }
            // Normalize degree with unit inside \mathrm{...}: \mathrm{ ^\circ C } → ^{\circ}\mathrm{C}
            s = s.replace(/\\mathrm\{\s*(?:\\;|\s)*\^\s*\{?\s*\\?circ\s*\}?\s*([A-Za-z])\s*\}/g, '^{\\circ}\\mathrm{$1}');
            // Replace unsupported Unicode triangles with math macros
            s = s.replace(/▲/g, '\\blacktriangle').replace(/△/g, '\\triangle');
            // Normalize some common unicode math symbols to TeX
            s = s.replace(/≠/g, '\\ne');
            s = s.replace(/±/g, '\\pm');
            s = s.replace(/∞/g, '\\infty');
            return s.trim();
        }

        const cleaned = sanitizeTeX(content);
        const analysis = analyzeFormulaLayout(cleaned, displayModeHint);
        let tex = analysis.text;

        // 如果清理后内容为空，说明这不是有效的数学公式
        // 返回原始匹配文本（不渲染），避免吞掉内容
        if (!tex) {
            console.log(`[MarkdownProcessorEnhanced] Skipping invalid formula: "${content}" (cleaned to empty)`);
            return originalMatch || '';
        }

        // Guard against obviously incomplete or non-TeX inputs
        try {
            // **NEW: 检测只包含 \begin{...} 或只包含 \end{...} 的空环境**
            if (/^\s*\\begin\{[a-zA-Z*]+\}\s*$/.test(tex) || /^\s*\\end\{[a-zA-Z*]+\}\s*$/.test(tex)) {
                console.log(`[MarkdownProcessorEnhanced] Skipping empty environment: "${tex}"`);
                return ''; // 返回空字符串，不显示
            }

            // Incomplete \begin{...} without matching \end{...}
            const beginMatch = tex.match(/\\begin\{([a-zA-Z*]+)\}/);
            if (beginMatch) {
                const env = beginMatch[1];
                const endRe = new RegExp('\\\\end\\{' + env.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&') + '\\}');
                if (!endRe.test(tex)) {
                    // Do not attempt to render; return accessible fallback
                    return buildKatexFallback(tex, true, 'Incomplete environment: \\begin{' + env + '} ...');
                }
            }
            // Lone \end{...} without a preceding \begin{...}
            if (/^\s*\\end\{[a-zA-Z*]+\}\s*$/.test(tex)) {
                return buildKatexFallback(tex, analysis.displayMode, 'Orphaned \\end{...}');
            }
            // Strings that look like prior KaTeX error messages or HTML entities only
            if (/^&#?\w/.test(tex) && / in math mode /.test(tex)) {
                return buildKatexFallback(tex, analysis.displayMode, 'Skipped non-TeX error text');
            }
            // If the supposed TeX contains HTML tags, skip rendering (likely mis-detected)
            // 但允许数学比较符号 < 和 > (如 <0.001, x>5)
            // 只检测明显的 HTML 模式：<tag、</、class=、style=
            if (/<[a-zA-Z]|<\/|class=|style=/.test(tex)) {
                return buildKatexFallback(tex, analysis.displayMode, 'HTML detected in TeX input');
            }
        } catch (_) { /* ignore guard errors */ }

        try {
            const options = {
                displayMode: analysis.displayMode,
                throwOnError: true,
                strict: 'ignore', // Allow some non-standard LaTeX
                output: 'html', // Avoid duplicate MathML branch
                macros: {
                    // Common macros for robustness
                    "\\RR": "\\mathbb{R}",
                    "\\NN": "\\mathbb{N}",
                    "\\ZZ": "\\mathbb{Z}",
                    "\\QQ": "\\mathbb{Q}",
                    "\\CC": "\\mathbb{C}"
                }
            };

            const rendered = katex.renderToString(tex, options);
            metrics.formulaSuccesses++;

            const className = analysis.displayMode ? 'katex-block' : 'katex-inline';
            const original = escapeHtml(tex);
            const wrapper = analysis.displayMode
                ? `
<div class="${className}" data-formula-display="block" data-original-text="${original}">${rendered}</div>
`
                : `<span class="${className}" data-formula-display="inline" data-original-text="${original}">${rendered}</span>`;

            return wrapper;

        } catch (error) {
            metrics.formulaErrors++;
            console.warn(`[MarkdownProcessorEnhanced] KaTeX rendering failed for: "${tex}"`, error);
            return buildKatexFallback(tex, analysis.displayMode, error);
        }
    }

    /**
     * Restore protected content
     * @param {string} html - HTML with placeholders
     * @param {Map} protectedContent - Map of protected content
     * @returns {string} HTML with content restored
     */
    function restoreContent(html, protectedContent) {
        protectedContent.forEach((content, placeholder) => {
            html = html.replace(placeholder, content);
        });
        return html;
    }

    /**
     * Escape HTML special characters
     * @param {string} text - Text to escape
     * @returns {string} Escaped text
     */
    function escapeHtml(text) {
        if (typeof text !== 'string') return '';

        const htmlEscapes = {
            '&': '&amp;',
            '<': '&lt;',
            '>': '&gt;',
            '"': '&quot;',
            "'": '&#39;'
        };

        return text.replace(/[&<>"']/g, (match) => htmlEscapes[match]);
    }

    /**
     * Get performance metrics
     * @returns {Object} Performance and error metrics
     */
    function getMetrics() {
        return {
            ...metrics,
            cacheSize: renderCache.size,
            cacheHitRate: metrics.totalRenders > 0 ? (metrics.cacheHits / metrics.totalRenders * 100).toFixed(2) + '%' : '0%',
            formulaErrorRate: (metrics.formulaErrors + metrics.formulaSuccesses) > 0 ?
                (metrics.formulaErrors / (metrics.formulaErrors + metrics.formulaSuccesses) * 100).toFixed(2) + '%' : '0%'
        };
    }

    /**
     * Clear cache and reset metrics
     */
    function clearCache() {
        renderCache.clear();
        Object.keys(metrics).forEach(key => {
            if (typeof metrics[key] === 'number') {
                metrics[key] = 0;
            }
        });
    }

    /**
     * Test formula rendering capability
     * @param {string} formula - Formula to test
     * @param {boolean} displayMode - Display mode
     * @returns {Object} Test result
     */
    function testFormula(formula, displayMode = false) {
        const startTime = performance.now();
        try {
            const result = katex.renderToString(formula, {
                displayMode: displayMode,
                throwOnError: true
            });
            return {
                success: true,
                result: result,
                renderTime: performance.now() - startTime,
                error: null
            };
        } catch (error) {
            return {
                success: false,
                result: null,
                renderTime: performance.now() - startTime,
                error: error.message
            };
        }
    }

    // Enhanced public interface
    global.MarkdownProcessorEnhanced = {
        // Core functions
        safeMarkdown: safeMarkdownEnhanced,
        renderWithKatexFailback: renderWithKatexEnhanced,

        // Utility functions
        processCustomSyntax: processCustomSyntax,
        renderFormula: renderFormula,
        escapeHtml: escapeHtml,

        // Management functions
        getMetrics: getMetrics,
        clearCache: clearCache,
        testFormula: testFormula,

        // Version info
        version: '2.0.0',
        compatibility: 'Backward compatible with MarkdownProcessor'
    };

    // Backward compatibility
    if (!global.MarkdownProcessor) {
        global.MarkdownProcessor = {
            safeMarkdown: safeMarkdownEnhanced,
            renderWithKatexFailback: renderWithKatexEnhanced
        };
    }

})(window);