// js/processing/markdown_processor.js
(function MarkdownProcessor(global) {
// Shared cache for legacy fallbacks
const renderCache = new Map();
const LEGACY_FORMULA_BLOCK_HINTS = [
/\r|\n/,
/\\\\/,
/\\tag\b/,
/\\label\b/,
/\\eqref\b/,
/\\display(?:style|limits)\b/,
/\\begin\{(?:align\*?|aligned|flalign\*?|gather\*?|multline\*?|split|cases|array|pmatrix|bmatrix|vmatrix|Vmatrix|matrix|smallmatrix)\}/,
/\\end\{(?:align\*?|aligned|flalign\*?|gather\*?|multline\*?|split|cases|array|pmatrix|bmatrix|vmatrix|Vmatrix|matrix|smallmatrix)\}/
];
/**
* 将 Markdown 中的代码区段(包括 ``code``、```code``` 等)提取为占位符,避免后续公式解析破坏代码内容。
* @param {string} md - Markdown 文本
* @returns {{ text: string, placeholders: Array<{ placeholder: string, segment: string }> }}
*/
function protectMarkdownCodeSegments(md) {
if (!md || typeof md !== 'string') {
return { text: md, placeholders: [] };
}
const placeholders = [];
const basePlaceholder = 'PBTOKEN' + Date.now().toString(36) + Math.random().toString(36).slice(2) + 'Z';
const suffix = 'X';
let result = '';
let i = 0;
const len = md.length;
while (i < len) {
const char = md[i];
if (char === '`' && (i === 0 || md[i - 1] !== '\\')) {
const start = i;
let j = i;
while (j < len && md[j] === '`') {
j++;
}
const fenceLen = j - start;
const fence = '`'.repeat(fenceLen);
let searchIndex = j;
let closingIndex = -1;
while (searchIndex < len) {
const idx = md.indexOf(fence, searchIndex);
if (idx === -1) {
break;
}
const prevChar = idx > 0 ? md[idx - 1] : '';
const nextChar = md[idx + fenceLen];
if (prevChar === '\\') {
searchIndex = idx + fenceLen;
continue;
}
if (nextChar === '`') {
searchIndex = idx + 1;
continue;
}
closingIndex = idx;
break;
}
if (closingIndex !== -1) {
const end = closingIndex + fenceLen;
const segment = md.slice(start, end);
const placeholder = basePlaceholder + placeholders.length + suffix;
placeholders.push({ placeholder: placeholder, segment: segment });
result += placeholder;
i = end;
continue;
}
}
result += char;
i++;
}
return { text: result, placeholders: placeholders };
}
/**
* 将之前提取的 Markdown 代码区段占位符恢复为原始内容。
* @param {string} md - 包含占位符的 Markdown 文本
* @param {Array<{ placeholder: string, segment: string }>} placeholders - 原始代码区段列表
* @returns {string} 恢复后的 Markdown 文本
*/
function restoreMarkdownCodeSegments(md, placeholders) {
if (!placeholders || placeholders.length === 0 || typeof md !== 'string') {
return md;
}
let restored = md;
placeholders.forEach(function(item) {
restored = restored.replace(item.placeholder, function() {
return item.segment;
});
});
return restored;
}
/**
* Escape HTML entities for safe fallback rendering.
* @param {string} text
* @returns {string}
*/
function escapeHtml(text) {
if (typeof text !== 'string') {
return '';
}
const htmlEscapes = {
'&': '&',
'<': '<',
'>': '>',
'"': '"',
"'": '''
};
return text.replace(/[&<>"']/g, function(match) {
return htmlEscapes[match];
});
}
function analyzeFormulaLayoutLegacy(content, displayHint) {
const normalized = typeof content === 'string' ? content.trim() : '';
if (!normalized) {
return {
text: '',
displayMode: !!displayHint
};
}
let displayMode = !!displayHint;
if (!displayMode) {
displayMode = LEGACY_FORMULA_BLOCK_HINTS.some(function(pattern) {
return pattern.test(normalized);
});
}
return {
text: normalized,
displayMode: displayMode
};
}
function buildKatexFallbackMarkup(content, displayMode, error) {
const sanitized = escapeHtml(content || '');
const message = error && error.message ? error.message : (typeof error === 'string' ? error : '');
const errorInfo = message
? ` data-katex-error="${escapeHtml(message)}" title="Formula rendering failed: ${escapeHtml(message)}"`
: '';
if (displayMode) {
return `
`;
}
return `${sanitized}`;
}
function renderFormulaLegacy(content, displayHint) {
function sanitizeTeX(src) {
let s = typeof src === 'string' ? src : '';
if (!s) return '';
s = s.replace(/[\u200B-\u200D\uFEFF]/g, '');
s = s.replace(/^[\u0300-\u036F]+|[\u0300-\u036F]+$/g, '');
s = s.replace(/^[\s\u3000。,、;::“”\((\))\[\]【】《》‘’'"–—-]+/, '');
s = s.replace(/[\s\u3000。,、;::“”\((\))\[\]【】《》‘’'"–—-]+$/, '');
s = s.replace(/\s{2,}/g, ' ');
// 如果末尾出现裸的 \\right ,补齐与最近的 \\left 匹配的右定界符
if (/\\right\s*$/.test(s)) {
let close = ')';
try {
const re = /\\left\s*([\(\[\{])/g;
let m;
while ((m = re.exec(s)) !== null) {
const ch = m[1];
close = ch === '(' ? ')' : ch === '[' ? ']' : '}';
}
} catch(_) { /* ignore */ }
s = s.replace(/\\right\s*$/, `\\right${close}`);
}
// Normalize degree unit: \mathrm{ ^\circ C } → ^{\circ}\mathrm{C}
s = s.replace(/\\mathrm\{\s*(?:\\;|\s)*\^\s*\{?\s*\\?circ\s*\}?\s*([A-Za-z])\s*\}/g, '^{\\circ}\\mathrm{$1}');
// Replace Unicode triangles
s = s.replace(/▲/g, '\\blacktriangle').replace(/△/g, '\\triangle');
return s.trim();
}
const cleaned = sanitizeTeX(content);
const analysis = analyzeFormulaLayoutLegacy(cleaned, displayHint);
if (!analysis.text) {
return analysis.displayMode ? '' : '';
}
try {
const rendered = katex.renderToString(analysis.text, {
displayMode: analysis.displayMode,
throwOnError: true,
strict: 'ignore',
output: 'html'
});
const original = escapeHtml(analysis.text);
if (analysis.displayMode) {
return `
${rendered}
`;
}
return `${rendered}`;
} catch (error) {
console.warn('[MarkdownProcessor] KaTeX rendering failed (legacy):', error);
return buildKatexFallbackMarkup(analysis.text, analysis.displayMode, error);
}
}
/**
* Legacy safeMarkdown 实现,在未加载增强版处理器时使用。
* @param {string} md
* @param {Array