paper-burner/tests/test-compressed-table-fix.html

205 lines
8.5 KiB
HTML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>压缩表格修复测试</title>
<script src="https://cdn.jsdelivr.net/npm/markdown-it@13.0.1/dist/markdown-it.min.js"></script>
<script src="js/processing/markdown_processor_ast.js"></script>
<style>
body {
font-family: 'Microsoft YaHei', Arial, sans-serif;
max-width: 1400px;
margin: 20px auto;
padding: 20px;
background: #f5f5f5;
}
h1 { color: #2c3e50; }
.section {
background: white;
margin: 20px 0;
padding: 20px;
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.section-title {
font-size: 18px;
font-weight: bold;
color: #3498db;
margin-bottom: 15px;
border-bottom: 2px solid #3498db;
padding-bottom: 8px;
}
.code-block {
background: #2c3e50;
color: #ecf0f1;
padding: 15px;
margin: 10px 0;
border-radius: 4px;
font-family: 'Courier New', monospace;
font-size: 12px;
white-space: pre-wrap;
word-break: break-all;
overflow-x: auto;
}
.result {
background: #f8f9fa;
padding: 15px;
margin: 10px 0;
border: 1px solid #ddd;
border-radius: 4px;
}
.result table {
width: 100%;
border-collapse: collapse;
}
.result th, .result td {
border: 1px solid #ddd;
padding: 8px;
text-align: left;
}
.result th {
background: #3498db;
color: white;
}
.result tr:nth-child(even) {
background: #f2f2f2;
}
.status {
display: inline-block;
padding: 6px 15px;
border-radius: 4px;
font-weight: bold;
margin: 10px 0;
}
.success { background: #27ae60; color: white; }
.fail { background: #e74c3c; color: white; }
</style>
</head>
<body>
<h1>📊 压缩表格自动修复</h1>
<p style="color: #7f8c8d;">自动检测并修复所有内容在一行的表格</p>
<div id="output"></div>
<script>
const compressedTable = `| | | | 五分位数 (Quintiles) | | | |---|---|---|---|---|---|---| | | 1 | 2 | 3 | 4 | 5 | 5-1 | | 市场 (Market) | 0.145*** | 0.200** | 0.061* | 0.136* | 0.106*** | -0.039 | | | (2.774) | (2.441) | (1.938) | (1.689) | (2.703) | (-0.930) | | 规模 (Size) | 0.107** | 0.329* | 0.147** | 0.206** | 0.059*** | 0.047 | | | (2.163) | (1.800) | (2.223) | (2.452) | (3.150) | (1.090) |`;
const output = document.getElementById('output');
// 步骤 1: 显示原始压缩表格
const section1 = document.createElement('div');
section1.className = 'section';
section1.innerHTML = `
<div class="section-title">步骤 1: 原始压缩表格(所有内容在一行)</div>
<div class="code-block">${compressedTable.replace(/</g, '&lt;').replace(/>/g, '&gt;')}</div>
<div style="color: #e74c3c; font-weight: bold; margin-top: 10px;">
⚠️ 问题:所有 | 都在同一行markdown-it 无法识别为表格
</div>
`;
output.appendChild(section1);
// 步骤 2: markdown-it 直接渲染(失败)
const section2 = document.createElement('div');
section2.className = 'section';
const md = markdownit({ html: true, breaks: false });
const directHTML = md.render(compressedTable);
const hasTableDirect = directHTML.includes('<table');
section2.innerHTML = `
<div class="section-title">步骤 2: markdown-it 直接渲染</div>
<div class="status ${hasTableDirect ? 'success' : 'fail'}">${hasTableDirect ? '✓ 成功生成表格' : '✗ 失败:未生成表格'}</div>
<div class="result">${directHTML}</div>
`;
output.appendChild(section2);
// 步骤 3: MarkdownProcessorAST 自动修复
const section3 = document.createElement('div');
section3.className = 'section';
if (typeof MarkdownProcessorAST !== 'undefined') {
try {
const astHTML = MarkdownProcessorAST.render(compressedTable);
const hasTableAST = astHTML.includes('<table');
section3.innerHTML = `
<div class="section-title">步骤 3: MarkdownProcessorAST 自动修复并渲染</div>
<div class="status ${hasTableAST ? 'success' : 'fail'}">${hasTableAST ? '✓ 自动修复成功!生成了表格' : '✗ 修复失败'}</div>
<div class="result">${astHTML}</div>
`;
// 显示修复过程
if (hasTableAST) {
const section4 = document.createElement('div');
section4.className = 'section';
section4.innerHTML = `
<div class="section-title">✨ 修复过程说明</div>
<ol style="line-height: 1.8;">
<li><strong>检测</strong>:发现行中包含分隔符 <code>|---|---|</code> 且管道符数量 ≥ 6</li>
<li><strong>分析</strong>:通过分隔符确定列数(${(directHTML.match(/\|/g) || []).length} 个管道符)</li>
<li><strong>分割</strong>:将单行表格按列数拆分成多行
<ul>
<li>表头行:提取分隔符之前的内容</li>
<li>分隔符行:保留 <code>|---|---|...</code></li>
<li>数据行:每 N 个单元格作为一行</li>
</ul>
</li>
<li><strong>重建</strong>:组合成标准的多行 Markdown 表格</li>
<li><strong>渲染</strong>:交给 markdown-it 正常渲染</li>
</ol>
`;
output.appendChild(section4);
}
} catch (error) {
section3.innerHTML = `
<div class="section-title">步骤 3: MarkdownProcessorAST 渲染</div>
<div class="status fail">✗ 渲染出错</div>
<div style="color: #e74c3c; margin-top: 10px;">错误: ${error.message}</div>
<div class="code-block">${error.stack}</div>
`;
}
} else {
section3.innerHTML = `
<div class="section-title">步骤 3: MarkdownProcessorAST 渲染</div>
<div class="status fail">✗ MarkdownProcessorAST 未加载</div>
`;
}
output.appendChild(section3);
// 总结
const summary = document.createElement('div');
summary.className = 'section';
summary.style.background = '#d5f4e6';
summary.innerHTML = `
<div class="section-title" style="color: #27ae60;">🎉 总结</div>
<p style="font-size: 16px; line-height: 1.8;">
<strong>MarkdownProcessorAST</strong> 现在可以自动检测并修复压缩的单行表格:
</p>
<ul style="line-height: 1.8;">
<li>✅ 检测单行表格(通过分隔符和管道符数量)</li>
<li>✅ 自动拆分为多行标准格式</li>
<li>✅ 保持表格结构完整</li>
<li>✅ 对正常表格无影响</li>
</ul>
<p style="margin-top: 15px; color: #27ae60; font-weight: bold;">
所有表格现在都能正确渲染!
</p>
`;
output.appendChild(summary);
// 显示指标
setTimeout(() => {
if (typeof MarkdownProcessorAST !== 'undefined' && MarkdownProcessorAST.getMetrics) {
const metrics = MarkdownProcessorAST.getMetrics();
const metricsDiv = document.createElement('div');
metricsDiv.className = 'section';
metricsDiv.innerHTML = `
<div class="section-title">📊 性能指标</div>
<div class="code-block">${JSON.stringify(metrics, null, 2)}</div>
`;
output.appendChild(metricsDiv);
}
}, 100);
</script>
</body>
</html>