deerflow2/frontend/src/core/utils/markdown-download/converter.ts

import {
  Document as DocxDocument,
  Packer,
  Paragraph,
  TextRun,
  HeadingLevel,
  ImageRun,
  type ParagraphChild,
} from "docx";
import { marked } from "marked";

// ============================================================================
// Types
// ============================================================================

/**
 * Markdown Token 类型（简化版）
 */
// eslint-disable-next-line @typescript-eslint/no-explicit-any
type MarkdownToken = any;

/**
 * PDF 转换选项
 */
export interface PdfOptions {
  /**
   * 页边距 [上, 右, 下, 左]，单位 mm
   * @default [15, 15, 15, 15]
   */
  margin?: [number, number, number, number];
  /**
   * 页面格式
   * @default "a4"
   */
  format?: "a3" | "a4" | "a5" | "letter" | "legal";
  /**
   * 页面方向
   * @default "portrait"
   */
  orientation?: "portrait" | "landscape";
  /**
   * 缩放比例
   * @default 2
   */
  scale?: number;
}

/**
 * DOCX 转换选项
 */
export interface DocxOptions {
  /**
   * 代码块字体
   * @default "Courier New"
   */
  codeFont?: string;
  /**
   * 代码块字号（半磅）
   * @default 22 (11pt)
   */
  codeFontSize?: number;
  /**
   * 解析 Markdown 里的资源路径（如图片相对路径）
   */
  resolveAssetUrl?: (rawPath: string) => string | null | Promise<string | null>;
}

// ============================================================================
// DOCX Converter
// ============================================================================

/**
 * 将 Markdown 内容转换为 DOCX 文件并下载
 *
 * @param markdown - Markdown 文本内容
 * @param filename - 文件名（不含扩展名，或包含 .md 扩展名）
 * @param options - 转换选项
 *
 * @example
 * ```ts
 * await downloadMarkdownAsDocx("# Hello World", "document");
 * ```
 */
export async function downloadMarkdownAsDocx(
  markdown: string,
  filename: string,
  options: DocxOptions = {},
): Promise<void> {
  const {
    codeFont = "Courier New",
    codeFontSize = 22,
    resolveAssetUrl,
  } = options;

  const tokens = marked.lexer(markdown);
  const children = await parseTokensToDocx(tokens, {
    codeFont,
    codeFontSize,
    resolveAssetUrl,
  });

  const doc = new DocxDocument({
    sections: [{ children }],
  });

  const blob = await Packer.toBlob(doc);
  downloadBlob(blob, normalizeFilename(filename, ".docx"));
}

// ============================================================================
// PDF Converter
// ============================================================================

/**
 * 将 Markdown 内容转换为 PDF 文件并下载
 *
 * @param markdown - Markdown 文本内容
 * @param filename - 文件名（不含扩展名，或包含 .md 扩展名）
 * @param options - 转换选项
 *
 * @example
 * ```ts
 * await downloadMarkdownAsPdf("# Hello World", "document");
 * ```
 */
export async function downloadMarkdownAsPdf(
  markdown: string,
  filename: string,
  options: PdfOptions & {
    resolveAssetUrl?: (
      rawPath: string,
    ) => string | null | Promise<string | null>;
  } = {},
): Promise<void> {
  const html2pdf = await loadHtml2Pdf();

  const {
    margin = [15, 15, 15, 15],
    format = "a4",
    orientation = "portrait",
    scale = 2,
    resolveAssetUrl,
  } = options;

  const normalizedMarkdown = await rewriteMarkdownImageSources(
    markdown,
    resolveAssetUrl,
  );

  // 解析 Markdown 为 HTML
  const htmlContent = await marked.parse(normalizedMarkdown);

  // 创建容器并应用样式
  const container = createStyledContainer(htmlContent);

  // 配置 html2pdf
  const opt = {
    margin,
    filename: normalizeFilename(filename, ".pdf"),
    image: { type: "jpeg" as const, quality: 0.98 },
    html2canvas: {
      scale,
      useCORS: true,
      logging: false,
      onclone: fixColorsForHtml2Canvas,
    },
    jsPDF: { unit: "mm" as const, format, orientation },
  };

  await html2pdf().set(opt).from(container).save();
}

// ============================================================================
// Internal Utilities
// ============================================================================

/**
 * 动态加载 html2pdf.js
 */
// eslint-disable-next-line @typescript-eslint/no-unsafe-function-type
async function loadHtml2Pdf(): Promise<Function> {
  const html2pdf = await import("html2pdf.js");
  return html2pdf.default;
}

/**
 * 创建带样式的 HTML 容器
 */
function createStyledContainer(htmlContent: string): HTMLDivElement {
  const container = document.createElement("div");
  container.innerHTML = htmlContent;

  // 容器基础样式
  container.style.cssText = `
    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
    font-size: 14px;
    line-height: 1.6;
    padding: 20px;
    max-width: 800px;
    color: #333333;
    background-color: #ffffff;
  `;

  // 应用元素样式
  applyElementStyles(container);

  return container;
}

/**
 * 应用元素样式
 */
function applyElementStyles(container: HTMLElement): void {
  // 标题
  container.querySelectorAll("h1, h2, h3, h4, h5, h6").forEach((h) => {
    const el = h as HTMLElement;
    el.style.marginTop = "1.5em";
    el.style.marginBottom = "0.5em";
    el.style.fontWeight = "600";
    el.style.color = "#1a1a1a";
  });

  // 段落
  container.querySelectorAll("p").forEach((p) => {
    (p as HTMLElement).style.marginBottom = "1em";
  });

  // 代码块
  container.querySelectorAll("pre, code").forEach((code) => {
    const el = code as HTMLElement;
    el.style.fontFamily = "'SF Mono', 'Fira Code', Consolas, monospace";
    el.style.backgroundColor = "#f5f5f5";
    el.style.color = "#333333";
    el.style.fontSize = "13px";
    if (code.tagName === "PRE") {
      el.style.padding = "12px";
      el.style.borderRadius = "6px";
      el.style.overflow = "auto";
    } else {
      el.style.padding = "2px 4px";
      el.style.borderRadius = "3px";
    }
  });

  // 列表
  container.querySelectorAll("ul, ol").forEach((list) => {
    const el = list as HTMLElement;
    el.style.marginBottom = "1em";
    el.style.paddingLeft = "2em";
  });

  // 引用块
  container.querySelectorAll("blockquote").forEach((bq) => {
    const el = bq as HTMLElement;
    el.style.borderLeft = "4px solid #dddddd";
    el.style.marginLeft = "0";
    el.style.paddingLeft = "16px";
    el.style.color = "#666666";
  });

  // 表格
  container.querySelectorAll("table").forEach((table) => {
    const el = table as HTMLElement;
    el.style.borderCollapse = "collapse";
    el.style.width = "100%";
    el.style.marginBottom = "1em";
  });

  container.querySelectorAll("th, td").forEach((cell) => {
    const el = cell as HTMLElement;
    el.style.border = "1px solid #dddddd";
    el.style.padding = "8px";
  });

  // 链接
  container.querySelectorAll("a").forEach((link) => {
    const el = link as HTMLElement;
    el.style.color = "#0066cc";
    el.style.textDecoration = "underline";
  });

  // 分割线
  container.querySelectorAll("hr").forEach((hr) => {
    const el = hr as HTMLElement;
    el.style.border = "none";
    el.style.borderTop = "1px solid #dddddd";
    el.style.margin = "2em 0";
  });
}

/**
 * 修复 html2canvas 不支持的颜色函数
 */
function fixColorsForHtml2Canvas(clonedDoc: Document): void {
  // 移除外部样式表（可能包含 lab、oklab 等不支持的颜色）
  clonedDoc
    .querySelectorAll<
      HTMLStyleElement | HTMLLinkElement
    >('link[rel="stylesheet"], style')
    .forEach((sheet) => sheet.remove());

  // 重置所有元素的颜色属性为安全值
  clonedDoc.querySelectorAll<HTMLElement>("*").forEach((el) => {
    const props = [
      "color",
      "background-color",
      "border-color",
      "border-top-color",
      "border-bottom-color",
      "border-left-color",
      "border-right-color",
      "outline-color",
      "text-decoration-color",
      "caret-color",
      "column-rule-color",
      "accent-color",
      "fill",
      "stroke",
    ];

    props.forEach((prop) => el.style.removeProperty(prop));

    el.style.color = "#333333";
    el.style.backgroundColor = "transparent";
  });

  // 设置 body 背景
  const body = clonedDoc.body;
  body.style.color = "#333333";
  body.style.backgroundColor = "#ffffff";
}

/**
 * 解析 Markdown Token 为 DOCX Paragraph
 */
async function parseTokensToDocx(
  tokens: MarkdownToken[],
  options: Required<Pick<DocxOptions, "codeFont" | "codeFontSize">> &
    Pick<DocxOptions, "resolveAssetUrl">,
): Promise<Paragraph[]> {
  const paragraphs: Paragraph[] = [];

  for (const token of tokens) {
    switch (token.type) {
      case "heading": {
        const runs = await parseInlineTokens(token.tokens ?? [], options);
        paragraphs.push(
          new Paragraph({
            children: runs,
            heading: getHeadingLevel(token.depth),
            spacing: { before: 240, after: 120 },
          }),
        );
        break;
      }

      case "paragraph": {
        const runs = await parseInlineTokens(token.tokens ?? [], options);
        paragraphs.push(
          new Paragraph({
            children: runs.length > 0 ? runs : [new TextRun("")],
            spacing: { after: 200 },
          }),
        );
        break;
      }

      case "code": {
        const lines = token.text.split("\n");
        lines.forEach((line: string) => {
          paragraphs.push(
            new Paragraph({
              children: [
                new TextRun({
                  text: line.length > 0 ? line : " ",
                  font: options.codeFont,
                  size: options.codeFontSize,
                }),
              ],
              shading: { fill: "F5F5F5" },
            }),
          );
        });
        paragraphs.push(new Paragraph({ children: [] }));
        break;
      }

      case "list": {
        for (const item of token.items ?? []) {
          const runs = await parseInlineTokens(
            item.tokens?.[0]?.tokens ?? [],
            options,
          );
          paragraphs.push(
            new Paragraph({
              children: runs.length > 0 ? runs : [new TextRun("")],
              bullet: { level: 0 },
              spacing: { after: 80 },
            }),
          );
        }
        break;
      }

      case "blockquote": {
        const runs = await parseInlineTokens(
          token.tokens?.[0]?.tokens ?? [],
          options,
        );
        paragraphs.push(
          new Paragraph({
            children: runs.length > 0 ? runs : [new TextRun("")],
            indent: { left: 720 },
            border: { left: { style: "single", size: 12, color: "CCCCCC" } },
            spacing: { after: 200 },
          }),
        );
        break;
      }

      case "hr": {
        paragraphs.push(
          new Paragraph({
            children: [new TextRun({ text: "─".repeat(50), color: "CCCCCC" })],
            spacing: { before: 200, after: 200 },
          }),
        );
        break;
      }

      case "space": {
        paragraphs.push(new Paragraph({ children: [] }));
        break;
      }

      case "image": {
        const imageRun = await createImageRunFromToken(token, options);
        if (imageRun) {
          paragraphs.push(
            new Paragraph({
              children: [imageRun],
              spacing: { after: 200 },
            }),
          );
        }
        break;
      }
    }
  }

  return paragraphs;
}

/**
 * 解析行内 Token 为 TextRun
 */
async function parseInlineTokens(
  tokens: MarkdownToken[],
  options: Required<Pick<DocxOptions, "codeFont" | "codeFontSize">> &
    Pick<DocxOptions, "resolveAssetUrl">,
): Promise<ParagraphChild[]> {
  const runs: ParagraphChild[] = [];

  for (const token of tokens) {
    switch (token.type) {
      case "text":
        runs.push(new TextRun(token.raw ?? token.text ?? ""));
        break;

      case "strong":
        runs.push(new TextRun({ text: token.text, bold: true }));
        break;

      case "em":
        runs.push(new TextRun({ text: token.text, italics: true }));
        break;

      case "codespan":
        runs.push(
          new TextRun({
            text: token.text,
            font: options.codeFont,
            shading: { fill: "F0F0F0" },
          }),
        );
        break;

      case "link":
        runs.push(
          new TextRun({
            text: token.text,
            color: "0066CC",
            underline: {},
          }),
        );
        break;

      case "br":
        runs.push(new TextRun({ text: "", break: 1 }));
        break;

      case "image": {
        const imageRun = await createImageRunFromToken(token, options);
        if (imageRun) {
          runs.push(imageRun);
        }
        break;
      }

      default:
        runs.push(new TextRun(token.raw ?? ""));
    }
  }

  return runs;
}

async function createImageRunFromToken(
  token: MarkdownToken,
  options: Pick<DocxOptions, "resolveAssetUrl">,
): Promise<ImageRun | null> {
  const rawHref = String(token?.href ?? token?.text ?? "").trim();
  if (!rawHref) return null;

  const resolvedUrl = await resolveAssetReference(
    rawHref,
    options.resolveAssetUrl,
  );
  if (!resolvedUrl || !isRenderableImageUrl(resolvedUrl)) {
    return null;
  }

  try {
    const response = await fetch(resolvedUrl);
    if (!response.ok) {
      return null;
    }
    const blob = await response.blob();
    const imageType = getDocxImageType(blob.type, resolvedUrl);
    if (!imageType) {
      return null;
    }
    const bytes = new Uint8Array(await blob.arrayBuffer());
    const { width, height } = await getImageDimensions(blob);
    const maxWidth = 560;
    const scale = width > maxWidth ? maxWidth / width : 1;
    return new ImageRun({
      data: bytes,
      type: imageType,
      transformation: {
        width: Math.max(1, Math.round(width * scale)),
        height: Math.max(1, Math.round(height * scale)),
      },
    });
  } catch {
    return null;
  }
}

async function getImageDimensions(
  blob: Blob,
): Promise<{ width: number; height: number }> {
  return await new Promise((resolve) => {
    const url = URL.createObjectURL(blob);
    const img = new Image();
    img.onload = () => {
      const width = img.naturalWidth || 1;
      const height = img.naturalHeight || 1;
      URL.revokeObjectURL(url);
      resolve({ width, height });
    };
    img.onerror = () => {
      URL.revokeObjectURL(url);
      resolve({ width: 600, height: 400 });
    };
    img.src = url;
  });
}

async function rewriteMarkdownImageSources(
  markdown: string,
  resolveAssetUrl?: (rawPath: string) => string | null | Promise<string | null>,
): Promise<string> {
  if (!resolveAssetUrl) {
    return markdown;
  }

  let rewritten = markdown;
  const markdownMatches = [...rewritten.matchAll(/!\[([^\]]*)\]\(([^)]+)\)/g)];
  for (const match of markdownMatches) {
    const alt = match[1] ?? "";
    const rawTarget = match[2]?.trim() ?? "";
    const resolved = await resolveAssetReference(rawTarget, resolveAssetUrl);
    if (!resolved || resolved === rawTarget) continue;
    rewritten = rewritten.replace(match[0], `![${alt}](${resolved})`);
  }

  const htmlMatches = [
    ...rewritten.matchAll(/(<img\b[^>]*\bsrc\s*=\s*)(["'])([^"']+)\2/gi),
  ];
  for (const match of htmlMatches) {
    const rawTarget = match[3]?.trim() ?? "";
    const resolved = await resolveAssetReference(rawTarget, resolveAssetUrl);
    if (!resolved || resolved === rawTarget) continue;
    rewritten = rewritten.replace(
      match[0],
      `${match[1]}${match[2]}${resolved}${match[2]}`,
    );
  }

  return rewritten;
}

async function resolveAssetReference(
  rawPath: string,
  resolveAssetUrl?: (rawPath: string) => string | null | Promise<string | null>,
): Promise<string | null> {
  const normalized = normalizeReference(rawPath);
  if (!normalized) return null;
  if (isExternalReference(normalized)) return normalized;
  if (!resolveAssetUrl) return normalized;
  return (await resolveAssetUrl(normalized)) ?? normalized;
}

function normalizeReference(ref: string): string {
  const trimmed = ref.trim().replace(/^<|>$/g, "");
  return trimmed.split(/[ \t]/)[0] ?? "";
}

function isExternalReference(ref: string): boolean {
  return (
    !ref ||
    ref.startsWith("#") ||
    ref.startsWith("//") ||
    ref.startsWith("data:") ||
    ref.startsWith("blob:") ||
    /^[a-zA-Z][a-zA-Z0-9+.-]*:/.test(ref)
  );
}

function isRenderableImageUrl(url: string): boolean {
  if (url.startsWith("data:image/")) return true;
  if (/\.(png|jpe?g|gif|webp|bmp|ico|avif|tiff?)([?#].*)?$/i.test(url))
    return true;
  if (/^https?:\/\//i.test(url)) return true;
  if (url.startsWith("/")) return true;
  return false;
}

function getDocxImageType(
  mimeType: string,
  src: string,
): "png" | "jpg" | "gif" | "bmp" {
  const mime = mimeType.toLowerCase();
  if (mime.includes("png")) return "png";
  if (mime.includes("jpeg") || mime.includes("jpg")) return "jpg";
  if (mime.includes("gif")) return "gif";
  if (mime.includes("bmp")) return "bmp";

  const lower = src.toLowerCase();
  if (lower.includes(".png")) return "png";
  if (lower.includes(".jpg") || lower.includes(".jpeg")) return "jpg";
  if (lower.includes(".gif")) return "gif";
  if (lower.includes(".bmp")) return "bmp";

  return "png";
}

/**
 * 获取标题级别
 */
function getHeadingLevel(
  depth: number,
): (typeof HeadingLevel)[keyof typeof HeadingLevel] | undefined {
  const levels = [
    HeadingLevel.HEADING_1,
    HeadingLevel.HEADING_2,
    HeadingLevel.HEADING_3,
    HeadingLevel.HEADING_4,
    HeadingLevel.HEADING_5,
    HeadingLevel.HEADING_6,
  ];
  return levels[depth - 1];
}

/**
 * 规范化文件名
 */
function normalizeFilename(filename: string, extension: string): string {
  return filename.replace(/\.md$/i, "") + extension;
}

/**
 * 触发 Blob 下载
 */
function downloadBlob(blob: Blob, filename: string): void {
  const url = URL.createObjectURL(blob);
  const link = document.createElement("a");
  link.href = url;
  link.download = filename;
  document.body.appendChild(link);
  link.click();
  document.body.removeChild(link);
  URL.revokeObjectURL(url);
}