feat(skills): rar格式按照原样结构解析调整完成

This commit is contained in:
wangzhiwei 2026-04-08 15:30:21 +08:00
parent bd252efd20
commit 713c28a534
5 changed files with 337 additions and 84 deletions

View File

@ -1,5 +1,12 @@
package com.kexue.skills.common.util;
import com.kexue.skills.utils.EscapeCharacterUtils;
import net.sf.sevenzipjbinding.IInArchive;
import net.sf.sevenzipjbinding.SevenZip;
import net.sf.sevenzipjbinding.SevenZipException;
import net.sf.sevenzipjbinding.impl.RandomAccessFileInStream;
import net.sf.sevenzipjbinding.simple.ISimpleInArchive;
import net.sf.sevenzipjbinding.simple.ISimpleInArchiveItem;
import org.yaml.snakeyaml.DumperOptions;
import org.yaml.snakeyaml.Yaml;
@ -12,10 +19,6 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import net.sf.sevenzipjbinding.*;
import net.sf.sevenzipjbinding.impl.RandomAccessFileInStream;
import net.sf.sevenzipjbinding.simple.ISimpleInArchive;
import net.sf.sevenzipjbinding.simple.ISimpleInArchiveItem;
/**
* 技能包解析工具类
@ -23,6 +26,11 @@ import net.sf.sevenzipjbinding.simple.ISimpleInArchiveItem;
*/
public class SkillZipParser {
private static final Pattern FRONT_MATTER_PATTERN = Pattern.compile(
"^---\\r?\\n(.*?)\\r?\\n---\\r?\\n",
Pattern.DOTALL
);
/**
* 提取压缩包中的skillMdText
* @param filePath 压缩文件路径
@ -80,8 +88,12 @@ public class SkillZipParser {
}
}
Map<String, Object> packageMap = new HashMap<>();
packageMap.put("package", skillStructure);
// 生成yaml
return generateYaml(skillStructure);
return generateYaml(packageMap);
}
@ -106,61 +118,95 @@ public class SkillZipParser {
throw new IOException("Cannot read RAR file: " + rarFilePath);
}
// 打开RAR文件并解析归档内容
try (RandomAccessFile randomAccessFile = new RandomAccessFile(rarFile, "r");
IInArchive archive = SevenZip.openInArchive(null, new RandomAccessFileInStream(randomAccessFile))) {
// 使用简单接口
ISimpleInArchive simpleInArchive = archive.getSimpleInterface();
// 首先尝试在根目录查找md文件
// 首先尝试在根目录查找md文件用于调用deepseek生成package下得第一个节点
for (ISimpleInArchiveItem item : simpleInArchive.getArchiveItems()) {
// 检查是否是根目录下的md文件
String path = item.getPath();
if (!item.isFolder() && path.endsWith(".md") && !path.contains("/") && !path.contains("\\")) {
// 读取文件内容
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
item.extractSlow(data -> {
try {
outputStream.write(data);
} catch (IOException e) {
throw new RuntimeException(e);
}
return data.length;
});
String content = outputStream.toString(StandardCharsets.UTF_8);
if (path.toLowerCase().contains("skill.md") || path.toLowerCase().contains("readme.md")){
// 读取文件内容
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
item.extractSlow(data -> {
try {
outputStream.write(data);
} catch (IOException e) {
throw new RuntimeException(e);
}
return data.length;
});
// 存储md文件的完整内容
skillInfo.put("skillMdText", content);
String content = outputStream.toString(StandardCharsets.UTF_8);
// 解析md内容
parseSkillsMd(content, skillInfo);
foundMdFile = true;
break; // 找到一个md文件后就停止
// 存储md文件的完整内容
skillInfo.put("skillMdText", content);
// 解析md内容
parseSkillsMd(content, skillInfo);
foundMdFile = true;
break; // 找到一个md文件后就停止
}
}
}
// 如果根目录没有找到md文件检查根目录下的文件夹
// 如果根目录没有找到md文件递归进入子目录查找
if (!foundMdFile) {
// 收集根目录下的文件夹
List<String> rootFolders = new ArrayList<>();
// 收集所有条目用于后续处理
List<ISimpleInArchiveItem> allItems = new ArrayList<>();
for (ISimpleInArchiveItem item : simpleInArchive.getArchiveItems()) {
if (item.isFolder()) {
String path = item.getPath();
// 确保是根目录下的文件夹路径中不包含/
if (!path.contains("/") && !path.contains("\\")) {
rootFolders.add(path);
}
}
allItems.add(item);
}
// 检查每个根目录文件夹下的md文件
for (String folder : rootFolders) {
for (ISimpleInArchiveItem item : simpleInArchive.getArchiveItems()) {
// 递归查找md文件
String mdRootDir = findMdFileRecursively(allItems, "", skillInfo);
// 如果找到了md文件以该目录为根目录解析所有文件和文件夹
if (foundMdFile && mdRootDir != null && !mdRootDir.isEmpty()) {
// 清空之前可能添加的children如果有的话
if (skillInfo.containsKey("structure")) {
Map<String, Object> structure = (Map<String, Object>) skillInfo.get("structure");
if (structure.containsKey("children")) {
((List<?>) structure.get("children")).clear();
}
}
// 以找到的md文件所在目录为根目录构建目录树结构
List<Map<String, Object>> children = new ArrayList<>();
Map<String, Map<String, Object>> directoryMap = new HashMap<>();
// 首先处理所有目录只处理mdRootDir及其子目录下的
for (ISimpleInArchiveItem item : allItems) {
if (item.isFolder()) {
String path = item.getPath();
// 确保路径以/结尾
if (!path.endsWith("/")) {
path = path + "/";
}
// 只处理mdRootDir目录下的文件夹
if (path.startsWith(mdRootDir + "/") || path.equals(mdRootDir + "/")) {
String relativePath = path.substring(mdRootDir.length() + 1);
if (!relativePath.isEmpty()) {
String[] pathParts = relativePath.split("/");
createDirectoryTree(children, directoryMap, pathParts, path);
}
}
}
}
// 然后处理所有文件只处理mdRootDir目录下的
for (ISimpleInArchiveItem item : allItems) {
if (!item.isFolder()) {
String path = item.getPath();
// 检查是否是该文件夹下的md文件
if (path.endsWith(".md") && (path.equals(folder + "/skill.md") || path.equals(folder + "/SKILL.md") ||
path.equals(folder + "/readme.md") || path.equals(folder + "/README.md"))) {
// 只处理mdRootDir目录下的文件
if (path.startsWith(mdRootDir + "/")) {
String relativePath = path.substring(mdRootDir.length() + 1);
String[] pathParts = relativePath.split("/");
String fileName = pathParts[pathParts.length - 1];
// 读取文件内容
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
item.extractSlow(data -> {
@ -172,19 +218,21 @@ public class SkillZipParser {
return data.length;
});
String content = outputStream.toString(StandardCharsets.UTF_8);
// 存储md文件的完整内容
skillInfo.put("skillMdText", content);
// 解析md内容
parseSkillsMd(content, skillInfo);
foundMdFile = true;
break; // 找到一个md文件后就停止
String fileContent = outputStream.toString(StandardCharsets.UTF_8);
addFileToTreeFromRar(children, directoryMap, pathParts, fileName, fileContent, path);
}
}
}
if (foundMdFile) break; // 找到一个md文件后就停止
// 将构建好的目录树存储到skillInfo中
Map<String, Object> structure = new LinkedHashMap<>();
structure.put("name", skillInfo.getOrDefault("name", defaultSkillName));
structure.put("type", "directory");
structure.put("path", ".");
structure.put("format", "directory");
structure.put("description", skillInfo.getOrDefault("description", "Skill package"));
structure.put("children", children);
skillInfo.put("structure", structure);
}
}
}
@ -214,6 +262,7 @@ public class SkillZipParser {
private static Map<String, Object> extractSkillInfo(ZipFile zipFile, String defaultSkillName) throws IOException {
Map<String, Object> skillInfo = new LinkedHashMap<>();
boolean foundMdFile = false;
String mdRootDir = "";
// 尝试从zip根目录的md文件中提取信息
Enumeration<? extends ZipEntry> entries = zipFile.entries();
@ -235,6 +284,7 @@ public class SkillZipParser {
// 解析md内容
parseSkillsMd(content.toString(), skillInfo);
foundMdFile = true;
mdRootDir = ".";
break; // 找到一个md文件后就停止
}
}
@ -280,6 +330,7 @@ public class SkillZipParser {
// 解析md内容
parseSkillsMd(content.toString(), skillInfo);
foundMdFile = true;
mdRootDir = folder;
break; // 找到一个md文件后就停止
}
}
@ -289,9 +340,81 @@ public class SkillZipParser {
}
}
// 如果找到了md文件构建目录树结构
if (foundMdFile && mdRootDir != null && !mdRootDir.isEmpty()) {
// 清空之前可能添加的children如果有的话
if (skillInfo.containsKey("structure")) {
Map<String, Object> structure = (Map<String, Object>) skillInfo.get("structure");
if (structure.containsKey("children")) {
((List<?>) structure.get("children")).clear();
}
}
// 以找到的md文件所在目录为根目录构建目录树结构
List<Map<String, Object>> children = new ArrayList<>();
Map<String, Map<String, Object>> directoryMap = new HashMap<>();
// 首先处理所有目录只处理mdRootDir及其子目录下的
entries = zipFile.entries();
while (entries.hasMoreElements()) {
ZipEntry entry = entries.nextElement();
if (entry.isDirectory()) {
String path = entry.getName();
// 确保路径以/结尾
if (!path.endsWith("/")) {
path = path + "/";
}
// 只处理mdRootDir目录下的文件夹
if (path.startsWith(mdRootDir + "/") || path.equals(mdRootDir + "/")) {
String relativePath = path.substring(mdRootDir.length() + 1);
if (!relativePath.isEmpty()) {
String[] pathParts = relativePath.split("/");
createDirectoryTree(children, directoryMap, pathParts, path);
}
}
}
}
// 然后处理所有文件只处理mdRootDir目录下的
entries = zipFile.entries();
while (entries.hasMoreElements()) {
ZipEntry entry = entries.nextElement();
if (!entry.isDirectory()) {
String path = entry.getName();
// 只处理mdRootDir目录下的文件
if (path.startsWith(mdRootDir + "/")) {
String relativePath = path.substring(mdRootDir.length() + 1);
String[] pathParts = relativePath.split("/");
addFileToTree(children, directoryMap, pathParts, entry, zipFile);
} else if (".".equals(mdRootDir) && !path.contains("/")) {
// 处理根目录下的文件
String[] pathParts = {path};
addFileToTree(children, directoryMap, pathParts, entry, zipFile);
}
}
}
// 将构建好的目录树存储到skillInfo中
Map<String, Object> structure = new LinkedHashMap<>();
// structure.put("name", skillInfo.getOrDefault("name", defaultSkillName));
// structure.put("type", "directory");
// structure.put("path", ".");
// structure.put("format", "directory");
// structure.put("description", skillInfo.getOrDefault("description", "Skill package"));
// structure.put("children", children);
skillInfo.put("structure", structure);
}
// 如果没有找到 md 文件使用默认值
if (!foundMdFile) {
skillInfo.put("name", defaultSkillName);
skillInfo.put("description", "Skill uploaded via uploadSkillV2");
skillInfo.put("tags", Arrays.asList("10001", "10002"));
} else {
// 确保 tags 不为 null
if (!skillInfo.containsKey("tags")) {
skillInfo.put("tags", Arrays.asList("10001"));
}
}
return skillInfo;
@ -304,14 +427,14 @@ public class SkillZipParser {
*/
private static void parseSkillsMd(String content, Map<String, Object> skillInfo) {
// 解析技能名称
Pattern namePattern = Pattern.compile("#\s+(.*)");
Pattern namePattern = Pattern.compile("#\\s+(.*)");
Matcher nameMatcher = namePattern.matcher(content);
if (nameMatcher.find()) {
skillInfo.put("name", nameMatcher.group(1).trim());
}
// 解析技能描述
Pattern descPattern = Pattern.compile("##\s+Description\s+(.*?)(?=##|$)", Pattern.DOTALL);
Pattern descPattern = Pattern.compile("##\\s+Description\\s+(.*?)(?=##|$)", Pattern.DOTALL);
Matcher descMatcher = descPattern.matcher(content);
if (descMatcher.find()) {
String description = descMatcher.group(1).trim();
@ -319,11 +442,11 @@ public class SkillZipParser {
}
// 解析技能标签
Pattern tagPattern = Pattern.compile("##\s+Tags\s+(.*?)(?=##|$)", Pattern.DOTALL);
Pattern tagPattern = Pattern.compile("##\\s+Tags\\s+(.*?)(?=##|$)", Pattern.DOTALL);
Matcher tagMatcher = tagPattern.matcher(content);
if (tagMatcher.find()) {
String tagsSection = tagMatcher.group(1);
Pattern tagItemPattern = Pattern.compile("-\s+(.*)");
Pattern tagItemPattern = Pattern.compile("-\\s+(.*)");
Matcher tagItemMatcher = tagItemPattern.matcher(tagsSection);
List<String> tags = new ArrayList<>();
while (tagItemMatcher.find()) {
@ -335,6 +458,96 @@ public class SkillZipParser {
}
}
/**
* 递归查找md文件
* @param allItems 所有归档条目
* @param currentDir 当前目录路径
* @param skillInfo 技能信息Map
* @return 找到md文件的根目录路径如果没找到返回null
* @throws SevenZipException 解析RAR文件时的异常
*/
private static String findMdFileRecursively(List<ISimpleInArchiveItem> allItems, String currentDir, Map<String, Object> skillInfo) throws SevenZipException {
// 检查当前目录下是否有md文件
for (ISimpleInArchiveItem item : allItems) {
if (!item.isFolder()) {
String path = item.getPath();
// 统一路径分隔符为/
path = path.replace("\\", "/");
// 确保路径以/结尾如果是目录
String checkPath = currentDir.isEmpty() ? path : (path.startsWith(currentDir + "/") ? path.substring(currentDir.length() + 1) : null);
if (checkPath != null && !checkPath.contains("/") && checkPath.endsWith(".md")) {
// 在当前目录找到了md文件
try {
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
item.extractSlow(data -> {
try {
outputStream.write(data);
} catch (IOException e) {
throw new RuntimeException(e);
}
return data.length;
});
String content = outputStream.toString(StandardCharsets.UTF_8);
// 存储md文件的完整内容
skillInfo.put("skillMdText", content);
// 解析md内容
parseSkillsMd(content, skillInfo);
// 返回当前目录作为根目录
return currentDir.isEmpty() ? "." : currentDir;
} catch (Exception e) {
// 如果读取失败继续查找
continue;
}
}
}
}
// 当前目录没有找到md文件进入子目录继续查找
Set<String> subDirs = new LinkedHashSet<>();
for (ISimpleInArchiveItem item : allItems) {
if (item.isFolder()) {
String path = item.getPath();
// 统一路径分隔符为/
path = path.replace("\\", "/");
// 确保路径以/结尾
if (!path.endsWith("/")) {
path = path + "/";
}
// 检查是否是当前目录的直接子目录
if (currentDir.isEmpty()) {
// 根目录下的直接子目录不包含/
if (!path.substring(0, path.length() - 1).contains("/")) {
subDirs.add(path.substring(0, path.length() - 1));
}
} else {
// 当前目录下的直接子目录
if (path.startsWith(currentDir + "/")) {
String relativePath = path.substring(currentDir.length() + 1);
if (!relativePath.contains("/")) {
subDirs.add(currentDir + "/" + relativePath.substring(0, relativePath.length() - 1));
}
}
}
}
}
// 递归检查每个子目录
for (String subDir : subDirs) {
String result = findMdFileRecursively(allItems, subDir, skillInfo);
if (result != null) {
return result;
}
}
// 所有子目录都没有找到md文件
return null;
}
/**
* 从rar文件生成技能包结构
* @param rarFilePath rar文件路径
@ -390,10 +603,10 @@ public class SkillZipParser {
if (item.isFolder()) {
String path = item.getPath();
// 确保路径以/结尾 zip 处理一致
if (!path.endsWith("/")) {
path = path + "/";
if (!path.endsWith("\\")) {
path = path + "\\";
}
String[] pathParts = path.split("/");
String[] pathParts = path.split("\\\\");
createDirectoryTree(children, directoryMap, pathParts, path);
}
}
@ -402,7 +615,7 @@ public class SkillZipParser {
for (ISimpleInArchiveItem item : archiveItems) {
if (!item.isFolder()) {
String path = item.getPath();
String[] pathParts = path.split("/");
String[] pathParts = path.split("\\\\");
String fileName = pathParts[pathParts.length - 1];
// 读取文件内容
@ -422,7 +635,7 @@ public class SkillZipParser {
}
}
// 确保包含skills.md文件
/* // 确保包含skills.md文件
boolean hasSkillsMd = false;
for (Map<String, Object> child : children) {
if ("skills.md".equals(child.get("name")) && "file".equals(child.get("type"))) {
@ -446,7 +659,7 @@ public class SkillZipParser {
if (!hasScriptsDir) {
Map<String, Object> scriptsDirNode = createScriptsDirNode();
children.add(scriptsDirNode);
}
}*/
structure.put("children", children);
skillStructure.put("structure", structure);
@ -459,7 +672,6 @@ public class SkillZipParser {
* @param children 子节点列表
* @param directoryMap 目录映射
* @param pathParts 路径部分
* @param item rar 文件项
* @param fileName 文件名
* @param fileContent 文件内容
* @param filePath 文件路径
@ -504,14 +716,52 @@ public class SkillZipParser {
fileNode.put("name", fileName);
fileNode.put("type", "file");
fileNode.put("path", filePath);
fileNode.put("path", "/"+filePath.toString().replaceAll("\\\\","/"));
fileNode.put("format", getFileFormat(fileName));
fileNode.put("description", fileName + " file");
fileNode.put("content", fileContent);
if (fileName.endsWith(".md")){
fileNode.put("content", EscapeCharacterUtils.removeEscapeCharacters(fileContent,true));
}else {
fileNode.put("content", fileContent);
}
return fileNode;
}
/**
* 将读取到得md文件进行yaml格式化
* @return 目录节点
*/
private static String convertMdToCleanYaml(String mdContent) throws IOException {
// 2. 提取顶部 FrontMatter--- 之间的内容
Matcher matcher = FRONT_MATTER_PATTERN.matcher(mdContent);
String frontMatter = "";
String bodyContent = mdContent;
if (matcher.find()) {
frontMatter = matcher.group(1).trim();
// 截取正文去掉 FrontMatter
bodyContent = mdContent.substring(matcher.end()).trim();
}
// 3. 构建最终 YAML正文使用 | 保留换行无任何转义
StringBuilder yaml = new StringBuilder();
yaml.append("---\n");
yaml.append(frontMatter).append("\n");
yaml.append("content: |\n");
// 正文每一行前面缩进 2 YAML 多行文本规范
String[] lines = bodyContent.split("\\r?\\n");
for (String line : lines) {
yaml.append(" ").append(line).append("\n");
}
yaml.append("---");
return yaml.toString();
}
/**
* 生成技能包结构
* @param zipFile zip文件对象
@ -624,7 +874,7 @@ public class SkillZipParser {
directoryNode = new LinkedHashMap<>();
directoryNode.put("name", part);
directoryNode.put("type", "directory");
directoryNode.put("path", pathKey);
directoryNode.put("path", "/"+pathKey);
directoryNode.put("format", "directory");
directoryNode.put("description", part + " directory");
directoryNode.put("children", new ArrayList<>());
@ -655,7 +905,7 @@ public class SkillZipParser {
String fileName = pathParts[pathParts.length - 1];
// 构建目录路径
StringBuilder directoryPath = new StringBuilder();
StringBuilder directoryPath = new StringBuilder("/");
for (int i = 0; i < pathParts.length - 1; i++) {
String part = pathParts[i];
if (!part.isEmpty()) {
@ -711,7 +961,7 @@ public class SkillZipParser {
fileNode.put("name", name);
fileNode.put("type", "file");
fileNode.put("path", entry.getName());
fileNode.put("path", "/"+entry.getName().replaceAll("\\\\","/"));
fileNode.put("format", getFileFormat(name));
fileNode.put("description", name + " file");

View File

@ -125,7 +125,6 @@ public class AccountController {
*/
@Operation(summary = "减少账户余额token消费转换", description = "减少账户余额token消费转换")
@PostMapping("/reduceBalanceWithToken")
@RequireAuth
public CommonResult<BigDecimal> reduceBalanceWithToken( @RequestBody TokenConsumptionDto tokenConsumptionDto) {
return CommonResult.success(this.accountService.reduceBalanceWithToken(tokenConsumptionDto));
}

View File

@ -103,4 +103,5 @@ public class SkillGenController {
return CommonResult.failed("上传失败:" + e.getMessage());
}
}
}

View File

@ -60,4 +60,5 @@ public interface SkillGenService {
* @return 生成的技能内容
*/
CmsContent uploadSkillV2(byte[] fileBytes, String fileName);
}

View File

@ -592,6 +592,8 @@ public class SkillGenServiceImpl implements SkillGenService {
cmsContent.setIcon(list.get(0).getIcon());
}
}
// 保存到数据库
cmsContentMapper.insert(cmsContent);
// 删除临时文件
tempFile.delete();