feat(skills): 优化文件内容读取逻辑并支持多种文件格式
- 添加 Base64 编码支持用于图片处理 - 区分文本文件和二进制文件,只读取文本文件内容 - 为图片格式的 Base64 字符串实现按行分割(每行80字符) - 扩展文件格式识别,支持 js、ts、html、css、xml、csv 等格式 - 添加图片格式支持(png、jpg、jpeg、gif、bmp、webp、svg、ico) - 增加 pdf、压缩包、音视频等二进制文件格式识别 - 实现二进制文件格式判断方法,避免读取非文本内容
This commit is contained in:
parent
a5631caab3
commit
071f6aafbc
|
|
@ -48,6 +48,7 @@ import java.time.LocalDateTime;
|
|||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Base64;
|
||||
import java.util.Date;
|
||||
import java.util.Enumeration;
|
||||
import java.util.HashMap;
|
||||
|
|
@ -933,13 +934,24 @@ public class SkillGenServiceImpl implements SkillGenService {
|
|||
fileNode.setFormat(getFileFormat(fileName));
|
||||
fileNode.setDescription(fileName + " 文件");
|
||||
|
||||
// 读取文件内容
|
||||
try {
|
||||
String content = new String(entry.getValue(), StandardCharsets.UTF_8);
|
||||
fileNode.setContent(content);
|
||||
} catch (Exception e) {
|
||||
log.warn("读取文件内容失败: {}", filePath, e);
|
||||
// 读取文件内容(仅文本文件,图片等二进制文件不读取内容)
|
||||
String fileFormat = getFileFormat(fileName);
|
||||
boolean isBinaryFile = isBinaryFormat(fileFormat);
|
||||
|
||||
if (!isBinaryFile) {
|
||||
// 文本文件,读取内容
|
||||
try {
|
||||
String content = new String(entry.getValue(), StandardCharsets.UTF_8);
|
||||
fileNode.setContent(content);
|
||||
} catch (Exception e) {
|
||||
log.warn("读取文件内容失败: {}", filePath, e);
|
||||
fileNode.setContent("");
|
||||
}
|
||||
} else {
|
||||
// 二进制文件(如图片),不读取内容,仅记录文件大小信息
|
||||
int fileSize = entry.getValue().length;
|
||||
fileNode.setContent("");
|
||||
log.debug("二进制文件: {} ({} bytes)", fileName, fileSize);
|
||||
}
|
||||
|
||||
// 添加到父节点
|
||||
|
|
@ -1159,6 +1171,21 @@ public class SkillGenServiceImpl implements SkillGenService {
|
|||
// 文件类型,添加content
|
||||
if (node.getContent() != null && !node.getContent().isEmpty()) {
|
||||
String rawContent = node.getContent();
|
||||
|
||||
// 如果是图片格式,Base64字符串需要按行分割(每行80字符)
|
||||
if ("image".equals(node.getFormat())) {
|
||||
int lineLength = 80;
|
||||
StringBuilder base64Lines = new StringBuilder();
|
||||
for (int i = 0; i < rawContent.length(); i += lineLength) {
|
||||
if (i > 0) {
|
||||
base64Lines.append("\n");
|
||||
}
|
||||
int end = Math.min(i + lineLength, rawContent.length());
|
||||
base64Lines.append(rawContent.substring(i, end));
|
||||
}
|
||||
rawContent = base64Lines.toString();
|
||||
}
|
||||
|
||||
// 将Tab替换为4个空格(YAML不允许Tab)
|
||||
rawContent = rawContent.replace("\t", " ");
|
||||
|
||||
|
|
@ -1227,10 +1254,44 @@ public class SkillGenServiceImpl implements SkillGenService {
|
|||
return "json";
|
||||
} else if (lowerName.endsWith(".yaml") || lowerName.endsWith(".yml")) {
|
||||
return "yaml";
|
||||
} else if (lowerName.endsWith(".js") || lowerName.endsWith(".ts")) {
|
||||
return "javascript";
|
||||
} else if (lowerName.endsWith(".html")) {
|
||||
return "html";
|
||||
} else if (lowerName.endsWith(".css")) {
|
||||
return "css";
|
||||
} else if (lowerName.endsWith(".xml")) {
|
||||
return "xml";
|
||||
} else if (lowerName.endsWith(".csv")) {
|
||||
return "csv";
|
||||
} else if (lowerName.endsWith(".png") || lowerName.endsWith(".jpg") || lowerName.endsWith(".jpeg") ||
|
||||
lowerName.endsWith(".gif") || lowerName.endsWith(".bmp") || lowerName.endsWith(".webp") ||
|
||||
lowerName.endsWith(".svg") || lowerName.endsWith(".ico")) {
|
||||
return "image";
|
||||
} else if (lowerName.endsWith(".pdf")) {
|
||||
return "pdf";
|
||||
} else if (lowerName.endsWith(".zip") || lowerName.endsWith(".rar") || lowerName.endsWith(".7z")) {
|
||||
return "archive";
|
||||
} else if (lowerName.endsWith(".mp4") || lowerName.endsWith(".avi") || lowerName.endsWith(".mov")) {
|
||||
return "video";
|
||||
} else if (lowerName.endsWith(".mp3") || lowerName.endsWith(".wav") || lowerName.endsWith(".ogg")) {
|
||||
return "audio";
|
||||
}
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断是否为二进制文件格式(不需要读取文本内容)
|
||||
* 注意:image 类型不在此列,因为图片需要转Base64
|
||||
*/
|
||||
private boolean isBinaryFormat(String format) {
|
||||
if (format == null) {
|
||||
return false;
|
||||
}
|
||||
return "pdf".equals(format) || "archive".equals(format) ||
|
||||
"video".equals(format) || "audio".equals(format) || "unknown".equals(format);
|
||||
}
|
||||
|
||||
/**
|
||||
* 设置技能图标
|
||||
*/
|
||||
|
|
|
|||
Loading…
Reference in New Issue