目前 测试任务队列 => message-dispatcher => backend 流程正常,但是任务完成后comfyui实例无法保存生成结果,comfyui界面的任务却能保存,所以还差获取结果上传的步骤;
尽管就算没有获取到结果但是回传没有问题,能正常处理错误消息
1083
AI实现指导提示词.md
|
|
@ -6,5 +6,11 @@ JWT_SECRET=comfyui-cluster-bridge-secret-key-2024
|
|||
JWT_EXPIRES_IN=24h
|
||||
ADMIN_USERNAME=admin
|
||||
ADMIN_PASSWORD=2233..2233
|
||||
MESSAGE_DISPATCHER_URL=ws://localhost:4000/ws
|
||||
|
||||
MESSAGE_DISPATCHER_URL=wss://www.whjbjm.com/message-dispatcher
|
||||
INTERNAL_UPLOAD_URL=http://43.134.182.189:9000/api/internal/uploadGeneratedFile
|
||||
INTERNAL_API_TOKEN=123456/message-dispatcher
|
||||
BRIDGE_ID=bridge-1
|
||||
WORKFLOW_RESOURCES_URL=http://117.72.204.159/AIGC/static/public/workflows
|
||||
|
||||
COMFYUI_OUTPUT_DIR=/root/ComfyUI/output
|
||||
|
|
@ -24,6 +24,10 @@
|
|||
"taskQueue": {
|
||||
"websocketUrl": "ws://localhost:8080/ws"
|
||||
},
|
||||
"messageDispatcher": {
|
||||
"websocketUrl": "wss://www.whjbjm.com/message-dispatcher",
|
||||
"bridgeId": "bridge-1"
|
||||
},
|
||||
"upload": {
|
||||
"url": "https://shuzhiren.xueai.art/upload/file"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ class ConfigManager {
|
|||
timeout: 3000
|
||||
},
|
||||
messageDispatcher: {
|
||||
websocketUrl: process.env.MESSAGE_DISPATCHER_URL || 'ws://localhost:4000/ws',
|
||||
websocketUrl: process.env.MESSAGE_DISPATCHER_URL || 'wss://www.whjbjm.com/message-dispatcher',
|
||||
bridgeId: process.env.BRIDGE_ID || 'bridge-1'
|
||||
},
|
||||
upload: {
|
||||
|
|
@ -72,6 +72,13 @@ class ConfigManager {
|
|||
* @returns {*} 配置值
|
||||
*/
|
||||
get(key, defaultValue = null) {
|
||||
if (key === 'messageDispatcher.websocketUrl') {
|
||||
return process.env.MESSAGE_DISPATCHER_URL || defaultValue;
|
||||
}
|
||||
if (key === 'messageDispatcher.bridgeId') {
|
||||
return process.env.BRIDGE_ID || defaultValue;
|
||||
}
|
||||
|
||||
const keys = key.split('.');
|
||||
let value = this.config;
|
||||
for (const k of keys) {
|
||||
|
|
|
|||
|
|
@ -1,148 +1,61 @@
|
|||
|
||||
/**
|
||||
* file-uploader模块 - 文件上传处理
|
||||
*/
|
||||
|
||||
import multer from 'multer';
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
import path from 'path';
|
||||
import fs from 'fs';
|
||||
import logger from '../logger/index.js';
|
||||
import config from '../config/index.js';
|
||||
import axios from 'axios';
|
||||
import FormData from 'form-data';
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import logger from '../logger/index.js';
|
||||
|
||||
const uploadDir = path.resolve(process.cwd(), 'uploads');
|
||||
|
||||
const INTERNAL_UPLOAD_URL = process.env.INTERNAL_UPLOAD_URL || 'http://43.134.182.189:9000/api/internal/uploadGeneratedFile';
|
||||
const INTERNAL_API_TOKEN = process.env.INTERNAL_API_TOKEN || '';
|
||||
|
||||
if (!fs.existsSync(uploadDir)) {
|
||||
fs.mkdirSync(uploadDir, { recursive: true });
|
||||
}
|
||||
|
||||
const storage = multer.diskStorage({
|
||||
destination: (req, file, cb) => {
|
||||
cb(null, uploadDir);
|
||||
},
|
||||
filename: (req, file, cb) => {
|
||||
const ext = path.extname(file.originalname);
|
||||
cb(null, `${uuidv4()}${ext}`);
|
||||
}
|
||||
});
|
||||
|
||||
const upload = multer({
|
||||
storage,
|
||||
limits: {
|
||||
fileSize: 100 * 1024 * 1024
|
||||
}
|
||||
});
|
||||
|
||||
class FileUploader {
|
||||
constructor() {
|
||||
this.files = new Map();
|
||||
}
|
||||
async uploadToExternalServer(filePath, originalFilename) {
|
||||
const formData = new FormData();
|
||||
formData.append('file', fs.createReadStream(filePath), originalFilename);
|
||||
|
||||
/**
|
||||
* 获取multer上传中间件
|
||||
*/
|
||||
getUploadMiddleware() {
|
||||
return upload.single('file');
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理文件上传
|
||||
* @param {object} file - 文件对象
|
||||
* @returns {object} 文件信息
|
||||
*/
|
||||
async uploadFile(file) {
|
||||
const fileId = uuidv4();
|
||||
const fileInfo = {
|
||||
id: fileId,
|
||||
filename: file.originalname,
|
||||
path: file.path,
|
||||
size: file.size,
|
||||
mimetype: file.mimetype,
|
||||
uploadedAt: new Date().toISOString()
|
||||
const headers = {
|
||||
'Content-Type': `multipart/form-data; boundary=${formData.getBoundary()}`
|
||||
};
|
||||
|
||||
this.files.set(fileId, fileInfo);
|
||||
logger.info(`文件已上传: ${fileId} - ${file.originalname}`);
|
||||
return fileInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* 上传文件到外部服务器
|
||||
* @param {string} filePath - 文件路径
|
||||
* @param {string} originalName - 原始文件名
|
||||
* @returns {object} 上传结果
|
||||
*/
|
||||
async uploadToExternalServer(filePath, originalName) {
|
||||
const uploadUrl = config.get('upload.url', 'https://shuzhiren.xueai.art/upload/file');
|
||||
|
||||
const formData = new FormData();
|
||||
formData.append('file', fs.createReadStream(filePath), {
|
||||
filename: originalName
|
||||
});
|
||||
|
||||
const response = await axios.post(uploadUrl, formData, {
|
||||
headers: formData.getHeaders(),
|
||||
maxContentLength: Infinity,
|
||||
maxBodyLength: Infinity
|
||||
});
|
||||
|
||||
logger.info(`文件已上传到外部服务器: ${originalName}`);
|
||||
return response.data;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取文件信息
|
||||
* @param {string} fileId - 文件ID
|
||||
* @returns {object|null} 文件信息
|
||||
*/
|
||||
getFile(fileId) {
|
||||
return this.files.get(fileId) || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取文件列表
|
||||
* @returns {Array} 文件列表
|
||||
*/
|
||||
getFiles() {
|
||||
return Array.from(this.files.values());
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除文件
|
||||
* @param {string} fileId - 文件ID
|
||||
* @returns {boolean} 是否成功
|
||||
*/
|
||||
deleteFile(fileId) {
|
||||
const fileInfo = this.files.get(fileId);
|
||||
if (!fileInfo) {
|
||||
return false;
|
||||
if (INTERNAL_API_TOKEN) {
|
||||
headers['Authorization'] = `Bearer ${INTERNAL_API_TOKEN}`;
|
||||
}
|
||||
|
||||
if (fs.existsSync(fileInfo.path)) {
|
||||
fs.unlinkSync(fileInfo.path);
|
||||
}
|
||||
try {
|
||||
logger.info(`正在上传文件到外部服务器: ${INTERNAL_UPLOAD_URL}, 文件名: ${originalFilename}`);
|
||||
|
||||
this.files.delete(fileId);
|
||||
logger.info(`文件已删除: ${fileId}`);
|
||||
return true;
|
||||
}
|
||||
const response = await axios.post(INTERNAL_UPLOAD_URL, formData, {
|
||||
headers,
|
||||
timeout: 60000
|
||||
});
|
||||
|
||||
/**
|
||||
* 清理过期文件
|
||||
* @param {number} maxAgeHours - 最大保留时间(小时)
|
||||
*/
|
||||
cleanupOldFiles(maxAgeHours = 24) {
|
||||
const now = Date.now();
|
||||
const maxAge = maxAgeHours * 60 * 60 * 1000;
|
||||
|
||||
for (const [fileId, fileInfo] of this.files) {
|
||||
const age = now - new Date(fileInfo.uploadedAt).getTime();
|
||||
if (age > maxAge) {
|
||||
this.deleteFile(fileId);
|
||||
if (response.data && response.data.code === '0' && response.data.data && response.data.data.url) {
|
||||
logger.info(`文件上传成功: ${response.data.data.url}`);
|
||||
return response.data.data;
|
||||
} else {
|
||||
logger.error(`文件上传失败: ${JSON.stringify(response.data)}`);
|
||||
throw new Error(response.data?.msg || '文件上传失败');
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('文件上传出错:', error.message);
|
||||
throw error;
|
||||
} finally {
|
||||
if (fs.existsSync(filePath)) {
|
||||
fs.unlinkSync(filePath);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export default new FileUploader();
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,13 @@
|
|||
/**
|
||||
* TaskForwarder - 任务转发器
|
||||
*
|
||||
* 设计说明:
|
||||
* - clientId 使用实例 ID(固定不变),实现 WebSocket 连接复用
|
||||
* - prompt_id 使用 taskId,便于任务追踪和查询
|
||||
* - 同一实例的所有任务共享同一个 WebSocket 连接
|
||||
* - 通过 prompt_id 区分不同任务的消息
|
||||
*/
|
||||
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
import logger from '../logger/index.js';
|
||||
import clusterManager from '../cluster-manager/index.js';
|
||||
|
|
@ -16,7 +26,10 @@ class TaskForwarder {
|
|||
}
|
||||
|
||||
setupEventListeners() {
|
||||
logger.info('[TaskForwarder] 设置事件监听器');
|
||||
|
||||
webSocketClient.on('execution_start', ({ instanceId, promptId }) => {
|
||||
logger.info(`[TaskForwarder] 收到 execution_start 事件: instanceId=${instanceId}, promptId=${promptId}`);
|
||||
this.handleExecutionStart(instanceId, promptId).catch(err => {
|
||||
logger.error('处理 execution_start 事件失败:', err);
|
||||
});
|
||||
|
|
@ -29,12 +42,21 @@ class TaskForwarder {
|
|||
});
|
||||
|
||||
webSocketClient.on('executed', ({ instanceId, data }) => {
|
||||
logger.info(`[TaskForwarder] 收到 executed 事件: instanceId=${instanceId}, promptId=${data?.prompt_id}`);
|
||||
this.handleExecuted(instanceId, data).catch(err => {
|
||||
logger.error('处理 executed 事件失败:', err);
|
||||
});
|
||||
});
|
||||
|
||||
webSocketClient.on('execution_success', ({ instanceId, data }) => {
|
||||
logger.info(`[TaskForwarder] 收到 execution_success 事件: instanceId=${instanceId}, promptId=${data?.prompt_id}`);
|
||||
this.handleExecutionSuccess(instanceId, data).catch(err => {
|
||||
logger.error('处理 execution_success 事件失败:', err);
|
||||
});
|
||||
});
|
||||
|
||||
webSocketClient.on('execution_error', ({ instanceId, data }) => {
|
||||
logger.error(`[TaskForwarder] 收到 execution_error 事件: instanceId=${instanceId}, promptId=${data?.prompt_id}`);
|
||||
this.handleExecutionError(instanceId, data).catch(err => {
|
||||
logger.error('处理 execution_error 事件失败:', err);
|
||||
});
|
||||
|
|
@ -59,7 +81,7 @@ class TaskForwarder {
|
|||
|
||||
const task = {
|
||||
id: taskId,
|
||||
promptId: null,
|
||||
promptId: taskId,
|
||||
workflow,
|
||||
nodeInfoList,
|
||||
workflowId,
|
||||
|
|
@ -96,115 +118,276 @@ class TaskForwarder {
|
|||
}
|
||||
|
||||
async sendTaskToInstance(task, instance) {
|
||||
await webSocketClient.connect(instance.id, instance.wsUrl);
|
||||
logger.info(`[TaskForwarder] 准备发送任务 ${task.id} 到实例 ${instance.id}`);
|
||||
logger.info(`[TaskForwarder] 实例信息: ${JSON.stringify({ id: instance.id, wsUrl: instance.wsUrl, apiUrl: instance.apiUrl })}`);
|
||||
|
||||
const promptMessage = {
|
||||
const wsClientId = instance.id;
|
||||
const wsUrlWithClientId = `${instance.wsUrl}?clientId=${wsClientId}`;
|
||||
logger.info(`[TaskForwarder] WebSocket URL (clientId=实例ID): ${wsUrlWithClientId}`);
|
||||
|
||||
await webSocketClient.connect(instance.id, wsUrlWithClientId);
|
||||
|
||||
const promptPayload = {
|
||||
prompt: task.workflow,
|
||||
client_id: task.id
|
||||
prompt_id: task.id,
|
||||
client_id: wsClientId,
|
||||
front_end: "comfy"
|
||||
};
|
||||
|
||||
webSocketClient.send(instance.id, promptMessage);
|
||||
logger.info(`[TaskForwarder] 发送的 prompt 消息结构: prompt_id=${task.id}, client_id=${wsClientId}, workflow节点数=${Object.keys(task.workflow || {}).length}`);
|
||||
logger.info(`[TaskForwarder] 通过 HTTP POST /prompt 提交任务到 ${instance.apiUrl}/prompt`);
|
||||
|
||||
try {
|
||||
const response = await axios.post(`${instance.apiUrl}/prompt`, promptPayload, {
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
timeout: 30000
|
||||
});
|
||||
|
||||
logger.info(`[TaskForwarder] HTTP POST /prompt 响应: ${JSON.stringify(response.data)}`);
|
||||
|
||||
if (response.data?.node_errors && Object.keys(response.data.node_errors).length > 0) {
|
||||
const nodeErrors = response.data.node_errors;
|
||||
const errorMessages = [];
|
||||
|
||||
for (const [nodeId, errorInfo] of Object.entries(nodeErrors)) {
|
||||
if (errorInfo.errors && errorInfo.errors.length > 0) {
|
||||
for (const err of errorInfo.errors) {
|
||||
errorMessages.push(`节点 ${nodeId}: ${err.message}${err.details ? ` (${err.details})` : ''}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const fullErrorMessage = `工作流节点错误: ${errorMessages.join('; ')}`;
|
||||
logger.error(`[TaskForwarder] ${fullErrorMessage}`);
|
||||
throw new Error(fullErrorMessage);
|
||||
}
|
||||
|
||||
const returnedPromptId = response.data?.prompt_id || response.data?.promptId;
|
||||
logger.info(`[TaskForwarder] 任务 ${task.id} 已提交,ComfyUI 返回 prompt_id: ${returnedPromptId}`);
|
||||
|
||||
} catch (error) {
|
||||
let errorMessage = error.message;
|
||||
|
||||
if (error.response && error.response.data) {
|
||||
const comfyError = error.response.data;
|
||||
logger.error(`[TaskForwarder] 错误响应: ${JSON.stringify(comfyError)}`);
|
||||
|
||||
if (comfyError.error && comfyError.error.message) {
|
||||
errorMessage = comfyError.error.message;
|
||||
} else if (comfyError.message) {
|
||||
errorMessage = comfyError.message;
|
||||
} else if (typeof comfyError === 'string') {
|
||||
errorMessage = comfyError;
|
||||
} else {
|
||||
errorMessage = JSON.stringify(comfyError);
|
||||
}
|
||||
logger.error(`[TaskForwarder] 提取的错误信息: ${errorMessage}`);
|
||||
}
|
||||
|
||||
throw new Error(errorMessage);
|
||||
}
|
||||
|
||||
task.status = 'submitted';
|
||||
logger.info(`任务 ${task.id} 已发送到实例 ${instance.id}`);
|
||||
}
|
||||
|
||||
async handleExecutionStart(instanceId, promptId) {
|
||||
for (const [taskId, task] of this.tasks) {
|
||||
if (task.instanceId === instanceId && !task.promptId && task.status === 'submitted') {
|
||||
task.promptId = promptId;
|
||||
task.status = 'running';
|
||||
task.startedAt = new Date().toISOString();
|
||||
this.tasks.set(taskId, task);
|
||||
clusterManager.updateInstanceStatus(instanceId, 'busy');
|
||||
logger.info(`任务 ${task.id} 开始执行, promptId: ${promptId}`);
|
||||
break;
|
||||
}
|
||||
logger.info(`[TaskForwarder] handleExecutionStart: instanceId=${instanceId}, promptId=${promptId}`);
|
||||
|
||||
const task = this.tasks.get(promptId);
|
||||
if (!task) {
|
||||
logger.warn(`[TaskForwarder] 未找到任务: promptId=${promptId}`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (task.instanceId !== instanceId) {
|
||||
logger.warn(`[TaskForwarder] 任务实例不匹配: taskId=${promptId}, task.instanceId=${task.instanceId}, event.instanceId=${instanceId}`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (task.status !== 'submitted') {
|
||||
logger.warn(`[TaskForwarder] 任务状态不正确: taskId=${promptId}, status=${task.status}`);
|
||||
return;
|
||||
}
|
||||
|
||||
task.status = 'running';
|
||||
task.startedAt = new Date().toISOString();
|
||||
this.tasks.set(promptId, task);
|
||||
clusterManager.updateInstanceStatus(instanceId, 'busy');
|
||||
logger.info(`任务 ${task.id} 开始执行`);
|
||||
}
|
||||
|
||||
async handleProgress(instanceId, data) {
|
||||
for (const [taskId, task] of this.tasks) {
|
||||
if (task.instanceId === instanceId && task.status === 'running') {
|
||||
if (data.max && data.max > 0) {
|
||||
task.progress = Math.round((data.value / data.max) * 100);
|
||||
this.tasks.set(taskId, task);
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (!data?.prompt_id) {
|
||||
return;
|
||||
}
|
||||
|
||||
const task = this.tasks.get(data.prompt_id);
|
||||
if (!task || task.instanceId !== instanceId) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (data.max && data.max > 0) {
|
||||
task.progress = Math.round((data.value / data.max) * 100);
|
||||
this.tasks.set(data.prompt_id, task);
|
||||
logger.info(`[TaskForwarder] 任务 ${data.prompt_id} 进度: ${task.progress}%`);
|
||||
}
|
||||
}
|
||||
|
||||
async handleExecuted(instanceId, data) {
|
||||
for (const [taskId, task] of this.tasks) {
|
||||
if (task.promptId === data.prompt_id && task.status === 'running') {
|
||||
task.status = 'completed';
|
||||
task.completedAt = new Date().toISOString();
|
||||
task.result = data;
|
||||
this.tasks.set(taskId, task);
|
||||
clusterManager.updateInstanceStatus(instanceId, 'online');
|
||||
logger.info(`任务 ${task.id} 执行完成`);
|
||||
logger.info(`[TaskForwarder] handleExecuted: instanceId=${instanceId}, promptId=${data?.prompt_id}, node=${data?.node}`);
|
||||
|
||||
if (task.webhookUrl) {
|
||||
await this.sendWebhookCallback(task, data, null);
|
||||
}
|
||||
|
||||
if (task.queueTaskId) {
|
||||
const resultData = await this.processResultData(data, instanceId);
|
||||
taskQueueClient.notifyTaskComplete(task.queueTaskId, resultData);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
if (!data?.output) {
|
||||
logger.info(`[TaskForwarder] executed 消息无输出,跳过处理`);
|
||||
return;
|
||||
}
|
||||
|
||||
const promptId = data.prompt_id;
|
||||
const task = this.tasks.get(promptId);
|
||||
if (!task || task.instanceId !== instanceId) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!task.partialResults) {
|
||||
task.partialResults = [];
|
||||
}
|
||||
task.partialResults.push(data);
|
||||
logger.info(`[TaskForwarder] 收集节点 ${data.node} 的输出结果`);
|
||||
}
|
||||
|
||||
async handleExecutionError(instanceId, data) {
|
||||
for (const [taskId, task] of this.tasks) {
|
||||
if (task.promptId === data.prompt_id && task.status === 'running') {
|
||||
task.status = 'failed';
|
||||
logger.error(`[TaskForwarder] handleExecutionError: instanceId=${instanceId}, promptId=${data?.prompt_id}`);
|
||||
|
||||
const promptId = data.prompt_id;
|
||||
const task = this.tasks.get(promptId);
|
||||
if (!task || task.instanceId !== instanceId) {
|
||||
return;
|
||||
}
|
||||
|
||||
task.status = 'failed';
|
||||
task.completedAt = new Date().toISOString();
|
||||
task.error = data.exception_message || data.error || JSON.stringify(data);
|
||||
this.tasks.set(promptId, task);
|
||||
clusterManager.updateInstanceStatus(instanceId, 'online');
|
||||
logger.error(`任务 ${task.id} 执行失败: ${task.error}`);
|
||||
|
||||
if (task.webhookUrl) {
|
||||
await this.sendWebhookCallback(task, null, task.error);
|
||||
}
|
||||
|
||||
if (task.queueTaskId) {
|
||||
taskQueueClient.notifyTaskComplete(task.queueTaskId, null, task.error);
|
||||
}
|
||||
}
|
||||
|
||||
async handleExecutionSuccess(instanceId, data) {
|
||||
logger.info(`[TaskForwarder] handleExecutionSuccess: instanceId=${instanceId}, promptId=${data?.prompt_id}`);
|
||||
|
||||
const promptId = data.prompt_id;
|
||||
const task = this.tasks.get(promptId);
|
||||
if (!task) {
|
||||
logger.warn(`[TaskForwarder] 未找到任务: promptId=${promptId}`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (task.instanceId !== instanceId) {
|
||||
logger.warn(`[TaskForwarder] 任务实例不匹配: taskId=${promptId}`);
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info(`[TaskForwarder] 找到匹配的任务: ${promptId}, 准备获取结果`);
|
||||
|
||||
const instance = clusterManager.getInstance(instanceId);
|
||||
if (!instance) {
|
||||
logger.error(`[TaskForwarder] 无法找到实例: ${instanceId}`);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const historyResponse = await axios.get(`${instance.apiUrl}/history/${promptId}`, {
|
||||
timeout: 10000
|
||||
});
|
||||
|
||||
const historyData = historyResponse.data;
|
||||
logger.info(`[TaskForwarder] 获取到历史记录`);
|
||||
|
||||
let outputs = null;
|
||||
if (historyData && historyData[promptId] && historyData[promptId].outputs) {
|
||||
outputs = historyData[promptId].outputs;
|
||||
} else if (historyData && historyData.outputs) {
|
||||
outputs = historyData.outputs;
|
||||
}
|
||||
|
||||
if (outputs) {
|
||||
const resultData = await this.processHistoryOutputs(outputs, instanceId);
|
||||
|
||||
task.status = 'completed';
|
||||
task.completedAt = new Date().toISOString();
|
||||
task.error = data.exception_message;
|
||||
this.tasks.set(taskId, task);
|
||||
task.result = outputs;
|
||||
this.tasks.set(promptId, task);
|
||||
clusterManager.updateInstanceStatus(instanceId, 'online');
|
||||
logger.error(`任务 ${task.id} 执行失败: ${data.exception_message}`);
|
||||
logger.info(`任务 ${task.id} 执行完成,结果数量: ${resultData.length}`);
|
||||
|
||||
if (task.webhookUrl) {
|
||||
await this.sendWebhookCallback(task, null, data.exception_message);
|
||||
await this.sendWebhookCallback(task, { output: outputs }, null);
|
||||
}
|
||||
|
||||
if (task.queueTaskId) {
|
||||
taskQueueClient.notifyTaskComplete(task.queueTaskId, null, data.exception_message);
|
||||
taskQueueClient.notifyTaskComplete(task.queueTaskId, resultData);
|
||||
}
|
||||
} else {
|
||||
logger.warn(`[TaskForwarder] 历史记录中没有 outputs`);
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`[TaskForwarder] 获取历史记录失败: ${error.message}`);
|
||||
task.status = 'failed';
|
||||
task.error = `获取结果失败: ${error.message}`;
|
||||
this.tasks.set(promptId, task);
|
||||
|
||||
break;
|
||||
if (task.webhookUrl) {
|
||||
await this.sendWebhookCallback(task, null, task.error);
|
||||
}
|
||||
|
||||
if (task.queueTaskId) {
|
||||
taskQueueClient.notifyTaskComplete(task.queueTaskId, null, task.error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async processResultData(data, instanceId) {
|
||||
async processHistoryOutputs(outputs, instanceId) {
|
||||
const resultData = [];
|
||||
|
||||
if (data.output) {
|
||||
const instance = clusterManager.getInstance(instanceId);
|
||||
if (!instance) {
|
||||
return resultData;
|
||||
}
|
||||
if (!outputs) {
|
||||
return resultData;
|
||||
}
|
||||
|
||||
for (const [nodeId, output] of Object.entries(data.output)) {
|
||||
if (output.images) {
|
||||
for (const image of output.images) {
|
||||
try {
|
||||
const fileUrl = await this.uploadImage(image, instance);
|
||||
resultData.push({
|
||||
fileUrl,
|
||||
fileType: image.type || 'png',
|
||||
taskCostTime: 0,
|
||||
nodeId
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('上传图片失败:', error);
|
||||
}
|
||||
}
|
||||
const instance = clusterManager.getInstance(instanceId);
|
||||
if (!instance) {
|
||||
logger.error(`[processHistoryOutputs] 无法找到实例: ${instanceId}`);
|
||||
return resultData;
|
||||
}
|
||||
|
||||
for (const [nodeId, output] of Object.entries(outputs)) {
|
||||
if (!output) continue;
|
||||
|
||||
const mediaFiles = output.images || output.gifs || [];
|
||||
logger.info(`[processHistoryOutputs] 节点 ${nodeId} 找到 ${mediaFiles.length} 个媒体文件`);
|
||||
|
||||
for (const media of mediaFiles) {
|
||||
try {
|
||||
logger.info(`[processHistoryOutputs] 正在上传文件: ${media.filename}`);
|
||||
const fileUrl = await this.uploadImage(media, instance);
|
||||
logger.info(`[processHistoryOutputs] 文件上传成功: ${fileUrl}`);
|
||||
resultData.push({
|
||||
fileUrl,
|
||||
fileType: media.type || 'png',
|
||||
taskCostTime: 0,
|
||||
nodeId
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('[processHistoryOutputs] 上传文件失败:', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -213,6 +396,23 @@ class TaskForwarder {
|
|||
}
|
||||
|
||||
async uploadImage(image, instance) {
|
||||
const comfyuiOutputDir = process.env.COMFYUI_OUTPUT_DIR;
|
||||
|
||||
if (comfyuiOutputDir) {
|
||||
let localFilePath = path.join(comfyuiOutputDir, image.filename);
|
||||
if (image.subfolder) {
|
||||
localFilePath = path.join(comfyuiOutputDir, image.subfolder, image.filename);
|
||||
}
|
||||
|
||||
if (fs.existsSync(localFilePath)) {
|
||||
logger.info(`从本地目录读取文件: ${localFilePath}`);
|
||||
const uploadResult = await fileUploader.uploadToExternalServer(localFilePath, image.filename);
|
||||
return uploadResult.url || uploadResult.data?.url;
|
||||
} else {
|
||||
logger.warn(`本地文件不存在: ${localFilePath}, 回退到 HTTP API`);
|
||||
}
|
||||
}
|
||||
|
||||
const imageUrl = `${instance.apiUrl}/view?filename=${image.filename}&subfolder=${image.subfolder || ''}&type=${image.type}`;
|
||||
|
||||
const response = await axios.get(imageUrl, {
|
||||
|
|
@ -247,7 +447,7 @@ class TaskForwarder {
|
|||
data: []
|
||||
});
|
||||
} else {
|
||||
const processedData = await this.processResultData(resultData, task.instanceId);
|
||||
const processedData = await this.processHistoryOutputs(resultData?.output, task.instanceId);
|
||||
eventData = JSON.stringify({
|
||||
code: 0,
|
||||
msg: 'success',
|
||||
|
|
@ -311,6 +511,25 @@ class TaskForwarder {
|
|||
logger.info(`任务 ${taskId} 已取消`);
|
||||
return true;
|
||||
}
|
||||
|
||||
async getTaskStatus(taskId) {
|
||||
const task = this.tasks.get(taskId);
|
||||
if (!task) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
id: task.id,
|
||||
promptId: task.promptId,
|
||||
status: task.status,
|
||||
progress: task.progress,
|
||||
instanceId: task.instanceId,
|
||||
createdAt: task.createdAt,
|
||||
startedAt: task.startedAt,
|
||||
completedAt: task.completedAt,
|
||||
error: task.error
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export default new TaskForwarder();
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@ import logger from '../logger/index.js';
|
|||
import config from '../config/index.js';
|
||||
import clusterManager from '../cluster-manager/index.js';
|
||||
import taskForwarder from '../task-forwarder/index.js';
|
||||
import comfyUIMonitor from '../comfyui-monitor/index.js';
|
||||
import workflowConverter from '../workflow-converter/index.js';
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
import axios from 'axios';
|
||||
import EventEmitter from 'events';
|
||||
|
|
@ -23,6 +25,12 @@ class MessageDispatcherClient extends EventEmitter {
|
|||
this.handleConfigChange();
|
||||
});
|
||||
this.startHeartbeatInterval();
|
||||
|
||||
comfyUIMonitor.on('connectionStateChange', () => {
|
||||
if (this.isConnected) {
|
||||
this.sendRegisterMessage();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
startHeartbeatInterval() {
|
||||
|
|
@ -32,7 +40,7 @@ class MessageDispatcherClient extends EventEmitter {
|
|||
}
|
||||
|
||||
getWebSocketUrl() {
|
||||
return config.get('messageDispatcher.websocketUrl', 'ws://localhost:4000/ws');
|
||||
return config.get('messageDispatcher.websocketUrl', 'wss://www.whjbjm.com/message-dispatcher');
|
||||
}
|
||||
|
||||
connect() {
|
||||
|
|
@ -165,6 +173,7 @@ class MessageDispatcherClient extends EventEmitter {
|
|||
async handleMessage(data) {
|
||||
try {
|
||||
const message = JSON.parse(data.toString());
|
||||
console.log('[后端] 收到消息:', JSON.stringify(message, null, 2));
|
||||
logger.debug('[MessageDispatcher] 收到消息:', message.type);
|
||||
|
||||
switch (message.type) {
|
||||
|
|
@ -236,15 +245,16 @@ class MessageDispatcherClient extends EventEmitter {
|
|||
}
|
||||
|
||||
async handleTaskAssign(taskData) {
|
||||
const { workflowId, nodeInfoList, webhookUrl, requestId } = taskData;
|
||||
const taskId = uuidv4();
|
||||
const { workflowId, nodeInfoList, webhookUrl, requestId, instanceId } = taskData;
|
||||
const taskId = requestId;
|
||||
|
||||
logger.info(`[MessageDispatcher] 收到任务: ${workflowId}, 生成taskId: ${taskId}`);
|
||||
logger.info(`[MessageDispatcher] 收到任务: ${workflowId}, 使用requestId作为taskId: ${taskId}, 指定实例: ${instanceId}`);
|
||||
|
||||
const ackResponse = {
|
||||
type: 'TASK_ACK',
|
||||
data: {
|
||||
requestId,
|
||||
instanceId,
|
||||
code: 0,
|
||||
msg: 'success',
|
||||
data: {
|
||||
|
|
@ -267,10 +277,15 @@ class MessageDispatcherClient extends EventEmitter {
|
|||
this.pendingTasks.set(taskId, taskRecord);
|
||||
|
||||
try {
|
||||
const workflow = await workflowConverter.convert(workflowId, nodeInfoList);
|
||||
|
||||
const actualTaskId = await taskForwarder.submitTask(
|
||||
{},
|
||||
workflow,
|
||||
nodeInfoList,
|
||||
workflowId
|
||||
workflowId,
|
||||
instanceId,
|
||||
webhookUrl,
|
||||
taskId
|
||||
);
|
||||
|
||||
taskRecord.status = 'running';
|
||||
|
|
|
|||
|
|
@ -1,5 +1,10 @@
|
|||
/**
|
||||
* websocket-client模块 - 与ComfyUI实例的WebSocket通信
|
||||
*
|
||||
* 设计说明:
|
||||
* - clientId 使用实例 ID(固定不变),实现连接复用
|
||||
* - 同一实例的所有任务共享同一个 WebSocket 连接
|
||||
* - 通过 prompt_id 区分不同任务的消息
|
||||
*/
|
||||
|
||||
import WebSocket from 'ws';
|
||||
|
|
@ -13,29 +18,30 @@ class WebSocketClient extends EventEmitter {
|
|||
this.connections = new Map();
|
||||
}
|
||||
|
||||
/**
|
||||
* 连接到指定实例
|
||||
* @param {string} instanceId - 实例ID
|
||||
* @param {string} wsUrl - WebSocket地址
|
||||
* @returns {Promise<WebSocket>} WebSocket连接
|
||||
*/
|
||||
connect(instanceId, wsUrl) {
|
||||
return new Promise((resolve, reject) => {
|
||||
if (this.connections.has(instanceId)) {
|
||||
const conn = this.connections.get(instanceId);
|
||||
if (conn.readyState === WebSocket.OPEN) {
|
||||
resolve(conn);
|
||||
return;
|
||||
}
|
||||
const existingConn = this.connections.get(instanceId);
|
||||
|
||||
if (existingConn && existingConn.ws && existingConn.ws.readyState === WebSocket.OPEN) {
|
||||
logger.info(`[WebSocketClient] 实例 ${instanceId} 已有连接,直接复用`);
|
||||
resolve(existingConn.ws);
|
||||
return;
|
||||
}
|
||||
|
||||
if (existingConn && existingConn.ws) {
|
||||
logger.info(`[WebSocketClient] 关闭旧连接,重新连接`);
|
||||
existingConn.ws.close();
|
||||
this.connections.delete(instanceId);
|
||||
}
|
||||
|
||||
logger.info(`正在连接到实例 ${instanceId}: ${wsUrl}`);
|
||||
logger.info(`[WebSocketClient] 连接详情: instanceId=${instanceId}, wsUrl=${wsUrl}`);
|
||||
|
||||
const ws = new WebSocket(wsUrl);
|
||||
|
||||
ws.on('open', () => {
|
||||
logger.info(`成功连接到实例 ${instanceId}`);
|
||||
this.connections.set(instanceId, ws);
|
||||
this.connections.set(instanceId, { ws, wsUrl });
|
||||
|
||||
const stateChange = comfyUIMonitor.setInstanceState(instanceId, 'connected');
|
||||
if (stateChange) {
|
||||
|
|
@ -92,79 +98,82 @@ class WebSocketClient extends EventEmitter {
|
|||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理收到的消息
|
||||
* @param {string} instanceId - 实例ID
|
||||
* @param {object} message - 消息对象
|
||||
*/
|
||||
handleMessage(instanceId, message) {
|
||||
if (message.type !== 'progress_state' && message.type !== 'progress') {
|
||||
logger.info(`[WebSocketClient] 收到消息 from ${instanceId}: type=${message.type}, data=${JSON.stringify(message.data || {}).substring(0, 200)}`);
|
||||
}
|
||||
this.emit('message', { instanceId, message });
|
||||
|
||||
switch (message.type) {
|
||||
case 'status':
|
||||
logger.info(`[WebSocketClient] status 消息: ${JSON.stringify(message.data)}`);
|
||||
this.emit('status', { instanceId, status: message.data });
|
||||
break;
|
||||
case 'progress':
|
||||
this.emit('progress', { instanceId, data: message.data });
|
||||
break;
|
||||
case 'execution_start':
|
||||
logger.info(`[WebSocketClient] execution_start 消息: prompt_id=${message.data?.prompt_id}`);
|
||||
this.emit('execution_start', { instanceId, promptId: message.data.prompt_id });
|
||||
break;
|
||||
case 'execution_cached':
|
||||
logger.info(`[WebSocketClient] execution_cached 消息: ${JSON.stringify(message.data)}`);
|
||||
this.emit('execution_cached', { instanceId, data: message.data });
|
||||
break;
|
||||
case 'executing':
|
||||
logger.info(`[WebSocketClient] executing 消息: node=${message.data?.node}, prompt_id=${message.data?.prompt_id}`);
|
||||
this.emit('executing', { instanceId, data: message.data });
|
||||
break;
|
||||
case 'executed':
|
||||
logger.info(`[WebSocketClient] executed 消息: prompt_id=${message.data?.prompt_id}`);
|
||||
this.emit('executed', { instanceId, data: message.data });
|
||||
break;
|
||||
case 'execution_success':
|
||||
logger.info(`[WebSocketClient] execution_success 消息: prompt_id=${message.data?.prompt_id}`);
|
||||
this.emit('execution_success', { instanceId, data: message.data });
|
||||
break;
|
||||
case 'execution_error':
|
||||
logger.error(`[WebSocketClient] execution_error 消息: ${JSON.stringify(message.data)}`);
|
||||
this.emit('execution_error', { instanceId, data: message.data });
|
||||
break;
|
||||
case 'progress_state':
|
||||
break;
|
||||
default:
|
||||
logger.info(`[WebSocketClient] 未处理的消息类型: ${message.type}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 发送消息到指定实例
|
||||
* @param {string} instanceId - 实例ID
|
||||
* @param {object} message - 消息对象
|
||||
*/
|
||||
send(instanceId, message) {
|
||||
const ws = this.connections.get(instanceId);
|
||||
const conn = this.connections.get(instanceId);
|
||||
const ws = conn?.ws;
|
||||
if (!ws || ws.readyState !== WebSocket.OPEN) {
|
||||
logger.error(`[WebSocketClient] 实例 ${instanceId} 未连接,无法发送消息`);
|
||||
throw new Error(`实例 ${instanceId} 未连接`);
|
||||
}
|
||||
ws.send(JSON.stringify(message));
|
||||
const messageStr = JSON.stringify(message);
|
||||
logger.info(`[WebSocketClient] 发送消息到实例 ${instanceId}: ${messageStr.substring(0, 500)}${messageStr.length > 500 ? '...' : ''}`);
|
||||
ws.send(messageStr);
|
||||
}
|
||||
|
||||
/**
|
||||
* 断开指定实例的连接
|
||||
* @param {string} instanceId - 实例ID
|
||||
*/
|
||||
disconnect(instanceId) {
|
||||
const ws = this.connections.get(instanceId);
|
||||
if (ws) {
|
||||
ws.close();
|
||||
const conn = this.connections.get(instanceId);
|
||||
if (conn && conn.ws) {
|
||||
conn.ws.close();
|
||||
this.connections.delete(instanceId);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 断开所有连接
|
||||
*/
|
||||
disconnectAll() {
|
||||
for (const [instanceId, ws] of this.connections) {
|
||||
ws.close();
|
||||
for (const [instanceId, conn] of this.connections) {
|
||||
if (conn.ws) {
|
||||
conn.ws.close();
|
||||
}
|
||||
}
|
||||
this.connections.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查实例是否已连接
|
||||
* @param {string} instanceId - 实例ID
|
||||
* @returns {boolean} 连接状态
|
||||
*/
|
||||
isConnected(instanceId) {
|
||||
const ws = this.connections.get(instanceId);
|
||||
return ws && ws.readyState === WebSocket.OPEN;
|
||||
const conn = this.connections.get(instanceId);
|
||||
return conn?.ws && conn.ws.readyState === WebSocket.OPEN;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,201 @@
|
|||
import axios from 'axios';
|
||||
import logger from '../logger/index.js';
|
||||
|
||||
class WorkflowConverter {
|
||||
constructor() {
|
||||
this.baseUrl = process.env.WORKFLOW_RESOURCES_URL || 'http://117.72.204.159/AIGC/static/public/workflows';
|
||||
this.cache = new Map();
|
||||
this.cacheTimeout = 5 * 60 * 1000;
|
||||
}
|
||||
|
||||
async getWorkflowTemplate(workflowId) {
|
||||
if (!workflowId) {
|
||||
throw new Error('workflowId is required');
|
||||
}
|
||||
|
||||
const cached = this.cache.get(workflowId);
|
||||
if (cached && Date.now() - cached.timestamp < this.cacheTimeout) {
|
||||
logger.debug(`使用缓存的 workflow 模板: ${workflowId}`);
|
||||
return cached.data;
|
||||
}
|
||||
|
||||
try {
|
||||
const url = `${this.baseUrl}/${workflowId}.json`;
|
||||
logger.info(`正在下载 workflow 模板: ${url}`);
|
||||
|
||||
const response = await axios.get(url, {
|
||||
timeout: 10000
|
||||
});
|
||||
|
||||
const workflowData = response.data;
|
||||
|
||||
this.cache.set(workflowId, {
|
||||
data: workflowData,
|
||||
timestamp: Date.now()
|
||||
});
|
||||
|
||||
logger.info(`workflow 模板下载成功: ${workflowId}`);
|
||||
return workflowData;
|
||||
} catch (error) {
|
||||
logger.error(`下载 workflow 模板失败: ${workflowId}`, error.message);
|
||||
throw new Error(`无法获取 workflow 模板: ${workflowId}, 错误: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
generateRandomSeed() {
|
||||
return Math.floor(Math.random() * 2147483647) + 1;
|
||||
}
|
||||
|
||||
findControlAfterGenerateNode(nodeInfoList) {
|
||||
return nodeInfoList.find(node => node.fieldName === 'control_after_generate');
|
||||
}
|
||||
|
||||
findSeedNodeByNodeId(nodeInfoList, nodeId) {
|
||||
return nodeInfoList.find(node => node.nodeId === nodeId && node.fieldName === 'seed');
|
||||
}
|
||||
|
||||
processSeedsInWorkflow(workflow, nodeInfoList) {
|
||||
const controlAfterGenerateNode = this.findControlAfterGenerateNode(nodeInfoList);
|
||||
const updatedWorkflow = JSON.parse(JSON.stringify(workflow));
|
||||
|
||||
const nodesWithExplicitSeed = new Set();
|
||||
|
||||
if (!controlAfterGenerateNode) {
|
||||
logger.info('[WorkflowConverter] 未找到 control_after_generate 节点,为所有 seed 生成随机值');
|
||||
this.setRandomSeedsForAll(updatedWorkflow);
|
||||
} else {
|
||||
const nodeId = controlAfterGenerateNode.nodeId;
|
||||
const controlValue = controlAfterGenerateNode.fieldValue;
|
||||
|
||||
logger.info(`[WorkflowConverter] 找到 control_after_generate 节点: nodeId=${nodeId}, value=${controlValue}`);
|
||||
|
||||
if (controlValue !== 'fixed') {
|
||||
if (updatedWorkflow[nodeId] && updatedWorkflow[nodeId].inputs) {
|
||||
const randomSeed = this.generateRandomSeed();
|
||||
updatedWorkflow[nodeId].inputs.seed = randomSeed;
|
||||
nodesWithExplicitSeed.add(nodeId);
|
||||
logger.info(`[WorkflowConverter] 为节点 ${nodeId} 的 seed 设置随机值: ${randomSeed}`);
|
||||
}
|
||||
} else {
|
||||
const seedNode = this.findSeedNodeByNodeId(nodeInfoList, nodeId);
|
||||
if (seedNode && seedNode.fieldValue !== undefined) {
|
||||
const seedValue = parseInt(seedNode.fieldValue, 10);
|
||||
if (!isNaN(seedValue)) {
|
||||
if (updatedWorkflow[nodeId] && updatedWorkflow[nodeId].inputs) {
|
||||
updatedWorkflow[nodeId].inputs.seed = seedValue;
|
||||
nodesWithExplicitSeed.add(nodeId);
|
||||
logger.info(`[WorkflowConverter] 为节点 ${nodeId} 的 seed 设置固定值: ${seedValue}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this.setRandomSeedsForOthers(updatedWorkflow, nodesWithExplicitSeed);
|
||||
}
|
||||
|
||||
return updatedWorkflow;
|
||||
}
|
||||
|
||||
setRandomSeedsForAll(workflow) {
|
||||
for (const [nodeId, node] of Object.entries(workflow)) {
|
||||
if (node.inputs && typeof node.inputs === 'object') {
|
||||
if ('seed' in node.inputs) {
|
||||
const randomSeed = this.generateRandomSeed();
|
||||
node.inputs.seed = randomSeed;
|
||||
logger.info(`[WorkflowConverter] 为节点 ${nodeId} 的 seed 设置随机值: ${randomSeed}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
setRandomSeedsForOthers(workflow, nodesWithExplicitSeed) {
|
||||
for (const [nodeId, node] of Object.entries(workflow)) {
|
||||
if (nodesWithExplicitSeed.has(nodeId)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (node.inputs && typeof node.inputs === 'object') {
|
||||
if ('seed' in node.inputs) {
|
||||
const randomSeed = this.generateRandomSeed();
|
||||
node.inputs.seed = randomSeed;
|
||||
logger.info(`[WorkflowConverter] 为其他节点 ${nodeId} 的 seed 设置随机值: ${randomSeed}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
applyNodeUpdates(workflow, nodeInfoList = []) {
|
||||
if (!workflow) {
|
||||
throw new Error('workflow is required');
|
||||
}
|
||||
|
||||
const updatedWorkflow = JSON.parse(JSON.stringify(workflow));
|
||||
|
||||
if (!nodeInfoList || !Array.isArray(nodeInfoList) || nodeInfoList.length === 0) {
|
||||
return updatedWorkflow;
|
||||
}
|
||||
|
||||
logger.info(`应用 ${nodeInfoList.length} 个节点更新`);
|
||||
|
||||
for (const nodeInfo of nodeInfoList) {
|
||||
let nodeId, inputs;
|
||||
|
||||
if (nodeInfo.nodeId && nodeInfo.fieldName && nodeInfo.fieldValue !== undefined) {
|
||||
nodeId = nodeInfo.nodeId;
|
||||
inputs = { [nodeInfo.fieldName]: nodeInfo.fieldValue };
|
||||
logger.info(`[WorkflowConverter] 检测到格式1: nodeId=${nodeId}, fieldName=${nodeInfo.fieldName}`);
|
||||
} else if (nodeInfo.nodeId && nodeInfo.inputs) {
|
||||
nodeId = nodeInfo.nodeId;
|
||||
inputs = nodeInfo.inputs;
|
||||
logger.info(`[WorkflowConverter] 检测到格式2: nodeId=${nodeId}, inputs=${Object.keys(inputs).join(',')}`);
|
||||
} else {
|
||||
logger.warn(`无效的节点信息,跳过: ${JSON.stringify(nodeInfo)}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (updatedWorkflow[nodeId]) {
|
||||
if (!updatedWorkflow[nodeId].inputs) {
|
||||
updatedWorkflow[nodeId].inputs = {};
|
||||
}
|
||||
|
||||
for (const [key, value] of Object.entries(inputs)) {
|
||||
updatedWorkflow[nodeId].inputs[key] = value;
|
||||
}
|
||||
|
||||
logger.debug(`节点 ${nodeId} 已更新: ${JSON.stringify(inputs)}`);
|
||||
} else {
|
||||
logger.warn(`节点 ${nodeId} 在 workflow 中不存在,跳过`);
|
||||
}
|
||||
}
|
||||
|
||||
return updatedWorkflow;
|
||||
}
|
||||
|
||||
async convert(workflowId, nodeInfoList = []) {
|
||||
logger.info(`[WorkflowConverter] 开始转换 workflow: workflowId=${workflowId}, nodeInfoList长度=${nodeInfoList?.length || 0}`);
|
||||
|
||||
const workflowTemplate = await this.getWorkflowTemplate(workflowId);
|
||||
logger.info(`[WorkflowConverter] 获取到的 workflow 模板节点数: ${Object.keys(workflowTemplate || {}).length}`);
|
||||
|
||||
let finalWorkflow = this.applyNodeUpdates(workflowTemplate, nodeInfoList);
|
||||
|
||||
finalWorkflow = this.processSeedsInWorkflow(finalWorkflow, nodeInfoList);
|
||||
|
||||
logger.info(`[WorkflowConverter] 应用节点更新后的 workflow 节点数: ${Object.keys(finalWorkflow || {}).length}`);
|
||||
|
||||
logger.info(`workflow 转换完成: ${workflowId}`);
|
||||
return finalWorkflow;
|
||||
}
|
||||
|
||||
clearCache() {
|
||||
this.cache.clear();
|
||||
logger.info('workflow 缓存已清空');
|
||||
}
|
||||
|
||||
clearCacheByWorkflowId(workflowId) {
|
||||
this.cache.delete(workflowId);
|
||||
logger.info(`workflow 缓存已清空: ${workflowId}`);
|
||||
}
|
||||
}
|
||||
|
||||
export default new WorkflowConverter();
|
||||
|
|
@ -1,3 +1,3 @@
|
|||
# 默认环境配置
|
||||
VITE_API_BASE_URL=https://a6848e23804d4315b56a48b456ee83ab.pvt.hz.smartml.cn/api
|
||||
VITE_MESSAGE_DISPATCHER_BASE_URL=http://localhost:4000
|
||||
VITE_API_BASE_URL=http://localhost:8079
|
||||
VITE_MESSAGE_DISPATCHER_BASE_URL=http://localhost:8078
|
||||
|
|
|
|||
|
|
@ -1,3 +1,3 @@
|
|||
# 默认环境配置
|
||||
VITE_API_BASE_URL=https://a6848e23804d4315b56a48b456ee83ab.pvt.hz.smartml.cn/api
|
||||
VITE_MESSAGE_DISPATCHER_BASE_URL=http://localhost:4000
|
||||
VITE_API_BASE_URL=http://localhost:8079
|
||||
VITE_MESSAGE_DISPATCHER_BASE_URL=http://localhost:8078
|
||||
|
|
|
|||
|
|
@ -1,2 +1,3 @@
|
|||
# 生产环境配置
|
||||
VITE_API_BASE_URL=https://a6848e23804d4315b56a48b456ee83ab.pvt.hz.smartml.cn/api
|
||||
VITE_API_BASE_URL=http://localhost:8079
|
||||
VITE_MESSAGE_DISPATCHER_BASE_URL=http://localhost:8078
|
||||
|
|
|
|||
|
|
@ -110,7 +110,7 @@ const handleCommand = async (command) => {
|
|||
</script>
|
||||
|
||||
<style scoped lang="scss">
|
||||
@import '@/styles/design-system.scss';
|
||||
@use '@/styles/design-system.scss' as *;
|
||||
|
||||
.main-layout {
|
||||
height: 100%;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
@import './design-system.scss';
|
||||
@use './design-system.scss' as *;
|
||||
|
||||
* {
|
||||
margin: 0;
|
||||
|
|
|
|||
|
|
@ -106,7 +106,7 @@ const handleLogin = async () => {
|
|||
</script>
|
||||
|
||||
<style scoped lang="scss">
|
||||
@import '@/styles/design-system.scss';
|
||||
@use '@/styles/design-system.scss' as *;
|
||||
|
||||
.login-container {
|
||||
min-height: 100vh;
|
||||
|
|
|
|||
|
|
@ -409,7 +409,7 @@ onUnmounted(() => {
|
|||
</script>
|
||||
|
||||
<style scoped lang="scss">
|
||||
@import '@/styles/design-system.scss';
|
||||
@use '@/styles/design-system.scss' as *;
|
||||
|
||||
.page-container {
|
||||
height: 100%;
|
||||
|
|
|
|||
|
|
@ -11,12 +11,19 @@ export default defineConfig(({ mode }) => {
|
|||
'@': resolve(__dirname, 'src')
|
||||
}
|
||||
},
|
||||
css: {
|
||||
preprocessorOptions: {
|
||||
scss: {
|
||||
api: 'modern-compiler'
|
||||
}
|
||||
}
|
||||
},
|
||||
server: {
|
||||
port: 5173,
|
||||
allowedHosts: ['dbc94f5824804eb9b41c3e7d3586baa2.pvt.hz.smartml.cn'],
|
||||
port: 8079,
|
||||
allowedHosts: ['www.whjbjm.com'],
|
||||
proxy: {
|
||||
'/api': {
|
||||
target: env.VITE_MESSAGE_DISPATCHER_BASE_URL || 'http://localhost:4000',
|
||||
target: env.VITE_MESSAGE_DISPATCHER_BASE_URL || 'http://localhost:8078',
|
||||
changeOrigin: true
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,11 +1,14 @@
|
|||
PORT=4000
|
||||
PORT=8078
|
||||
REDIS_HOST=localhost
|
||||
REDIS_PORT=6379
|
||||
REDIS_DB=0
|
||||
REDIS_PORT=16379
|
||||
REDIS_DB=5
|
||||
REDIS_PASSWORD=654321
|
||||
JWT_SECRET=comfyui-cluster-bridge-secret-key-2024
|
||||
JWT_EXPIRES_IN=24h
|
||||
ADMIN_USERNAME=admin
|
||||
ADMIN_PASSWORD=2233..2233
|
||||
|
||||
# 任务队列后端 WebSocket 地址
|
||||
TASK_QUEUE_WS_URL=ws://localhost:8087
|
||||
TASK_QUEUE_WS_URL=ws://localhost:8088
|
||||
# 任务队列后端 token (与任务队列后端的 TOKEN_SECRET 保持一致)
|
||||
TASK_QUEUE_TOKEN=1Ag9BJJn0rXDnidCyXqu
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
"version": "1.0.0",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "nodemon src/index.js",
|
||||
"dev": "node src/index.js",
|
||||
"start": "node src/index.js"
|
||||
},
|
||||
"dependencies": {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,48 @@
|
|||
module.exports = {
|
||||
apps: [{
|
||||
name: 'comfyui消息分发',
|
||||
script: './src/index.js',
|
||||
cwd: './',
|
||||
args: '',
|
||||
interpreter: 'node',
|
||||
interpreter_args: '',
|
||||
|
||||
// 监听文件修改
|
||||
watch: true,
|
||||
ignore_watch: ['logs', 'node_modules',
|
||||
'package.json', 'package-lock.json', 'pnpm-lock.yaml',
|
||||
'pm2Index.config.cjs'],
|
||||
|
||||
// 实例数
|
||||
instances: 1,
|
||||
exec_mode: 'fork',
|
||||
|
||||
// 自动重启设置
|
||||
autorestart: true,
|
||||
max_restarts: 30,
|
||||
min_uptime: '10s',
|
||||
|
||||
// 内存限制重启
|
||||
// max_memory_restart: '1G',
|
||||
|
||||
// 日志配置
|
||||
out_file: './logs/out/out.log',
|
||||
error_file: './logs/error/error.log',
|
||||
// log_file: './logs/combined.log',
|
||||
log_type: 'raw', // 或 'json'
|
||||
log_date_format: 'YYYY-MM-DD HH:mm:ss Z',
|
||||
logrotate: {
|
||||
max_size: '5M', // 日志文件最大大小
|
||||
retain: 30, // 保留最近7天的日志
|
||||
compress: true, // 压缩旧日志
|
||||
date_format: 'YYYY-MM-DD' // 日期格式
|
||||
},
|
||||
|
||||
// 合并日志
|
||||
// combine_logs: true,
|
||||
|
||||
// 监控和重启设置
|
||||
kill_timeout: 1600,
|
||||
restart_delay: 4000,
|
||||
}],
|
||||
};
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
import express from 'express';
|
||||
import bridgeManager from '../bridge-manager/index.js';
|
||||
import websocketServer from '../websocket-server/index.js';
|
||||
import taskScheduler from '../task-scheduler/index.js';
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
import logger from '../logger/index.js';
|
||||
import { authMiddleware } from '../auth/middleware.js';
|
||||
|
|
@ -46,36 +47,85 @@ router.get('/bridges/:bridgeId', authMiddleware, (req, res) => {
|
|||
|
||||
router.post('/task', authMiddleware, async (req, res) => {
|
||||
try {
|
||||
const { bridgeId, workflowId, nodeInfoList, webhookUrl } = req.body;
|
||||
|
||||
if (!bridgeId) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: 'bridgeId不能为空'
|
||||
});
|
||||
}
|
||||
|
||||
const bridge = bridgeManager.getBridge(bridgeId);
|
||||
if (!bridge) {
|
||||
return res.status(404).json({
|
||||
success: false,
|
||||
error: '桥接器不存在'
|
||||
});
|
||||
}
|
||||
|
||||
const { workflowId, nodeInfoList, webhookUrl, taskType } = req.body;
|
||||
const requestId = uuidv4();
|
||||
logger.info(`收到任务请求, bridgeId: ${bridgeId}, requestId: ${requestId}`);
|
||||
|
||||
const result = await websocketServer.sendTaskToBridge(
|
||||
bridgeId,
|
||||
{ workflowId, nodeInfoList, webhookUrl },
|
||||
requestId
|
||||
);
|
||||
const capacity = bridgeManager.getAvailableCapacity();
|
||||
taskScheduler.setCurrentCapacity(capacity.available);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: result
|
||||
const assignResult = taskScheduler.tryDirectAssign({
|
||||
requestId,
|
||||
workflowId,
|
||||
nodeInfoList,
|
||||
webhookUrl,
|
||||
taskType
|
||||
});
|
||||
|
||||
if (assignResult.success) {
|
||||
logger.info(`收到任务请求, 直接分配实例: ${assignResult.instanceId}, bridgeId: ${assignResult.bridgeId}, requestId: ${requestId}`);
|
||||
|
||||
try {
|
||||
const result = await websocketServer.sendTaskToInstance(
|
||||
assignResult.bridgeId,
|
||||
assignResult.instanceId,
|
||||
{ workflowId, nodeInfoList, webhookUrl },
|
||||
requestId
|
||||
);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
requestId,
|
||||
instanceId: assignResult.instanceId,
|
||||
bridgeId: assignResult.bridgeId,
|
||||
status: 'processing',
|
||||
...result.data
|
||||
}
|
||||
});
|
||||
} catch (sendError) {
|
||||
logger.error('发送任务失败:', sendError);
|
||||
taskScheduler.handleTaskFailure(requestId, sendError.message);
|
||||
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: sendError.message,
|
||||
requestId
|
||||
});
|
||||
}
|
||||
} else {
|
||||
const queueResult = taskScheduler.addTask({
|
||||
requestId,
|
||||
workflowId,
|
||||
nodeInfoList,
|
||||
webhookUrl,
|
||||
taskType
|
||||
});
|
||||
|
||||
if (queueResult.success) {
|
||||
logger.info(`收到任务请求, 无可用实例, 加入队列: ${requestId}, 队列位置: ${queueResult.queuePosition}`);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
requestId,
|
||||
status: 'queued',
|
||||
queuePosition: queueResult.queuePosition,
|
||||
capacity: capacity
|
||||
}
|
||||
});
|
||||
} else {
|
||||
logger.warn(`任务队列已满: ${requestId}`);
|
||||
|
||||
res.status(503).json({
|
||||
success: false,
|
||||
error: queueResult.error,
|
||||
data: {
|
||||
requestId,
|
||||
capacity
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('处理任务请求失败:', error);
|
||||
res.status(500).json({
|
||||
|
|
@ -85,29 +135,36 @@ router.post('/task', authMiddleware, async (req, res) => {
|
|||
}
|
||||
});
|
||||
|
||||
router.get('/instances', authMiddleware, (req, res) => {
|
||||
const bridges = bridgeManager.getAllBridges();
|
||||
const allInstances = [];
|
||||
router.get('/task/:taskId', authMiddleware, (req, res) => {
|
||||
const taskStatus = taskScheduler.getTaskStatus(req.params.taskId);
|
||||
|
||||
for (const bridge of bridges) {
|
||||
if (bridge.info?.instances) {
|
||||
for (const instance of bridge.info.instances) {
|
||||
allInstances.push({
|
||||
...instance,
|
||||
bridgeId: bridge.id
|
||||
});
|
||||
}
|
||||
}
|
||||
if (!taskStatus) {
|
||||
return res.status(404).json({
|
||||
success: false,
|
||||
error: '任务不存在'
|
||||
});
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: allInstances
|
||||
data: taskStatus
|
||||
});
|
||||
});
|
||||
|
||||
router.get('/instances', authMiddleware, (req, res) => {
|
||||
const instances = bridgeManager.getAllInstances();
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: instances
|
||||
});
|
||||
});
|
||||
|
||||
router.get('/overview', authMiddleware, (req, res) => {
|
||||
const bridges = bridgeManager.getAllBridges();
|
||||
const capacity = bridgeManager.getAvailableCapacity();
|
||||
const stats = taskScheduler.getStats();
|
||||
|
||||
let totalInstances = 0;
|
||||
let onlineInstances = 0;
|
||||
let busyInstances = 0;
|
||||
|
|
@ -133,21 +190,20 @@ router.get('/overview', authMiddleware, (req, res) => {
|
|||
total: totalInstances,
|
||||
online: onlineInstances,
|
||||
busy: busyInstances,
|
||||
offline: offlineInstances
|
||||
offline: offlineInstances,
|
||||
locked: capacity.locked,
|
||||
available: capacity.available
|
||||
},
|
||||
tasks: {
|
||||
total: 0,
|
||||
pending: 0,
|
||||
running: 0,
|
||||
completed: 0,
|
||||
failed: 0
|
||||
}
|
||||
tasks: stats
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
router.get('/monitor/overview', authMiddleware, (req, res) => {
|
||||
const bridges = bridgeManager.getAllBridges();
|
||||
const capacity = bridgeManager.getAvailableCapacity();
|
||||
const stats = taskScheduler.getStats();
|
||||
|
||||
let totalInstances = 0;
|
||||
let onlineInstances = 0;
|
||||
let busyInstances = 0;
|
||||
|
|
@ -169,23 +225,26 @@ router.get('/monitor/overview', authMiddleware, (req, res) => {
|
|||
total: totalInstances,
|
||||
online: onlineInstances,
|
||||
busy: busyInstances,
|
||||
offline: offlineInstances
|
||||
offline: offlineInstances,
|
||||
locked: capacity.locked,
|
||||
available: capacity.available
|
||||
},
|
||||
tasks: {
|
||||
total: 0,
|
||||
pending: 0,
|
||||
running: 0,
|
||||
completed: 0,
|
||||
failed: 0
|
||||
}
|
||||
tasks: stats
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
router.get('/tasks', authMiddleware, (req, res) => {
|
||||
const stats = taskScheduler.getStats();
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: []
|
||||
data: {
|
||||
stats,
|
||||
pending: taskScheduler.pendingTaskQueue.slice(0, 20),
|
||||
processing: Array.from(taskScheduler.processingTasks.values()),
|
||||
recovering: Array.from(taskScheduler.recoveringTasks.values())
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,8 @@ const BLACKLIST_KEY_PREFIX = 'jwt:blacklist:';
|
|||
const redis = new Redis({
|
||||
host: process.env.REDIS_HOST || 'localhost',
|
||||
port: parseInt(process.env.REDIS_PORT || '6379'),
|
||||
db: parseInt(process.env.REDIS_DB || '0')
|
||||
db: parseInt(process.env.REDIS_DB || '0'),
|
||||
password: process.env.REDIS_PASSWORD || null
|
||||
});
|
||||
|
||||
redis.on('error', (err) => {
|
||||
|
|
|
|||
|
|
@ -3,7 +3,8 @@ import Redis from 'ioredis';
|
|||
const redis = new Redis({
|
||||
host: process.env.REDIS_HOST || 'localhost',
|
||||
port: parseInt(process.env.REDIS_PORT || '6379'),
|
||||
db: parseInt(process.env.REDIS_DB || '0')
|
||||
db: parseInt(process.env.REDIS_DB || '0'),
|
||||
password: process.env.REDIS_PASSWORD || null
|
||||
});
|
||||
|
||||
redis.on('error', (err) => {
|
||||
|
|
|
|||
|
|
@ -3,6 +3,19 @@ import logger from '../logger/index.js';
|
|||
class BridgeManager {
|
||||
constructor() {
|
||||
this.bridges = new Map();
|
||||
this.listeners = [];
|
||||
this.instanceLocks = new Map();
|
||||
this.lockTimeouts = new Map();
|
||||
this.LOCK_TIMEOUT = 30000;
|
||||
this.roundRobinIndex = 0;
|
||||
}
|
||||
|
||||
onBridgeChange(callback) {
|
||||
this.listeners.push(callback);
|
||||
}
|
||||
|
||||
notifyBridgeChange() {
|
||||
this.listeners.forEach(callback => callback());
|
||||
}
|
||||
|
||||
registerBridge(bridgeId, ws, bridgeInfo) {
|
||||
|
|
@ -15,13 +28,21 @@ class BridgeManager {
|
|||
};
|
||||
this.bridges.set(bridgeId, bridge);
|
||||
logger.info(`桥接器已注册: ${bridgeId}`);
|
||||
this.notifyBridgeChange();
|
||||
return bridge;
|
||||
}
|
||||
|
||||
unregisterBridge(bridgeId) {
|
||||
if (this.bridges.has(bridgeId)) {
|
||||
const bridge = this.bridges.get(bridgeId);
|
||||
if (bridge.info?.instances) {
|
||||
for (const instance of bridge.info.instances) {
|
||||
this.releaseInstanceLock(instance.id);
|
||||
}
|
||||
}
|
||||
this.bridges.delete(bridgeId);
|
||||
logger.info(`桥接器已注销: ${bridgeId}`);
|
||||
this.notifyBridgeChange();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -49,6 +70,155 @@ class BridgeManager {
|
|||
return this.getAllBridges();
|
||||
}
|
||||
|
||||
lockInstance(instanceId, taskId) {
|
||||
if (this.instanceLocks.has(instanceId)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
this.instanceLocks.set(instanceId, {
|
||||
taskId,
|
||||
lockedAt: new Date().toISOString()
|
||||
});
|
||||
|
||||
if (this.lockTimeouts.has(instanceId)) {
|
||||
clearTimeout(this.lockTimeouts.get(instanceId));
|
||||
}
|
||||
|
||||
const timeoutId = setTimeout(() => {
|
||||
logger.warn(`[BridgeManager] 实例锁超时自动释放: ${instanceId}`);
|
||||
this.releaseInstanceLock(instanceId);
|
||||
}, this.LOCK_TIMEOUT);
|
||||
|
||||
this.lockTimeouts.set(instanceId, timeoutId);
|
||||
|
||||
logger.info(`[BridgeManager] 实例已锁定: ${instanceId}, taskId: ${taskId}`);
|
||||
return true;
|
||||
}
|
||||
|
||||
releaseInstanceLock(instanceId) {
|
||||
if (this.instanceLocks.has(instanceId)) {
|
||||
this.instanceLocks.delete(instanceId);
|
||||
|
||||
if (this.lockTimeouts.has(instanceId)) {
|
||||
clearTimeout(this.lockTimeouts.get(instanceId));
|
||||
this.lockTimeouts.delete(instanceId);
|
||||
}
|
||||
|
||||
logger.info(`[BridgeManager] 实例锁已释放: ${instanceId}`);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
confirmInstanceLock(instanceId) {
|
||||
if (this.lockTimeouts.has(instanceId)) {
|
||||
clearTimeout(this.lockTimeouts.get(instanceId));
|
||||
this.lockTimeouts.delete(instanceId);
|
||||
logger.info(`[BridgeManager] 实例锁已确认,取消超时定时器: ${instanceId}`);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
isInstanceLocked(instanceId) {
|
||||
return this.instanceLocks.has(instanceId);
|
||||
}
|
||||
|
||||
getInstanceLockInfo(instanceId) {
|
||||
return this.instanceLocks.get(instanceId) || null;
|
||||
}
|
||||
|
||||
getAvailableInstance() {
|
||||
let bestInstance = null;
|
||||
let bestBridgeId = null;
|
||||
|
||||
const onlineInstances = [];
|
||||
|
||||
for (const [bridgeId, bridge] of this.bridges) {
|
||||
if (bridge.info?.instances) {
|
||||
for (const instance of bridge.info.instances) {
|
||||
if (instance.status === 'online' && !this.isInstanceLocked(instance.id)) {
|
||||
onlineInstances.push({ instance, bridgeId });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (onlineInstances.length === 0) {
|
||||
console.log('[BridgeManager] 没有找到空闲实例');
|
||||
return { instance: null, bridgeId: null };
|
||||
}
|
||||
|
||||
const selectedIndex = this.roundRobinIndex % onlineInstances.length;
|
||||
this.roundRobinIndex++;
|
||||
|
||||
const selected = onlineInstances[selectedIndex];
|
||||
bestInstance = selected.instance;
|
||||
bestBridgeId = selected.bridgeId;
|
||||
|
||||
console.log(`[BridgeManager] 找到空闲实例: ${bestInstance.id}, 所属 bridge: ${bestBridgeId}`);
|
||||
|
||||
return { instance: bestInstance, bridgeId: bestBridgeId };
|
||||
}
|
||||
|
||||
getAvailableInstanceAndLock(taskId) {
|
||||
const { instance, bridgeId } = this.getAvailableInstance();
|
||||
|
||||
if (instance) {
|
||||
const locked = this.lockInstance(instance.id, taskId);
|
||||
if (!locked) {
|
||||
logger.warn(`[BridgeManager] 实例锁定失败,可能已被其他任务占用: ${instance.id}`);
|
||||
return { instance: null, bridgeId: null };
|
||||
}
|
||||
}
|
||||
|
||||
return { instance, bridgeId };
|
||||
}
|
||||
|
||||
getAllInstances() {
|
||||
const instances = [];
|
||||
for (const [bridgeId, bridge] of this.bridges) {
|
||||
if (bridge.info?.instances) {
|
||||
for (const instance of bridge.info.instances) {
|
||||
instances.push({
|
||||
...instance,
|
||||
bridgeId,
|
||||
locked: this.isInstanceLocked(instance.id)
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
return instances;
|
||||
}
|
||||
|
||||
getAvailableCapacity() {
|
||||
let onlineCount = 0;
|
||||
let busyCount = 0;
|
||||
let lockedCount = 0;
|
||||
|
||||
for (const [, bridge] of this.bridges) {
|
||||
if (bridge.info?.instances) {
|
||||
for (const instance of bridge.info.instances) {
|
||||
if (instance.status === 'online') {
|
||||
onlineCount++;
|
||||
if (this.isInstanceLocked(instance.id)) {
|
||||
lockedCount++;
|
||||
}
|
||||
} else if (instance.status === 'busy') {
|
||||
busyCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
online: onlineCount,
|
||||
busy: busyCount,
|
||||
locked: lockedCount,
|
||||
available: Math.max(0, onlineCount - lockedCount)
|
||||
};
|
||||
}
|
||||
|
||||
sendToBridge(bridgeId, message) {
|
||||
const bridge = this.bridges.get(bridgeId);
|
||||
if (!bridge) {
|
||||
|
|
@ -60,6 +230,7 @@ class BridgeManager {
|
|||
return false;
|
||||
}
|
||||
try {
|
||||
console.log(`[分发] BridgeManager 发送到桥接器 ${bridgeId}:`, JSON.stringify(message, null, 2));
|
||||
bridge.ws.send(JSON.stringify(message));
|
||||
return true;
|
||||
} catch (error) {
|
||||
|
|
@ -77,6 +248,16 @@ class BridgeManager {
|
|||
}
|
||||
return successCount;
|
||||
}
|
||||
|
||||
handleInstanceOffline(instanceId) {
|
||||
const lockInfo = this.getInstanceLockInfo(instanceId);
|
||||
if (lockInfo) {
|
||||
logger.warn(`[BridgeManager] 实例离线,释放锁并标记任务需回收: ${instanceId}, taskId: ${lockInfo.taskId}`);
|
||||
this.releaseInstanceLock(instanceId);
|
||||
return lockInfo.taskId;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export default new BridgeManager();
|
||||
|
|
|
|||
|
|
@ -51,10 +51,21 @@ taskScheduler.init().then(() => {
|
|||
|
||||
process.on('SIGINT', async () => {
|
||||
console.log('正在关闭服务...');
|
||||
await taskScheduler.shutdown();
|
||||
mdWebSocketClient.disconnect();
|
||||
server.close(() => {
|
||||
console.log('服务已关闭');
|
||||
process.exit(0);
|
||||
});
|
||||
try {
|
||||
await taskScheduler.shutdown();
|
||||
mdWebSocketClient.disconnect();
|
||||
|
||||
server.close(() => {
|
||||
console.log('服务已关闭');
|
||||
process.exit(0);
|
||||
});
|
||||
|
||||
setTimeout(() => {
|
||||
console.log('强制关闭...');
|
||||
process.exit(1);
|
||||
}, 5000);
|
||||
} catch (error) {
|
||||
console.error('关闭服务时出错:', error);
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
|
|
|||
|
|
@ -11,22 +11,28 @@ class MDWebSocketClient {
|
|||
this.connected = false;
|
||||
this.reconnectAttempts = 0;
|
||||
this.tokenPushInterval = null;
|
||||
this.capacityPushInterval = null;
|
||||
this.heartbeatInterval = null;
|
||||
this.serverUrl = process.env.TASK_QUEUE_WS_URL || 'ws://localhost:8087';
|
||||
this.jwtSecret = process.env.JWT_SECRET || 'comfyui-cluster-bridge-secret-key-2024';
|
||||
this.taskQueueToken = process.env.TASK_QUEUE_TOKEN || '1Ag9BJJn0rXDnidCyXqu';
|
||||
}
|
||||
|
||||
async init() {
|
||||
console.log('[MDWebSocketClient] 初始化 WebSocket 客户端');
|
||||
bridgeManager.onBridgeChange(() => {
|
||||
if (this.connected) {
|
||||
this.pushCapacityState();
|
||||
}
|
||||
});
|
||||
await this.connect();
|
||||
}
|
||||
|
||||
async connect() {
|
||||
return new Promise((resolve, reject) => {
|
||||
console.log(`[MDWebSocketClient] 正在连接到 ${this.serverUrl}`);
|
||||
const urlWithParams = `${this.serverUrl}?token=${this.taskQueueToken}&id=message-dispatcher`;
|
||||
console.log(`[MDWebSocketClient] 正在连接到 ${urlWithParams}`);
|
||||
|
||||
this.ws = new WebSocket(this.serverUrl);
|
||||
this.ws = new WebSocket(urlWithParams);
|
||||
|
||||
this.ws.on('open', () => {
|
||||
console.log('[MDWebSocketClient] WebSocket 连接已建立');
|
||||
|
|
@ -38,7 +44,6 @@ class MDWebSocketClient {
|
|||
|
||||
this.startHeartbeat();
|
||||
this.startTokenPushTimer();
|
||||
this.startCapacityPushTimer();
|
||||
|
||||
resolve();
|
||||
});
|
||||
|
|
@ -55,7 +60,7 @@ class MDWebSocketClient {
|
|||
});
|
||||
|
||||
this.ws.on('error', (error) => {
|
||||
console.error('[MDWebSocketClient] WebSocket 连接错误:', error);
|
||||
// console.error('[MDWebSocketClient] WebSocket 连接错误:', error);
|
||||
this.connected = false;
|
||||
});
|
||||
});
|
||||
|
|
@ -76,10 +81,6 @@ class MDWebSocketClient {
|
|||
clearInterval(this.tokenPushInterval);
|
||||
this.tokenPushInterval = null;
|
||||
}
|
||||
if (this.capacityPushInterval) {
|
||||
clearInterval(this.capacityPushInterval);
|
||||
this.capacityPushInterval = null;
|
||||
}
|
||||
if (this.heartbeatInterval) {
|
||||
clearInterval(this.heartbeatInterval);
|
||||
this.heartbeatInterval = null;
|
||||
|
|
@ -209,9 +210,25 @@ class MDWebSocketClient {
|
|||
}
|
||||
|
||||
handleMessage(data) {
|
||||
const messageStr = data.toString();
|
||||
|
||||
if (messageStr === 'please give me tasks') {
|
||||
console.log('[MDWebSocketClient] 收到任务请求');
|
||||
return;
|
||||
}
|
||||
|
||||
if (messageStr === 'ping') {
|
||||
this.ws.send('pong');
|
||||
return;
|
||||
}
|
||||
|
||||
if (messageStr === 'pong') {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const message = JSON.parse(data.toString());
|
||||
console.log(`[MDWebSocketClient] 收到消息: ${message.type}`);
|
||||
const message = JSON.parse(messageStr);
|
||||
// console.log(`[MDWebSocketClient] 收到消息: ${message.type}`);
|
||||
|
||||
switch (message.type) {
|
||||
case 'HEARTBEAT_ACK':
|
||||
|
|
@ -241,12 +258,6 @@ class MDWebSocketClient {
|
|||
this.pushJwtToken();
|
||||
}, 20 * 60 * 60 * 1000);
|
||||
}
|
||||
|
||||
startCapacityPushTimer() {
|
||||
this.capacityPushInterval = setInterval(() => {
|
||||
this.pushCapacityState();
|
||||
}, 10000);
|
||||
}
|
||||
}
|
||||
|
||||
export default new MDWebSocketClient();
|
||||
|
|
|
|||
|
|
@ -1,9 +1,13 @@
|
|||
import bridgeManager from '../bridge-manager/index.js';
|
||||
import websocketServer from '../websocket-server/index.js';
|
||||
import logger from '../logger/index.js';
|
||||
|
||||
const TASK_STATES = {
|
||||
PENDING: 'pending',
|
||||
PROCESSING: 'processing',
|
||||
COMPLETED: 'completed',
|
||||
FAILED: 'failed',
|
||||
RETRYING: 'retrying'
|
||||
RECOVERING: 'recovering'
|
||||
};
|
||||
|
||||
class TaskScheduler {
|
||||
|
|
@ -12,14 +16,22 @@ class TaskScheduler {
|
|||
this.processingTasks = new Map();
|
||||
this.completedTasks = [];
|
||||
this.failedTasks = [];
|
||||
this.recoveringTasks = new Map();
|
||||
this.currentCapacity = 0;
|
||||
this.maxCapacity = 0;
|
||||
this.schedulerLoopInterval = null;
|
||||
this.MAX_PENDING_QUEUE = 100;
|
||||
this.MAX_COMPLETED_HISTORY = 1000;
|
||||
this.MAX_FAILED_HISTORY = 100;
|
||||
this.TASK_TIMEOUT = 5 * 60 * 1000;
|
||||
this.RECOVERY_TIMEOUT = 60000;
|
||||
this.taskCallbacks = new Map();
|
||||
}
|
||||
|
||||
async init() {
|
||||
console.log('[TaskScheduler] 初始化任务调度器');
|
||||
this.startSchedulerLoop();
|
||||
this.startTimeoutCheck();
|
||||
}
|
||||
|
||||
setCurrentCapacity(capacity) {
|
||||
|
|
@ -29,81 +41,13 @@ class TaskScheduler {
|
|||
|
||||
console.log(`[TaskScheduler] 容量更新: ${oldCapacity} -> ${capacity}`);
|
||||
|
||||
if (capacity < oldCapacity) {
|
||||
this.handleCapacityReduction(capacity);
|
||||
} else if (capacity > oldCapacity) {
|
||||
this.handleCapacityIncrease(capacity);
|
||||
}
|
||||
}
|
||||
|
||||
addTaskToPending(task) {
|
||||
const taskWithState = {
|
||||
...task,
|
||||
state: TASK_STATES.PENDING,
|
||||
addedAt: new Date().toISOString()
|
||||
};
|
||||
this.pendingTaskQueue.push(taskWithState);
|
||||
console.log(`[TaskScheduler] 任务已加入等待队列: ${task.taskId}, 当前等待数: ${this.pendingTaskQueue.length}`);
|
||||
}
|
||||
|
||||
getTaskFromPending() {
|
||||
if (this.pendingTaskQueue.length === 0) {
|
||||
return null;
|
||||
}
|
||||
return this.pendingTaskQueue.shift();
|
||||
}
|
||||
|
||||
markTaskAsProcessing(taskId, instanceId) {
|
||||
const task = this.processingTasks.get(taskId) || this.pendingTaskQueue.find(t => t.taskId === taskId);
|
||||
if (task) {
|
||||
task.state = TASK_STATES.PROCESSING;
|
||||
task.instanceId = instanceId;
|
||||
task.startedAt = new Date().toISOString();
|
||||
this.processingTasks.set(taskId, task);
|
||||
|
||||
const index = this.pendingTaskQueue.findIndex(t => t.taskId === taskId);
|
||||
if (index !== -1) {
|
||||
this.pendingTaskQueue.splice(index, 1);
|
||||
}
|
||||
|
||||
console.log(`[TaskScheduler] 任务开始处理: ${taskId}, 实例: ${instanceId}`);
|
||||
}
|
||||
}
|
||||
|
||||
markTaskAsCompleted(taskId, result) {
|
||||
const task = this.processingTasks.get(taskId);
|
||||
if (task) {
|
||||
task.state = TASK_STATES.COMPLETED;
|
||||
task.result = result;
|
||||
task.completedAt = new Date().toISOString();
|
||||
this.processingTasks.delete(taskId);
|
||||
|
||||
this.completedTasks.push(task);
|
||||
if (this.completedTasks.length > 1000) {
|
||||
this.completedTasks.shift();
|
||||
}
|
||||
|
||||
console.log(`[TaskScheduler] 任务完成: ${taskId}`);
|
||||
if (capacity > oldCapacity) {
|
||||
this.schedulePendingTasks();
|
||||
}
|
||||
}
|
||||
|
||||
markTaskAsFailed(taskId, error) {
|
||||
const task = this.processingTasks.get(taskId);
|
||||
if (task) {
|
||||
task.state = TASK_STATES.FAILED;
|
||||
task.error = error;
|
||||
task.failedAt = new Date().toISOString();
|
||||
this.processingTasks.delete(taskId);
|
||||
|
||||
this.failedTasks.push(task);
|
||||
if (this.failedTasks.length > 100) {
|
||||
this.failedTasks.shift();
|
||||
}
|
||||
|
||||
console.error(`[TaskScheduler] 任务失败: ${taskId}`, error);
|
||||
this.schedulePendingTasks();
|
||||
}
|
||||
getAvailableCapacity() {
|
||||
return Math.max(0, this.currentCapacity - this.processingTasks.size);
|
||||
}
|
||||
|
||||
hasAvailableCapacity() {
|
||||
|
|
@ -114,39 +58,86 @@ class TaskScheduler {
|
|||
return Math.max(0, this.currentCapacity - this.processingTasks.size);
|
||||
}
|
||||
|
||||
handleCapacityReduction(newCapacity) {
|
||||
const currentProcessingCount = this.processingTasks.size;
|
||||
addTask(taskData) {
|
||||
const taskId = taskData.requestId || taskData.taskId;
|
||||
|
||||
if (currentProcessingCount > newCapacity) {
|
||||
const excessCount = currentProcessingCount - newCapacity;
|
||||
|
||||
const tasksToMoveBack = Array.from(this.processingTasks.values())
|
||||
.sort((a, b) => new Date(a.startedAt) - new Date(b.startedAt))
|
||||
.slice(0, excessCount);
|
||||
|
||||
for (const task of tasksToMoveBack.reverse()) {
|
||||
this.processingTasks.delete(task.taskId);
|
||||
this.pendingTaskQueue.unshift({
|
||||
...task,
|
||||
state: TASK_STATES.PENDING,
|
||||
movedBackAt: new Date().toISOString()
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`[TaskScheduler] 算力降低: ${this.currentCapacity} -> ${newCapacity}, 已将 ${excessCount} 个任务移回缓存队列`);
|
||||
if (this.pendingTaskQueue.length >= this.MAX_PENDING_QUEUE) {
|
||||
return {
|
||||
success: false,
|
||||
error: '任务队列已满',
|
||||
queueSize: this.pendingTaskQueue.length
|
||||
};
|
||||
}
|
||||
|
||||
this.currentCapacity = newCapacity;
|
||||
const taskWithState = {
|
||||
...taskData,
|
||||
taskId,
|
||||
state: TASK_STATES.PENDING,
|
||||
addedAt: new Date().toISOString(),
|
||||
retryCount: 0
|
||||
};
|
||||
|
||||
this.pendingTaskQueue.push(taskWithState);
|
||||
console.log(`[TaskScheduler] 任务已加入等待队列: ${taskId}, 当前等待数: ${this.pendingTaskQueue.length}`);
|
||||
|
||||
this.schedulePendingTasks();
|
||||
|
||||
return {
|
||||
success: true,
|
||||
taskId,
|
||||
status: 'queued',
|
||||
queuePosition: this.pendingTaskQueue.length
|
||||
};
|
||||
}
|
||||
|
||||
handleCapacityIncrease(newCapacity) {
|
||||
console.log(`[TaskScheduler] 算力增加: ${this.currentCapacity} -> ${newCapacity}`);
|
||||
this.currentCapacity = newCapacity;
|
||||
this.schedulePendingTasks();
|
||||
tryDirectAssign(taskData) {
|
||||
const taskId = taskData.requestId || taskData.taskId;
|
||||
const capacity = bridgeManager.getAvailableCapacity();
|
||||
|
||||
if (capacity.available <= 0) {
|
||||
return {
|
||||
success: false,
|
||||
reason: 'no_capacity',
|
||||
capacity
|
||||
};
|
||||
}
|
||||
|
||||
const { instance, bridgeId } = bridgeManager.getAvailableInstanceAndLock(taskId);
|
||||
|
||||
if (!instance || !bridgeId) {
|
||||
return {
|
||||
success: false,
|
||||
reason: 'no_instance',
|
||||
capacity
|
||||
};
|
||||
}
|
||||
|
||||
const taskWithState = {
|
||||
...taskData,
|
||||
taskId,
|
||||
instanceId: instance.id,
|
||||
bridgeId,
|
||||
state: TASK_STATES.PROCESSING,
|
||||
startedAt: new Date().toISOString(),
|
||||
retryCount: 0
|
||||
};
|
||||
|
||||
this.processingTasks.set(taskId, taskWithState);
|
||||
|
||||
console.log(`[TaskScheduler] 任务直接分配: ${taskId} -> 实例 ${instance.id}`);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
taskId,
|
||||
instanceId: instance.id,
|
||||
bridgeId,
|
||||
status: 'processing'
|
||||
};
|
||||
}
|
||||
|
||||
async schedulePendingTasks() {
|
||||
const availableSlots = this.getAvailableSlots();
|
||||
const capacity = bridgeManager.getAvailableCapacity();
|
||||
const availableSlots = capacity.available;
|
||||
|
||||
if (availableSlots <= 0 || this.pendingTaskQueue.length === 0) {
|
||||
return;
|
||||
|
|
@ -154,17 +145,243 @@ class TaskScheduler {
|
|||
|
||||
const tasksToSchedule = this.pendingTaskQueue.splice(0, availableSlots);
|
||||
|
||||
console.log(`[TaskScheduler] 调度 ${tasksToSchedule.length} 个任务`);
|
||||
console.log(`[TaskScheduler] 调度 ${tasksToSchedule.length} 个任务, 可用槽位: ${availableSlots}`);
|
||||
|
||||
for (const task of tasksToSchedule) {
|
||||
this.markTaskAsProcessing(task.taskId, 'auto-assigned');
|
||||
const { instance, bridgeId } = bridgeManager.getAvailableInstanceAndLock(task.taskId);
|
||||
|
||||
if (!instance || !bridgeId) {
|
||||
console.warn(`[TaskScheduler] 无可用实例,任务 ${task.taskId} 返回队列`);
|
||||
this.pendingTaskQueue.unshift(task);
|
||||
break;
|
||||
}
|
||||
|
||||
task.instanceId = instance.id;
|
||||
task.bridgeId = bridgeId;
|
||||
task.state = TASK_STATES.PROCESSING;
|
||||
task.startedAt = new Date().toISOString();
|
||||
|
||||
this.processingTasks.set(task.taskId, task);
|
||||
|
||||
console.log(`[TaskScheduler] 任务调度: ${task.taskId} -> 实例 ${instance.id}`);
|
||||
|
||||
try {
|
||||
await websocketServer.sendTaskToInstance(
|
||||
bridgeId,
|
||||
instance.id,
|
||||
{
|
||||
workflowId: task.workflowId,
|
||||
nodeInfoList: task.nodeInfoList,
|
||||
webhookUrl: task.webhookUrl
|
||||
},
|
||||
task.taskId
|
||||
);
|
||||
} catch (error) {
|
||||
console.error(`[TaskScheduler] 任务发送失败: ${task.taskId}`, error);
|
||||
this.handleTaskFailure(task.taskId, error.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
handleTaskAck(taskId, instanceId, bridgeId) {
|
||||
const task = this.processingTasks.get(taskId);
|
||||
if (task) {
|
||||
task.ackReceived = true;
|
||||
task.ackAt = new Date().toISOString();
|
||||
console.log(`[TaskScheduler] 任务确认: ${taskId}`);
|
||||
}
|
||||
}
|
||||
|
||||
handleTaskComplete(taskId, result) {
|
||||
const task = this.processingTasks.get(taskId);
|
||||
if (task) {
|
||||
task.state = TASK_STATES.COMPLETED;
|
||||
task.result = result;
|
||||
task.completedAt = new Date().toISOString();
|
||||
|
||||
bridgeManager.releaseInstanceLock(task.instanceId);
|
||||
|
||||
this.processingTasks.delete(taskId);
|
||||
|
||||
this.completedTasks.push(task);
|
||||
if (this.completedTasks.length > this.MAX_COMPLETED_HISTORY) {
|
||||
this.completedTasks.shift();
|
||||
}
|
||||
|
||||
console.log(`[TaskScheduler] 任务完成: ${taskId}`);
|
||||
|
||||
this.schedulePendingTasks();
|
||||
}
|
||||
}
|
||||
|
||||
handleTaskFailure(taskId, error) {
|
||||
const task = this.processingTasks.get(taskId);
|
||||
if (task) {
|
||||
task.state = TASK_STATES.FAILED;
|
||||
task.error = error;
|
||||
task.failedAt = new Date().toISOString();
|
||||
|
||||
bridgeManager.releaseInstanceLock(task.instanceId);
|
||||
|
||||
this.processingTasks.delete(taskId);
|
||||
|
||||
this.failedTasks.push(task);
|
||||
if (this.failedTasks.length > this.MAX_FAILED_HISTORY) {
|
||||
this.failedTasks.shift();
|
||||
}
|
||||
|
||||
console.error(`[TaskScheduler] 任务失败: ${taskId}`, error);
|
||||
|
||||
this.schedulePendingTasks();
|
||||
}
|
||||
}
|
||||
|
||||
handleInstanceOffline(instanceId) {
|
||||
const affectedTaskId = bridgeManager.handleInstanceOffline(instanceId);
|
||||
|
||||
if (affectedTaskId) {
|
||||
const task = this.processingTasks.get(affectedTaskId);
|
||||
if (task) {
|
||||
console.warn(`[TaskScheduler] 实例离线,任务需要回收: ${affectedTaskId}`);
|
||||
this.recoverTask(affectedTaskId, 'instance_offline');
|
||||
}
|
||||
}
|
||||
|
||||
this.schedulePendingTasks();
|
||||
}
|
||||
|
||||
recoverTask(taskId, reason) {
|
||||
const task = this.processingTasks.get(taskId);
|
||||
if (!task) {
|
||||
return false;
|
||||
}
|
||||
|
||||
task.state = TASK_STATES.RECOVERING;
|
||||
task.recoveryReason = reason;
|
||||
task.recoveryStartedAt = new Date().toISOString();
|
||||
|
||||
this.processingTasks.delete(taskId);
|
||||
this.recoveringTasks.set(taskId, task);
|
||||
|
||||
setTimeout(() => {
|
||||
this.executeRecovery(taskId);
|
||||
}, 1000);
|
||||
|
||||
console.log(`[TaskScheduler] 任务进入恢复流程: ${taskId}, 原因: ${reason}`);
|
||||
return true;
|
||||
}
|
||||
|
||||
executeRecovery(taskId) {
|
||||
const task = this.recoveringTasks.get(taskId);
|
||||
if (!task) {
|
||||
return;
|
||||
}
|
||||
|
||||
task.retryCount = (task.retryCount || 0) + 1;
|
||||
|
||||
if (task.retryCount > 3) {
|
||||
console.error(`[TaskScheduler] 任务恢复失败次数过多: ${taskId}`);
|
||||
this.recoveringTasks.delete(taskId);
|
||||
|
||||
task.state = TASK_STATES.FAILED;
|
||||
task.error = `任务恢复失败: 重试次数超过限制`;
|
||||
this.failedTasks.push(task);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`[TaskScheduler] 尝试恢复任务: ${taskId}, 重试次数: ${task.retryCount}`);
|
||||
|
||||
this.recoveringTasks.delete(taskId);
|
||||
|
||||
const retryTask = {
|
||||
...task,
|
||||
state: TASK_STATES.PENDING,
|
||||
instanceId: null,
|
||||
bridgeId: null,
|
||||
startedAt: null
|
||||
};
|
||||
|
||||
this.pendingTaskQueue.unshift(retryTask);
|
||||
|
||||
this.schedulePendingTasks();
|
||||
}
|
||||
|
||||
startSchedulerLoop() {
|
||||
this.schedulerLoopInterval = setInterval(() => {
|
||||
this.schedulePendingTasks();
|
||||
}, 1000);
|
||||
}, 5000);
|
||||
}
|
||||
|
||||
startTimeoutCheck() {
|
||||
setInterval(() => {
|
||||
this.checkTaskTimeouts();
|
||||
}, 30000);
|
||||
}
|
||||
|
||||
checkTaskTimeouts() {
|
||||
const now = Date.now();
|
||||
|
||||
for (const [taskId, task] of this.processingTasks) {
|
||||
if (task.startedAt) {
|
||||
const elapsed = now - new Date(task.startedAt).getTime();
|
||||
if (elapsed > this.TASK_TIMEOUT) {
|
||||
console.warn(`[TaskScheduler] 任务超时: ${taskId}, 已运行 ${Math.round(elapsed / 1000)}秒`);
|
||||
this.handleTaskFailure(taskId, '任务执行超时');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const [taskId, task] of this.recoveringTasks) {
|
||||
if (task.recoveryStartedAt) {
|
||||
const elapsed = now - new Date(task.recoveryStartedAt).getTime();
|
||||
if (elapsed > this.RECOVERY_TIMEOUT) {
|
||||
console.warn(`[TaskScheduler] 任务恢复超时: ${taskId}`);
|
||||
this.recoveringTasks.delete(taskId);
|
||||
task.state = TASK_STATES.FAILED;
|
||||
task.error = '任务恢复超时';
|
||||
this.failedTasks.push(task);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
getTaskStatus(taskId) {
|
||||
if (this.processingTasks.has(taskId)) {
|
||||
return { ...this.processingTasks.get(taskId) };
|
||||
}
|
||||
|
||||
const pendingTask = this.pendingTaskQueue.find(t => t.taskId === taskId);
|
||||
if (pendingTask) {
|
||||
return { ...pendingTask };
|
||||
}
|
||||
|
||||
if (this.recoveringTasks.has(taskId)) {
|
||||
return { ...this.recoveringTasks.get(taskId) };
|
||||
}
|
||||
|
||||
const completedTask = this.completedTasks.find(t => t.taskId === taskId);
|
||||
if (completedTask) {
|
||||
return { ...completedTask };
|
||||
}
|
||||
|
||||
const failedTask = this.failedTasks.find(t => t.taskId === taskId);
|
||||
if (failedTask) {
|
||||
return { ...failedTask };
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
getStats() {
|
||||
return {
|
||||
pending: this.pendingTaskQueue.length,
|
||||
processing: this.processingTasks.size,
|
||||
recovering: this.recoveringTasks.size,
|
||||
completed: this.completedTasks.length,
|
||||
failed: this.failedTasks.length,
|
||||
capacity: this.currentCapacity,
|
||||
availableSlots: this.getAvailableSlots()
|
||||
};
|
||||
}
|
||||
|
||||
stopSchedulerLoop() {
|
||||
|
|
@ -177,6 +394,13 @@ class TaskScheduler {
|
|||
async shutdown() {
|
||||
console.log('[TaskScheduler] 正在关闭调度器');
|
||||
this.stopSchedulerLoop();
|
||||
|
||||
for (const [taskId, task] of this.processingTasks) {
|
||||
bridgeManager.releaseInstanceLock(task.instanceId);
|
||||
}
|
||||
|
||||
this.processingTasks.clear();
|
||||
this.pendingTaskQueue.length = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,15 @@
|
|||
import { WebSocketServer as WSServer } from 'ws';
|
||||
import logger from '../logger/index.js';
|
||||
import bridgeManager from '../bridge-manager/index.js';
|
||||
import taskScheduler from '../task-scheduler/index.js';
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
|
||||
class WebSocketServer {
|
||||
constructor() {
|
||||
this.wss = null;
|
||||
this.pendingRequests = new Map();
|
||||
this.instanceTaskMap = new Map();
|
||||
this.TASK_TIMEOUT = 5 * 60 * 1000;
|
||||
}
|
||||
|
||||
start(server) {
|
||||
|
|
@ -62,6 +65,7 @@ class WebSocketServer {
|
|||
clearTimeout(pongTimeout);
|
||||
}
|
||||
if (bridgeId) {
|
||||
this.handleBridgeDisconnect(bridgeId);
|
||||
bridgeManager.unregisterBridge(bridgeId);
|
||||
this.cleanupPendingRequests(bridgeId);
|
||||
}
|
||||
|
|
@ -77,6 +81,18 @@ class WebSocketServer {
|
|||
});
|
||||
}
|
||||
|
||||
handleBridgeDisconnect(bridgeId) {
|
||||
const bridge = bridgeManager.getBridge(bridgeId);
|
||||
if (bridge?.info?.instances) {
|
||||
for (const instance of bridge.info.instances) {
|
||||
const affectedTaskId = bridgeManager.handleInstanceOffline(instance.id);
|
||||
if (affectedTaskId) {
|
||||
taskScheduler.recoverTask(affectedTaskId, 'bridge_disconnect');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
handleMessage(ws, data, setBridgeId) {
|
||||
try {
|
||||
const message = JSON.parse(data.toString());
|
||||
|
|
@ -90,10 +106,17 @@ class WebSocketServer {
|
|||
this.handleHeartbeat(message);
|
||||
break;
|
||||
case 'TASK_ACK':
|
||||
this.handleTaskAck(message);
|
||||
break;
|
||||
case 'TASK_END':
|
||||
this.handleTaskEnd(message);
|
||||
break;
|
||||
case 'INSTANCE_CHECK_ACK':
|
||||
this.handleBridgeResponse(message);
|
||||
break;
|
||||
case 'INSTANCE_STATUS_UPDATE':
|
||||
this.handleInstanceStatusUpdate(message);
|
||||
break;
|
||||
case 'PONG':
|
||||
break;
|
||||
default:
|
||||
|
|
@ -126,6 +149,68 @@ class WebSocketServer {
|
|||
}
|
||||
}
|
||||
|
||||
handleTaskAck(message) {
|
||||
const requestId = message.data?.requestId;
|
||||
const instanceId = message.data?.instanceId;
|
||||
const bridgeId = message.data?.bridgeId;
|
||||
|
||||
if (requestId) {
|
||||
taskScheduler.handleTaskAck(requestId, instanceId, bridgeId);
|
||||
|
||||
if (instanceId) {
|
||||
bridgeManager.confirmInstanceLock(instanceId);
|
||||
}
|
||||
|
||||
if (this.pendingRequests.has(requestId)) {
|
||||
const pending = this.pendingRequests.get(requestId);
|
||||
pending.ackReceived = true;
|
||||
pending.ackAt = new Date().toISOString();
|
||||
}
|
||||
|
||||
this.instanceTaskMap.set(instanceId, requestId);
|
||||
}
|
||||
|
||||
this.handleBridgeResponse(message);
|
||||
}
|
||||
|
||||
handleTaskEnd(message) {
|
||||
const requestId = message.data?.requestId;
|
||||
const instanceId = message.data?.instanceId;
|
||||
const result = message.data?.result;
|
||||
const error = message.data?.error;
|
||||
|
||||
if (instanceId) {
|
||||
this.instanceTaskMap.delete(instanceId);
|
||||
bridgeManager.releaseInstanceLock(instanceId);
|
||||
}
|
||||
|
||||
if (requestId) {
|
||||
if (error) {
|
||||
taskScheduler.handleTaskFailure(requestId, error);
|
||||
} else {
|
||||
taskScheduler.handleTaskComplete(requestId, result);
|
||||
}
|
||||
}
|
||||
|
||||
this.handleBridgeResponse(message);
|
||||
}
|
||||
|
||||
handleInstanceStatusUpdate(message) {
|
||||
const bridgeId = message.data?.bridgeId;
|
||||
const instances = message.data?.instances;
|
||||
|
||||
if (instances && Array.isArray(instances)) {
|
||||
for (const instance of instances) {
|
||||
if (instance.status === 'offline') {
|
||||
const affectedTaskId = bridgeManager.handleInstanceOffline(instance.id);
|
||||
if (affectedTaskId) {
|
||||
taskScheduler.recoverTask(affectedTaskId, 'instance_offline');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
handleBridgeResponse(message) {
|
||||
const requestId = message.data?.requestId;
|
||||
if (requestId && this.pendingRequests.has(requestId)) {
|
||||
|
|
@ -145,8 +230,11 @@ class WebSocketServer {
|
|||
}
|
||||
};
|
||||
|
||||
console.log('[分发] WebSocketServer 准备发送消息:', JSON.stringify(message, null, 2));
|
||||
|
||||
const success = bridgeManager.sendToBridge(bridgeId, message);
|
||||
if (!success) {
|
||||
bridgeManager.releaseInstanceLock(taskData.instanceId);
|
||||
reject(new Error('发送任务失败'));
|
||||
return;
|
||||
}
|
||||
|
|
@ -154,11 +242,63 @@ class WebSocketServer {
|
|||
const timeout = setTimeout(() => {
|
||||
if (this.pendingRequests.has(requestId)) {
|
||||
this.pendingRequests.delete(requestId);
|
||||
bridgeManager.releaseInstanceLock(taskData.instanceId);
|
||||
reject(new Error('任务执行超时'));
|
||||
}
|
||||
}, 5 * 60 * 1000);
|
||||
}, this.TASK_TIMEOUT);
|
||||
|
||||
this.pendingRequests.set(requestId, { resolve, reject, timeout, bridgeId });
|
||||
this.pendingRequests.set(requestId, {
|
||||
resolve,
|
||||
reject,
|
||||
timeout,
|
||||
bridgeId,
|
||||
instanceId: taskData.instanceId,
|
||||
sentAt: new Date().toISOString(),
|
||||
ackReceived: false
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
sendTaskToInstance(bridgeId, instanceId, taskData, requestId) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const message = {
|
||||
type: 'TASK_ASSIGN',
|
||||
data: {
|
||||
...taskData,
|
||||
requestId,
|
||||
instanceId
|
||||
}
|
||||
};
|
||||
|
||||
console.log(`[分发] WebSocketServer 发送任务到实例: bridgeId=${bridgeId}, instanceId=${instanceId}, requestId=${requestId}`);
|
||||
|
||||
const success = bridgeManager.sendToBridge(bridgeId, message);
|
||||
if (!success) {
|
||||
bridgeManager.releaseInstanceLock(instanceId);
|
||||
reject(new Error('发送任务到实例失败'));
|
||||
return;
|
||||
}
|
||||
|
||||
const timeout = setTimeout(() => {
|
||||
if (this.pendingRequests.has(requestId)) {
|
||||
this.pendingRequests.delete(requestId);
|
||||
bridgeManager.releaseInstanceLock(instanceId);
|
||||
taskScheduler.handleTaskFailure(requestId, '任务发送超时');
|
||||
reject(new Error('任务执行超时'));
|
||||
}
|
||||
}, this.TASK_TIMEOUT);
|
||||
|
||||
this.pendingRequests.set(requestId, {
|
||||
resolve,
|
||||
reject,
|
||||
timeout,
|
||||
bridgeId,
|
||||
instanceId,
|
||||
sentAt: new Date().toISOString(),
|
||||
ackReceived: false
|
||||
});
|
||||
|
||||
this.instanceTaskMap.set(instanceId, requestId);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -173,6 +313,8 @@ class WebSocketServer {
|
|||
}
|
||||
};
|
||||
|
||||
console.log('[分发] WebSocketServer 准备发送实例检查消息:', JSON.stringify(message, null, 2));
|
||||
|
||||
const success = bridgeManager.sendToBridge(bridgeId, message);
|
||||
if (!success) {
|
||||
reject(new Error('发送实例检查请求失败'));
|
||||
|
|
@ -194,11 +336,27 @@ class WebSocketServer {
|
|||
for (const [requestId, pending] of this.pendingRequests) {
|
||||
if (pending.bridgeId === bridgeId) {
|
||||
clearTimeout(pending.timeout);
|
||||
|
||||
if (pending.instanceId) {
|
||||
bridgeManager.releaseInstanceLock(pending.instanceId);
|
||||
this.instanceTaskMap.delete(pending.instanceId);
|
||||
|
||||
taskScheduler.recoverTask(requestId, 'bridge_disconnect');
|
||||
}
|
||||
|
||||
pending.reject(new Error('桥接器连接已断开'));
|
||||
this.pendingRequests.delete(requestId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
getStats() {
|
||||
return {
|
||||
pendingRequests: this.pendingRequests.size,
|
||||
instanceTaskMap: this.instanceTaskMap.size,
|
||||
pendingRequestIds: Array.from(this.pendingRequests.keys())
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export default new WebSocketServer();
|
||||
|
|
|
|||
17
任务队列后端/.env
|
|
@ -1,14 +1,14 @@
|
|||
# 项目前缀
|
||||
PROJECT_PREFIX='digitalHuman'
|
||||
PROJECT_PREFIX='digitalHuman-test'
|
||||
|
||||
# token 密钥
|
||||
TOKEN_SECRET='1Ag9BJJn0rXDnidCyXqu'
|
||||
|
||||
# WebSocket 端口
|
||||
WS_PORT=8086
|
||||
WS_PORT=8087
|
||||
|
||||
# 回调端口
|
||||
CALLBACK_PORT=8066
|
||||
CALLBACK_PORT=8089
|
||||
|
||||
# runninghub API
|
||||
RunningHub_URL='https://www.runninghub.cn/task/openapi/create'
|
||||
|
|
@ -17,18 +17,15 @@ RunningHub_URL='https://www.runninghub.cn/task/openapi/create'
|
|||
BACKEND_API_URL='http://localhost:8787' # http://www.whjbjm.com/api
|
||||
|
||||
# 回调接口地址
|
||||
CALLBACK_URL='http://43.248.131.153:8066/callback/all'
|
||||
CALLBACK_URL='http://43.248.131.153:8087/callback/all'
|
||||
# fNkecvcLonpHtFimE4G1BOjcB82yy4PqiQv9caknQqtQAwT1ZAJeWkG7YjY2YVBP
|
||||
# http://www.whjbjm.com/taskCallback/callback/all
|
||||
|
||||
# redis 地址
|
||||
REDIS_URL = ''
|
||||
REDIS_URL = 'redis://:654321@localhost:16379/5'
|
||||
|
||||
# Message Dispatcher 配置
|
||||
MESSAGE_DISPATCHER_URL=http://localhost:4000/api/task
|
||||
MESSAGE_DISPATCHER_WS_PORT=8087
|
||||
MESSAGE_DISPATCHER_URL=http://localhost:8078/api/task
|
||||
MESSAGE_DISPATCHER_WS_PORT=8088
|
||||
MESSAGE_DISPATCHER_ENABLED=true
|
||||
MESSAGE_DISPATCHER_TIMEOUT=30000
|
||||
|
||||
# 外部容量配置
|
||||
EXTERNAL_CAPACITY_MAX=10
|
||||
|
|
|
|||
|
|
@ -0,0 +1,367 @@
|
|||
import express, { json, urlencoded } from 'express';
|
||||
import cors from 'cors';
|
||||
import dotenv from 'dotenv';
|
||||
import WebSocket, { WebSocketServer } from 'ws';
|
||||
import { Worker } from 'worker_threads';
|
||||
import { checkUsertoken } from './school/api.js';
|
||||
import redis from './redis/index.js';
|
||||
import initQueue from './redis/initQueue.js';
|
||||
import messagePersistence from './redis/messagePersistence.js';
|
||||
import code from './config/code.json' with { type: 'json' };
|
||||
import fileRouter from './upload/index.js';
|
||||
import recordRouter from './outside/callback.js';
|
||||
import mdWebSocketServer from './utils/mdWebSocketServer.js';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
const logger = {
|
||||
info: (message) => {
|
||||
const timestamp = new Date().toISOString();
|
||||
console.log(`[${timestamp}] INFO: ${message}`);
|
||||
},
|
||||
error: (message, error) => {
|
||||
const timestamp = new Date().toISOString();
|
||||
console.error(`[${timestamp}] ERROR: ${message}`, error || '');
|
||||
},
|
||||
debug: (message) => {
|
||||
const timestamp = new Date().toISOString();
|
||||
console.debug(`[${timestamp}] DEBUG: ${message}`);
|
||||
}
|
||||
};
|
||||
|
||||
let wss = null;
|
||||
const workers = [];
|
||||
const socketMap = new Map();
|
||||
|
||||
function createWorker(scriptPath) {
|
||||
const worker = new Worker(scriptPath);
|
||||
worker.setMaxListeners(20);
|
||||
worker.on('error', (error) => {
|
||||
logger.error(`Worker ${scriptPath} 错误:`, error);
|
||||
});
|
||||
worker.on('exit', (code) => {
|
||||
if (code !== 0) {
|
||||
logger.error(`Worker ${scriptPath} 异常退出,退出码: ${code}`);
|
||||
}
|
||||
});
|
||||
workers.push(worker);
|
||||
return worker;
|
||||
}
|
||||
|
||||
const assessment = createWorker('./worker_threads/assessment/assessment.js');
|
||||
const wait = createWorker('./worker_threads/wait/waiting.js');
|
||||
const polling = createWorker('./worker_threads/process/process.js');
|
||||
const result = createWorker('./worker_threads/result/result.js');
|
||||
const callback_result = createWorker('./worker_threads/callback_result/result.js');
|
||||
const error = createWorker('./worker_threads/error/error.js');
|
||||
|
||||
async function sendMessageToClient(id, message, close = false, closeCode = 1000, closeReason = '') {
|
||||
let socket;
|
||||
if (typeof id === 'string' && id) {
|
||||
socket = socketMap.get(id);
|
||||
}
|
||||
if (socket && socket.readyState === WebSocket.OPEN && message) {
|
||||
try {
|
||||
socket.send(message);
|
||||
const messagePreview = typeof message === 'string' ? message.slice(0, 50) : JSON.stringify(message).slice(0, 50);
|
||||
logger.debug(`成功发送消息到客户端,id: ${id}, 消息: ${messagePreview}...`);
|
||||
if (close) {
|
||||
socket.close(closeCode, closeReason);
|
||||
}
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error(`发送消息给客户端失败,id: ${id}`, error);
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (!message) {
|
||||
logger.debug(`消息为空,无法发送,id: ${id}`);
|
||||
return false;
|
||||
} else {
|
||||
logger.debug(`未找到目标客户端或连接已关闭,保存消息到待发送队列,id: ${id}`);
|
||||
try {
|
||||
await messagePersistence.savePendingMessage(id, message);
|
||||
logger.info(`消息已保存到待发送队列,等待重试: backendId=${id}`);
|
||||
return false;
|
||||
} catch (error) {
|
||||
logger.error(`保存待发送消息失败: backendId=${id}`, error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function createWebSocketServer(server) {
|
||||
wss = new WebSocketServer({
|
||||
server,
|
||||
verifyClient: async (info, callback) => {
|
||||
try {
|
||||
const urlParams = new URLSearchParams(info.req.url.split('?')[1]);
|
||||
const token = urlParams.get('token');
|
||||
const id = urlParams.get('id');
|
||||
|
||||
if (!token) {
|
||||
logger.info('缺少令牌');
|
||||
callback(false, 401, '缺少令牌');
|
||||
return;
|
||||
} else if (token !== process.env.TOKEN_SECRET){
|
||||
logger.info('验证后端失败');
|
||||
callback(false, 401, 'Token is invalid');
|
||||
return;
|
||||
}
|
||||
|
||||
info.req.id = id;
|
||||
logger.info(`用户ID: token 验证成功`);
|
||||
callback(true);
|
||||
} catch (error) {
|
||||
logger.error('验证后端失败:', error);
|
||||
callback(false, 401, 'Token is invalid');
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
logger.info(`WebSocket server is running on the same port as HTTP server`);
|
||||
|
||||
wss.on('error', (error) => {
|
||||
logger.error('WebSocket服务器错误:', error);
|
||||
});
|
||||
|
||||
wss.on('connection', async (socket, req) => {
|
||||
const id = req.id;
|
||||
logger.info(`${id}号后端 连接成功`);
|
||||
socketMap.set(id, socket);
|
||||
socket.send('please give me tasks');
|
||||
|
||||
try {
|
||||
const pendingMessages = await messagePersistence.getPendingMessages(id);
|
||||
if (pendingMessages.length > 0) {
|
||||
logger.info(`${id}号后端 发现 ${pendingMessages.length} 条待发送消息,开始重试发送`);
|
||||
|
||||
for (const pendingMsg of pendingMessages) {
|
||||
try {
|
||||
socket.send(pendingMsg.message);
|
||||
await messagePersistence.removePendingMessage(pendingMsg.key);
|
||||
logger.debug(`成功重试发送消息: backendId=${id}, messageKey=${pendingMsg.key}`);
|
||||
} catch (error) {
|
||||
logger.error(`重试发送消息失败: backendId=${id}, messageKey=${pendingMsg.key}`, error);
|
||||
await messagePersistence.incrementRetryCount(pendingMsg.key);
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(`${id}号后端 待发送消息重试完成`);
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`获取或发送待发送消息失败: backendId=${id}`, error);
|
||||
}
|
||||
|
||||
socket.on('message', (message) => {
|
||||
const messageStr = typeof message === 'string' ? message : message.toString();
|
||||
|
||||
if (messageStr === 'ping') {
|
||||
socket.send('pong');
|
||||
return;
|
||||
}
|
||||
|
||||
if (messageStr === 'pong') {
|
||||
return;
|
||||
}
|
||||
|
||||
if (messageStr === 'please give me tasks') {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const msg = JSON.parse(messageStr);
|
||||
|
||||
if (msg.type === 'JWT_UPDATE') {
|
||||
logger.info(`收到 JWT_UPDATE 消息`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === 'CAPACITY_UPDATE') {
|
||||
logger.debug(`收到算力状态更新: 可用容量 = ${msg.data?.summary?.availableCapacity || 0}`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === 'INSTANCE_ONLINE') {
|
||||
logger.debug(`收到实例上线: ${msg.data?.instanceId}`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === 'INSTANCE_OFFLINE') {
|
||||
logger.debug(`收到实例下线: ${msg.data?.instanceId}`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === 'HEARTBEAT') {
|
||||
socket.send(JSON.stringify({
|
||||
type: 'HEARTBEAT_ACK',
|
||||
data: { timestamp: new Date().toISOString() }
|
||||
}));
|
||||
return;
|
||||
}
|
||||
|
||||
const prefix = messageStr.slice(0, 50);
|
||||
if (prefix.includes('"type":"generate"') || prefix.includes("'type':'generate'")) {
|
||||
assessment.postMessage({
|
||||
type: 'submit',
|
||||
data: messageStr
|
||||
});
|
||||
} else {
|
||||
logger.debug(`收到未知消息类型: ${prefix}`);
|
||||
}
|
||||
} catch (e) {
|
||||
logger.error('处理消息出错:', e);
|
||||
socket.send(JSON.stringify({
|
||||
error: '处理消息出错',
|
||||
details: e.message
|
||||
}));
|
||||
}
|
||||
});
|
||||
|
||||
const heartbeatInterval = setInterval(() => {
|
||||
if (socket.readyState === WebSocket.OPEN) {
|
||||
socket.send('ping');
|
||||
logger.debug(`向 ${id} 号后端发送心跳`);
|
||||
}
|
||||
}, 30000);
|
||||
|
||||
socket.on('close', (code, reason) => {
|
||||
clearInterval(heartbeatInterval);
|
||||
logger.info(`${id}号后端 连接关闭,关闭码: ${code},原因: ${reason}`);
|
||||
});
|
||||
|
||||
socket.on('error', (error) => {
|
||||
logger.error(`${id}号后端 连接错误:`, error);
|
||||
});
|
||||
});
|
||||
|
||||
assessment.on('message', async (message) => {
|
||||
logger.debug(`收到assessment worker消息: ${JSON.stringify(message)}`);
|
||||
if (message.type === 'AssessmentSuccess') {
|
||||
await sendMessageToClient(message.backendId, code.SUCCESS[message.type]);
|
||||
} else {
|
||||
await sendMessageToClient(message.backendId, code.ERROR[message.type], false, 4401, code.ERROR[message.type]);
|
||||
}
|
||||
});
|
||||
|
||||
result.on('message', async (message) => {
|
||||
logger.debug(`收到result worker消息: ${JSON.stringify(message)}`);
|
||||
if (message.type === 'success') {
|
||||
await sendMessageToClient(message.backendId, message.message, false, 1000, 'success');
|
||||
} else {
|
||||
await sendMessageToClient(message.backendId, '获取结果失败,可在历史记录区刷新查看结果', false, 4401, code.ERROR[message.type]);
|
||||
}
|
||||
});
|
||||
|
||||
callback_result.on('message', async (message) => {
|
||||
logger.debug(`收到callback_result worker消息: ${JSON.stringify(message)}`);
|
||||
if (message.type === 'success') {
|
||||
await sendMessageToClient(message.backendId, message.message, false, 1000, 'success');
|
||||
} else {
|
||||
await sendMessageToClient(message.backendId, '获取结果失败,可在历史记录区刷新查看结果', false, 4401);
|
||||
}
|
||||
});
|
||||
|
||||
error.on('message', async (message) => {
|
||||
logger.debug(`收到error worker消息: ${JSON.stringify(message)}`);
|
||||
await sendMessageToClient(message.backendId, message.message, false, 4402, 'false');
|
||||
});
|
||||
}
|
||||
|
||||
function gracefulShutdown() {
|
||||
logger.info('开始优雅关闭...');
|
||||
|
||||
if (wss) {
|
||||
wss.close(() => {
|
||||
logger.info('WebSocket服务器已关闭');
|
||||
});
|
||||
|
||||
wss.clients.forEach((client) => {
|
||||
client.close(1001, '服务器正在关闭');
|
||||
});
|
||||
}
|
||||
|
||||
workers.forEach((worker, index) => {
|
||||
logger.info(`终止worker线程 ${index}`);
|
||||
worker.terminate();
|
||||
});
|
||||
|
||||
if (redis.isOpen) {
|
||||
redis.disconnect()
|
||||
.then(() => {
|
||||
logger.info('Redis连接已关闭');
|
||||
process.exit(0);
|
||||
})
|
||||
.catch((error) => {
|
||||
logger.error('关闭Redis连接失败:', error);
|
||||
process.exit(1);
|
||||
});
|
||||
} else {
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
setTimeout(() => {
|
||||
logger.info('强制关闭...');
|
||||
process.exit(1);
|
||||
}, 5000);
|
||||
}
|
||||
|
||||
async function initialize() {
|
||||
logger.info('***************初始化队列开始***************');
|
||||
try {
|
||||
if (!redis.isOpen) {
|
||||
await redis.connect();
|
||||
logger.info('Redis 连接成功');
|
||||
}
|
||||
|
||||
await initQueue.init();
|
||||
logger.info('***************初始化队列完成***************');
|
||||
|
||||
setInterval(async () => {
|
||||
try {
|
||||
await messagePersistence.cleanupOldMessages(2 * 24 * 60 * 60 * 1000);
|
||||
} catch (error) {
|
||||
logger.error('定期清理过期消息失败:', error);
|
||||
}
|
||||
}, 24 * 60 * 60 * 1000);
|
||||
|
||||
} catch (err) {
|
||||
logger.error('初始化失败:', err);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
const app = express();
|
||||
const hostname = '0.0.0.0';
|
||||
const port = process.env.WS_PORT || 8087;
|
||||
|
||||
app.use(cors());
|
||||
app.use('/workflow/uploads', express.static('uploads'));
|
||||
app.use(json());
|
||||
app.use(urlencoded({ extended: true }));
|
||||
app.use('/workflow/file', fileRouter);
|
||||
app.use('/callback', recordRouter);
|
||||
|
||||
app.get('/', (req, res) => {
|
||||
res.json({
|
||||
name: 'ComfyUI Task Queue Backend',
|
||||
version: '1.0.0',
|
||||
status: 'running',
|
||||
timestamp: new Date().toISOString()
|
||||
});
|
||||
});
|
||||
|
||||
process.on('SIGINT', gracefulShutdown);
|
||||
process.on('SIGTERM', gracefulShutdown);
|
||||
|
||||
initialize().then(() => {
|
||||
const server = app.listen(port, hostname, () => {
|
||||
console.log(`========================================`);
|
||||
console.log(`HTTP & WebSocket 服务器已启动`);
|
||||
console.log(`服务地址: http://${hostname}:${port}/`);
|
||||
console.log(`========================================`);
|
||||
|
||||
createWebSocketServer(server);
|
||||
});
|
||||
});
|
||||
|
|
@ -1,103 +0,0 @@
|
|||
// clearDigitalHumanData.js
|
||||
import redis from './redis/index.js';
|
||||
import initQueue from './redis/initQueue.js';
|
||||
|
||||
async function clearDigitalHumanData() {
|
||||
try {
|
||||
console.log('开始清除数字人相关数据...');
|
||||
|
||||
// 1. 清除等待队列
|
||||
await redis.del('digitalHuman:runninghub:wait');
|
||||
await redis.del('digitalHuman:coze:wait');
|
||||
console.log('已清除等待队列');
|
||||
|
||||
// 2. 清除所有数字人相关的任务数据
|
||||
let cursor = '0';
|
||||
do {
|
||||
// 调用scan并打印返回结果(便于调试)
|
||||
const result = await redis.scan(cursor, {
|
||||
MATCH: `${initQueue.prefix}:task:*`,
|
||||
COUNT: 100
|
||||
});
|
||||
// 【核心修复】从scan返回的对象中正确提取cursor和keys
|
||||
const newCursor = result.cursor; // 取cursor属性
|
||||
const keys = result.keys || []; // 取keys属性,兜底为空数组
|
||||
|
||||
console.log(`当前游标: ${newCursor}, 找到keys数量: ${keys.length}`); // 调试日志
|
||||
|
||||
if (keys.length > 0) {
|
||||
// 加强过滤,确保只保留有效字符串key
|
||||
const validKeys = keys.filter(key => {
|
||||
return typeof key === 'string' && key.trim() !== '';
|
||||
});
|
||||
|
||||
if (validKeys.length > 0) {
|
||||
await redis.del(...validKeys);
|
||||
console.log(`已清除 ${validKeys.length} 个任务数据`);
|
||||
}
|
||||
}
|
||||
|
||||
cursor = newCursor;
|
||||
} while (cursor !== '0');
|
||||
|
||||
// 3. 清除轮询队列数据
|
||||
cursor = '0';
|
||||
do {
|
||||
const result = await redis.scan(cursor, {
|
||||
MATCH: `${initQueue.prefix}:processPolling:*`,
|
||||
COUNT: 100
|
||||
});
|
||||
// 同样修复解构问题
|
||||
const newCursor = result.cursor;
|
||||
const keys = result.keys || [];
|
||||
|
||||
console.log(`当前游标: ${newCursor}, 找到轮询keys数量: ${keys.length}`);
|
||||
|
||||
if (keys.length > 0) {
|
||||
const validKeys = keys.filter(key => {
|
||||
return typeof key === 'string' && key.trim() !== '';
|
||||
});
|
||||
|
||||
if (validKeys.length > 0) {
|
||||
await redis.del(...validKeys);
|
||||
console.log(`已清除 ${validKeys.length} 个轮询队列数据`);
|
||||
}
|
||||
}
|
||||
|
||||
cursor = newCursor;
|
||||
} while (cursor !== '0');
|
||||
|
||||
// 4. 清除结果队列数据
|
||||
await redis.del(initQueue.resultName);
|
||||
await redis.del(initQueue.resultList);
|
||||
console.log('已清除结果队列数据');
|
||||
|
||||
// 5. 清除错误队列数据
|
||||
await redis.del(initQueue.errorName);
|
||||
await redis.del(initQueue.errorList);
|
||||
console.log('已清除错误队列数据');
|
||||
|
||||
// 6. 清除回调队列数据
|
||||
await redis.del(initQueue.callback);
|
||||
console.log('已清除回调队列数据');
|
||||
|
||||
// 7. 重置平台信息中的数字人相关计数
|
||||
await redis.json.set(initQueue.initInfoKey, '$.platforms.digitalHuman:runninghub.WQtasks', '0');
|
||||
await redis.json.set(initQueue.initInfoKey, '$.platforms.digitalHuman:runninghub.PQtasks', '0');
|
||||
await redis.json.set(initQueue.initInfoKey, '$.platforms.digitalHuman:coze.WQtasks', '0');
|
||||
await redis.json.set(initQueue.initInfoKey, '$.platforms.digitalHuman:coze.PQtasks', '0');
|
||||
console.log('已重置平台计数');
|
||||
|
||||
console.log('数字人相关数据清除完成!');
|
||||
} catch (error) {
|
||||
console.error('清除数据时出错:', error);
|
||||
process.exit(1); // 异常退出进程
|
||||
} finally {
|
||||
if (redis.isOpen) {
|
||||
await redis.disconnect();
|
||||
console.log('Redis 连接已正常关闭');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clearDigitalHumanData();
|
||||
|
|
@ -3,11 +3,9 @@ import path from 'path';
|
|||
import { fileURLToPath } from 'url';
|
||||
import { dirname } from 'path';
|
||||
|
||||
// 获取当前模块文件的目录
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
|
||||
// 使用相对于当前文件的路径(注意:文件在同一个目录下,不需要 .. )
|
||||
const modelPath = path.join(__dirname, 'model.json');
|
||||
const modelData = JSON.parse(fs.readFileSync(modelPath, 'utf8'));
|
||||
|
||||
|
|
@ -17,7 +15,16 @@ const platformData = JSON.parse(fs.readFileSync(platformPath, 'utf8'));
|
|||
const CostPath = path.join(__dirname, 'cost.json');
|
||||
const CostData = JSON.parse(fs.readFileSync(CostPath, 'utf8'));
|
||||
|
||||
// const errorPath = path.join(__dirname, 'error.json');
|
||||
// const errorData = JSON.parse(fs.readFileSync(errorPath, 'utf8'));
|
||||
function getExternalCapacityMax() {
|
||||
let total = 0;
|
||||
for (const modelObj of Object.values(modelData)) {
|
||||
for (const [platformName, info] of Object.entries(modelObj)) {
|
||||
if (platformName === 'comfyui') {
|
||||
total += info.concurrency || 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
export { modelData, platformData, CostData };
|
||||
export { modelData, platformData, CostData, getExternalCapacityMax };
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"callback": [
|
||||
"runninghub"
|
||||
"comfyui"
|
||||
],
|
||||
"polling": [
|
||||
"coze"
|
||||
|
|
|
|||
|
|
@ -1,3 +1,3 @@
|
|||
{
|
||||
"runninghub": 0.0012
|
||||
"comfyui": 0.0012
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"digitalHuman":{
|
||||
"runninghub":{
|
||||
"digitalHuman-test":{
|
||||
"comfyui":{
|
||||
"apikey":"3c20cd6c85514d1c86d55a5d3bcd53b7",
|
||||
"concurrency":13
|
||||
},
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@ import cors from 'cors';
|
|||
import dotenv from 'dotenv';
|
||||
import fileRouter from './upload/index.js';
|
||||
import recordRouter from './outside/callback.js';
|
||||
import mdWebSocketServer from './utils/mdWebSocketServer.js';
|
||||
|
||||
// 配置 dotenv 加载环境变量
|
||||
dotenv.config();
|
||||
|
|
@ -26,11 +25,4 @@ server.use('/callback', recordRouter);
|
|||
// 启动服务器
|
||||
server.listen(port, hostname, () => {
|
||||
console.log(`Server running at http://${hostname}:${port}/`);
|
||||
|
||||
// 初始化 WebSocket 服务
|
||||
mdWebSocketServer.init().then(() => {
|
||||
console.log('[MDWebSocketServer] 初始化完成');
|
||||
}).catch((error) => {
|
||||
console.error('[MDWebSocketServer] 初始化失败:', error);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -0,0 +1,27 @@
|
|||
import dotenv from 'dotenv';
|
||||
import mdWebSocketServer from './utils/mdWebSocketServer.js';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
console.log('========================================');
|
||||
console.log('Message Dispatcher WebSocket Server');
|
||||
console.log('========================================');
|
||||
|
||||
mdWebSocketServer.init()
|
||||
.then(() => {
|
||||
console.log('[MDServer] 启动成功');
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('[MDServer] 启动失败:', error);
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
process.on('SIGINT', () => {
|
||||
console.log('[MDServer] 正在关闭...');
|
||||
process.exit(0);
|
||||
});
|
||||
|
||||
process.on('SIGTERM', () => {
|
||||
console.log('[MDServer] 正在关闭...');
|
||||
process.exit(0);
|
||||
});
|
||||
|
|
@ -1,41 +1,28 @@
|
|||
import {modelData} from '../config/Config.js';
|
||||
import outside from './outPlatforms/outside.js'
|
||||
import mdWebSocketServer from '../utils/mdWebSocketServer.js';
|
||||
import dotenv from 'dotenv'
|
||||
dotenv.config()
|
||||
|
||||
// 发送请求
|
||||
export async function externalPostRequest(task) { // { aigc, tasksData }
|
||||
export async function externalPostRequest(task, jwtToken = null) {
|
||||
const platform = task.platformName
|
||||
const AIGC = process.env.PROJECT_PREFIX
|
||||
|
||||
let actualPlatform = platform;
|
||||
let usedInternal = false;
|
||||
// 获取分发标识,默认为 'runninghub'
|
||||
const dispatchType = task.dispatchType || 'runninghub';
|
||||
|
||||
// 决策逻辑:如果是 runninghub 且内部有算力,尝试使用内部平台
|
||||
if (platform === 'runninghub') {
|
||||
const internalCapacity = mdWebSocketServer.getInternalCapacity();
|
||||
const hasJwtToken = !!mdWebSocketServer.getJwtToken();
|
||||
const hasConnectedClients = mdWebSocketServer.hasConnectedClients();
|
||||
console.log(`[externalPostRequest] 任务分发 - 平台: ${platform}, 分发标识: ${dispatchType}`);
|
||||
|
||||
if (internalCapacity > 0 && hasJwtToken && hasConnectedClients) {
|
||||
console.log('[externalPostRequest] 尝试使用内部 messageDispatcher 平台');
|
||||
actualPlatform = 'messageDispatcher';
|
||||
usedInternal = true;
|
||||
}
|
||||
}
|
||||
|
||||
const apikey = modelData[AIGC][platform].apikey
|
||||
const apikey = modelData[AIGC]?.[platform]?.apikey || '';
|
||||
|
||||
let response;
|
||||
let success = false;
|
||||
|
||||
try {
|
||||
const headers = await outside[actualPlatform].getGenerateHeader(apikey)
|
||||
const url = outside[actualPlatform].getGenerateUrl()
|
||||
const body = outside[actualPlatform].getGenerateBody({payload:task.taskData, apikey})
|
||||
// 对于 comfyui 平台,使用分发标识来调用相应的接口
|
||||
const headers = outside[platform].getGenerateHeader(apikey, dispatchType, jwtToken);
|
||||
const url = outside[platform].getGenerateUrl(dispatchType);
|
||||
const body = outside[platform].getGenerateBody({payload: task.taskData, apikey}, dispatchType, jwtToken);
|
||||
|
||||
console.log(`[externalPostRequest] 发送请求到 ${actualPlatform}: ${url}`);
|
||||
console.log(`[externalPostRequest] 发送请求到 ${platform} (${dispatchType}): ${url}`);
|
||||
|
||||
response = await fetch(url, { method: 'POST', headers, body: body });
|
||||
|
||||
|
|
@ -46,18 +33,18 @@ export async function externalPostRequest(task) { // { aigc, tasksData }
|
|||
|
||||
success = true;
|
||||
} catch (error) {
|
||||
if (usedInternal) {
|
||||
console.warn('[externalPostRequest] 内部平台失败,降级使用 runninghub:', error.message);
|
||||
// 降级到 runninghub
|
||||
actualPlatform = 'runninghub';
|
||||
usedInternal = false;
|
||||
// 如果是 comfyui 平台且使用了 messageDispatcher 分发,尝试降级到 runninghub
|
||||
if (platform === 'comfyui' && dispatchType === 'messageDispatcher') {
|
||||
console.warn('[externalPostRequest] messageDispatcher 分发失败,降级使用 runninghub:', error.message);
|
||||
|
||||
const fallbackDispatchType = 'runninghub';
|
||||
|
||||
try {
|
||||
const headers = await outside[actualPlatform].getGenerateHeader(apikey)
|
||||
const url = outside[actualPlatform].getGenerateUrl()
|
||||
const body = outside[actualPlatform].getGenerateBody({payload:task.taskData, apikey})
|
||||
const headers = outside[platform].getGenerateHeader(apikey, fallbackDispatchType, jwtToken);
|
||||
const url = outside[platform].getGenerateUrl(fallbackDispatchType);
|
||||
const body = outside[platform].getGenerateBody({payload: task.taskData, apikey}, fallbackDispatchType, jwtToken);
|
||||
|
||||
console.log(`[externalPostRequest] 降级发送请求到 ${actualPlatform}: ${url}`);
|
||||
console.log(`[externalPostRequest] 降级发送请求到 ${platform} (${fallbackDispatchType}): ${url}`);
|
||||
|
||||
response = await fetch(url, { method: 'POST', headers, body: body });
|
||||
|
||||
|
|
@ -88,8 +75,8 @@ export async function externalPostRequest(task) { // { aigc, tasksData }
|
|||
|
||||
// 处理成功响应
|
||||
try {
|
||||
const successResult = await outside[actualPlatform].getSuccessTasks(response);
|
||||
console.log(`[externalPostRequest] ${actualPlatform} 响应:`, successResult);
|
||||
const successResult = await outside[platform].getSuccessTasks(response, dispatchType);
|
||||
console.log(`[externalPostRequest] ${platform} 响应:`, successResult);
|
||||
|
||||
let remoteTaskId;
|
||||
if (successResult.type === 2) {
|
||||
|
|
@ -98,13 +85,13 @@ export async function externalPostRequest(task) { // { aigc, tasksData }
|
|||
remoteTaskId = { type: 1, data: successResult };
|
||||
}
|
||||
|
||||
return { taskId: task.taskId, remoteTaskId, platform: actualPlatform, AIGC, workflowId: task.workflowId };
|
||||
return { taskId: task.taskId, remoteTaskId, platform, AIGC, workflowId: task.workflowId };
|
||||
} catch (parseError) {
|
||||
console.error('[externalPostRequest] 解析响应失败:', parseError);
|
||||
return {
|
||||
taskId: task.taskId,
|
||||
remoteTaskId: { type: 2, message: `解析响应失败: ${parseError.message}` },
|
||||
platform: actualPlatform,
|
||||
platform,
|
||||
AIGC
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,87 @@
|
|||
import dotenv from 'dotenv';
|
||||
dotenv.config();
|
||||
|
||||
const DISPATCH_TYPES = {
|
||||
RUNNINGHUB: 'runninghub',
|
||||
MESSAGEDISPATCHER: 'messageDispatcher'
|
||||
};
|
||||
|
||||
export function getGenerateUrl(dispatchType = DISPATCH_TYPES.RUNNINGHUB) {
|
||||
if (dispatchType === DISPATCH_TYPES.MESSAGEDISPATCHER) {
|
||||
return process.env.MESSAGE_DISPATCHER_URL;
|
||||
}
|
||||
return process.env.RunningHub_URL;
|
||||
}
|
||||
|
||||
export function getGenerateHeader(apikey, dispatchType = DISPATCH_TYPES.RUNNINGHUB, jwtToken = null) {
|
||||
if (dispatchType === DISPATCH_TYPES.MESSAGEDISPATCHER) {
|
||||
return {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${jwtToken || ''}`
|
||||
};
|
||||
}
|
||||
return {
|
||||
'Content-Type': 'application/json',
|
||||
'Host': 'www.runninghub.cn'
|
||||
};
|
||||
}
|
||||
|
||||
export function getGenerateBody(task, dispatchType = DISPATCH_TYPES.RUNNINGHUB, jwtToken = null) {
|
||||
const taskData = JSON.parse(task.payload);
|
||||
|
||||
if (dispatchType === DISPATCH_TYPES.MESSAGEDISPATCHER) {
|
||||
const payload = { ...taskData, apiKey: jwtToken, webhookUrl: process.env.CALLBACK_URL };
|
||||
console.log('[comfyui - messageDispatcher] 请求体:', payload);
|
||||
return JSON.stringify(payload);
|
||||
}
|
||||
|
||||
const payload = { ...taskData, apiKey: task.apikey, webhookUrl: process.env.CALLBACK_URL };
|
||||
return JSON.stringify(payload);
|
||||
}
|
||||
|
||||
export function getQueryUrl() {
|
||||
return process.env.CALLBACK_URL;
|
||||
}
|
||||
|
||||
export function getTaskStatus() {
|
||||
if (response.task_status === 'SUCCESS') return true;
|
||||
}
|
||||
|
||||
export async function getSuccessTasks(response, dispatchType = DISPATCH_TYPES.RUNNINGHUB) {
|
||||
if (dispatchType === DISPATCH_TYPES.MESSAGEDISPATCHER) {
|
||||
try {
|
||||
const res = await response.json();
|
||||
console.log('[comfyui - messageDispatcher] 响应:\n', res);
|
||||
if (res.success === true && res.data && res.data.requestId) {
|
||||
return { msg: 'success', code: 0, data: { taskId: res.data.requestId } };
|
||||
} else {
|
||||
return { message: res, type: 2 };
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('[comfyui - messageDispatcher] 解析响应失败:', error);
|
||||
return { message: error.message, type: 2 };
|
||||
}
|
||||
}
|
||||
|
||||
const res = await response.json();
|
||||
console.log('[comfyui - runninghub]:\n', res);
|
||||
if (res.msg === 'success' && res.code === 0) {
|
||||
return res.data.taskId;
|
||||
} else {
|
||||
return { message: res, type: 2 };
|
||||
}
|
||||
}
|
||||
|
||||
export async function getTaskResult(response) {
|
||||
const res = await JSON.parse(response);
|
||||
const files = [];
|
||||
if (res.msg === 'success' && res.code === 0) {
|
||||
for (const file of res.data)
|
||||
files.push(file.fileUrl);
|
||||
return { files: files[0], type: 1 };
|
||||
} else {
|
||||
return { message: res.msg, type: 2 };
|
||||
}
|
||||
}
|
||||
|
||||
export { DISPATCH_TYPES };
|
||||
|
|
@ -1,6 +1,5 @@
|
|||
import * as runninghub from './runninghub.js';
|
||||
import * as comfyui from './comfyui.js';
|
||||
import * as jimuai from './JimuAI.js';
|
||||
import coze from './coze/coze.js';
|
||||
import * as messageDispatcher from './messageDispatcher.js';
|
||||
|
||||
export default { runninghub, jimuai, coze, messageDispatcher };
|
||||
export default { comfyui, jimuai, coze };
|
||||
|
|
@ -2,7 +2,7 @@ import { addConsumptionHistory } from '../school/api.js'
|
|||
import outside from './outPlatforms/outside.js'
|
||||
import { modelData,CostData } from '../config/Config.js';
|
||||
|
||||
async function getTaskResult(task) { // 创建一个函数,用于获取runninghub的任务结果得到其费用
|
||||
async function getTaskResult(task) { // 创建一个函数,用于获取comfyui(runninghub)的任务结果得到其费用
|
||||
const body = JSON.stringify({
|
||||
"apiKey": modelData[task.info.AIGC][task.info.platform].apikey,
|
||||
"taskId": task.remoteTaskId
|
||||
|
|
@ -56,7 +56,7 @@ export async function record(task) { // 创建一个函数,用于记录任务
|
|||
errorMessage = file.message
|
||||
return false
|
||||
}
|
||||
if (task.info.platform === 'runninghub' && task.info.type === 2){
|
||||
if (task.info.platform === 'comfyui' && task.info.type === 2){
|
||||
let res = null
|
||||
res = await getTaskResult(task)
|
||||
// console.log('生成时长:',res.data.taskCostTime,'三方平台费用:',res.data.thirdPartyConsumeMoney)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,34 @@
|
|||
module.exports = {
|
||||
apps: [{
|
||||
name: 'digitalHuman-md-server-v2',
|
||||
script: './md-server.js',
|
||||
cwd: './',
|
||||
args: '',
|
||||
interpreter: 'node',
|
||||
interpreter_args: '',
|
||||
|
||||
watch: true,
|
||||
ignore_watch: ['config', 'logs', 'node_modules', 'redis', 'school', 'static', 'worker_threads',
|
||||
'.env', 'package.json', 'package-lock.json', 'pnpm-lock.yaml',
|
||||
'pm2Index.config.cjs', 'pm2Websocket.config.cjs', 'pm2MdServer.config.cjs'],
|
||||
|
||||
instances: 1,
|
||||
exec_mode: 'fork',
|
||||
|
||||
autorestart: true,
|
||||
max_restarts: 30,
|
||||
min_uptime: '10s',
|
||||
|
||||
out_file: './logs/md-server/out/out.log',
|
||||
error_file: './logs/md-server/error/error.log',
|
||||
log_type: 'raw',
|
||||
log_date_format: 'YYYY-MM-DD HH:mm:ss Z',
|
||||
|
||||
kill_timeout: 1600,
|
||||
restart_delay: 4000,
|
||||
|
||||
env_production: {
|
||||
NODE_ENV: 'production'
|
||||
}
|
||||
}]
|
||||
};
|
||||
|
|
@ -1,10 +1,12 @@
|
|||
import { createClient } from "redis";
|
||||
import dotenv from 'dotenv';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
// 创建 Redis 客户端,优化配置
|
||||
const redis = createClient({
|
||||
RESP: 3,
|
||||
url: process.env.REDIS_URL || 'redis://localhost:16379',
|
||||
password: process.env.REDIS_PASSWORD || '654321',
|
||||
url: process.env.REDIS_URL || 'redis://localhost:6379',
|
||||
// 优化连接配置
|
||||
socket: {
|
||||
// 连接超时时间
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import redis from './redis/index.js';
|
||||
import dotenv from 'dotenv';
|
||||
import { createClient } from 'redis';
|
||||
import initQueue from './redis/initQueue.js';
|
||||
|
||||
dotenv.config();
|
||||
|
|
@ -7,15 +7,17 @@ dotenv.config();
|
|||
const prefix = process.env.PROJECT_PREFIX || 'default';
|
||||
const initInfoKey = `${prefix}:InitInfo`;
|
||||
|
||||
const redis = createClient({
|
||||
url: process.env.REDIS_URL || 'redis://localhost:6379'
|
||||
});
|
||||
|
||||
async function clearAllProjectData() {
|
||||
try {
|
||||
console.log('正在连接Redis...');
|
||||
console.log('REDIS_URL:', process.env.REDIS_URL);
|
||||
|
||||
// 连接Redis
|
||||
if (!redis.isOpen) {
|
||||
await redis.connect();
|
||||
console.log('Redis连接成功');
|
||||
}
|
||||
await redis.connect();
|
||||
console.log('Redis连接成功');
|
||||
|
||||
console.log(`\n开始清除项目 "${prefix}" 的所有Redis数据...`);
|
||||
|
||||
|
|
|
|||
|
Before Width: | Height: | Size: 360 KiB |
|
Before Width: | Height: | Size: 319 KiB |
|
Before Width: | Height: | Size: 360 KiB |
|
Before Width: | Height: | Size: 309 KiB |
|
Before Width: | Height: | Size: 360 KiB |
|
Before Width: | Height: | Size: 384 KiB |
|
Before Width: | Height: | Size: 360 KiB |
|
Before Width: | Height: | Size: 320 KiB |
|
Before Width: | Height: | Size: 1.9 MiB |
|
Before Width: | Height: | Size: 1.9 MiB |
|
Before Width: | Height: | Size: 1.9 MiB |
|
Before Width: | Height: | Size: 1.9 MiB |
|
Before Width: | Height: | Size: 1.9 MiB |
|
Before Width: | Height: | Size: 3.6 MiB |
|
|
@ -1,7 +1,15 @@
|
|||
import { WebSocketServer as WSServer } from 'ws';
|
||||
import dotenv from 'dotenv';
|
||||
|
||||
dotenv.config();
|
||||
import { WebSocketServer as WSServer } from 'ws';
|
||||
import { getExternalCapacityMax } from '../config/Config.js';
|
||||
import redis from '../redis/index.js';
|
||||
|
||||
// Redis key 定义
|
||||
const REDIS_KEYS = {
|
||||
CAPACITY: `${process.env.PROJECT_PREFIX}:md:capacity`,
|
||||
JWT: `${process.env.PROJECT_PREFIX}:md:jwt`
|
||||
};
|
||||
|
||||
class MDWebSocketServer {
|
||||
constructor() {
|
||||
|
|
@ -10,7 +18,7 @@ class MDWebSocketServer {
|
|||
this.currentJwtToken = null;
|
||||
this.currentCapacity = { internal: 0, external: 0 };
|
||||
this.instances = new Map();
|
||||
this.port = process.env.MESSAGE_DISPATCHER_WS_PORT || 8087;
|
||||
this.port = process.env.MESSAGE_DISPATCHER_WS_PORT || 8088;
|
||||
}
|
||||
|
||||
async init() {
|
||||
|
|
@ -79,15 +87,31 @@ class MDWebSocketServer {
|
|||
}
|
||||
}
|
||||
|
||||
handleJwtUpdate(data) {
|
||||
async handleJwtUpdate(data) {
|
||||
this.currentJwtToken = data.token;
|
||||
console.log('[MDWebSocketServer] JWT Token 已更新');
|
||||
|
||||
// 同时存储到 Redis,供 worker 线程读取
|
||||
try {
|
||||
await redis.set(REDIS_KEYS.JWT, data.token);
|
||||
console.log('[MDWebSocketServer] JWT Token 已存储到 Redis');
|
||||
} catch (error) {
|
||||
console.error('[MDWebSocketServer] JWT Token 存储到 Redis 失败:', error);
|
||||
}
|
||||
}
|
||||
|
||||
handleCapacityUpdate(data) {
|
||||
async handleCapacityUpdate(data) {
|
||||
if (data.summary) {
|
||||
this.currentCapacity.internal = data.summary.onlineInstances - data.summary.busyInstances;
|
||||
console.log(`[MDWebSocketServer] 算力状态已更新: 内部可用 = ${this.currentCapacity.internal}`);
|
||||
|
||||
// 同时存储到 Redis,供 worker 线程读取
|
||||
try {
|
||||
await redis.set(REDIS_KEYS.CAPACITY, this.currentCapacity.internal.toString());
|
||||
console.log(`[MDWebSocketServer] 算力信息已存储到 Redis: ${this.currentCapacity.internal}`);
|
||||
} catch (error) {
|
||||
console.error('[MDWebSocketServer] 算力信息存储到 Redis 失败:', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -117,7 +141,7 @@ class MDWebSocketServer {
|
|||
}
|
||||
|
||||
getExternalCapacity() {
|
||||
return parseInt(process.env.EXTERNAL_CAPACITY_MAX) || 10;
|
||||
return getExternalCapacityMax() || 10;
|
||||
}
|
||||
|
||||
getInstances() {
|
||||
|
|
@ -127,6 +151,37 @@ class MDWebSocketServer {
|
|||
hasConnectedClients() {
|
||||
return this.connectedClients.size > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 Redis 读取内部算力信息
|
||||
* @returns {Promise<number>} 内部可用算力
|
||||
*/
|
||||
async getInternalCapacityFromRedis() {
|
||||
try {
|
||||
const capacity = await redis.get(REDIS_KEYS.CAPACITY);
|
||||
const result = capacity ? parseInt(capacity, 10) : 0;
|
||||
console.log(`[MDWebSocketServer] 从 Redis 读取算力信息: ${result}`);
|
||||
return result;
|
||||
} catch (error) {
|
||||
console.error('[MDWebSocketServer] 从 Redis 读取算力信息失败:', error);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 Redis 读取 JWT Token
|
||||
* @returns {Promise<string|null>} JWT Token
|
||||
*/
|
||||
async getJwtTokenFromRedis() {
|
||||
try {
|
||||
const token = await redis.get(REDIS_KEYS.JWT);
|
||||
console.log(`[MDWebSocketServer] 从 Redis 读取 JWT Token: ${token ? '存在' : '不存在'}`);
|
||||
return token;
|
||||
} catch (error) {
|
||||
console.error('[MDWebSocketServer] 从 Redis 读取 JWT Token 失败:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export default new MDWebSocketServer();
|
||||
|
|
|
|||
|
|
@ -130,7 +130,7 @@ async function sendMessageToClient(id, message, close = false, closeCode = 1000,
|
|||
// 创建 WebSocket 服务器函数
|
||||
function createWebSocketServer() {
|
||||
wss = new WebSocketServer({
|
||||
port: process.env.WS_PORT || 8086,
|
||||
port: process.env.WS_PORT || 8087,
|
||||
verifyClient: async (info, callback) => {
|
||||
try {
|
||||
const urlParams = new URLSearchParams(info.req.url.split('?')[1]);
|
||||
|
|
@ -158,7 +158,7 @@ function createWebSocketServer() {
|
|||
});
|
||||
|
||||
// 日志显示WebSocket服务器端口
|
||||
logger.info(`WebSocket server is running on port: ${process.env.WS_PORT || 8082}`);
|
||||
logger.info(`WebSocket server is running on port: ${process.env.WS_PORT || 8087}`);
|
||||
|
||||
// 添加服务器错误处理
|
||||
wss.on('error', (error) => {
|
||||
|
|
|
|||
|
|
@ -4,13 +4,32 @@ import initQueue from '../../redis/initQueue.js';
|
|||
import { externalPostRequest } from '../../outside/generat.js';
|
||||
import { platformData } from '../../config/Config.js';
|
||||
|
||||
// Redis key 定义
|
||||
const REDIS_KEYS = {
|
||||
JWT: `${process.env.PROJECT_PREFIX}:md:jwt`
|
||||
};
|
||||
|
||||
// 从 Redis 获取 JWT Token
|
||||
async function getJwtTokenFromRedis() {
|
||||
try {
|
||||
const token = await redis.get(REDIS_KEYS.JWT);
|
||||
return token;
|
||||
} catch (error) {
|
||||
console.error('[generatTask] 从 Redis 获取 JWT Token 失败:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// 批量转发待处理任务到各外部平台
|
||||
async function generatTask(tasksData) {
|
||||
// console.log('开始转发任务');
|
||||
// 预先获取 JWT Token,避免每个任务都读取一次 Redis
|
||||
const jwtToken = await getJwtTokenFromRedis();
|
||||
console.log(`[generatTask] 预获取 JWT Token: ${jwtToken ? '存在' : '不存在'}`);
|
||||
|
||||
const generatTasks = []
|
||||
for (const task of tasksData) {
|
||||
// 2. 获取任务所属平台的生成接口地址
|
||||
const generatTaskPromise = externalPostRequest(task) // { aigc, tasksData }
|
||||
// 将 jwtToken 传递给 externalPostRequest
|
||||
const generatTaskPromise = externalPostRequest(task, jwtToken)
|
||||
generatTasks.push(generatTaskPromise)
|
||||
}
|
||||
|
||||
|
|
@ -36,7 +55,6 @@ async function storeGeneratTasks(tasks) {
|
|||
const taskErrorCountMap = new Map();
|
||||
const taskCountMap = new Map();
|
||||
for (const task of tasks) {
|
||||
// console.log('\n***************',task)
|
||||
//错误任务
|
||||
if(task.remoteTaskId?.type === 2){
|
||||
console.log('储存在错误队列', task);
|
||||
|
|
@ -67,7 +85,6 @@ async function storeGeneratTasks(tasks) {
|
|||
// 使用解析后的响应数据提取外部平台任务ID
|
||||
try {
|
||||
const responseData = task.remoteTaskId.data;
|
||||
// console.log('处理成功任务,响应数据:', responseData);
|
||||
|
||||
// 直接处理响应数据,提取任务ID
|
||||
const platform = task.platform || task.platformName;
|
||||
|
|
@ -206,8 +223,6 @@ async function storeGeneratTasks(tasks) {
|
|||
if(errorCount > 0){
|
||||
initQueue.addEQtaskALL(errorCount) // 添加错误队列任务数量
|
||||
}
|
||||
// 注意:这里不再调用addPlatformsProcess,因为PQtasks计数已经在updateTaskCounts函数中处理过了
|
||||
// 避免同一个任务被两次增加PQtasks计数
|
||||
await multi.exec();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,12 @@ import { parentPort, Worker } from 'worker_threads';
|
|||
import redis from '../../redis/index.js';
|
||||
import initQueue from '../../redis/initQueue.js';
|
||||
|
||||
// Redis key 定义(与 mdWebSocketServer 保持一致)
|
||||
const REDIS_KEYS = {
|
||||
CAPACITY: `${process.env.PROJECT_PREFIX}:md:capacity`,
|
||||
JWT: `${process.env.PROJECT_PREFIX}:md:jwt`
|
||||
};
|
||||
|
||||
// 日志工具函数
|
||||
const logger = {
|
||||
info: (message) => {
|
||||
|
|
@ -18,6 +24,73 @@ const logger = {
|
|||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* 从 Redis 读取内部算力信息
|
||||
* @returns {Promise<number>} 内部可用算力
|
||||
*/
|
||||
async function getInternalCapacityFromRedis() {
|
||||
try {
|
||||
const capacity = await redis.get(REDIS_KEYS.CAPACITY);
|
||||
console.log(`[MDWebSocketServer] 从 Redis 读取算力信息: ${capacity}`);
|
||||
return capacity ? parseInt(capacity, 10) : 0;
|
||||
} catch (error) {
|
||||
logger.error('从 Redis 读取算力信息失败:', error);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 Redis 读取 JWT Token
|
||||
* @returns {Promise<string|null>} JWT Token
|
||||
*/
|
||||
async function getJwtTokenFromRedis() {
|
||||
try {
|
||||
const token = await redis.get(REDIS_KEYS.JWT);
|
||||
console.log(`[MDWebSocketServer] 从 Redis 读取 JWT Token: ${token ? '存在' : '不存在'}`);
|
||||
return token;
|
||||
} catch (error) {
|
||||
logger.error('从 Redis 读取 JWT Token 失败:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 分发状态管理器
|
||||
* 用于跟踪 comfyui 任务在内部算力和外部算力之间的分配
|
||||
*/
|
||||
class DispatchStateManager {
|
||||
constructor() {
|
||||
this.internalCapacity = 0;
|
||||
this.assignedToInternal = 0;
|
||||
this.hasJwtToken = false;
|
||||
}
|
||||
|
||||
async init() {
|
||||
this.internalCapacity = await getInternalCapacityFromRedis();
|
||||
const jwtToken = await getJwtTokenFromRedis();
|
||||
this.hasJwtToken = !!jwtToken;
|
||||
this.assignedToInternal = 0;
|
||||
logger.info(`[DispatchStateManager] 初始化: 内部可用算力=${this.internalCapacity}, JWT=${this.hasJwtToken ? '存在' : '不存在'}`);
|
||||
}
|
||||
|
||||
getDispatchType(platformName) {
|
||||
if (platformName !== 'comfyui') {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (this.hasJwtToken && this.assignedToInternal < this.internalCapacity) {
|
||||
this.assignedToInternal++;
|
||||
logger.debug(`[DispatchStateManager] 分配到 messageDispatcher (已分配 ${this.assignedToInternal}/${this.internalCapacity})`);
|
||||
return 'messageDispatcher';
|
||||
}
|
||||
|
||||
logger.debug(`[DispatchStateManager] 分配到 runninghub (内部已满或无JWT)`);
|
||||
return 'runninghub';
|
||||
}
|
||||
}
|
||||
|
||||
const dispatchStateManager = new DispatchStateManager();
|
||||
|
||||
// 创建专门的线程池管理 Worker
|
||||
const generateWorker = new Worker(new URL('./GenerateWorkerManager.js', import.meta.url));
|
||||
|
||||
|
|
@ -31,16 +104,35 @@ async function judgConcurrency() {
|
|||
|
||||
logger.debug('获取到的平台信息:', platforms);
|
||||
|
||||
// 获取内部可用算力(用于 comfyui 平台)
|
||||
const internalCapacity = await getInternalCapacityFromRedis();
|
||||
const jwtToken = await getJwtTokenFromRedis();
|
||||
const hasInternalCapacity = internalCapacity > 0 && !!jwtToken;
|
||||
|
||||
if (hasInternalCapacity) {
|
||||
logger.info(`[judgConcurrency] 内部可用算力: ${internalCapacity}, JWT: 存在`);
|
||||
}
|
||||
|
||||
// 检查每个平台的实际队列长度
|
||||
for(const [aigcPfName, info] of Object.entries(platforms)) {
|
||||
try {
|
||||
// 直接检查 Redis 队列的实际长度
|
||||
const actualQueueLength = await redis.lLen(info.waitQueue);
|
||||
logger.debug(`平台 ${aigcPfName} 信息:PQtasks=${info.PQtasks}, MAX_CONCURRENT=${info.MAX_CONCURRENT}, 实际队列长度=${actualQueueLength}`);
|
||||
// logger.debug(`平台 ${aigcPfName} 信息:PQtasks=${info.PQtasks}, MAX_CONCURRENT=${info.MAX_CONCURRENT}, 实际队列长度=${actualQueueLength}`);
|
||||
|
||||
// 计算总并发能力
|
||||
let totalCapacity = info.MAX_CONCURRENT;
|
||||
|
||||
// 对于 comfyui 平台,如果有内部算力,增加可处理任务数
|
||||
if (info.platformName === 'comfyui' && hasInternalCapacity) {
|
||||
totalCapacity = info.MAX_CONCURRENT + internalCapacity;
|
||||
logger.debug(`[judgConcurrency] comfyui 平台总并发: ${totalCapacity} (外部${info.MAX_CONCURRENT} + 内部${internalCapacity})`);
|
||||
}
|
||||
|
||||
// 判断是否可以处理任务:总并发未满且队列中有任务
|
||||
if (info.PQtasks < totalCapacity && actualQueueLength > 0) {
|
||||
let count = totalCapacity - info.PQtasks;
|
||||
|
||||
// 判断是否可以处理任务:并发数未满且队列中有任务
|
||||
if (info.PQtasks < info.MAX_CONCURRENT && actualQueueLength > 0) {
|
||||
let count = info.MAX_CONCURRENT - info.PQtasks;
|
||||
// 可处理的任务数不能大于队列实际长度
|
||||
if(count > actualQueueLength) {
|
||||
count = actualQueueLength;
|
||||
|
|
@ -48,7 +140,7 @@ async function judgConcurrency() {
|
|||
wDeficiency.push({ aigcPfName, info, count }); // 储存可进行任务处理的等待队列
|
||||
logger.debug(`平台 ${aigcPfName} 满足处理条件,可处理 ${count} 个任务`);
|
||||
} else {
|
||||
logger.debug(`平台 ${aigcPfName} 不满足处理条件:PQtasks < MAX_CONCURRENT = ${info.PQtasks < info.MAX_CONCURRENT}, 队列长度 > 0 = ${actualQueueLength > 0}`);
|
||||
// logger.debug(`平台 ${aigcPfName} 不满足处理条件:PQtasks < MAX_CONCURRENT = ${info.PQtasks < info.MAX_CONCURRENT}, 队列长度 > 0 = ${actualQueueLength > 0}`);
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`检查平台 ${aigcPfName} 队列长度失败:`, error);
|
||||
|
|
@ -95,6 +187,9 @@ async function getBatchWaitTasks(aigcPfTasks) {
|
|||
const tasksData = [];
|
||||
|
||||
try {
|
||||
// 在处理任务前,初始化分发状态管理器(获取最新的内部算力信息)
|
||||
await dispatchStateManager.init();
|
||||
|
||||
// 收集所有需要获取的任务ID
|
||||
const allTaskIds = [];
|
||||
const taskIdMap = new Map(); // 用于映射任务ID到平台信息
|
||||
|
|
@ -132,6 +227,8 @@ async function getBatchWaitTasks(aigcPfTasks) {
|
|||
|
||||
if (taskInfo) {
|
||||
try {
|
||||
// 使用分发状态管理器获取分发类型(前 N 个用 messageDispatcher,剩余用 runninghub)
|
||||
const dispatchType = dispatchStateManager.getDispatchType(platformInfo.platformName);
|
||||
tasksData.push({
|
||||
backendId: taskInfo.backendId,
|
||||
taskId: taskInfo.taskId, // 单个任务ID
|
||||
|
|
@ -140,6 +237,7 @@ async function getBatchWaitTasks(aigcPfTasks) {
|
|||
aigcPfName: platformInfo.aigcPfName,
|
||||
taskData: taskInfo.payload,
|
||||
workflowId: taskInfo.workflowId || '',
|
||||
dispatchType: dispatchType,
|
||||
});
|
||||
// logger.debug(`已获取任务 ${taskId} 数据:platform=${platformInfo.platformName}, aigc=${platformInfo.aigc}`);
|
||||
} catch (error) {
|
||||
|
|
@ -228,7 +326,7 @@ async function updateTaskCounts(wDeficiency) {
|
|||
}
|
||||
} else {
|
||||
// 没有可处理的队列,等待10秒后重试
|
||||
await new Promise(resolve => setTimeout(resolve, 10000));
|
||||
await new Promise(resolve => setTimeout(resolve, 15000));
|
||||
logger.debug('没有可处理的队列');
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,639 @@
|
|||
# 任务队列后端 - 业务流程分析报告
|
||||
|
||||
## 一、项目概述
|
||||
|
||||
**项目名称**: ComfyUI 任务队列后端
|
||||
|
||||
**核心功能**: 一个分布式任务处理系统,用于管理和分发 AI 图像生成任务到多个外部平台(如 comfyui、coze 等),支持任务队列、并发控制、状态跟踪和结果返回。
|
||||
|
||||
**技术栈**:
|
||||
|
||||
- Node.js + Express
|
||||
- Redis(任务队列和数据存储)
|
||||
- WebSocket(实时通信)
|
||||
- Worker Threads(多线程处理)
|
||||
|
||||
***
|
||||
|
||||
## 二、系统架构
|
||||
|
||||
### 2.1 核心组件
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ 前端 / 后端客户端 │
|
||||
└────────────────────────────┬────────────────────────────────────┘
|
||||
│ WebSocket (8087)
|
||||
│ HTTP (8089 - 回调)
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ index.js (主入口) │
|
||||
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
||||
│ │ WebSocket │ │ HTTP Server │ │ Worker │ │
|
||||
│ │ Server │ │ │ │ Manager │ │
|
||||
│ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ │
|
||||
└─────────┼───────────────────┼───────────────────┼─────────────────┘
|
||||
│ │ │
|
||||
└───────────────────┴───────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Redis │
|
||||
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
||||
│ │ 等待队列 │ │ 处理队列 │ │ 结果队列 │ │
|
||||
│ │ (Wait) │ │ (Process) │ │ (Result) │ │
|
||||
│ └──────────────┘ └──────────────┘ └──────────────┘ │
|
||||
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
||||
│ │ 错误队列 │ │ 任务数据 │ │ 初始化配置 │ │
|
||||
│ │ (Error) │ │ (Hash) │ │ (InitInfo) │ │
|
||||
│ └──────────────┘ └──────────────┘ └──────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### 2.2 Worker Threads 架构
|
||||
|
||||
系统使用 6 个独立的 Worker Threads 处理不同阶段的任务:
|
||||
|
||||
| Worker 名称 | 功能描述 | 文件路径 |
|
||||
| ---------------- | ------------- | ------------------------------------------ |
|
||||
| assessment | 任务预处理、参数校验、入队 | `worker_threads/assessment/assessment.js` |
|
||||
| wait | 等待队列监控、任务分发 | `worker_threads/wait/waiting.js` |
|
||||
| polling | 轮询外部平台获取任务结果 | `worker_threads/process/process.js` |
|
||||
| result | 结果队列处理、返回给客户端 | `worker_threads/result/result.js` |
|
||||
| callback\_result | 回调结果处理 | `worker_threads/callback_result/result.js` |
|
||||
| error | 错误队列处理 | `worker_threads/error/error.js` |
|
||||
|
||||
***
|
||||
|
||||
## 三、完整业务流程
|
||||
|
||||
### 3.1 流程总览图
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start([系统启动]) --> Init[初始化 Redis 队列]
|
||||
Init --> WS[WebSocket 服务启动]
|
||||
Init --> HTTP[HTTP 服务启动]
|
||||
WS --> ReceiveTask[接收任务: type=generate]
|
||||
|
||||
ReceiveTask --> Assessment[Assessment Worker 处理]
|
||||
Assessment --> Validate{参数校验}
|
||||
Validate -->|失败| ErrorReturn[返回错误给客户端]
|
||||
Validate -->|成功| StoreTask[存储任务到 Redis Hash]
|
||||
StoreTask --> PushWait[推入等待队列]
|
||||
PushWait --> NotifySuccess[通知客户端: 任务提交成功]
|
||||
|
||||
PushWait --> WaitWorker[Wait Worker 监控]
|
||||
WaitWorker --> CheckConcurrency{检查并发数}
|
||||
CheckConcurrency -->|并发未满| GetTasks[批量获取任务]
|
||||
GetTasks --> GenerateWorker[Generate Worker 分发]
|
||||
GenerateWorker --> ExternalPost[提交任务到外部平台]
|
||||
|
||||
ExternalPost --> PostResult{提交结果}
|
||||
PostResult -->|失败| PushError[推入错误队列]
|
||||
PostResult -->|成功| CheckPlatform{平台类型}
|
||||
|
||||
CheckPlatform -->|回调型| StoreCallback[存储 remoteTaskId 映射]
|
||||
CheckPlatform -->|轮询型| PushPolling[推入轮询队列]
|
||||
|
||||
StoreCallback --> WaitCallback[等待外部回调]
|
||||
PushPolling --> PollingWorker[Polling Worker 轮询]
|
||||
|
||||
WaitCallback --> CallbackReceived[收到回调]
|
||||
PollingWorker --> PollingResult{轮询结果}
|
||||
|
||||
CallbackReceived --> ProcessCallback[处理回调数据]
|
||||
PollingResult -->|完成| StoreResult[存储结果]
|
||||
PollingResult -->|未完成| ContinuePolling[继续轮询]
|
||||
|
||||
ProcessCallback --> StoreResult
|
||||
StoreResult --> PushResultQueue[推入结果队列]
|
||||
|
||||
PushResultQueue --> ResultWorker[Result Worker 处理]
|
||||
ResultWorker --> SendResult[发送结果给客户端]
|
||||
|
||||
PushError --> ErrorWorker[Error Worker 处理]
|
||||
ErrorWorker --> SendError[发送错误给客户端]
|
||||
|
||||
ContinuePolling --> PollingWorker
|
||||
```
|
||||
|
||||
### 3.2 详细流程步骤
|
||||
|
||||
#### 阶段 1: 系统启动与初始化
|
||||
|
||||
**步骤 1.1: 启动主程序**
|
||||
|
||||
- 入口文件: [index.js](file:///d:\WebUI\Kexue\comfyui\comfyui-cluster-bridge\任务队列后端\index.js)
|
||||
- 加载环境变量 (.env)
|
||||
- 创建 6 个 Worker Threads
|
||||
|
||||
**步骤 1.2: Redis 初始化**
|
||||
|
||||
- 连接 Redis
|
||||
- 初始化队列配置 ([initQueue.js](file:///d:\WebUI\Kexue\comfyui\comfyui-cluster-bridge\任务队列后端\redis\initQueue.js))
|
||||
- 创建 `InitInfo` 配置对象,包含:
|
||||
- 等待队列列表
|
||||
- 各平台并发配置
|
||||
- 任务计数器
|
||||
|
||||
**步骤 1.3: 启动服务**
|
||||
|
||||
- HTTP 服务器 (端口: 8087)
|
||||
- WebSocket 服务器 (同端口)
|
||||
- 回调服务器 (端口: 8089, [app.js](file:///d:\WebUI\Kexue\comfyui\comfyui-cluster-bridge\任务队列后端\app.js))
|
||||
|
||||
***
|
||||
|
||||
#### 阶段 2: 任务接收与预处理
|
||||
|
||||
**步骤 2.1: 接收 WebSocket 任务**
|
||||
|
||||
- 客户端通过 WebSocket 连接,携带 token 和 id
|
||||
- 验证 token (TOKEN\_SECRET)
|
||||
- 接收 `type: "generate"` 消息
|
||||
|
||||
**步骤 2.2: Assessment Worker 处理**
|
||||
|
||||
- 文件: [assessment.js](file:///d:\WebUI\Kexue\comfyui\comfyui-cluster-bridge\任务队列后端\worker_threads\assessment\assessment.js)
|
||||
- 内部线程池: 3 个 [PreproTask.js](file:///d:\WebUI\Kexue\comfyui\comfyui-cluster-bridge\任务队列后端\worker_threads\assessment\PreproTask.js)
|
||||
|
||||
**步骤 2.3: 参数校验**
|
||||
|
||||
- 校验必填字段: `taskId`, `platform`, `payload`
|
||||
- 失败返回: `JSONError`, `OpcodeError`
|
||||
|
||||
**步骤 2.4: 存储任务**
|
||||
|
||||
- 创建任务 Hash: `{prefix}:task:{taskId}`
|
||||
- 字段: `taskId`, `payload`, `backendId`, `AIGC`, `platform`, `status`, `workflowId`
|
||||
- 设置过期时间: 2 小时 (7200秒)
|
||||
|
||||
**步骤 2.5: 推入等待队列**
|
||||
|
||||
- 队列命名: `{AIGC}:{platform}:wait`
|
||||
- 例如: `digitalHuman-test:comfyui:wait`
|
||||
- 更新 `InitInfo.platforms.{key}.WQtasks` +1
|
||||
|
||||
**步骤 2.6: 通知客户端**
|
||||
|
||||
- 返回成功消息: `"任务提交成功,正在排队中..."`
|
||||
|
||||
***
|
||||
|
||||
#### 阶段 3: 等待队列监控与任务分发
|
||||
|
||||
**步骤 3.1: Wait Worker 监控**
|
||||
|
||||
- 文件: [waiting.js](file:///d:\WebUI\Kexue\comfyui\comfyui-cluster-bridge\任务队列后端\worker_threads\wait\waiting.js)
|
||||
- 循环检查各平台等待队列 (间隔: 15秒)
|
||||
|
||||
**步骤 3.2: 判断并发容量**
|
||||
|
||||
- 获取 `InitInfo.platforms.{key}.PQtasks` (正在处理数)
|
||||
- 获取 `InitInfo.platforms.{key}.MAX_CONCURRENT` (最大并发数)
|
||||
- 对于 comfyui 平台,还会检查内部算力 (messageDispatcher)
|
||||
|
||||
**步骤 3.3: 批量获取任务**
|
||||
|
||||
- 从等待队列获取任务 ID (不超过剩余并发数)
|
||||
- 批量获取任务数据 (Hash)
|
||||
|
||||
**步骤 3.4: 更新计数器**
|
||||
|
||||
- `WQtasks` -N (减少等待数)
|
||||
- `PQtasks` +N (增加处理数)
|
||||
- `PQtasksALL` +N (总处理数)
|
||||
|
||||
**步骤 3.5: 分发到 Generate Worker**
|
||||
|
||||
- 文件: [GenerateWorkerManager.js](file:///d:\WebUI\Kexue\comfyui\comfyui-cluster-bridge\任务队列后端\worker_threads\wait\GenerateWorkerManager.js)
|
||||
- 内部线程池: [GenerateThreadPool.js](file:///d:\WebUI\Kexue\comfyui\comfyui-cluster-bridge\任务队列后端\worker_threads\wait\GenerateThreadPool.js)
|
||||
|
||||
***
|
||||
|
||||
#### 阶段 4: 提交任务到外部平台
|
||||
|
||||
**步骤 4.1: Generate Worker 处理**
|
||||
|
||||
- 文件: [generatTask.js](file:///d:\WebUI\Kexue\comfyui\comfyui-cluster-bridge\任务队列后端\worker_threads\wait\generatTask.js)
|
||||
- 批量调用 `externalPostRequest()`
|
||||
|
||||
**步骤 4.2: 外部平台接口调用**
|
||||
|
||||
- 文件: [generat.js](file:///d:\WebUI\Kexue\comfyui\comfyui-cluster-bridge\任务队列后端\outside\generat.js)
|
||||
- 平台适配器: [outside.js](file:///d:\WebUI\Kexue\comfyui\comfyui-cluster-bridge\任务队列后端\outside\outPlatforms\outside.js)
|
||||
|
||||
**支持的平台**:
|
||||
|
||||
| 平台 | 类型 | 适配器文件 |
|
||||
| ------- | --- | -------------------------------------------------------------------------------------------------------------- |
|
||||
| comfyui | 回调型 | [comfyui.js](file:///d:\WebUI\Kexue\comfyui\comfyui-cluster-bridge\任务队列后端\outside\outPlatforms\comfyui.js) |
|
||||
| coze | 轮询型 | [coze/coze.js](file:///d:\WebUI\Kexue\comfyui\comfyui-cluster-bridge\任务队列后端\outside\outPlatforms\coze\coze.js) |
|
||||
| jimuai | - | [JimuAI.js](file:///d:\WebUI\Kexue\comfyui\comfyui-cluster-bridge\任务队列后端\outside\outPlatforms\JimuAI.js) |
|
||||
|
||||
**comfyui 分发策略**:
|
||||
|
||||
- 优先使用 `messageDispatcher` (内部算力)
|
||||
- 失败降级到 `runninghub` (外部平台)
|
||||
|
||||
**步骤 4.3: 处理提交结果**
|
||||
|
||||
**情况 A: 提交失败**
|
||||
|
||||
- 存储错误信息到任务 Hash
|
||||
- 推入错误队列: `{prefix}:error:list`
|
||||
- `EQtaskALL` +1
|
||||
|
||||
**情况 B: 提交成功 (回调型平台)**
|
||||
|
||||
- 存储映射: `{prefix}:callback:{remoteTaskId}` → `taskId`
|
||||
- 更新任务 Hash: `remoteTaskId`
|
||||
|
||||
**情况 C: 提交成功 (轮询型平台)**
|
||||
|
||||
- 推入轮询队列: `{prefix}:processPolling:{AIGC}:{platform}`
|
||||
- Hash 结构: `{remoteTaskId}` → `{taskId, platform, AIGC, workflowId}`
|
||||
- 更新任务 Hash: `remoteTaskId`
|
||||
|
||||
***
|
||||
|
||||
#### 阶段 5: 任务结果获取
|
||||
|
||||
**分支 A: 回调型平台 (comfyui)**
|
||||
|
||||
**步骤 5A.1: 接收回调**
|
||||
|
||||
- 回调接口: `POST /callback/all` ([callback.js](file:///d:\WebUI\Kexue\comfyui\comfyui-cluster-bridge\任务队列后端\outside\callback.js))
|
||||
- 立即返回 200 响应
|
||||
|
||||
**步骤 5A.2: 处理回调数据**
|
||||
|
||||
- 通过 `remoteTaskId` 查询 `taskId`
|
||||
- 存储 `eventData` 到任务 Hash 的 `resultData`
|
||||
|
||||
* 推入回调队列: `{prefix}:callback`
|
||||
* `CQtasksALL` +1
|
||||
|
||||
**步骤 5A.3: Callback Result Worker 处理**
|
||||
|
||||
- 文件: [callback\_result/result.js](file:///d:\WebUI\Kexue\comfyui\comfyui-cluster-bridge\任务队列后端\worker_threads\callback_result\result.js)
|
||||
- 发送结果给客户端
|
||||
|
||||
***
|
||||
|
||||
**分支 B: 轮询型平台 (coze)**
|
||||
|
||||
**步骤 5B.1: Polling Worker 监控**
|
||||
|
||||
- 文件: [process.js](file:///d:\WebUI\Kexue\comfyui\comfyui-cluster-bridge\任务队列后端\worker_threads\process\process.js)
|
||||
- 为每个平台启动独立轮询循环
|
||||
- 动态轮询间隔: 5-30秒 (根据任务数调整)
|
||||
|
||||
**步骤 5B.2: 查询外部平台**
|
||||
|
||||
- 文件: [polling.js](file:///d:\WebUI\Kexue\comfyui\comfyui-cluster-bridge\任务队列后端\outside\polling.js)
|
||||
- 调用 `externalGetRequest()`
|
||||
- 批量查询 (每批最多 100 个任务)
|
||||
|
||||
**步骤 5B.3: 判断任务状态**
|
||||
|
||||
**情况 1: 任务未完成**
|
||||
|
||||
- 继续轮询 (不做任何操作)
|
||||
|
||||
**情况 2: 任务成功**
|
||||
|
||||
- 存储结果到任务 Hash 的 `resultData`
|
||||
- 更新状态: `status = "success"`
|
||||
- 推入结果队列: `{prefix}:result:list`
|
||||
- 从轮询队列删除
|
||||
- `PQtasks` -1, `RQtasksALL` +1
|
||||
|
||||
**情况 3: 任务失败**
|
||||
|
||||
- 存储错误信息到 `resultData`
|
||||
- 更新状态: `status = "failed"`
|
||||
- 推入错误队列: `{prefix}:error:list`
|
||||
- 从轮询队列删除
|
||||
- `PQtasks` -1, `EQtaskALL` +1
|
||||
|
||||
***
|
||||
|
||||
#### 阶段 6: 结果返回给客户端
|
||||
|
||||
**分支 A: 结果队列处理**
|
||||
|
||||
- 文件: [result/result.js](file:///d:\WebUI\Kexue\comfyui\comfyui-cluster-bridge\任务队列后端\worker_threads\result\result.js)
|
||||
- 从结果队列获取任务
|
||||
- 通过 WebSocket 发送给对应 `backendId` 的客户端
|
||||
- 从结果队列删除
|
||||
- `RQtasksALL` -1
|
||||
|
||||
**分支 B: 错误队列处理**
|
||||
|
||||
- 文件: [error/error.js](file:///d:\WebUI\Kexue\comfyui\comfyui-cluster-bridge\任务队列后端\worker_threads\error\error.js)
|
||||
- 从错误队列获取任务
|
||||
- 通过 WebSocket 发送错误给客户端
|
||||
- 从错误队列和任务 Hash 删除
|
||||
- `EQtaskALL` -1, `WQtasks` -1
|
||||
|
||||
***
|
||||
|
||||
## 四、任务状态转换图
|
||||
|
||||
```mermaid
|
||||
stateDiagram-v2
|
||||
[*] --> pending: 任务创建
|
||||
pending --> processing: 从等待队列取出
|
||||
processing --> success: 任务成功完成
|
||||
processing --> failed: 任务失败
|
||||
success --> [*]: 结果已发送
|
||||
failed --> [*]: 错误已发送
|
||||
|
||||
note right of pending
|
||||
存储在等待队列
|
||||
WQtasks +1
|
||||
end note
|
||||
|
||||
note right of processing
|
||||
存储在处理队列/轮询队列
|
||||
PQtasks +1
|
||||
end note
|
||||
|
||||
note right of success
|
||||
存储在结果队列
|
||||
RQtasksALL +1
|
||||
end note
|
||||
|
||||
note right of failed
|
||||
存储在错误队列
|
||||
EQtaskALL +1
|
||||
end note
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
## 五、Redis 数据结构
|
||||
|
||||
### 5.1 队列列表
|
||||
|
||||
| Key 模式 | 类型 | 描述 |
|
||||
| ------------------------------------------- | ---- | ------------------------ |
|
||||
| `{prefix}:{AIGC}:{platform}:wait` | List | 等待队列,存储 taskId |
|
||||
| `{prefix}:process:Polling` | - | (已弃用,见下方) |
|
||||
| `{prefix}:processPolling:{AIGC}:{platform}` | Hash | 轮询队列,remoteTaskId → 任务信息 |
|
||||
| `{prefix}:result:queue` | - | (已弃用) |
|
||||
| `{prefix}:result:list` | List | 结果队列,存储 taskId |
|
||||
| `{prefix}:callback` | List | 回调队列,存储 taskId |
|
||||
| `{prefix}:error:queue` | - | (已弃用) |
|
||||
| `{prefix}:error:list` | List | 错误队列,存储 taskId |
|
||||
|
||||
### 5.2 任务数据 (Hash)
|
||||
|
||||
| Key | 类型 | 字段 |
|
||||
| ------------------------ | ------ | ---------------------------------------------------------------------------------------------------------- |
|
||||
| `{prefix}:task:{taskId}` | Hash | `taskId`, `payload`, `backendId`, `AIGC`, `platform`, `status`, `resultData`, `remoteTaskId`, `workflowId` |
|
||||
| <br /> | <br /> | **过期时间**: 7200秒 (2小时) |
|
||||
|
||||
### 5.3 映射关系
|
||||
|
||||
| Key | 类型 | 描述 |
|
||||
| --------------------------------------------------- | ------ | --------------------- |
|
||||
| `{prefix}:callback:{remoteTaskId}` | String | remoteTaskId → taskId |
|
||||
| `{prefix}:pending:messages` | List | 待发送消息键列表 |
|
||||
| `{prefix}:pending:messages:{backendId}:{timestamp}` | Hash | 待发送消息数据 |
|
||||
|
||||
### 5.4 初始化配置 (JSON)
|
||||
|
||||
| Key | 路径 | 描述 |
|
||||
| ------------------- | ---------------------------------- | -------- |
|
||||
| `{prefix}:InitInfo` | `$.waitQueues` | 等待队列名称数组 |
|
||||
| <br /> | `$.processPolling` | 轮询队列名称 |
|
||||
| <br /> | `$.processCallback` | 回调队列名称 |
|
||||
| <br /> | `$.resultName` | 结果队列名称 |
|
||||
| <br /> | `$.PQtasksALL` | 总处理任务数 |
|
||||
| <br /> | `$.RQtasksALL` | 总结果任务数 |
|
||||
| <br /> | `$.CQtasksALL` | 总回调任务数 |
|
||||
| <br /> | `$.EQtaskALL` | 总错误任务数 |
|
||||
| <br /> | `$.platforms.{key}.WQtasks` | 平台等待任务数 |
|
||||
| <br /> | `$.platforms.{key}.PQtasks` | 平台处理任务数 |
|
||||
| <br /> | `$.platforms.{key}.MAX_CONCURRENT` | 平台最大并发数 |
|
||||
| <br /> | `$.platforms.{key}.waitQueue` | 平台等待队列名 |
|
||||
|
||||
***
|
||||
|
||||
## 六、任务唯一标识与类型
|
||||
|
||||
### 6.1 任务唯一标识
|
||||
|
||||
| 标识 | 生成位置 | 用途 |
|
||||
| -------------- | ------ | -------- |
|
||||
| `taskId` | 前端/客户端 | 内部任务唯一标识 |
|
||||
| `remoteTaskId` | 外部平台 | 外部平台任务标识 |
|
||||
| `backendId` | 客户端连接时 | 客户端连接标识 |
|
||||
|
||||
### 6.2 任务类型
|
||||
|
||||
| 类型 | 说明 | 处理方式 |
|
||||
| -------------- | ------- | ---------- |
|
||||
| 回调型 (callback) | comfyui | 等待外部回调通知 |
|
||||
| 轮询型 (polling) | coze | 主动轮询外部平台状态 |
|
||||
|
||||
***
|
||||
|
||||
## 七、关键配置文件
|
||||
|
||||
### 7.1 model.json
|
||||
|
||||
```json
|
||||
{
|
||||
"digitalHuman-test": {
|
||||
"comfyui": {
|
||||
"apikey": "...",
|
||||
"concurrency": 13
|
||||
},
|
||||
"coze": {
|
||||
"apikey": "...",
|
||||
"concurrency": 20
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 7.2 Platform.json
|
||||
|
||||
```json
|
||||
{
|
||||
"callback": ["comfyui"],
|
||||
"polling": ["coze"]
|
||||
}
|
||||
```
|
||||
|
||||
### 7.3 .env 环境变量
|
||||
|
||||
```env
|
||||
PROJECT_PREFIX='digitalHuman-test'
|
||||
TOKEN_SECRET='...'
|
||||
WS_PORT=8087
|
||||
CALLBACK_PORT=8089
|
||||
RunningHub_URL='...'
|
||||
CALLBACK_URL='...'
|
||||
REDIS_URL='...'
|
||||
MESSAGE_DISPATCHER_URL='...'
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
## 八、错误码与消息
|
||||
|
||||
### 8.1 code.json
|
||||
|
||||
```json
|
||||
{
|
||||
"ERROR": {
|
||||
"JSONError": "消息格式错误,请联系服务商。",
|
||||
"OpcodeError": "错误提交,请稍后再试。",
|
||||
"BalanceError": "余额不足,请充值后继续使用。",
|
||||
"AssessmentError": "任务提交失败,请稍后再试。"
|
||||
},
|
||||
"SUCCESS": {
|
||||
"AssessmentSuccess": "任务提交成功,正在排队中..."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
## 九、消息持久化机制
|
||||
|
||||
当客户端断开连接时,待发送消息会被保存到 Redis,待客户端重连后重试发送:
|
||||
|
||||
1. **保存待发送消息**: `messagePersistence.savePendingMessage()`
|
||||
2. **客户端重连时获取**: `messagePersistence.getPendingMessages()`
|
||||
3. **发送成功后删除**: `messagePersistence.removePendingMessage()`
|
||||
4. **定期清理过期消息**: 超过 2 天的消息自动清理
|
||||
|
||||
***
|
||||
|
||||
## 十、流程图
|
||||
|
||||
### 10.1 完整数据流时序图
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as 客户端
|
||||
participant WS as WebSocket Server
|
||||
participant A as Assessment Worker
|
||||
participant Redis as Redis
|
||||
participant W as Wait Worker
|
||||
participant G as Generate Worker
|
||||
participant Ext as 外部平台
|
||||
participant P as Polling Worker
|
||||
participant R as Result Worker
|
||||
|
||||
Client->>WS: WebSocket 连接 (token, id)
|
||||
WS-->>Client: 连接成功
|
||||
|
||||
Client->>WS: {type: "generate", taskId, platform, payload}
|
||||
WS->>A: 转发任务
|
||||
A->>A: 参数校验
|
||||
alt 校验失败
|
||||
A-->>WS: 返回错误
|
||||
WS-->>Client: 错误消息
|
||||
else 校验成功
|
||||
A->>Redis: HSET {prefix}:task:{taskId}
|
||||
A->>Redis: RPUSH {AIGC}:{platform}:wait
|
||||
A->>Redis: INCR WQtasks
|
||||
A-->>WS: 成功
|
||||
WS-->>Client: "任务提交成功,正在排队中..."
|
||||
end
|
||||
|
||||
loop 每15秒检查
|
||||
W->>Redis: GET InitInfo.platforms
|
||||
W->>Redis: LLEN wait queue
|
||||
alt 有可处理任务
|
||||
W->>Redis: LRANGE wait queue (批量获取)
|
||||
W->>Redis: HGETALL task data
|
||||
W->>Redis: LTRIM wait queue
|
||||
W->>Redis: DECR WQtasks, INCR PQtasks
|
||||
W->>G: 发送任务数据
|
||||
G->>Ext: POST /task/create
|
||||
Ext-->>G: {remoteTaskId}
|
||||
|
||||
alt 提交失败
|
||||
G->>Redis: HSET resultData (error)
|
||||
G->>Redis: LPUSH error:list
|
||||
G->>Redis: INCR EQtaskALL
|
||||
else 回调型平台
|
||||
G->>Redis: SET callback:{remoteTaskId} = taskId
|
||||
G->>Redis: HSET remoteTaskId
|
||||
else 轮询型平台
|
||||
G->>Redis: HSET processPolling:{remoteTaskId}
|
||||
G->>Redis: HSET remoteTaskId
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
alt 回调型平台
|
||||
Ext->>WS: POST /callback/all
|
||||
WS->>Redis: GET callback:{remoteTaskId}
|
||||
WS->>Redis: HSET resultData
|
||||
WS->>Redis: LPUSH callback
|
||||
WS->>Redis: INCR CQtasksALL
|
||||
else 轮询型平台
|
||||
loop 每5-30秒轮询
|
||||
P->>Redis: HGETALL processPolling
|
||||
P->>Ext: GET /task/status
|
||||
alt 任务未完成
|
||||
P->>P: 继续轮询
|
||||
else 任务成功
|
||||
P->>Redis: HSET resultData
|
||||
P->>Redis: LPUSH result:list
|
||||
P->>Redis: HDEL processPolling
|
||||
P->>Redis: DECR PQtasks, INCR RQtasksALL
|
||||
else 任务失败
|
||||
P->>Redis: HSET resultData (error)
|
||||
P->>Redis: LPUSH error:list
|
||||
P->>Redis: HDEL processPolling
|
||||
P->>Redis: DECR PQtasks, INCR EQtaskALL
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
loop 每15秒检查
|
||||
R->>Redis: LLEN result:list / error:list
|
||||
alt 有结果
|
||||
R->>Redis: LRANGE result:list
|
||||
R->>Redis: HGETALL task data
|
||||
R->>WS: 发送结果
|
||||
WS->>Client: {taskId, result}
|
||||
R->>Redis: LREM result:list
|
||||
R->>Redis: DECR RQtasksALL
|
||||
else 有错误
|
||||
R->>Redis: LRANGE error:list
|
||||
R->>Redis: HGETALL task data
|
||||
R->>WS: 发送错误
|
||||
WS->>Client: {taskId, error}
|
||||
R->>Redis: LREM error:list
|
||||
R->>Redis: DEL task:{taskId}
|
||||
R->>Redis: DECR EQtaskALL, DECR WQtasks
|
||||
end
|
||||
end
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
## 十一、总结
|
||||
|
||||
该任务队列后端系统是一个设计完善的分布式任务处理系统,具有以下特点:
|
||||
|
||||
1. **多线程架构**: 使用 Worker Threads 实现各阶段解耦,提高并发处理能力
|
||||
2. **Redis 作为核心**: 利用 Redis 的 List、Hash、JSON 等数据结构实现任务队列和状态管理
|
||||
3. **支持多平台**: 可灵活接入不同类型的外部平台(回调型、轮询型)
|
||||
4. **任务状态追踪**: 完整的任务生命周期管理,从接收、处理到完成/失败
|
||||
5. **消息持久化**: 支持客户端断线重连后的消息补发
|
||||
6. **并发控制**: 各平台独立的并发数配置,防止过载
|
||||
7. **优雅降级**: comfyui 平台支持内部算力和外部平台的自动切换
|
||||
|
||||
系统的核心流程清晰,各组件职责明确,是一个生产级别的任务队列管理系统。
|
||||