feat(skills): add remote YAML bootstrap and materialization APIs

This commit is contained in:
Titan 2026-04-02 15:51:05 +08:00
parent 1ed736dbea
commit 867bb6de46
4 changed files with 632 additions and 0 deletions

View File

@ -9,6 +9,10 @@ class GatewayConfig(BaseModel):
host: str = Field(default="0.0.0.0", description="Host to bind the gateway server") host: str = Field(default="0.0.0.0", description="Host to bind the gateway server")
port: int = Field(default=8001, description="Port to bind the gateway server") port: int = Field(default=8001, description="Port to bind the gateway server")
cors_origins: list[str] = Field(default_factory=lambda: ["http://localhost:3000"], description="Allowed CORS origins") cors_origins: list[str] = Field(default_factory=lambda: ["http://localhost:3000"], description="Allowed CORS origins")
skill_content_api_url: str = Field(
default="https://skills.xueai.art/api/cmsContent/getContent",
description="Remote API URL used to fetch skill YAML content by content ID",
)
_gateway_config: GatewayConfig | None = None _gateway_config: GatewayConfig | None = None
@ -23,5 +27,9 @@ def get_gateway_config() -> GatewayConfig:
host=os.getenv("GATEWAY_HOST", "0.0.0.0"), host=os.getenv("GATEWAY_HOST", "0.0.0.0"),
port=int(os.getenv("GATEWAY_PORT", "8001")), port=int(os.getenv("GATEWAY_PORT", "8001")),
cors_origins=cors_origins_str.split(","), cors_origins=cors_origins_str.split(","),
skill_content_api_url=os.getenv(
"SKILL_CONTENT_API_URL",
"https://skills.xueai.art/api/cmsContent/getContent",
),
) )
return _gateway_config return _gateway_config

View File

@ -2,10 +2,13 @@ import json
import logging import logging
from pathlib import Path from pathlib import Path
import httpx
from fastapi import APIRouter, HTTPException from fastapi import APIRouter, HTTPException
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from app.gateway.config import get_gateway_config
from app.gateway.path_utils import resolve_thread_virtual_path from app.gateway.path_utils import resolve_thread_virtual_path
from app.gateway.skill_yaml_importer import materialize_skill_tree, parse_skill_yaml_spec
from deerflow.config.extensions_config import ExtensionsConfig, SkillStateConfig, get_extensions_config, reload_extensions_config from deerflow.config.extensions_config import ExtensionsConfig, SkillStateConfig, get_extensions_config, reload_extensions_config
from deerflow.skills import Skill, load_skills from deerflow.skills import Skill, load_skills
from deerflow.skills.installer import SkillAlreadyExistsError, install_skill_from_archive from deerflow.skills.installer import SkillAlreadyExistsError, install_skill_from_archive
@ -52,6 +55,33 @@ class SkillInstallResponse(BaseModel):
message: str = Field(..., description="Installation result message") message: str = Field(..., description="Installation result message")
class RemoteSkillBootstrapRequest(BaseModel):
"""Request model for bootstrapping skill files from remote content API."""
thread_id: str = Field(..., description="Thread ID used for user-data path binding")
content_id: int = Field(..., description="Remote content ID (maps from frontend query param skill_id)")
language_type: int = Field(default=0, description="Language type for remote API request body")
target_dir: str = Field(
default="/mnt/user-data/uploads/skill",
description="Virtual target directory where parsed files/directories are created",
)
clear_target: bool = Field(
default=True,
description="Whether to clear target directory before writing parsed files",
)
class RemoteSkillBootstrapResponse(BaseModel):
"""Response model for remote bootstrap endpoint."""
success: bool = Field(..., description="Whether bootstrap succeeded")
target_dir: str = Field(..., description="Virtual target directory")
created_directories: int = Field(..., description="Number of created directories")
created_files: int = Field(..., description="Number of created files")
sandbox_id: str | None = Field(default=None, description="Acquired sandbox ID (null when sandbox is not acquired)")
message: str = Field(..., description="Operation result message")
def _skill_to_response(skill: Skill) -> SkillResponse: def _skill_to_response(skill: Skill) -> SkillResponse:
"""Convert a Skill object to a SkillResponse.""" """Convert a Skill object to a SkillResponse."""
return SkillResponse( return SkillResponse(
@ -171,3 +201,81 @@ async def install_skill(request: SkillInstallRequest) -> SkillInstallResponse:
except Exception as e: except Exception as e:
logger.error(f"Failed to install skill: {e}", exc_info=True) logger.error(f"Failed to install skill: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to install skill: {str(e)}") raise HTTPException(status_code=500, detail=f"Failed to install skill: {str(e)}")
@router.post(
"/skills/bootstrap-remote",
response_model=RemoteSkillBootstrapResponse,
summary="Bootstrap Skill Files From Remote API",
description=(
"Fetch YAML text from configured remote API by content_id/language_type and "
"materialize files into /mnt/user-data/uploads/skill before first thread submit."
),
)
async def bootstrap_skill_from_remote(request: RemoteSkillBootstrapRequest) -> RemoteSkillBootstrapResponse:
"""Initialize thread skill directory from remote YAML content service."""
try:
cfg = get_gateway_config()
api_url = cfg.skill_content_api_url
payload = {
"contentId": request.content_id,
"languageType": request.language_type,
}
async with httpx.AsyncClient(timeout=20.0) as client:
response = await client.post(api_url, json=payload)
if response.status_code >= 400:
raise HTTPException(
status_code=502,
detail=f"Remote skill content API failed with HTTP {response.status_code}",
)
try:
response_json = response.json()
except ValueError as e:
raise HTTPException(status_code=502, detail=f"Remote API did not return valid JSON: {e}") from e
status = response_json.get("status")
if status != 1000:
raise HTTPException(
status_code=502,
detail=f"Remote API returned non-success status: {status}, message: {response_json.get('message')}",
)
yaml_text = response_json.get("data")
if not isinstance(yaml_text, str) or not yaml_text.strip():
raise HTTPException(status_code=502, detail="Remote API returned empty or invalid YAML content")
target_path = resolve_thread_virtual_path(request.thread_id, request.target_dir)
parsed = parse_skill_yaml_spec(yaml_text)
materialize_skill_tree(parsed, target_path, clear_target=request.clear_target)
logger.info(
"Bootstrapped remote skill YAML for thread %s (content_id=%s, language_type=%s) to %s: dirs=%d files=%d",
request.thread_id,
request.content_id,
request.language_type,
request.target_dir,
len(parsed.directories),
len(parsed.files),
)
return RemoteSkillBootstrapResponse(
success=True,
target_dir=request.target_dir,
created_directories=len(parsed.directories),
created_files=len(parsed.files),
sandbox_id=None,
message=(
f"Bootstrapped {len(parsed.files)} files and {len(parsed.directories)} directories "
f"under '{request.target_dir}'"
),
)
except HTTPException:
raise
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"Failed to bootstrap skill from remote API: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to bootstrap skill from remote API: {str(e)}")

View File

@ -0,0 +1,475 @@
"""Utilities for parsing YAML-defined skill package structures.
This module supports turning a YAML document describing files/directories into
real filesystem content under a thread's virtual path (for example,
``/mnt/user-data/uploads/skill``).
"""
from __future__ import annotations
import argparse
import json
import sys
from dataclasses import dataclass
from pathlib import Path
import yaml # type: ignore[import-not-found]
@dataclass(frozen=True)
class ParsedSkillTree:
"""Normalized parsed structure from YAML spec."""
directories: set[str]
files: dict[str, str]
def _pick_first_existing(data: dict, keys: tuple[str, ...]):
for key in keys:
if key in data:
return data[key]
return None
def _extract_spec_root(data: dict) -> dict:
"""Extract the effective spec root.
Supports nested wrappers like:
- skill: { ... }
- package: { ... }
- spec: { ... }
"""
if not isinstance(data, dict):
raise ValueError("YAML root must be an object")
known_keys = {
"entries",
"files",
"directories",
"dirs",
"tree",
"structure",
"file_tree",
"fileTree",
"file_structure",
"paths",
}
if any(k in data for k in known_keys):
return data
wrapper_candidates = ("skill", "package", "spec", "data", "content", "payload")
for wrapper in wrapper_candidates:
candidate = data.get(wrapper)
if isinstance(candidate, dict) and any(k in candidate for k in known_keys):
return candidate
# Fallback: if exactly one nested object exists, try it as spec root.
nested_dicts = [v for v in data.values() if isinstance(v, dict)]
if len(nested_dicts) == 1:
return nested_dicts[0]
return data
def _normalize_relative_path(path: str) -> str:
"""Normalize and validate a relative path.
Raises:
ValueError: If path is unsafe or invalid.
"""
if not isinstance(path, str):
raise ValueError("Path must be a string")
normalized = path.strip().replace("\\", "/")
if normalized in {"/", ".", "./"}:
return ""
if not normalized:
raise ValueError("Path cannot be empty")
if normalized.startswith("/"):
raise ValueError(f"Path must be relative, got absolute path: {path}")
if ":" in normalized:
raise ValueError(f"Path cannot contain ':' (possible drive path): {path}")
parts = [part for part in normalized.split("/") if part]
if not parts:
raise ValueError("Path cannot be empty")
if any(part in {".", ".."} for part in parts):
raise ValueError(f"Path traversal is not allowed: {path}")
return "/".join(parts)
def _add_directory(path: str, directories: set[str]) -> None:
normalized = _normalize_relative_path(path)
if not normalized:
return
directories.add(normalized)
def _add_file(path: str, content: str, files: dict[str, str], directories: set[str]) -> None:
normalized = _normalize_relative_path(path)
if not normalized:
raise ValueError("File path cannot be root ('/')")
if not isinstance(content, str):
raise ValueError(f"File content must be a string for '{normalized}'")
parent = Path(normalized).parent
if str(parent) != ".":
directories.add(str(parent).replace("\\", "/"))
files[normalized] = content
def _walk_tree_dict(tree: dict, base: str, files: dict[str, str], directories: set[str]) -> None:
for name, value in tree.items():
if not isinstance(name, str):
raise ValueError("Tree keys must be strings")
if name.strip() in {"/", ".", "./"}:
if isinstance(value, dict):
_walk_tree_dict(value, base, files, directories)
continue
raise ValueError("Root sentinel '/' can only be used for directory/object nodes")
node_path = f"{base}/{name}" if base else name
if isinstance(value, dict):
_add_directory(node_path, directories)
_walk_tree_dict(value, _normalize_relative_path(node_path), files, directories)
elif isinstance(value, str):
_add_file(node_path, value, files, directories)
else:
raise ValueError(
f"Unsupported tree node type for '{node_path}': {type(value).__name__}. "
"Use object (directory) or string (file content)."
)
def _parse_entries_node(
node: dict,
base: str,
files: dict[str, str],
directories: set[str],
) -> None:
raw_path = node.get("path")
raw_name = node.get("name")
if raw_path is None and raw_name is None:
raise ValueError("Each entry must have at least one of: 'path' or 'name'")
if raw_path is not None and not isinstance(raw_path, str):
raise ValueError("Entry 'path' must be a string")
if raw_name is not None and not isinstance(raw_name, str):
raise ValueError("Entry 'name' must be a string")
# Common schema compatibility:
# - `path` is parent directory (e.g. "/")
# - `name` is current node name (e.g. "README.md")
# Build parent then append name when both are present.
parent = base
if isinstance(raw_path, str) and raw_path.strip():
rp = raw_path.strip()
if rp not in {"/", ".", "./"}:
parent = _normalize_relative_path(f"{base}/{rp}" if base else rp)
if isinstance(raw_name, str) and raw_name.strip():
if parent:
node_path = _normalize_relative_path(f"{parent}/{raw_name.strip()}")
else:
node_path = _normalize_relative_path(raw_name.strip())
else:
# Fallback: only path provided
if not isinstance(raw_path, str) or not raw_path.strip():
raise ValueError("Each entry must have a non-empty 'path' or 'name'")
rp = raw_path.strip()
if rp in {"/", ".", "./"}:
node_path = base
else:
node_path = _normalize_relative_path(f"{base}/{rp}" if base else rp)
node_type = node.get("type")
content = node.get("content")
children = node.get("children")
inferred_type = "directory" if isinstance(children, list) else "file" if content is not None else None
final_type = node_type or inferred_type
if final_type == "directory":
_add_directory(node_path, directories)
if children is None:
return
if not isinstance(children, list):
raise ValueError(f"Entry '{node_path}' children must be a list")
for child in children:
if not isinstance(child, dict):
raise ValueError(f"Entry '{node_path}' children must be objects")
_parse_entries_node(child, node_path, files, directories)
return
if final_type == "file":
if content is None:
raise ValueError(f"File entry '{node_path}' is missing 'content'")
_add_file(node_path, content, files, directories)
return
raise ValueError(
f"Unable to infer entry type for '{node_path}'. Set 'type' to 'file' or 'directory'."
)
def parse_skill_yaml_spec(yaml_text: str) -> ParsedSkillTree:
"""Parse YAML text into normalized directories and files.
Supported forms:
- entries: [{type,path/content/children}, ...]
- files: {"path/to/file": "text"} + optional directories/dirs
- tree/structure: nested dict where dict=directory and string=file content
"""
try:
data = yaml.safe_load(yaml_text)
except yaml.YAMLError as e:
raise ValueError(f"Invalid YAML: {e}") from e
if data is None:
raise ValueError("YAML is empty")
if not isinstance(data, dict):
raise ValueError("YAML root must be an object")
data = _extract_spec_root(data)
directories: set[str] = set()
files: dict[str, str] = {}
# Form 1: explicit entries list
entries = _pick_first_existing(data, ("entries", "nodes", "items"))
if entries is not None:
if not isinstance(entries, list):
raise ValueError("'entries' must be a list")
for entry in entries:
if not isinstance(entry, dict):
raise ValueError("Each item in 'entries' must be an object")
_parse_entries_node(entry, "", files, directories)
# Form 2: files + directories
file_map = _pick_first_existing(data, ("files", "paths", "file_map", "fileMap", "documents"))
if file_map is not None:
if isinstance(file_map, dict):
for path, content in file_map.items():
_add_file(path, content, files, directories)
elif isinstance(file_map, list):
for item in file_map:
if not isinstance(item, dict):
raise ValueError("Each item in 'files' list must be an object")
path = item.get("path") or item.get("name") or item.get("file")
content = item.get("content")
if content is None:
content = item.get("text")
if content is None:
content = item.get("body")
if path is None or content is None:
raise ValueError("Each file item needs 'path' and 'content'")
_add_file(path, content, files, directories)
else:
raise ValueError("'files' must be a map or list")
directory_list = _pick_first_existing(data, ("directories", "dirs", "folders", "folder_paths"))
if directory_list is not None:
if not isinstance(directory_list, list):
raise ValueError("'directories'/'dirs' must be a list")
for path in directory_list:
_add_directory(path, directories)
# Form 3: nested tree
tree = _pick_first_existing(data, ("tree", "structure", "file_tree", "fileTree", "file_structure"))
if tree is not None:
if isinstance(tree, dict):
_walk_tree_dict(tree, "", files, directories)
elif isinstance(tree, list):
for item in tree:
if not isinstance(item, dict):
raise ValueError("Items in 'tree' list must be objects")
_parse_entries_node(item, "", files, directories)
else:
raise ValueError("'tree'/'structure' must be an object or list")
# Heuristic fallback: treat root as path->content map when possible.
if not files and not directories:
candidate_keys = [k for k in data.keys() if isinstance(k, str)]
if candidate_keys and all(isinstance(data[k], str) for k in candidate_keys):
for path, content in data.items():
_add_file(path, content, files, directories)
if not files and not directories:
raise ValueError(
"No content found. Provide at least one of: entries, files, directories/dirs, tree/structure"
)
# Ensure parent directories exist for every file
for rel_file in files:
parent = Path(rel_file).parent
if str(parent) != ".":
directories.add(str(parent).replace("\\", "/"))
return ParsedSkillTree(directories=directories, files=files)
def materialize_skill_tree(parsed: ParsedSkillTree, target_root: Path, clear_target: bool = True) -> None:
"""Create parsed directories/files under target root."""
if clear_target and target_root.exists():
import shutil
shutil.rmtree(target_root)
target_root.mkdir(parents=True, exist_ok=True)
for rel_dir in sorted(parsed.directories, key=lambda p: (p.count("/"), p)):
(target_root / rel_dir).mkdir(parents=True, exist_ok=True)
for rel_file, content in parsed.files.items():
file_path = target_root / rel_file
file_path.parent.mkdir(parents=True, exist_ok=True)
file_path.write_text(content, encoding="utf-8")
def _build_cli_parser() -> argparse.ArgumentParser:
"""Build command-line argument parser.
CLI usage:
python skill_yaml_importer.py <input_path> [options]
"""
parser = argparse.ArgumentParser(description="Parse and validate a skill YAML spec file")
parser.add_argument("input_path", help="Path to a YAML file or a directory containing YAML files")
parser.add_argument(
"--show-files",
action="store_true",
help="Print sorted parsed file paths",
)
parser.add_argument(
"--show-directories",
action="store_true",
help="Print sorted parsed directory paths",
)
parser.add_argument(
"--json",
action="store_true",
help="Print parsed summary as JSON",
)
parser.add_argument(
"--recursive",
action="store_true",
help="When input path is a directory, scan YAML files recursively",
)
parser.add_argument(
"--log-file",
default=None,
help="Optional path to save full execution results and summary as JSON",
)
return parser
def _collect_yaml_files(input_path: Path, recursive: bool) -> list[Path]:
if input_path.is_file():
return [input_path]
if not input_path.is_dir():
return []
patterns = ("*.yaml", "*.yml")
files: list[Path] = []
for pattern in patterns:
iterator = input_path.rglob(pattern) if recursive else input_path.glob(pattern)
files.extend(iterator)
# Stable order for deterministic output
return sorted({p.resolve() for p in files})
def _parse_one_yaml_file(yaml_path: Path, show_files: bool, show_directories: bool) -> dict:
yaml_text = yaml_path.read_text(encoding="utf-8")
parsed = parse_skill_yaml_spec(yaml_text)
directories = sorted(parsed.directories)
files = sorted(parsed.files.keys())
return {
"yaml_file": str(yaml_path),
"directories_count": len(directories),
"files_count": len(files),
"directories": directories if show_directories else None,
"files": files if show_files else None,
}
def _main() -> int:
"""CLI entrypoint for parsing one YAML file or a batch of YAML files.
Exit codes:
0: all files parsed successfully
1: invalid input path or no YAML files found
2: processed completed with one or more parse failures
"""
args = _build_cli_parser().parse_args()
input_path = Path(args.input_path)
if not input_path.exists():
print(f"Input path not found: {input_path}", file=sys.stderr)
return 1
yaml_files = _collect_yaml_files(input_path, recursive=args.recursive)
if not yaml_files:
print(f"No YAML files found under: {input_path}", file=sys.stderr)
return 1
successes: list[dict] = []
failures: list[dict[str, str]] = []
for yaml_path in yaml_files:
try:
result = _parse_one_yaml_file(
yaml_path,
show_files=args.show_files,
show_directories=args.show_directories,
)
successes.append(result)
if not args.json:
print(f"OK: {yaml_path}")
print(f" Directories: {result['directories_count']}")
print(f" Files: {result['files_count']}")
except Exception as e: # noqa: BLE001
failures.append({"yaml_file": str(yaml_path), "error": str(e)})
print(f"ERROR: {yaml_path}: {e}", file=sys.stderr)
summary = {
"input_path": str(input_path),
"total": len(yaml_files),
"success": len(successes),
"failed": len(failures),
}
report = {"summary": summary, "successes": successes, "failures": failures}
if args.log_file:
try:
log_path = Path(args.log_file)
log_path.parent.mkdir(parents=True, exist_ok=True)
log_path.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8")
print(f"Log saved: {log_path}")
except Exception as e: # noqa: BLE001
print(f"Failed to write log file '{args.log_file}': {e}", file=sys.stderr)
if args.json:
print(json.dumps(report, ensure_ascii=False, indent=2))
else:
print("\n[Summary]")
print(f"Input: {summary['input_path']}")
print(f"Total: {summary['total']}")
print(f"Success: {summary['success']}")
print(f"Failed: {summary['failed']}")
return 0 if not failures else 2
if __name__ == "__main__":
raise SystemExit(_main())

View File

@ -35,6 +35,23 @@ export interface InstallSkillResponse {
message: string; message: string;
} }
export interface BootstrapRemoteSkillRequest {
thread_id: string;
content_id: number;
language_type?: number;
target_dir?: string;
clear_target?: boolean;
}
export interface BootstrapRemoteSkillResponse {
success: boolean;
target_dir: string;
created_directories: number;
created_files: number;
sandbox_id: string | null;
message: string;
}
export async function installSkill( export async function installSkill(
request: InstallSkillRequest, request: InstallSkillRequest,
): Promise<InstallSkillResponse> { ): Promise<InstallSkillResponse> {
@ -60,3 +77,27 @@ export async function installSkill(
return response.json(); return response.json();
} }
export async function bootstrapRemoteSkill(
request: BootstrapRemoteSkillRequest,
): Promise<BootstrapRemoteSkillResponse> {
const response = await fetch(
`${getBackendBaseURL()}/api/skills/bootstrap-remote`,
{
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(request),
},
);
if (!response.ok) {
const errorData = await response.json().catch(() => ({}));
const errorMessage =
errorData.detail ?? `HTTP ${response.status}: ${response.statusText}`;
throw new Error(errorMessage);
}
return response.json();
}