deerflow2/backend/packages/harness/deerflow/skills/loader.py

157 lines
5.6 KiB
Python

import logging
import os
from pathlib import Path
from .parser import parse_skill_file
from .types import Skill
logger = logging.getLogger(__name__)
UPLOADS_SKILLS_PATH = Path("/mnt/user-data/uploads")
def get_uploads_skills_path(thread_id: str | None = None) -> Path:
"""Resolve the uploads skills root for the current execution context.
When called from the LangGraph process, uploaded skills live under the
host-side per-thread data directory rather than the sandbox mount path.
"""
if not thread_id:
return UPLOADS_SKILLS_PATH
try:
from deerflow.config.paths import get_paths
return get_paths().sandbox_uploads_dir(thread_id)
except Exception as exc:
logger.warning("Failed to resolve uploads skills path for thread %s: %s", thread_id, exc)
return UPLOADS_SKILLS_PATH
def get_skills_root_path() -> Path:
"""
Get the root path of the skills directory.
Returns:
Path to the skills directory (deer-flow/skills)
"""
# loader.py lives at packages/harness/deerflow/skills/loader.py — 5 parents up reaches backend/
backend_dir = Path(__file__).resolve().parent.parent.parent.parent.parent
# skills directory is sibling to backend directory
skills_dir = backend_dir.parent / "skills"
return skills_dir
def load_skills(
skills_path: Path | None = None,
use_config: bool = True,
enabled_only: bool = False,
thread_id: str | None = None,
) -> list[Skill]:
"""
Load all skills from the skills directory.
Scans public/custom skill directories under the configured skills root,
and also scans uploaded skills under /mnt/user-data/uploads.
SKILL.md metadata is parsed and enabled state is derived from
skills_state_config.json.
Args:
skills_path: Optional custom path to skills directory.
If not provided and use_config is True, uses path from config.
Otherwise defaults to deer-flow/skills
use_config: Whether to load skills path from config (default: True)
enabled_only: If True, only return enabled skills (default: False)
thread_id: Optional thread ID used to resolve per-thread uploads skills
from the LangGraph host process
Returns:
List of Skill objects, sorted by name
"""
if skills_path is None:
if use_config:
try:
from deerflow.config import get_app_config
config = get_app_config()
skills_path = config.skills.get_skills_path()
except Exception:
# Fallback to default if config fails
skills_path = get_skills_root_path()
else:
skills_path = get_skills_root_path()
if not skills_path.exists():
return []
skills = []
# Scan built-in roots and uploaded skills mounted in personal workspace.
scan_targets: list[tuple[str, Path]] = [
("public", skills_path / "public"),
("custom", skills_path / "custom"),
("uploads", get_uploads_skills_path(thread_id)),
]
for category, category_path in scan_targets:
logger.debug("Scanning %s skills under %s", category, category_path)
if not category_path.exists() or not category_path.is_dir():
logger.debug("Skip %s scan: directory not found or not a directory (%s)", category, category_path)
continue
scanned_skill_dirs: list[str] = []
for current_root, dir_names, file_names in os.walk(category_path, followlinks=True):
# Keep traversal deterministic and skip hidden directories.
dir_names[:] = sorted(name for name in dir_names if not name.startswith("."))
if "SKILL.md" not in file_names:
continue
skill_file = Path(current_root) / "SKILL.md"
relative_path = skill_file.parent.relative_to(category_path)
scanned_skill_dirs.append(relative_path.as_posix())
skill = parse_skill_file(skill_file, category=category, relative_path=relative_path)
if skill:
skills.append(skill)
if scanned_skill_dirs:
logger.debug(
"%s scan found %d skill directories: %s",
category,
len(scanned_skill_dirs),
", ".join(sorted(scanned_skill_dirs)),
)
else:
logger.debug("%s scan found no skill directories", category)
# Load skills state configuration and update enabled status
# NOTE: We use ExtensionsConfig.from_file() instead of get_extensions_config()
# to always read the latest configuration from disk. This ensures that changes
# made through the Gateway API (which runs in a separate process) are immediately
# reflected in the LangGraph Server when loading skills.
try:
from deerflow.config.extensions_config import ExtensionsConfig
extensions_config = ExtensionsConfig.from_file()
for skill in skills:
if skill.category == "uploads":
# Uploaded skills should be available by default for the current thread.
skill.enabled = True
continue
skill.enabled = extensions_config.is_skill_enabled(skill.name, skill.category)
except Exception as e:
# If config loading fails, default to all enabled
logger.warning("Failed to load extensions config: %s", e)
# Filter by enabled status if requested
if enabled_only:
skills = [skill for skill in skills if skill.enabled]
# Sort by name for consistent ordering
skills.sort(key=lambda s: s.name)
return skills