deerflow2/backend/packages/harness/deerflow/skills/loader.py

import logging
import os
from pathlib import Path

from .parser import parse_skill_file
from .types import Skill

logger = logging.getLogger(__name__)


UPLOADS_SKILLS_PATH = Path("/mnt/user-data/uploads")


def get_uploads_skills_path(thread_id: str | None = None) -> Path:
    """Resolve the uploads skills root for the current execution context.

    When called from the LangGraph process, uploaded skills live under the
    host-side per-thread data directory rather than the sandbox mount path.
    """
    if not thread_id:
        return UPLOADS_SKILLS_PATH

    try:
        from deerflow.config.paths import get_paths

        return get_paths().sandbox_uploads_dir(thread_id)
    except Exception as exc:
        logger.warning("Failed to resolve uploads skills path for thread %s: %s", thread_id, exc)
        return UPLOADS_SKILLS_PATH


def get_skills_root_path() -> Path:
    """
    Get the root path of the skills directory.

    Returns:
        Path to the skills directory (deer-flow/skills)
    """
    # loader.py lives at packages/harness/deerflow/skills/loader.py — 5 parents up reaches backend/
    backend_dir = Path(__file__).resolve().parent.parent.parent.parent.parent
    # skills directory is sibling to backend directory
    skills_dir = backend_dir.parent / "skills"
    return skills_dir


def load_skills(
    skills_path: Path | None = None,
    use_config: bool = True,
    enabled_only: bool = False,
    thread_id: str | None = None,
) -> list[Skill]:
    """
    Load all skills from the skills directory.

    Scans public/custom skill directories under the configured skills root,
    and also scans uploaded skills under /mnt/user-data/uploads.
    SKILL.md metadata is parsed and enabled state is derived from
    skills_state_config.json.

    Args:
        skills_path: Optional custom path to skills directory.
                     If not provided and use_config is True, uses path from config.
                     Otherwise defaults to deer-flow/skills
        use_config: Whether to load skills path from config (default: True)
        enabled_only: If True, only return enabled skills (default: False)
        thread_id: Optional thread ID used to resolve per-thread uploads skills
               from the LangGraph host process

    Returns:
        List of Skill objects, sorted by name
    """
    if skills_path is None:
        if use_config:
            try:
                from deerflow.config import get_app_config

                config = get_app_config()
                skills_path = config.skills.get_skills_path()
            except Exception:
                # Fallback to default if config fails
                skills_path = get_skills_root_path()
        else:
            skills_path = get_skills_root_path()

    if not skills_path.exists():
        return []

    skills = []

    # Scan built-in roots and uploaded skills mounted in personal workspace.
    scan_targets: list[tuple[str, Path]] = [
        ("public", skills_path / "public"),
        ("custom", skills_path / "custom"),
        ("uploads", get_uploads_skills_path(thread_id)),
    ]

    for category, category_path in scan_targets:
        logger.debug("Scanning %s skills under %s", category, category_path)

        if not category_path.exists() or not category_path.is_dir():
            logger.debug("Skip %s scan: directory not found or not a directory (%s)", category, category_path)
            continue

        scanned_skill_dirs: list[str] = []

        for current_root, dir_names, file_names in os.walk(category_path, followlinks=True):
            # Keep traversal deterministic and skip hidden directories.
            dir_names[:] = sorted(name for name in dir_names if not name.startswith("."))
            if "SKILL.md" not in file_names:
                continue

            skill_file = Path(current_root) / "SKILL.md"
            relative_path = skill_file.parent.relative_to(category_path)
            scanned_skill_dirs.append(relative_path.as_posix())

            skill = parse_skill_file(skill_file, category=category, relative_path=relative_path)
            if skill:
                skills.append(skill)

        if scanned_skill_dirs:
            logger.debug(
                "%s scan found %d skill directories: %s",
                category,
                len(scanned_skill_dirs),
                ", ".join(sorted(scanned_skill_dirs)),
            )
        else:
            logger.debug("%s scan found no skill directories", category)

    # Load skills state configuration and update enabled status
    # NOTE: We use ExtensionsConfig.from_file() instead of get_extensions_config()
    # to always read the latest configuration from disk. This ensures that changes
    # made through the Gateway API (which runs in a separate process) are immediately
    # reflected in the LangGraph Server when loading skills.
    try:
        from deerflow.config.extensions_config import ExtensionsConfig

        extensions_config = ExtensionsConfig.from_file()
        for skill in skills:
            if skill.category == "uploads":
                # Uploaded skills should be available by default for the current thread.
                skill.enabled = True
                continue
            skill.enabled = extensions_config.is_skill_enabled(skill.name, skill.category)
    except Exception as e:
        # If config loading fails, default to all enabled
        logger.warning("Failed to load extensions config: %s", e)

    # Filter by enabled status if requested
    if enabled_only:
        skills = [skill for skill in skills if skill.enabled]

    # Sort by name for consistent ordering
    skills.sort(key=lambda s: s.name)

    return skills