diff --git a/backend/src/community/aio_sandbox/aio_sandbox_provider.py b/backend/src/community/aio_sandbox/aio_sandbox_provider.py index af62633d..aa29ada5 100644 --- a/backend/src/community/aio_sandbox/aio_sandbox_provider.py +++ b/backend/src/community/aio_sandbox/aio_sandbox_provider.py @@ -16,6 +16,7 @@ import atexit import hashlib import logging import os +import re import signal import threading import time @@ -197,28 +198,53 @@ class AioSandboxProvider(SandboxProvider): return mounts - @staticmethod - def _get_thread_mounts(thread_id: str) -> list[tuple[str, str, bool]]: - """Get volume mounts for a thread's data directories. + @classmethod + def _ensure_thread_mount_dirs(cls, thread_id: str) -> list[tuple[str, str, bool]]: + """Ensure thread data mount directories exist and are writable.""" + base_dir = Path(os.getcwd()) + thread_dir = base_dir / THREAD_DATA_BASE_DIR / thread_id / "user-data" + host_thread_dir = cls._resolve_host_bind_path(thread_dir) - Creates directories if they don't exist (lazy initialization). - """ - base_dir = os.getcwd() - thread_dir = Path(base_dir) / THREAD_DATA_BASE_DIR / thread_id / "user-data" + if str(host_thread_dir) != str(thread_dir): + logger.info( + "Resolved thread mount source from %s to host path %s", + thread_dir, + host_thread_dir, + ) + + # Ensure the root user-data directory exists and is writable for + # sandbox runtimes that run as non-root users. + os.makedirs(host_thread_dir, exist_ok=True) + try: + os.chmod(host_thread_dir, 0o777) + except OSError as e: + logger.warning(f"Could not chmod thread user-data dir {host_thread_dir}: {e}") mounts = [ - (str(thread_dir / "workspace"), f"{VIRTUAL_PATH_PREFIX}/workspace", False), - (str(thread_dir / "uploads"), f"{VIRTUAL_PATH_PREFIX}/uploads", False), - (str(thread_dir / "outputs"), f"{VIRTUAL_PATH_PREFIX}/outputs", False), + (str(host_thread_dir / "workspace"), f"{VIRTUAL_PATH_PREFIX}/workspace", False), + (str(host_thread_dir / "uploads"), f"{VIRTUAL_PATH_PREFIX}/uploads", False), + (str(host_thread_dir / "outputs"), f"{VIRTUAL_PATH_PREFIX}/outputs", False), ] for host_path, _, _ in mounts: os.makedirs(host_path, exist_ok=True) + try: + os.chmod(host_path, 0o777) + except OSError as e: + logger.warning(f"Could not chmod thread mount dir {host_path}: {e}") return mounts - @staticmethod - def _get_skills_mount() -> tuple[str, str, bool] | None: + @classmethod + def _get_thread_mounts(cls, thread_id: str) -> list[tuple[str, str, bool]]: + """Get volume mounts for a thread's data directories. + + Creates directories if they don't exist (lazy initialization). + """ + return cls._ensure_thread_mount_dirs(thread_id) + + @classmethod + def _get_skills_mount(cls) -> tuple[str, str, bool] | None: """Get the skills directory mount configuration.""" try: config = get_app_config() @@ -226,11 +252,73 @@ class AioSandboxProvider(SandboxProvider): container_path = config.skills.container_path if skills_path.exists(): - return (str(skills_path), container_path, True) # Read-only for security + host_skills_path = cls._resolve_host_bind_path(skills_path) + if str(host_skills_path) != str(skills_path): + logger.info( + "Resolved skills bind source from %s to host path %s", + skills_path, + host_skills_path, + ) + return (str(host_skills_path), container_path, True) # Read-only for security except Exception as e: logger.warning(f"Could not setup skills mount: {e}") return None + @staticmethod + def _decode_mountinfo_path(path: str) -> str: + """Decode escaped mountinfo paths (e.g. ``\040`` -> space).""" + + return re.sub(r"\\([0-7]{3})", lambda m: chr(int(m.group(1), 8)), path) + + @classmethod + def _resolve_host_bind_path(cls, path: Path) -> Path: + """Resolve a container-visible bind path to its host source path. + + This is needed when running gateway/langgraph inside Docker while using + the host Docker socket to start sandbox containers. In that scenario, + bind sources passed to Docker must be host paths, not paths inside the + current container. + + If resolution fails, returns the original path. + """ + + try: + target = str(path.resolve()) + except Exception: + target = str(path) + + try: + with open("/proc/self/mountinfo") as f: + lines = f.readlines() + except Exception: + return path + + best_mount_point: str | None = None + best_root: str | None = None + + for line in lines: + pre, _, _ = line.partition(" - ") + fields = pre.split() + if len(fields) < 5: + continue + + # Fields: ... root mount_point ... + root = cls._decode_mountinfo_path(fields[3]) + mount_point = cls._decode_mountinfo_path(fields[4]) + + if target == mount_point or target.startswith(f"{mount_point.rstrip('/')}/"): + if best_mount_point is None or len(mount_point) > len(best_mount_point): + best_mount_point = mount_point + best_root = root + + if best_mount_point is None or best_root is None: + return path + + rel = target[len(best_mount_point) :].lstrip("/") + if rel: + return Path(best_root) / rel + return Path(best_root) + # ── Idle timeout management ────────────────────────────────────────── def _start_idle_checker(self) -> None: @@ -331,6 +419,11 @@ class AioSandboxProvider(SandboxProvider): Layer 2: Cross-process state store + file lock (covers multi-process) Layer 3: Backend discovery (covers containers started by other processes) """ + if thread_id: + # Best-effort self-heal for existing threads/sandboxes: make sure + # mounted directories are writable by non-root users inside sandbox. + self._ensure_thread_mount_dirs(thread_id) + # ── Layer 1: In-process cache (fast path) ── if thread_id: with self._lock: diff --git a/backend/src/community/aio_sandbox/local_backend.py b/backend/src/community/aio_sandbox/local_backend.py index bd3bdb2b..fbddfa5d 100644 --- a/backend/src/community/aio_sandbox/local_backend.py +++ b/backend/src/community/aio_sandbox/local_backend.py @@ -8,6 +8,7 @@ from __future__ import annotations import logging import subprocess +import time from src.utils.network import get_free_port, release_port @@ -107,6 +108,7 @@ class LocalContainerBackend(SandboxBackend): port = get_free_port(start_port=self._base_port) try: container_id = self._start_container(container_name, port, extra_mounts) + self._ensure_user_data_permissions(container_name) except Exception: release_port(port) raise @@ -121,6 +123,40 @@ class LocalContainerBackend(SandboxBackend): container_id=container_id, ) + def _ensure_user_data_permissions(self, container_name: str) -> None: + """Ensure /mnt/user-data subdirectories are writable in sandbox container. + + Some sandbox services run as non-root users (e.g. ``gem``). If mounted + host directories are created as ``755 root:root``, uploads may fail with + permission denied. This best-effort fix normalizes permissions. + """ + + fix_cmd = ( + "mkdir -p /mnt/user-data/uploads /mnt/user-data/workspace /mnt/user-data/outputs " + "&& chmod 777 /mnt/user-data/uploads /mnt/user-data/workspace /mnt/user-data/outputs" + ) + + # Retry briefly because the init process may still be setting up paths + # right after container startup. + for _ in range(5): + try: + subprocess.run( + [self._runtime, "exec", container_name, "sh", "-lc", fix_cmd], + capture_output=True, + text=True, + check=True, + timeout=5, + ) + return + except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e: + logger.debug(f"Retrying user-data permission fix for {container_name}: {e}") + time.sleep(0.3) + + logger.warning( + "Failed to ensure user-data permissions for %s; uploads may fail until permissions are fixed", + container_name, + ) + def destroy(self, info: SandboxInfo) -> None: """Stop the container and release its port.""" if info.container_id: diff --git a/docker/provisioner/app.py b/docker/provisioner/app.py index f9cdfa36..dcbbb0b0 100644 --- a/docker/provisioner/app.py +++ b/docker/provisioner/app.py @@ -225,6 +225,31 @@ def _build_pod(sandbox_id: str, thread_id: str) -> k8s_client.V1Pod: }, ), spec=k8s_client.V1PodSpec( + init_containers=[ + k8s_client.V1Container( + name="init-user-data-permissions", + image=SANDBOX_IMAGE, + image_pull_policy="IfNotPresent", + command=[ + "/bin/sh", + "-c", + "mkdir -p /mnt/user-data/workspace /mnt/user-data/uploads /mnt/user-data/outputs && chmod -R 0777 /mnt/user-data", + ], + volume_mounts=[ + k8s_client.V1VolumeMount( + name="user-data", + mount_path="/mnt/user-data", + read_only=False, + ), + ], + security_context=k8s_client.V1SecurityContext( + run_as_user=0, + run_as_group=0, + privileged=False, + allow_privilege_escalation=False, + ), + ) + ], containers=[ k8s_client.V1Container( name="sandbox",