修复docker下目录权限问题
This commit is contained in:
parent
8f356cdf51
commit
28bb208469
|
|
@ -16,6 +16,7 @@ import atexit
|
|||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
import threading
|
||||
import time
|
||||
|
|
@ -197,28 +198,53 @@ class AioSandboxProvider(SandboxProvider):
|
|||
|
||||
return mounts
|
||||
|
||||
@staticmethod
|
||||
def _get_thread_mounts(thread_id: str) -> list[tuple[str, str, bool]]:
|
||||
"""Get volume mounts for a thread's data directories.
|
||||
@classmethod
|
||||
def _ensure_thread_mount_dirs(cls, thread_id: str) -> list[tuple[str, str, bool]]:
|
||||
"""Ensure thread data mount directories exist and are writable."""
|
||||
base_dir = Path(os.getcwd())
|
||||
thread_dir = base_dir / THREAD_DATA_BASE_DIR / thread_id / "user-data"
|
||||
host_thread_dir = cls._resolve_host_bind_path(thread_dir)
|
||||
|
||||
Creates directories if they don't exist (lazy initialization).
|
||||
"""
|
||||
base_dir = os.getcwd()
|
||||
thread_dir = Path(base_dir) / THREAD_DATA_BASE_DIR / thread_id / "user-data"
|
||||
if str(host_thread_dir) != str(thread_dir):
|
||||
logger.info(
|
||||
"Resolved thread mount source from %s to host path %s",
|
||||
thread_dir,
|
||||
host_thread_dir,
|
||||
)
|
||||
|
||||
# Ensure the root user-data directory exists and is writable for
|
||||
# sandbox runtimes that run as non-root users.
|
||||
os.makedirs(host_thread_dir, exist_ok=True)
|
||||
try:
|
||||
os.chmod(host_thread_dir, 0o777)
|
||||
except OSError as e:
|
||||
logger.warning(f"Could not chmod thread user-data dir {host_thread_dir}: {e}")
|
||||
|
||||
mounts = [
|
||||
(str(thread_dir / "workspace"), f"{VIRTUAL_PATH_PREFIX}/workspace", False),
|
||||
(str(thread_dir / "uploads"), f"{VIRTUAL_PATH_PREFIX}/uploads", False),
|
||||
(str(thread_dir / "outputs"), f"{VIRTUAL_PATH_PREFIX}/outputs", False),
|
||||
(str(host_thread_dir / "workspace"), f"{VIRTUAL_PATH_PREFIX}/workspace", False),
|
||||
(str(host_thread_dir / "uploads"), f"{VIRTUAL_PATH_PREFIX}/uploads", False),
|
||||
(str(host_thread_dir / "outputs"), f"{VIRTUAL_PATH_PREFIX}/outputs", False),
|
||||
]
|
||||
|
||||
for host_path, _, _ in mounts:
|
||||
os.makedirs(host_path, exist_ok=True)
|
||||
try:
|
||||
os.chmod(host_path, 0o777)
|
||||
except OSError as e:
|
||||
logger.warning(f"Could not chmod thread mount dir {host_path}: {e}")
|
||||
|
||||
return mounts
|
||||
|
||||
@staticmethod
|
||||
def _get_skills_mount() -> tuple[str, str, bool] | None:
|
||||
@classmethod
|
||||
def _get_thread_mounts(cls, thread_id: str) -> list[tuple[str, str, bool]]:
|
||||
"""Get volume mounts for a thread's data directories.
|
||||
|
||||
Creates directories if they don't exist (lazy initialization).
|
||||
"""
|
||||
return cls._ensure_thread_mount_dirs(thread_id)
|
||||
|
||||
@classmethod
|
||||
def _get_skills_mount(cls) -> tuple[str, str, bool] | None:
|
||||
"""Get the skills directory mount configuration."""
|
||||
try:
|
||||
config = get_app_config()
|
||||
|
|
@ -226,11 +252,73 @@ class AioSandboxProvider(SandboxProvider):
|
|||
container_path = config.skills.container_path
|
||||
|
||||
if skills_path.exists():
|
||||
return (str(skills_path), container_path, True) # Read-only for security
|
||||
host_skills_path = cls._resolve_host_bind_path(skills_path)
|
||||
if str(host_skills_path) != str(skills_path):
|
||||
logger.info(
|
||||
"Resolved skills bind source from %s to host path %s",
|
||||
skills_path,
|
||||
host_skills_path,
|
||||
)
|
||||
return (str(host_skills_path), container_path, True) # Read-only for security
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not setup skills mount: {e}")
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _decode_mountinfo_path(path: str) -> str:
|
||||
"""Decode escaped mountinfo paths (e.g. ``\040`` -> space)."""
|
||||
|
||||
return re.sub(r"\\([0-7]{3})", lambda m: chr(int(m.group(1), 8)), path)
|
||||
|
||||
@classmethod
|
||||
def _resolve_host_bind_path(cls, path: Path) -> Path:
|
||||
"""Resolve a container-visible bind path to its host source path.
|
||||
|
||||
This is needed when running gateway/langgraph inside Docker while using
|
||||
the host Docker socket to start sandbox containers. In that scenario,
|
||||
bind sources passed to Docker must be host paths, not paths inside the
|
||||
current container.
|
||||
|
||||
If resolution fails, returns the original path.
|
||||
"""
|
||||
|
||||
try:
|
||||
target = str(path.resolve())
|
||||
except Exception:
|
||||
target = str(path)
|
||||
|
||||
try:
|
||||
with open("/proc/self/mountinfo") as f:
|
||||
lines = f.readlines()
|
||||
except Exception:
|
||||
return path
|
||||
|
||||
best_mount_point: str | None = None
|
||||
best_root: str | None = None
|
||||
|
||||
for line in lines:
|
||||
pre, _, _ = line.partition(" - ")
|
||||
fields = pre.split()
|
||||
if len(fields) < 5:
|
||||
continue
|
||||
|
||||
# Fields: ... root mount_point ...
|
||||
root = cls._decode_mountinfo_path(fields[3])
|
||||
mount_point = cls._decode_mountinfo_path(fields[4])
|
||||
|
||||
if target == mount_point or target.startswith(f"{mount_point.rstrip('/')}/"):
|
||||
if best_mount_point is None or len(mount_point) > len(best_mount_point):
|
||||
best_mount_point = mount_point
|
||||
best_root = root
|
||||
|
||||
if best_mount_point is None or best_root is None:
|
||||
return path
|
||||
|
||||
rel = target[len(best_mount_point) :].lstrip("/")
|
||||
if rel:
|
||||
return Path(best_root) / rel
|
||||
return Path(best_root)
|
||||
|
||||
# ── Idle timeout management ──────────────────────────────────────────
|
||||
|
||||
def _start_idle_checker(self) -> None:
|
||||
|
|
@ -331,6 +419,11 @@ class AioSandboxProvider(SandboxProvider):
|
|||
Layer 2: Cross-process state store + file lock (covers multi-process)
|
||||
Layer 3: Backend discovery (covers containers started by other processes)
|
||||
"""
|
||||
if thread_id:
|
||||
# Best-effort self-heal for existing threads/sandboxes: make sure
|
||||
# mounted directories are writable by non-root users inside sandbox.
|
||||
self._ensure_thread_mount_dirs(thread_id)
|
||||
|
||||
# ── Layer 1: In-process cache (fast path) ──
|
||||
if thread_id:
|
||||
with self._lock:
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ from __future__ import annotations
|
|||
|
||||
import logging
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
from src.utils.network import get_free_port, release_port
|
||||
|
||||
|
|
@ -107,6 +108,7 @@ class LocalContainerBackend(SandboxBackend):
|
|||
port = get_free_port(start_port=self._base_port)
|
||||
try:
|
||||
container_id = self._start_container(container_name, port, extra_mounts)
|
||||
self._ensure_user_data_permissions(container_name)
|
||||
except Exception:
|
||||
release_port(port)
|
||||
raise
|
||||
|
|
@ -121,6 +123,40 @@ class LocalContainerBackend(SandboxBackend):
|
|||
container_id=container_id,
|
||||
)
|
||||
|
||||
def _ensure_user_data_permissions(self, container_name: str) -> None:
|
||||
"""Ensure /mnt/user-data subdirectories are writable in sandbox container.
|
||||
|
||||
Some sandbox services run as non-root users (e.g. ``gem``). If mounted
|
||||
host directories are created as ``755 root:root``, uploads may fail with
|
||||
permission denied. This best-effort fix normalizes permissions.
|
||||
"""
|
||||
|
||||
fix_cmd = (
|
||||
"mkdir -p /mnt/user-data/uploads /mnt/user-data/workspace /mnt/user-data/outputs "
|
||||
"&& chmod 777 /mnt/user-data/uploads /mnt/user-data/workspace /mnt/user-data/outputs"
|
||||
)
|
||||
|
||||
# Retry briefly because the init process may still be setting up paths
|
||||
# right after container startup.
|
||||
for _ in range(5):
|
||||
try:
|
||||
subprocess.run(
|
||||
[self._runtime, "exec", container_name, "sh", "-lc", fix_cmd],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
timeout=5,
|
||||
)
|
||||
return
|
||||
except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
|
||||
logger.debug(f"Retrying user-data permission fix for {container_name}: {e}")
|
||||
time.sleep(0.3)
|
||||
|
||||
logger.warning(
|
||||
"Failed to ensure user-data permissions for %s; uploads may fail until permissions are fixed",
|
||||
container_name,
|
||||
)
|
||||
|
||||
def destroy(self, info: SandboxInfo) -> None:
|
||||
"""Stop the container and release its port."""
|
||||
if info.container_id:
|
||||
|
|
|
|||
|
|
@ -225,6 +225,31 @@ def _build_pod(sandbox_id: str, thread_id: str) -> k8s_client.V1Pod:
|
|||
},
|
||||
),
|
||||
spec=k8s_client.V1PodSpec(
|
||||
init_containers=[
|
||||
k8s_client.V1Container(
|
||||
name="init-user-data-permissions",
|
||||
image=SANDBOX_IMAGE,
|
||||
image_pull_policy="IfNotPresent",
|
||||
command=[
|
||||
"/bin/sh",
|
||||
"-c",
|
||||
"mkdir -p /mnt/user-data/workspace /mnt/user-data/uploads /mnt/user-data/outputs && chmod -R 0777 /mnt/user-data",
|
||||
],
|
||||
volume_mounts=[
|
||||
k8s_client.V1VolumeMount(
|
||||
name="user-data",
|
||||
mount_path="/mnt/user-data",
|
||||
read_only=False,
|
||||
),
|
||||
],
|
||||
security_context=k8s_client.V1SecurityContext(
|
||||
run_as_user=0,
|
||||
run_as_group=0,
|
||||
privileged=False,
|
||||
allow_privilege_escalation=False,
|
||||
),
|
||||
)
|
||||
],
|
||||
containers=[
|
||||
k8s_client.V1Container(
|
||||
name="sandbox",
|
||||
|
|
|
|||
Loading…
Reference in New Issue