修复docker下目录权限问题
This commit is contained in:
parent
8f356cdf51
commit
28bb208469
|
|
@ -16,6 +16,7 @@ import atexit
|
||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import signal
|
import signal
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
|
|
@ -197,28 +198,53 @@ class AioSandboxProvider(SandboxProvider):
|
||||||
|
|
||||||
return mounts
|
return mounts
|
||||||
|
|
||||||
@staticmethod
|
@classmethod
|
||||||
def _get_thread_mounts(thread_id: str) -> list[tuple[str, str, bool]]:
|
def _ensure_thread_mount_dirs(cls, thread_id: str) -> list[tuple[str, str, bool]]:
|
||||||
"""Get volume mounts for a thread's data directories.
|
"""Ensure thread data mount directories exist and are writable."""
|
||||||
|
base_dir = Path(os.getcwd())
|
||||||
|
thread_dir = base_dir / THREAD_DATA_BASE_DIR / thread_id / "user-data"
|
||||||
|
host_thread_dir = cls._resolve_host_bind_path(thread_dir)
|
||||||
|
|
||||||
Creates directories if they don't exist (lazy initialization).
|
if str(host_thread_dir) != str(thread_dir):
|
||||||
"""
|
logger.info(
|
||||||
base_dir = os.getcwd()
|
"Resolved thread mount source from %s to host path %s",
|
||||||
thread_dir = Path(base_dir) / THREAD_DATA_BASE_DIR / thread_id / "user-data"
|
thread_dir,
|
||||||
|
host_thread_dir,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Ensure the root user-data directory exists and is writable for
|
||||||
|
# sandbox runtimes that run as non-root users.
|
||||||
|
os.makedirs(host_thread_dir, exist_ok=True)
|
||||||
|
try:
|
||||||
|
os.chmod(host_thread_dir, 0o777)
|
||||||
|
except OSError as e:
|
||||||
|
logger.warning(f"Could not chmod thread user-data dir {host_thread_dir}: {e}")
|
||||||
|
|
||||||
mounts = [
|
mounts = [
|
||||||
(str(thread_dir / "workspace"), f"{VIRTUAL_PATH_PREFIX}/workspace", False),
|
(str(host_thread_dir / "workspace"), f"{VIRTUAL_PATH_PREFIX}/workspace", False),
|
||||||
(str(thread_dir / "uploads"), f"{VIRTUAL_PATH_PREFIX}/uploads", False),
|
(str(host_thread_dir / "uploads"), f"{VIRTUAL_PATH_PREFIX}/uploads", False),
|
||||||
(str(thread_dir / "outputs"), f"{VIRTUAL_PATH_PREFIX}/outputs", False),
|
(str(host_thread_dir / "outputs"), f"{VIRTUAL_PATH_PREFIX}/outputs", False),
|
||||||
]
|
]
|
||||||
|
|
||||||
for host_path, _, _ in mounts:
|
for host_path, _, _ in mounts:
|
||||||
os.makedirs(host_path, exist_ok=True)
|
os.makedirs(host_path, exist_ok=True)
|
||||||
|
try:
|
||||||
|
os.chmod(host_path, 0o777)
|
||||||
|
except OSError as e:
|
||||||
|
logger.warning(f"Could not chmod thread mount dir {host_path}: {e}")
|
||||||
|
|
||||||
return mounts
|
return mounts
|
||||||
|
|
||||||
@staticmethod
|
@classmethod
|
||||||
def _get_skills_mount() -> tuple[str, str, bool] | None:
|
def _get_thread_mounts(cls, thread_id: str) -> list[tuple[str, str, bool]]:
|
||||||
|
"""Get volume mounts for a thread's data directories.
|
||||||
|
|
||||||
|
Creates directories if they don't exist (lazy initialization).
|
||||||
|
"""
|
||||||
|
return cls._ensure_thread_mount_dirs(thread_id)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _get_skills_mount(cls) -> tuple[str, str, bool] | None:
|
||||||
"""Get the skills directory mount configuration."""
|
"""Get the skills directory mount configuration."""
|
||||||
try:
|
try:
|
||||||
config = get_app_config()
|
config = get_app_config()
|
||||||
|
|
@ -226,11 +252,73 @@ class AioSandboxProvider(SandboxProvider):
|
||||||
container_path = config.skills.container_path
|
container_path = config.skills.container_path
|
||||||
|
|
||||||
if skills_path.exists():
|
if skills_path.exists():
|
||||||
return (str(skills_path), container_path, True) # Read-only for security
|
host_skills_path = cls._resolve_host_bind_path(skills_path)
|
||||||
|
if str(host_skills_path) != str(skills_path):
|
||||||
|
logger.info(
|
||||||
|
"Resolved skills bind source from %s to host path %s",
|
||||||
|
skills_path,
|
||||||
|
host_skills_path,
|
||||||
|
)
|
||||||
|
return (str(host_skills_path), container_path, True) # Read-only for security
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Could not setup skills mount: {e}")
|
logger.warning(f"Could not setup skills mount: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _decode_mountinfo_path(path: str) -> str:
|
||||||
|
"""Decode escaped mountinfo paths (e.g. ``\040`` -> space)."""
|
||||||
|
|
||||||
|
return re.sub(r"\\([0-7]{3})", lambda m: chr(int(m.group(1), 8)), path)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _resolve_host_bind_path(cls, path: Path) -> Path:
|
||||||
|
"""Resolve a container-visible bind path to its host source path.
|
||||||
|
|
||||||
|
This is needed when running gateway/langgraph inside Docker while using
|
||||||
|
the host Docker socket to start sandbox containers. In that scenario,
|
||||||
|
bind sources passed to Docker must be host paths, not paths inside the
|
||||||
|
current container.
|
||||||
|
|
||||||
|
If resolution fails, returns the original path.
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
target = str(path.resolve())
|
||||||
|
except Exception:
|
||||||
|
target = str(path)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open("/proc/self/mountinfo") as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
except Exception:
|
||||||
|
return path
|
||||||
|
|
||||||
|
best_mount_point: str | None = None
|
||||||
|
best_root: str | None = None
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
pre, _, _ = line.partition(" - ")
|
||||||
|
fields = pre.split()
|
||||||
|
if len(fields) < 5:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Fields: ... root mount_point ...
|
||||||
|
root = cls._decode_mountinfo_path(fields[3])
|
||||||
|
mount_point = cls._decode_mountinfo_path(fields[4])
|
||||||
|
|
||||||
|
if target == mount_point or target.startswith(f"{mount_point.rstrip('/')}/"):
|
||||||
|
if best_mount_point is None or len(mount_point) > len(best_mount_point):
|
||||||
|
best_mount_point = mount_point
|
||||||
|
best_root = root
|
||||||
|
|
||||||
|
if best_mount_point is None or best_root is None:
|
||||||
|
return path
|
||||||
|
|
||||||
|
rel = target[len(best_mount_point) :].lstrip("/")
|
||||||
|
if rel:
|
||||||
|
return Path(best_root) / rel
|
||||||
|
return Path(best_root)
|
||||||
|
|
||||||
# ── Idle timeout management ──────────────────────────────────────────
|
# ── Idle timeout management ──────────────────────────────────────────
|
||||||
|
|
||||||
def _start_idle_checker(self) -> None:
|
def _start_idle_checker(self) -> None:
|
||||||
|
|
@ -331,6 +419,11 @@ class AioSandboxProvider(SandboxProvider):
|
||||||
Layer 2: Cross-process state store + file lock (covers multi-process)
|
Layer 2: Cross-process state store + file lock (covers multi-process)
|
||||||
Layer 3: Backend discovery (covers containers started by other processes)
|
Layer 3: Backend discovery (covers containers started by other processes)
|
||||||
"""
|
"""
|
||||||
|
if thread_id:
|
||||||
|
# Best-effort self-heal for existing threads/sandboxes: make sure
|
||||||
|
# mounted directories are writable by non-root users inside sandbox.
|
||||||
|
self._ensure_thread_mount_dirs(thread_id)
|
||||||
|
|
||||||
# ── Layer 1: In-process cache (fast path) ──
|
# ── Layer 1: In-process cache (fast path) ──
|
||||||
if thread_id:
|
if thread_id:
|
||||||
with self._lock:
|
with self._lock:
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import time
|
||||||
|
|
||||||
from src.utils.network import get_free_port, release_port
|
from src.utils.network import get_free_port, release_port
|
||||||
|
|
||||||
|
|
@ -107,6 +108,7 @@ class LocalContainerBackend(SandboxBackend):
|
||||||
port = get_free_port(start_port=self._base_port)
|
port = get_free_port(start_port=self._base_port)
|
||||||
try:
|
try:
|
||||||
container_id = self._start_container(container_name, port, extra_mounts)
|
container_id = self._start_container(container_name, port, extra_mounts)
|
||||||
|
self._ensure_user_data_permissions(container_name)
|
||||||
except Exception:
|
except Exception:
|
||||||
release_port(port)
|
release_port(port)
|
||||||
raise
|
raise
|
||||||
|
|
@ -121,6 +123,40 @@ class LocalContainerBackend(SandboxBackend):
|
||||||
container_id=container_id,
|
container_id=container_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _ensure_user_data_permissions(self, container_name: str) -> None:
|
||||||
|
"""Ensure /mnt/user-data subdirectories are writable in sandbox container.
|
||||||
|
|
||||||
|
Some sandbox services run as non-root users (e.g. ``gem``). If mounted
|
||||||
|
host directories are created as ``755 root:root``, uploads may fail with
|
||||||
|
permission denied. This best-effort fix normalizes permissions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
fix_cmd = (
|
||||||
|
"mkdir -p /mnt/user-data/uploads /mnt/user-data/workspace /mnt/user-data/outputs "
|
||||||
|
"&& chmod 777 /mnt/user-data/uploads /mnt/user-data/workspace /mnt/user-data/outputs"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Retry briefly because the init process may still be setting up paths
|
||||||
|
# right after container startup.
|
||||||
|
for _ in range(5):
|
||||||
|
try:
|
||||||
|
subprocess.run(
|
||||||
|
[self._runtime, "exec", container_name, "sh", "-lc", fix_cmd],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=True,
|
||||||
|
timeout=5,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
|
||||||
|
logger.debug(f"Retrying user-data permission fix for {container_name}: {e}")
|
||||||
|
time.sleep(0.3)
|
||||||
|
|
||||||
|
logger.warning(
|
||||||
|
"Failed to ensure user-data permissions for %s; uploads may fail until permissions are fixed",
|
||||||
|
container_name,
|
||||||
|
)
|
||||||
|
|
||||||
def destroy(self, info: SandboxInfo) -> None:
|
def destroy(self, info: SandboxInfo) -> None:
|
||||||
"""Stop the container and release its port."""
|
"""Stop the container and release its port."""
|
||||||
if info.container_id:
|
if info.container_id:
|
||||||
|
|
|
||||||
|
|
@ -225,6 +225,31 @@ def _build_pod(sandbox_id: str, thread_id: str) -> k8s_client.V1Pod:
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
spec=k8s_client.V1PodSpec(
|
spec=k8s_client.V1PodSpec(
|
||||||
|
init_containers=[
|
||||||
|
k8s_client.V1Container(
|
||||||
|
name="init-user-data-permissions",
|
||||||
|
image=SANDBOX_IMAGE,
|
||||||
|
image_pull_policy="IfNotPresent",
|
||||||
|
command=[
|
||||||
|
"/bin/sh",
|
||||||
|
"-c",
|
||||||
|
"mkdir -p /mnt/user-data/workspace /mnt/user-data/uploads /mnt/user-data/outputs && chmod -R 0777 /mnt/user-data",
|
||||||
|
],
|
||||||
|
volume_mounts=[
|
||||||
|
k8s_client.V1VolumeMount(
|
||||||
|
name="user-data",
|
||||||
|
mount_path="/mnt/user-data",
|
||||||
|
read_only=False,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
security_context=k8s_client.V1SecurityContext(
|
||||||
|
run_as_user=0,
|
||||||
|
run_as_group=0,
|
||||||
|
privileged=False,
|
||||||
|
allow_privilege_escalation=False,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
],
|
||||||
containers=[
|
containers=[
|
||||||
k8s_client.V1Container(
|
k8s_client.V1Container(
|
||||||
name="sandbox",
|
name="sandbox",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue