fix(sandbox): exclude URL paths from absolute path validation (#1385) (#1419)

* fix(sandbox): URL路径被误判为不安全绝对路径 (#1385)

在本地沙箱模式下,bash工具对命令做绝对路径安全校验时,会把curl命令中的
HTTPS URL(如 https://example.com/api/v1/check)误识别为本地绝对路径并拦截。

根因:_ABSOLUTE_PATH_PATTERN 正则的负向后行断言 (?<![:\w]) 只排除了冒号和
单词字符,但 :// 中第二个斜杠前面是第一个斜杠(/),不在排除列表中,导致
//example.com/api/... 被匹配为绝对路径 /example.com/api/...。

修复:在负向后行断言中增加斜杠字符,改为 (?<![:\w/]),使得 :// 中的连续
斜杠不会触发绝对路径匹配。同时补充了URL相关的单元测试用例。

Signed-off-by: moose-lab <moose-lab@users.noreply.github.com>

* fix(sandbox): refine absolute path regex to preserve file:// defense-in-depth

Change lookbehind from (?<![:\w/]) to (?<![:\w])(?<!:/) so only the
second slash in :// sequences is excluded. This keeps URL paths from
false-positiving while still letting the regex detect /etc/passwd in
file:///etc/passwd. Also add explicit file:// URL blocking and tests.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

---------

Signed-off-by: moose-lab <moose-lab@users.noreply.github.com>
Co-authored-by: moose-lab <moose-lab@users.noreply.github.com>
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
This commit is contained in:
moose-lab 2026-04-02 16:09:14 +08:00 committed by GitHub
parent 784265f742
commit 292ba596d2
2 changed files with 57 additions and 1 deletions

View File

@ -18,7 +18,8 @@ from deerflow.sandbox.sandbox import Sandbox
from deerflow.sandbox.sandbox_provider import get_sandbox_provider
from deerflow.sandbox.security import LOCAL_HOST_BASH_DISABLED_MESSAGE, is_host_bash_allowed
_ABSOLUTE_PATH_PATTERN = re.compile(r"(?<![:\w])/(?:[^\s\"'`;&|<>()]+)")
_ABSOLUTE_PATH_PATTERN = re.compile(r"(?<![:\w])(?<!:/)/(?:[^\s\"'`;&|<>()]+)")
_FILE_URL_PATTERN = re.compile(r"\bfile://\S+", re.IGNORECASE)
_LOCAL_BASH_SYSTEM_PATH_PREFIXES = (
"/bin/",
"/usr/bin/",
@ -516,6 +517,11 @@ def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState
if thread_data is None:
raise SandboxRuntimeError("Thread data not available for local sandbox")
# Block file:// URLs which bypass the absolute-path regex but allow local file exfiltration
file_url_match = _FILE_URL_PATTERN.search(command)
if file_url_match:
raise PermissionError(f"Unsafe file:// URL in command: {file_url_match.group()}. Use paths under {VIRTUAL_PATH_PREFIX}")
unsafe_paths: list[str] = []
allowed_paths = _get_mcp_allowed_paths()

View File

@ -324,6 +324,56 @@ def test_validate_local_bash_command_paths_allows_skills_path() -> None:
)
def test_validate_local_bash_command_paths_allows_urls() -> None:
"""URLs in bash commands should not be mistaken for absolute paths (issue #1385)."""
# HTTPS URLs
validate_local_bash_command_paths(
"curl -X POST https://example.com/api/v1/risk/check",
_THREAD_DATA,
)
# HTTP URLs
validate_local_bash_command_paths(
"curl http://localhost:8080/health",
_THREAD_DATA,
)
# URLs with query strings
validate_local_bash_command_paths(
"curl https://api.example.com/v2/search?q=test",
_THREAD_DATA,
)
# FTP URLs
validate_local_bash_command_paths(
"curl ftp://ftp.example.com/pub/file.tar.gz",
_THREAD_DATA,
)
# URL mixed with valid virtual path
validate_local_bash_command_paths(
"curl https://example.com/data -o /mnt/user-data/workspace/data.json",
_THREAD_DATA,
)
def test_validate_local_bash_command_paths_blocks_file_urls() -> None:
"""file:// URLs should be treated as unsafe and blocked."""
with pytest.raises(PermissionError):
validate_local_bash_command_paths("curl file:///etc/passwd", _THREAD_DATA)
def test_validate_local_bash_command_paths_blocks_file_urls_case_insensitive() -> None:
"""file:// URL detection should be case-insensitive."""
with pytest.raises(PermissionError):
validate_local_bash_command_paths("curl FILE:///etc/shadow", _THREAD_DATA)
def test_validate_local_bash_command_paths_blocks_file_urls_mixed_with_valid() -> None:
"""file:// URLs should be blocked even when mixed with valid paths."""
with pytest.raises(PermissionError):
validate_local_bash_command_paths(
"curl file:///etc/passwd -o /mnt/user-data/workspace/out.txt",
_THREAD_DATA,
)
def test_validate_local_bash_command_paths_still_blocks_other_paths() -> None:
"""Paths outside virtual and system prefixes must still be blocked."""
with patch("deerflow.sandbox.tools._get_skills_container_path", return_value="/mnt/skills"):