ai-chat-ui/server/adapters/dashscope_adapter.py

"""
阿里云百炼 DashScope 适配器
使用 OpenAI SDK 调用阿里云 OpenAI 兼容 API
"""

import json
import os
from typing import Any, Dict, List, Optional

from .base import ChatCompletionRequest, ModelInfo
from .unified_adapter import UnifiedOpenAIAdapter
from .plugins import get_web_search_mode
from core import get_logger

logger = get_logger()

# 百炼模型配置
DASHSCOPE_MODELS = [
    ModelInfo(
        id="qwen3-max",
        name="Qwen3-Max",
        description="千问系列效果最好的模型，适合复杂、多步骤的任务。",
        max_tokens=8192,
        provider="Aliyun",
        supports_thinking=True,
        supports_web_search=True,
        supports_vision=False,
        supports_files=False,
    ),
    ModelInfo(
        id="qwen3.5-plus",
        name="Qwen3.5-Plus",
        description="能力均衡，推理效果、成本和速度介于千问Max和千问Flash之间，适合中等复杂任务。",
        max_tokens=8192,
        provider="Aliyun",
        supports_thinking=True,
        supports_web_search=True,
        supports_vision=True,
        supports_files=False,
    ),
    ModelInfo(
        id="qwen3.5-flash",
        name="Qwen3.5-Flash",
        description="千问系列速度最快、成本极低的模型，适合简单任务。",
        max_tokens=8192,
        provider="Aliyun",
        supports_thinking=False,
        supports_web_search=True,
        supports_vision=False,
        supports_files=False,
    ),
    ModelInfo(
        id="qwen-turbo",
        name="Qwen-Turbo",
        description="快速响应的通用模型",
        max_tokens=8192,
        provider="Aliyun",
        supports_thinking=False,
        supports_web_search=True,
        supports_vision=False,
        supports_files=False,
    ),
    ModelInfo(
        id="qwen-vl-max",
        name="通义万相 VL-Max",
        description="支持视觉理解的多模态模型",
        max_tokens=8192,
        provider="Aliyun",
        supports_thinking=False,
        supports_web_search=False,
        supports_vision=True,
        supports_files=False,
    ),
    ModelInfo(
        id="qwen-vl-plus",
        name="通义万相 VL-Plus",
        description="支持视觉理解的多模态模型",
        max_tokens=8192,
        provider="Aliyun",
        supports_thinking=False,
        supports_web_search=False,
        supports_vision=True,
        supports_files=False,
    ),
]

# 从 DASHSCOPE_MODELS 自动计算
THINKING_MODELS = {m.id.lower() for m in DASHSCOPE_MODELS if m.supports_thinking}
VISION_MODELS = {m.id.lower() for m in DASHSCOPE_MODELS if m.supports_vision}


class DashScopeAdapter(UnifiedOpenAIAdapter):
    """阿里云百炼 DashScope 平台适配器"""

    _provider_type = "dashscope"

    @property
    def provider_name(self) -> str:
        return "dashscope"

    def list_models(self) -> List[ModelInfo]:
        return DASHSCOPE_MODELS

    def _supports_thinking(self, model: str) -> bool:
        """检查模型是否支持深度思考"""
        return model.lower() in THINKING_MODELS

    def _is_vision_model(self, model: str) -> bool:
        """检查是否为多模态模型"""
        return model.lower() in VISION_MODELS

    def _build_messages(self, request: ChatCompletionRequest) -> List[Dict]:
        """
        构建 DashScope 格式的消息
        处理多模态内容
        """
        messages = []

        for msg in request.messages:
            role = msg.get("role", "user")
            content = msg.get("content", "")

            if isinstance(content, str):
                if content.strip():
                    messages.append({"role": role, "content": content})
            elif isinstance(content, list):
                # 多模态内容 - OpenAI 格式兼容
                ds_content = []
                for item in content:
                    if isinstance(item, dict):
                        if item.get("type") == "text":
                            ds_content.append({"type": "text", "text": item.get("text", "")})
                        elif item.get("type") == "image_url":
                            img_url = self._extract_image_url(item)
                            if img_url:
                                ds_content.append({
                                    "type": "image_url",
                                    "image_url": {"url": img_url}
                                })

                if ds_content:
                    messages.append({"role": role, "content": ds_content})

        return messages

    def _extract_image_url(self, item: Dict) -> Optional[str]:
        """提取图片 URL"""
        img_val = item.get("image_url", "")
        if isinstance(img_val, str):
            img_url = img_val
        elif isinstance(img_val, dict):
            img_url = img_val.get("url", "")
        else:
            return None

        # 记录图片 URL 转换
        logger.info(f"[DashScope] 图片URL: {img_url}")

        # 处理本地文件 URL
        if img_url.startswith(("http://", "https://")):
            from urllib.parse import urlparse
            parsed = urlparse(img_url)
            if "localhost" in parsed.netloc or "127.0.0.1" in parsed.netloc:
                path_parts = parsed.path.split("/")
                try:
                    uploads_idx = path_parts.index("uploads")
                    img_url = f"file://{'/'.join(path_parts[uploads_idx:])}"
                except ValueError:
                    pass
        elif not img_url.startswith("file://") and not img_url.startswith(("http://", "https://")):
            img_url = f"file://{img_url}"

        return img_url

    def _get_extra_params(self, request: ChatCompletionRequest) -> Dict[str, Any]:
        """
        获取 DashScope 特殊参数
        - 深度思考: extra_body={"enable_thinking": True/False}
        - 联网搜索: extra_body={"enable_search": True}
        """
        extra_params = {}
        extra_body = {}

        model = request.model

        # 深度思考 - 始终传递，明确启用或禁用
        logger.info(f"[DashScope] 深度思考请求: deep_thinking={request.deep_thinking}, model={model}")

        supports_thinking = self._supports_thinking(model)
        logger.info(f"[DashScope] 模型 {model} 支持深度思考: {supports_thinking}")

        thinking_enabled = request.deep_thinking and supports_thinking
        extra_body["enable_thinking"] = thinking_enabled
        logger.info(f"[DashScope] 深度思考最终状态: {thinking_enabled}")

        # 联网搜索
        web_search_mode = get_web_search_mode(request)
        if web_search_mode:
            extra_body["enable_search"] = True
            if web_search_mode == "deep":
                extra_body["search_options"] = {"enable_search_extension": True}
            logger.info(f"[DashScope] 联网搜索已启用: mode={web_search_mode}")

        # 添加 extra_body 到参数
        extra_params["extra_body"] = extra_body

        return extra_params