ai-chat-ui/server/adapters/dashscope_adapter.py

467 lines
17 KiB
Python

"""
阿里云百炼 DashScope 适配器
基于 api/chat_routes.py 重构
"""
import json
import os
from typing import Dict, List
from fastapi.responses import JSONResponse, StreamingResponse
from .base import BaseAdapter, ChatCompletionRequest, ModelInfo
from utils.logger import get_logger
logger = get_logger()
# 百炼模型配置
DASHSCOPE_MODELS = [
ModelInfo(
id="qwen-max",
name="通义千问 Max",
description="最强大的模型",
max_tokens=8192,
provider="Aliyun",
supports_thinking=True,
supports_web_search=False,
supports_vision=False,
supports_files=False,
),
ModelInfo(
id="qwen-plus",
name="通义千问 Plus",
description="能力均衡",
max_tokens=8192,
provider="Aliyun",
supports_thinking=False,
supports_web_search=False,
supports_vision=False,
supports_files=False,
),
ModelInfo(
id="qwen-turbo",
name="通义千问 Turbo",
description="速度更快、成本更低",
max_tokens=8192,
provider="Aliyun",
supports_thinking=False,
supports_web_search=False,
supports_vision=False,
supports_files=False,
),
ModelInfo(
id="qwen-vl-max",
name="通义万相 VL-Max",
description="支持视觉理解的多模态模型",
max_tokens=8192,
provider="Aliyun",
supports_thinking=False,
supports_web_search=False,
supports_vision=True,
supports_files=False,
),
ModelInfo(
id="qwen-vl-plus",
name="通义万相 VL-Plus",
description="支持视觉理解的多模态模型",
max_tokens=8192,
provider="Aliyun",
supports_thinking=False,
supports_web_search=False,
supports_vision=True,
supports_files=False,
),
]
class DashScopeAdapter(BaseAdapter):
"""阿里云百炼 DashScope 平台适配器"""
@property
def provider_name(self) -> str:
return "dashscope"
def is_available(self) -> bool:
"""检查 API Key 是否配置"""
return bool(os.getenv("ALIYUN_API_KEY") or os.getenv("DASHSCOPE_API_KEY"))
def _get_api_key(self) -> str:
"""获取 API Key"""
return os.getenv("ALIYUN_API_KEY") or os.getenv("DASHSCOPE_API_KEY", "")
def list_models(self) -> List[ModelInfo]:
return DASHSCOPE_MODELS
async def chat(self, request: ChatCompletionRequest):
"""
处理 DashScope 聊天请求
支持流式/非流式、多模态
"""
# 打印请求参数
logger.info(f"[DashScope] 请求参数:")
logger.info(f" - model: {request.model}")
logger.info(f" - stream: {request.stream}")
logger.info(f" - temperature: {request.temperature}")
logger.info(f" - max_tokens: {request.max_tokens}")
logger.info(f" - files: {request.files}")
logger.info(
f" - messages: {json.dumps(request.messages, ensure_ascii=False, indent=2)}"
)
# 检测是否包含多模态内容
has_multimodal = self._has_multimodal_content(request)
logger.info(f" - has_multimodal: {has_multimodal}")
if has_multimodal:
return await self._multimodal_chat(request)
else:
return await self._text_chat(request)
def _has_multimodal_content(self, request: ChatCompletionRequest) -> bool:
"""检查是否包含多模态内容"""
for msg in request.messages:
content = msg.get("content", "")
if isinstance(content, list):
for item in content:
if isinstance(item, dict) and item.get("type") == "image_url":
return True
return bool(request.files)
async def _text_chat(self, request: ChatCompletionRequest):
"""纯文本聊天"""
import dashscope
from dashscope import Generation
dashscope.api_key = self._get_api_key()
# 转换消息格式
messages = self._build_text_messages(request)
if request.stream:
return self._stream_text_chat(messages, request)
else:
return self._sync_text_chat(messages, request)
def _build_text_messages(self, request: ChatCompletionRequest) -> List[Dict]:
"""构建文本消息"""
messages = []
for msg in request.messages:
role = msg.get("role", "user")
content = msg.get("content", "")
if isinstance(content, str) and content.strip():
messages.append({"role": role, "content": content})
elif isinstance(content, list):
text = ""
for item in content:
if isinstance(item, dict) and item.get("type") == "text":
text += item.get("text", "")
if text.strip():
messages.append({"role": role, "content": text})
return messages
def _stream_text_chat(self, messages: List[Dict], request: ChatCompletionRequest):
"""流式文本聊天"""
logger.info(f"[DashScope] 开始流式文本响应...")
def generator():
from utils.helpers import generate_unique_id, get_current_timestamp
from dashscope import Generation
full_content = ""
chunk_count = 0
responses = Generation.call(
model=request.model,
messages=messages,
stream=True,
temperature=request.temperature,
max_tokens=request.max_tokens,
result_format="message",
)
for resp in responses:
if resp.status_code == 200:
chunk_count += 1
content = resp.output.choices[0].message.content
if content and len(content) > len(full_content):
# DashScope 流式响应返回完整内容,计算增量
delta = content[len(full_content) :]
full_content = content
data = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion.chunk",
"created": get_current_timestamp(),
"model": request.model,
"choices": [
{
"index": 0,
"delta": {"content": delta},
"finish_reason": None,
}
],
}
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
finish = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion.chunk",
"created": get_current_timestamp(),
"model": request.model,
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
}
yield f"data: {json.dumps(finish, ensure_ascii=False)}\n\n"
yield "data: [DONE]\n\n"
# 打印流式响应结果
logger.info(f"[DashScope] 流式文本响应完成:")
logger.info(f" - chunks: {chunk_count}")
logger.info(f" - content_length: {len(full_content)} 字符")
logger.info(
f" - content_preview: {full_content[:200]}..."
if len(full_content) > 200
else f" - content: {full_content}"
)
return StreamingResponse(generator(), media_type="text/event-stream")
def _sync_text_chat(self, messages: List[Dict], request: ChatCompletionRequest):
"""非流式文本聊天"""
from utils.helpers import generate_unique_id, get_current_timestamp
from dashscope import Generation
resp = Generation.call(
model=request.model,
messages=messages,
stream=False,
temperature=request.temperature,
max_tokens=request.max_tokens,
result_format="message",
)
if resp.status_code == 200:
content = resp.output.choices[0].message.content
response = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion",
"created": get_current_timestamp(),
"model": request.model,
"choices": [
{
"index": 0,
"message": {"role": "assistant", "content": content},
"finish_reason": "stop",
}
],
}
if hasattr(resp, "usage") and resp.usage:
response["usage"] = {
"prompt_tokens": resp.usage.input_tokens,
"completion_tokens": resp.usage.output_tokens,
"total_tokens": resp.usage.total_tokens,
}
# 打印响应结果
logger.info(f"[DashScope] 响应结果:")
logger.info(f" - content_length: {len(content)} 字符")
logger.info(
f" - content_preview: {content[:200]}..."
if len(content) > 200
else f" - content: {content}"
)
if hasattr(resp, "usage") and resp.usage:
logger.info(f" - usage: {response['usage']}")
return JSONResponse(content=response)
logger.error(f"[DashScope] 请求失败: {resp.code} - {resp.message}")
return JSONResponse(
status_code=500,
content={"error": f"DashScope Error: {resp.code} - {resp.message}"},
)
async def _multimodal_chat(self, request: ChatCompletionRequest):
"""多模态聊天"""
import dashscope
from dashscope import MultiModalConversation
dashscope.api_key = self._get_api_key()
# 转换消息格式
messages = self._build_multimodal_messages(request)
# 选择多模态模型
model = request.model
if "qwen-" in model and "vl" not in model:
model = model.replace("qwen-", "qwen-vl-")
if request.stream:
return self._stream_multimodal_chat(messages, model, request)
else:
return self._sync_multimodal_chat(messages, model, request)
def _build_multimodal_messages(self, request: ChatCompletionRequest) -> List[Dict]:
"""构建多模态消息"""
messages = []
for msg in request.messages:
role = msg.get("role", "user")
content = msg.get("content", "")
if isinstance(content, str):
if content.strip():
messages.append({"role": role, "content": [{"text": content}]})
elif isinstance(content, list):
ds_content = []
for item in content:
if isinstance(item, dict):
if item.get("type") == "text":
ds_content.append({"text": item.get("text", "")})
elif item.get("type") == "image_url":
img_url = self._extract_image_url(item)
if img_url:
ds_content.append({"image": img_url})
if ds_content:
messages.append({"role": role, "content": ds_content})
return messages
def _extract_image_url(self, item: Dict) -> str:
"""提取并转换图片 URL"""
img_val = item.get("image_url", "")
if isinstance(img_val, str):
img_url = img_val
elif isinstance(img_val, dict):
img_url = img_val.get("url", "")
else:
img_url = ""
# 转换 http URL 为 file:// 格式(如果是本地文件)
if img_url.startswith(("http://", "https://")):
from urllib.parse import urlparse
parsed = urlparse(img_url)
if "localhost" in parsed.netloc or "127.0.0.1" in parsed.netloc:
path_parts = parsed.path.split("/")
try:
uploads_idx = path_parts.index("uploads")
img_url = f"file://{'/'.join(path_parts[uploads_idx:])}"
except ValueError:
pass
elif not img_url.startswith("file://"):
img_url = f"file://{img_url}"
return img_url
def _stream_multimodal_chat(
self, messages: List[Dict], model: str, request: ChatCompletionRequest
):
"""流式多模态聊天"""
def generator():
from utils.helpers import generate_unique_id, get_current_timestamp
from dashscope import MultiModalConversation
responses = MultiModalConversation.call(
model=model,
messages=messages,
stream=True,
max_tokens=request.max_tokens,
temperature=request.temperature,
)
full_content = ""
for resp in responses:
if resp.status_code == 200:
try:
content_items = resp.output.choices[0]["message"]["content"]
text = ""
for item in content_items:
if isinstance(item, dict) and "text" in item:
text += item["text"]
if len(text) > len(full_content):
delta = text[len(full_content) :]
full_content = text
data = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion.chunk",
"created": get_current_timestamp(),
"model": model,
"choices": [
{
"index": 0,
"delta": {"content": delta},
"finish_reason": None,
}
],
}
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
except (KeyError, IndexError, TypeError):
pass
finish = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion.chunk",
"created": get_current_timestamp(),
"model": model,
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
}
yield f"data: {json.dumps(finish, ensure_ascii=False)}\n\n"
yield "data: [DONE]\n\n"
return StreamingResponse(generator(), media_type="text/event-stream")
def _sync_multimodal_chat(
self, messages: List[Dict], model: str, request: ChatCompletionRequest
):
"""非流式多模态聊天"""
from utils.helpers import generate_unique_id, get_current_timestamp
from dashscope import MultiModalConversation
resp = MultiModalConversation.call(
model=model,
messages=messages,
stream=False,
max_tokens=request.max_tokens,
temperature=request.temperature,
)
if resp.status_code == 200:
try:
content_items = resp.output.choices[0]["message"]["content"]
text = ""
for item in content_items:
if isinstance(item, dict) and "text" in item:
text += item["text"]
response = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion",
"created": get_current_timestamp(),
"model": model,
"choices": [
{
"index": 0,
"message": {"role": "assistant", "content": text},
"finish_reason": "stop",
}
],
}
return JSONResponse(content=response)
except (KeyError, IndexError, TypeError) as e:
return JSONResponse(
status_code=500,
content={"error": f"Parse error: {str(e)}"},
)
return JSONResponse(
status_code=500,
content={"error": f"DashScope Error: {resp.code} - {resp.message}"},
)