feat: 修改默认skill

feat:优化skill注入流程
2026-03-27 14:20:47 +08:00 · 2026-03-27 13:34:30 +08:00
18 changed files with 1055 additions and 264 deletions
--- a/backend/src/gateway/routers/skills.py
+++ b/backend/src/gateway/routers/skills.py
@ -108,7 +108,7 @@ class RemoteSkillBootstrapResponse(BaseModel):
    target_dir: str = Field(..., description="Virtual target directory")
    created_directories: int = Field(..., description="Number of created directories")
    created_files: int = Field(..., description="Number of created files")
-    sandbox_id: str = Field(..., description="Acquired sandbox ID")
+    sandbox_id: str | None = Field(default=None, description="Acquired sandbox ID (null when sandbox is not acquired)")
    message: str = Field(..., description="Operation result message")
@ -568,8 +568,8 @@ async def bootstrap_skill_from_remote(request: RemoteSkillBootstrapRequest) -> R
    """Initialize thread skill directory from remote YAML content service."""
    try:
        # 1) Ensure sandbox and thread personal dirs are initialized first.
-        sandbox_provider = get_sandbox_provider()
+        # sandbox_provider = get_sandbox_provider()
-        sandbox_id = sandbox_provider.acquire(request.thread_id)
+        # sandbox_id = sandbox_provider.acquire(request.thread_id)
        # 2) Fetch YAML content from configured remote endpoint.
        cfg = get_gateway_config()
@ -624,7 +624,7 @@ async def bootstrap_skill_from_remote(request: RemoteSkillBootstrapRequest) -> R
            target_dir=request.target_dir,
            created_directories=len(parsed.directories),
            created_files=len(parsed.files),
-            sandbox_id=sandbox_id,
+            sandbox_id=None,
            message=(
                f"Bootstrapped {len(parsed.files)} files and {len(parsed.directories)} directories "
                f"under '{request.target_dir}'"
--- a/backend/src/gateway/skill_yaml_importer.py
+++ b/backend/src/gateway/skill_yaml_importer.py
@ -7,6 +7,9 @@ real filesystem content under a thread's virtual path (for example,
 from __future__ import annotations
 import argparse
 import json
 import sys
 from dataclasses import dataclass
 from pathlib import Path
@ -329,3 +332,158 @@ def materialize_skill_tree(parsed: ParsedSkillTree, target_root: Path, clear_tar
        file_path = target_root / rel_file
        file_path.parent.mkdir(parents=True, exist_ok=True)
        file_path.write_text(content, encoding="utf-8")
 def _build_cli_parser() -> argparse.ArgumentParser:
    """Build command-line argument parser.
    CLI usage:
        python skill_yaml_importer.py <input_path> [options]
    Positional arguments:
        input_path              Path to a YAML file, or a directory containing YAML files.
    Options:
        --show-files            Include parsed file paths in output.
        --show-directories      Include parsed directory paths in output.
        --json                  Print JSON output instead of plain text.
        --recursive             Recursively scan subdirectories when input is a directory.
        --log-file <path>       Save full report (summary + successes + failures) to JSON file.
    Examples:
        python skill_yaml_importer.py ./sample.yaml --json
        python skill_yaml_importer.py ./generated_yaml --recursive --log-file ./parse_log.json
    """
    parser = argparse.ArgumentParser(description="Parse and validate a skill YAML spec file")
    parser.add_argument("input_path", help="Path to a YAML file or a directory containing YAML files")
    parser.add_argument(
        "--show-files",
        action="store_true",
        help="Print sorted parsed file paths",
    )
    parser.add_argument(
        "--show-directories",
        action="store_true",
        help="Print sorted parsed directory paths",
    )
    parser.add_argument(
        "--json",
        action="store_true",
        help="Print parsed summary as JSON",
    )
    parser.add_argument(
        "--recursive",
        action="store_true",
        help="When input path is a directory, scan YAML files recursively",
    )
    parser.add_argument(
        "--log-file",
        default=None,
        help="Optional path to save full execution results and summary as JSON",
    )
    return parser
 def _collect_yaml_files(input_path: Path, recursive: bool) -> list[Path]:
    if input_path.is_file():
        return [input_path]
    if not input_path.is_dir():
        return []
    patterns = ("*.yaml", "*.yml")
    files: list[Path] = []
    for pattern in patterns:
        iterator = input_path.rglob(pattern) if recursive else input_path.glob(pattern)
        files.extend(iterator)
    # Stable order for deterministic output
    return sorted({p.resolve() for p in files})
 def _parse_one_yaml_file(yaml_path: Path, show_files: bool, show_directories: bool) -> dict:
    yaml_text = yaml_path.read_text(encoding="utf-8")
    parsed = parse_skill_yaml_spec(yaml_text)
    directories = sorted(parsed.directories)
    files = sorted(parsed.files.keys())
    return {
        "yaml_file": str(yaml_path),
        "directories_count": len(directories),
        "files_count": len(files),
        "directories": directories if show_directories else None,
        "files": files if show_files else None,
    }
 def _main() -> int:
    """CLI entrypoint for parsing one YAML file or a batch of YAML files.
    Exit codes:
        0: all files parsed successfully
        1: invalid input path or no YAML files found
        2: processed completed with one or more parse failures
    """
    args = _build_cli_parser().parse_args()
    input_path = Path(args.input_path)
    if not input_path.exists():
        print(f"Input path not found: {input_path}", file=sys.stderr)
        return 1
    yaml_files = _collect_yaml_files(input_path, recursive=args.recursive)
    if not yaml_files:
        print(f"No YAML files found under: {input_path}", file=sys.stderr)
        return 1
    successes: list[dict] = []
    failures: list[dict[str, str]] = []
    for yaml_path in yaml_files:
        try:
            result = _parse_one_yaml_file(
                yaml_path,
                show_files=args.show_files,
                show_directories=args.show_directories,
            )
            successes.append(result)
            if not args.json:
                print(f"OK: {yaml_path}")
                print(f"  Directories: {result['directories_count']}")
                print(f"  Files: {result['files_count']}")
        except Exception as e:  # noqa: BLE001
            failures.append({"yaml_file": str(yaml_path), "error": str(e)})
            print(f"ERROR: {yaml_path}: {e}", file=sys.stderr)
    summary = {
        "input_path": str(input_path),
        "total": len(yaml_files),
        "success": len(successes),
        "failed": len(failures),
    }
    report = {"summary": summary, "successes": successes, "failures": failures}
    if args.log_file:
        try:
            log_path = Path(args.log_file)
            log_path.parent.mkdir(parents=True, exist_ok=True)
            log_path.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8")
            print(f"Log saved: {log_path}")
        except Exception as e:  # noqa: BLE001
            print(f"Failed to write log file '{args.log_file}': {e}", file=sys.stderr)
    if args.json:
        print(json.dumps(report, ensure_ascii=False, indent=2))
    else:
        print("\n[Summary]")
        print(f"Input: {summary['input_path']}")
        print(f"Total: {summary['total']}")
        print(f"Success: {summary['success']}")
        print(f"Failed: {summary['failed']}")
    return 0 if not failures else 2
 if __name__ == "__main__":
    raise SystemExit(_main())
--- a/docker/docker-compose-dev.yaml
+++ b/docker/docker-compose-dev.yaml
@ -166,6 +166,10 @@ services:
      - CI=true
      # Docker environment for aio sandbox
      - DOCKER_HOST=unix:///var/run/docker.sock      
      - LOG_LEVEL=DEBUG
      - LANGGRAPH_DEBUG=true
      - LANGCHAIN_DEBUG=true
      - PYTHONUNBUFFERED=1
    env_file:
      - ../.env
    extra_hosts:
--- a/skills/public/image-generation/.env.example
+++ b/skills/public/image-generation/.env.example
@ -0,0 +1,6 @@
 # RunningHub API Configuration
 # Copy this file to .env and fill in your actual API key
 # RunningHub API Key for image generation
 # Get your API key from: https://www.runninghub.cn
 RUNNINGHUB_API_KEY=your_api_key_here
--- a/skills/public/image-generation/.gitignore
+++ b/skills/public/image-generation/.gitignore
@ -0,0 +1,31 @@
 # Environment variables
 .env
 .env.local
 .env.*.local
 # Python
 __pycache__/
 *.py[cod]
 *$py.class
 *.so
 .Python
 venv/
 env/
 ENV/
 # Output files
 *.jpg
 *.jpeg
 *.png
 *.webp
 outputs/
 # IDE
 .vscode/
 .idea/
 *.swp
 *.swo
 # OS
 .DS_Store
 Thumbs.db
--- a/skills/public/image-generation/SKILL.md
+++ b/skills/public/image-generation/SKILL.md
@ -7,14 +7,47 @@ description: Use this skill when the user requests to generate, create, imagine,
 ## Overview
-This skill generates high-quality images using structured prompts and a Python script. The workflow includes creating JSON-formatted prompts and executing image generation with optional reference images.
+This skill generates high-quality images using RunningHub API with structured prompts and a Python script. The workflow includes creating JSON-formatted prompts and executing image generation through asynchronous task submission.
 ## Core Capabilities
 - Create structured JSON prompts for AIGC image generation
- Support multiple reference images for style/composition guidance
+- Generate images through RunningHub's Z-Image Turbo LoRA API
- Generate images through automated Python script execution
+- Support asynchronous task submission and status polling
 - Handle various image generation scenarios (character design, scenes, products, etc.)
 - Support multiple aspect ratios and output formats (PNG, JPEG, WEBP)
 ## Configuration
 ### API Key Setup
 This skill uses RunningHub API for image generation. You need to configure your API key before using the skill.
 **Option 1: Environment Variable (Recommended)**
 ```bash
 # Set the RUNNINGHUB_API_KEY environment variable
 export RUNNINGHUB_API_KEY=your_api_key_here
 # Or on Windows:
 set RUNNINGHUB_API_KEY=your_api_key_here
 ```
 **Option 2: .env File**
 1. Copy `.env.example` to `.env`:
   ```bash
   cp .env.example .env
   ```
 2. Edit `.env` and add your API key:
   ```
   RUNNINGHUB_API_KEY=your_api_key_here
   ```
 3. The `.env` file is automatically excluded from version control via `.gitignore`
 **Security Notes:**
 - Never commit `.env` files to version control
 - Never hardcode API keys in source code
 - Rotate your API keys if they are accidentally exposed
 - Get your API key from: https://www.runninghub.cn
 ## Workflow
@ -38,20 +71,20 @@ Call the Python script:
 ```bash
 python /mnt/skills/public/image-generation/scripts/generate.py \
  --prompt-file /mnt/user-data/workspace/prompt-file.json \
-  --reference-images /path/to/ref1.jpg /path/to/ref2.png \
+  --output-file /mnt/user-data/outputs/generated-image.jpg \
  --output-file /mnt/user-data/outputs/generated-image.jpg
  --aspect-ratio 16:9
 ```
 Parameters:
 - `--prompt-file`: Absolute path to JSON prompt file (required)
 - `--reference-images`: Absolute paths to reference images (optional, space-separated)
 - `--output-file`: Absolute path to output image file (required)
 - `--aspect-ratio`: Aspect ratio of the generated image (optional, default: 16:9)
 [!NOTE]
-Do NOT read the python file, just call it with the parameters.
+- The script uses RunningHub API which requires `RUNNINGHUB_API_KEY` environment variable to be set
 - Do NOT read the python file, just call it with the parameters
 - The script automatically handles task submission, status polling, and image download
 ## Character Generation Example
@ -86,40 +119,6 @@ python /mnt/skills/public/image-generation/scripts/generate.py \
  --aspect-ratio 2:3
 ```
 With reference images:
 ```json
 {
  "characters": [{
    "gender": "based on [Image 1]",
    "age": "based on [Image 1]",
    "ethnicity": "human from [Image 1] adapted to Star Wars universe",
    "body_type": "based on [Image 1]",
    "facial_features": "matching [Image 1] with slight weathered look from space travel",
    "clothing": "Star Wars style outfit - worn leather jacket with utility vest, cargo pants with tactical pouches, scuffed boots, belt with holster",
    "accessories": "blaster pistol on hip, comlink device on wrist, goggles pushed up on forehead, satchel with supplies, personal vehicle based on [Image 2]",
    "era": "Star Wars universe, post-Empire era"
  }],
  "prompt": "Character inspired by [Image 1] standing next to a vehicle inspired by [Image 2] on a bustling alien planet street in Star Wars universe aesthetic. Character wearing worn leather jacket with utility vest, cargo pants with tactical pouches, scuffed boots, belt with blaster holster. The vehicle adapted to Star Wars aesthetic with weathered metal panels, repulsor engines, desert dust covering, parked on the street. Exotic alien marketplace street with multi-level architecture, weathered metal structures, hanging market stalls with colorful awnings, alien species walking by as background characters. Twin suns casting warm golden light, atmospheric dust particles in air, moisture vaporators visible in distance. Gritty lived-in Star Wars aesthetic, practical effects look, film grain texture, cinematic composition.",
  "negative_prompt": "clean futuristic look, sterile environment, overly CGI appearance, fantasy medieval elements, Earth architecture, modern city",
  "style": "Star Wars original trilogy aesthetic, lived-in universe, practical effects inspired, cinematic film look, slightly desaturated with warm tones",
  "composition": "medium wide shot, character in foreground with alien street extending into background, environmental storytelling, rule of thirds",
  "lighting": "warm golden hour lighting from twin suns, rim lighting on character, atmospheric haze, practical light sources from market stalls",
  "color_palette": "warm sandy tones, ochre and sienna, dusty blues, weathered metals, muted earth colors with pops of alien market colors",
  "technical": {
    "aspect_ratio": "9:16",
    "quality": "high",
    "detail_level": "highly detailed with film-like texture"
  }
 }
 ```
 ```bash
 python /mnt/skills/public/image-generation/scripts/generate.py \
  --prompt-file /mnt/user-data/workspace/star-wars-scene.json \
  --reference-images /mnt/user-data/uploads/character-ref.jpg /mnt/user-data/uploads/vehicle-ref.jpg \
  --output-file /mnt/user-data/outputs/star-wars-scene-01.jpg \
  --aspect-ratio 16:9
 ```
 ## Common Scenarios
 Use different JSON schemas for different scenarios.
@ -158,30 +157,10 @@ After generation:
 - Provide brief description of the generation result
 - Offer to iterate if adjustments needed
 ## Tips: Enhancing Generation with Reference Images
 For scenarios where visual accuracy is critical, **use the `image_search` tool first** to find reference images before generation.
 **Recommended scenarios for using image_search tool:**
 - **Character/Portrait Generation**: Search for similar poses, expressions, or styles to guide facial features and body proportions
 - **Specific Objects or Products**: Find reference images of real objects to ensure accurate representation
 - **Architectural or Environmental Scenes**: Search for location references to capture authentic details
 - **Fashion and Clothing**: Find style references to ensure accurate garment details and styling
 **Example workflow:**
 1. Call the `image_search` tool to find suitable reference images:
   ```
   image_search(query="Japanese woman street photography 1990s", size="Large")
   ```
 2. Download the returned image URLs to local files
 3. Use the downloaded images as `--reference-images` parameter in the generation script
 This approach significantly improves generation quality by providing the model with concrete visual guidance rather than relying solely on text descriptions.
 ## Notes
 - Always use English for prompts regardless of user's language
 - JSON format ensures structured, parsable prompts
 - Reference images enhance generation quality significantly
 - Iterative refinement is normal for optimal results
 - For character generation, include the detailed character object plus a consolidated prompt field
 - The script automatically polls task status and downloads the generated image
--- a/skills/public/image-generation/scripts/generate.py
+++ b/skills/public/image-generation/scripts/generate.py
@ -1,8 +1,14 @@
 import base64
 import json
 import os
 import time
 from typing import List
 import requests
 from PIL import Image
 from dotenv import load_dotenv
 load_dotenv()
 def validate_image(image_path: str) -> bool:
@ -17,77 +23,171 @@ def validate_image(image_path: str) -> bool:
    """
    try:
        with Image.open(image_path) as img:
-            img.verify()  # Verify that it's a valid image
+            img.verify()
        # Re-open to check if it can be fully loaded (verify() may not catch all issues)
        with Image.open(image_path) as img:
-            img.load()  # Force load the image data
+            img.load()
        return True
    except Exception as e:
        print(f"Warning: Image '{image_path}' is invalid or corrupted: {e}")
        return False
 def submit_generation_task(prompt: str, aspect_ratio: str = "16:9", output_format: str = "png") -> str:
    """
    Submit image generation task to RunningHub API.
    Args:
        prompt: Text prompt for image generation
        aspect_ratio: Aspect ratio of the generated image
        output_format: Output image format (png, jpeg, webp)
    Returns:
        Task ID for tracking the generation
    """
    api_key = os.getenv("RUNNINGHUB_API_KEY")
    if not api_key:
        raise Exception("RUNNINGHUB_API_KEY environment variable is not set")
    url = "https://www.runninghub.cn/openapi/v2/rhart-image/z-image/turbo-lora"
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json",
    }
    payload = {
        "prompt": prompt,
        "aspectRatio": aspect_ratio,
        "lora_name": "Z-Image _ 清纯高颜值_脸模版V1.0.safetensors",
        "lora_strength": 1,
        "outputFormat": output_format
    }
    response = requests.post(url, headers=headers, json=payload)
    response.raise_for_status()
    result = response.json()
    if result.get("status") not in ["QUEUED", "RUNNING", "SUCCESS"]:
        raise Exception(f"Task submission failed: {result.get('errorMessage', 'Unknown error')}")
    return result.get("taskId")
 def query_task_status(task_id: str) -> dict:
    """
    Query the status of a generation task.
    Args:
        task_id: Task ID to query
    Returns:
        Task status information
    """
    api_key = os.getenv("RUNNINGHUB_API_KEY")
    if not api_key:
        raise Exception("RUNNINGHUB_API_KEY environment variable is not set")
    url = "https://www.runninghub.cn/openapi/v2/query"
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json",
    }
    payload = {
        "taskId": task_id
    }
    response = requests.post(url, headers=headers, json=payload)
    response.raise_for_status()
    return response.json()
 def download_image(url: str, output_path: str) -> None:
    """
    Download image from URL and save to file.
    Args:
        url: Image URL to download
        output_path: Local path to save the image
    """
    response = requests.get(url, stream=True)
    response.raise_for_status()
    with open(output_path, "wb") as f:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)
 def generate_image(
    prompt_file: str,
-    reference_images: list[str],
+    reference_images: List[str],
    output_file: str,
    aspect_ratio: str = "16:9",
 ) -> str:
-    with open(prompt_file, "r") as f:
+    """
-        prompt = f.read()
+    Generate image using RunningHub API.
    parts = []
    i = 0
-    # Filter out invalid reference images
+    Args:
-    valid_reference_images = []
+        prompt_file: Path to JSON prompt file
-    for ref_img in reference_images:
+        reference_images: List of reference image paths (currently not supported by RunningHub API)
-        if validate_image(ref_img):
+        output_file: Output path for generated image
-            valid_reference_images.append(ref_img)
+        aspect_ratio: Aspect ratio of the generated image
        else:
            print(f"Skipping invalid reference image: {ref_img}")
-    if len(valid_reference_images) < len(reference_images):
+    Returns:
-        print(f"Note: {len(reference_images) - len(valid_reference_images)} reference image(s) were skipped due to validation failure.")
+        Success message with output file path
    """
    with open(prompt_file, "r", encoding="utf-8") as f:
        prompt_data = json.load(f)
-    for reference_image in valid_reference_images:
+    if reference_images:
-        i += 1
+        print("Note: RunningHub API does not support reference images in this version. Reference images will be ignored.")
        with open(reference_image, "rb") as f:
            image_b64 = base64.b64encode(f.read()).decode("utf-8")
        parts.append(
            {
                "inlineData": {
                    "mimeType": "image/jpeg",
                    "data": image_b64,
                }
            }
        )
-    api_key = os.getenv("GEMINI_API_KEY")
+    prompt_text = prompt_data.get("prompt", "")
-    if not api_key:
+    if not prompt_text:
-        return "GEMINI_API_KEY is not set"
+        prompt_text = json.dumps(prompt_data, ensure_ascii=False)
-    response = requests.post(
+    
-        "https://generativelanguage.googleapis.com/v1beta/models/gemini-3-pro-image-preview:generateContent",
+    output_format = "png"
-        headers={
+    if output_file.lower().endswith(".jpg") or output_file.lower().endswith(".jpeg"):
-            "x-goog-api-key": api_key,
+        output_format = "jpeg"
-            "Content-Type": "application/json",
+    elif output_file.lower().endswith(".webp"):
-        },
+        output_format = "webp"
-        json={
+    
-            "generationConfig": {"imageConfig": {"aspectRatio": aspect_ratio}},
+    print(f"Submitting generation task...")
-            "contents": [{"parts": [*parts, {"text": prompt}]}],
+    task_id = submit_generation_task(prompt_text, aspect_ratio, output_format)
-        },
+    print(f"Task submitted successfully. Task ID: {task_id}")
-    )
+    
-    response.raise_for_status()
+    max_retries = 60
-    json = response.json()
+    retry_interval = 2
-    parts: list[dict] = json["candidates"][0]["content"]["parts"]
+    
-    image_parts = [part for part in parts if part.get("inlineData", False)]
+    for attempt in range(max_retries):
-    if len(image_parts) == 1:
+        print(f"Checking task status... (Attempt {attempt + 1}/{max_retries})")
-        base64_image = image_parts[0]["inlineData"]["data"]
+        status_result = query_task_status(task_id)
-        # Save the image to a file
+        status = status_result.get("status")
-        with open(output_file, "wb") as f:
+        
-            f.write(base64.b64decode(base64_image))
+        if status == "SUCCESS":
            print("Task completed successfully!")
            results = status_result.get("results", [])
            if results and len(results) > 0:
                image_url = results[0].get("url")
                if image_url:
                    print(f"Downloading image from: {image_url}")
                    download_image(image_url, output_file)
                    return f"Successfully generated image to {output_file}"
                else:
-        raise Exception("Failed to generate image")
+                    raise Exception("No image URL found in task results")
            else:
                raise Exception("No results found in task response")
        elif status == "FAILED":
            error_msg = status_result.get("errorMessage", "Unknown error")
            raise Exception(f"Task failed: {error_msg}")
        elif status in ["QUEUED", "RUNNING"]:
            print(f"Task status: {status}. Waiting...")
            time.sleep(retry_interval)
        else:
            raise Exception(f"Unknown task status: {status}")
    raise Exception(f"Task did not complete within {max_retries * retry_interval} seconds")
 if __name__ == "__main__":
--- a/skills/public/image-generation/templates/doraemon.md
+++ b/skills/public/image-generation/templates/doraemon.md
--- a/skills/public/podcast-generation/.env.example
+++ b/skills/public/podcast-generation/.env.example
@ -0,0 +1,20 @@
 # RunningHub API Configuration
 # 请在获取API文档后填写以下信息
 # RunningHub API密钥
 RUNNINGHUB_API_KEY=your_api_key_here
 # RunningHub API端点URL
 RUNNINGHUB_API_URL=https://api.runninghub.cn/v1/workflow/run
 # TTS工作流ID（需要在RunningHub平台创建TTS工作流后获取）
 RUNNINGHUB_TTS_WORKFLOW_ID=your_workflow_id_here
 # 男性声音参数（根据RunningHub工作流参数填写）
 RUNNINGHUB_MALE_VOICE=male_voice_name
 # 女性声音参数（根据RunningHub工作流参数填写）
 RUNNINGHUB_FEMALE_VOICE=female_voice_name
 # 可选：音频质量设置
 RUNNINGHUB_AUDIO_QUALITY=high
--- a/skills/public/podcast-generation/README.md
+++ b/skills/public/podcast-generation/README.md
@ -0,0 +1,127 @@
 # Podcast Generation - 播客生成工具
 将文本内容转换为双主持人对话形式的播客音频。
 ## 功能特点
 - 支持中英文内容
 - 自动生成男女双主持人对话
 - 支持多种TTS服务（Edge-TTS、RunningHub、火山引擎）
 - 自动生成播客音频和文字稿
 ## 快速开始
 ### 1. 安装依赖
 ```bash
 pip install -r requirements.txt
 ```
 ### 2. 配置TTS服务（可选）
 默认使用**Edge-TTS**（免费，无需配置），可以直接跳过此步骤。
 如果需要使用其他TTS服务，复制 `.env.example` 为 `.env` 并配置：
 **使用RunningHub API：**
 ```bash
 RUNNINGHUB_API_KEY=your_api_key_here
 RUNNINGHUB_API_URL=https://api.runninghub.cn/v1/workflow/run
 RUNNINGHUB_TTS_WORKFLOW_ID=your_workflow_id_here
 RUNNINGHUB_MALE_VOICE=male_voice_name
 RUNNINGHUB_FEMALE_VOICE=female_voice_name
 ```
 **使用火山引擎TTS：**
 ```bash
 VOLCENGINE_TTS_APPID=your_app_id
 VOLCENGINE_TTS_ACCESS_TOKEN=your_access_token
 VOLCENGINE_TTS_CLUSTER=volcano_tts
 ```
 ### 3. 创建播客脚本
 创建一个JSON脚本文件，例如 `workspace/test-script.json`：
 ```json
 {
  "title": "测试播客",
  "locale": "zh",
  "lines": [
    {"speaker": "male", "paragraph": "Hello Deer! 欢迎回到我们的播客节目。"},
    {"speaker": "female", "paragraph": "大家好！今天我们要聊一个有趣的话题。"},
    {"speaker": "male", "paragraph": "没错，我们今天要讨论的是人工智能的发展历程。"}
  ]
 }
 ```
 ### 4. 生成播客
 ```bash
 python scripts/generate.py \
  --script-file workspace/test-script.json \
  --output-file outputs/test-podcast.mp3 \
  --transcript-file outputs/test-transcript.md
 ```
 ## TTS服务说明
 ### Edge-TTS（推荐，默认）
 - ✅ 完全免费
 - ✅ 无需API密钥
 - ✅ 支持中英文
 - ✅ 音质优秀
 - ⚠️ 需要网络连接
 ### RunningHub API
 - 需要API密钥
 - 支持自定义声音
 - 需要配置工作流ID
 ### 火山引擎TTS
 - 需要API密钥
 - 音质优秀
 - 需要注册火山引擎账号
 ## 脚本格式说明
 ```json
 {
  "title": "播客标题（可选）",
  "locale": "语言代码（zh/en）",
  "lines": [
    {
      "speaker": "male",  // male 或 female
      "paragraph": "对话内容"
    }
  ]
 }
 ```
 ## 输出文件
 - `*.mp3` - 播客音频文件
 - `*.md` - 播客文字稿
 ## 注意事项
 - 首次使用Edge-TTS时会自动下载语音模型
 - 建议每行对话不要太长（50-100字）
 - 男女主持人交替对话效果更好
 - 支持的音频格式：MP3
 ## 故障排除
 **问题：edge-tts库未安装**
 ```bash
 pip install edge-tts
 ```
 **问题：网络连接失败**
 - 检查网络连接
 - Edge-TTS需要访问微软服务器
 **问题：音频生成失败**
 - 检查脚本JSON格式是否正确
 - 查看错误日志
 - 尝试使用其他TTS服务
--- a/skills/public/podcast-generation/SKILL.md
+++ b/skills/public/podcast-generation/SKILL.md
@ -25,12 +25,42 @@ When a user requests podcast generation, identify:
 - Source content: The text/article/report to convert into a podcast
 - Language: English or Chinese (based on content)
- Output location: Where to save the generated podcast
+- Output location: Where to save the generated podcast (默认保存在项目目录下)
 - You don't need to check the folder under `/mnt/user-data`
-### Step 2: Create Structured Script JSON
+### Step 2: Configure Environment Variables（可选）
-Generate a structured JSON script file in `/mnt/user-data/workspace/` with naming pattern: `{descriptive-name}-script.json`
+默认使用**Edge-TTS**（免费，无需配置），可以直接跳过此步骤。
 如果需要使用其他TTS服务，复制 `.env.example` 为 `.env` 并配置：
 **方式1：使用Edge-TTS（推荐，默认）**
 - ✅ 完全免费
 - ✅ 无需API密钥
 - ✅ 支持中英文
 - ✅ 音质优秀
 - 直接使用，无需配置
 **方式2：使用RunningHub API**
 ```bash
 RUNNINGHUB_API_KEY=your_api_key_here
 RUNNINGHUB_API_URL=https://api.runninghub.cn/v1/workflow/run
 RUNNINGHUB_TTS_WORKFLOW_ID=your_workflow_id_here
 RUNNINGHUB_MALE_VOICE=male_voice_name
 RUNNINGHUB_FEMALE_VOICE=female_voice_name
 ```
 **方式3：使用火山引擎TTS**
 ```bash
 VOLCENGINE_TTS_APPID=your_app_id
 VOLCENGINE_TTS_ACCESS_TOKEN=your_access_token
 VOLCENGINE_TTS_CLUSTER=volcano_tts
 ```
 ### Step 3: Create Structured Script JSON
 Generate a structured JSON script file in project directory with naming pattern: `{descriptive-name}-script.json`
 The JSON structure:
 ```json
@ -43,21 +73,21 @@ The JSON structure:
 }
 ```
-### Step 3: Execute Generation
+### Step 4: Execute Generation
 Call the Python script:
 ```bash
-python /mnt/skills/public/podcast-generation/scripts/generate.py \
+python scripts/generate.py \
-  --script-file /mnt/user-data/workspace/script-file.json \
+  --script-file workspace/script-file.json \
-  --output-file /mnt/user-data/outputs/generated-podcast.mp3 \
+  --output-file outputs/generated-podcast.mp3 \
-  --transcript-file /mnt/user-data/outputs/generated-podcast-transcript.md
+  --transcript-file outputs/generated-podcast-transcript.md
 ```
 Parameters:
- `--script-file`: Absolute path to JSON script file (required)
+- `--script-file`: Path to JSON script file (required)
- `--output-file`: Absolute path to output MP3 file (required)
+- `--output-file`: Path to output MP3 file (required)
- `--transcript-file`: Absolute path to output transcript markdown file (optional, but recommended)
+- `--transcript-file`: Path to output transcript markdown file (optional, but recommended)
 > [!IMPORTANT]
 > - Execute the script in one complete call. Do NOT split the workflow into separate steps.
@ -112,39 +142,6 @@ When creating the script JSON, follow these guidelines:
 - Make content engaging and accessible for audio-only listeners
 - Exclude meta information like dates, author names, or document structure
 ## Podcast Generation Example
 User request: "Generate a podcast about the history of artificial intelligence"
 Step 1: Create script file `/mnt/user-data/workspace/ai-history-script.json`:
 ```json
 {
  "title": "The History of Artificial Intelligence",
  "locale": "en",
  "lines": [
    {"speaker": "male", "paragraph": "Hello Deer! Welcome back to another fascinating episode. Today we're diving into something that's literally shaping our future - the history of artificial intelligence."},
    {"speaker": "female", "paragraph": "Oh, I love this topic! You know, AI feels so modern, but it actually has roots going back over seventy years."},
    {"speaker": "male", "paragraph": "Exactly! It all started back in the 1950s. The term artificial intelligence was actually coined by John McCarthy in 1956 at a famous conference at Dartmouth."},
    {"speaker": "female", "paragraph": "Wait, so they were already thinking about machines that could think back then? That's incredible!"},
    {"speaker": "male", "paragraph": "Right? The early pioneers were so optimistic. They thought we'd have human-level AI within a generation."},
    {"speaker": "female", "paragraph": "But things didn't quite work out that way, did they?"},
    {"speaker": "male", "paragraph": "No, not at all. The 1970s brought what's called the first AI winter..."}
  ]
 }
 ```
 Step 2: Execute generation:
 ```bash
 python /mnt/skills/public/podcast-generation/scripts/generate.py \
  --script-file /mnt/user-data/workspace/ai-history-script.json \
  --output-file /mnt/user-data/outputs/ai-history-podcast.mp3 \
  --transcript-file /mnt/user-data/outputs/ai-history-transcript.md
 ```
 This will generate:
 - `ai-history-podcast.mp3`: The audio podcast file
 - `ai-history-transcript.md`: A readable markdown transcript of the podcast
 ## Specific Templates
 Read the following template file only when matching the user request.
@ -164,14 +161,25 @@ The generated podcast follows the "Hello Deer" format:
 After generation:
- Podcasts and transcripts are saved in `/mnt/user-data/outputs/`
+- Podcasts and transcripts are saved in the `outputs/` directory
- Share both the podcast MP3 and transcript MD with user using `present_files` tool
+- Share both the podcast MP3 and transcript MD with user
 - Provide brief description of the generation result (topic, duration, hosts)
 - Offer to regenerate if adjustments needed
 ## Requirements
-The following environment variables must be set:
+**默认使用Edge-TTS（微软浏览器TTS），无需任何配置，开箱即用。**
 如需使用其他TTS服务，可配置以下环境变量：
 **RunningHub API（可选）：**
 - `RUNNINGHUB_API_KEY`: RunningHub API密钥
 - `RUNNINGHUB_API_URL`: RunningHub API端点URL
 - `RUNNINGHUB_TTS_WORKFLOW_ID`: TTS工作流ID
 - `RUNNINGHUB_MALE_VOICE`: 男性声音参数
 - `RUNNINGHUB_FEMALE_VOICE`: 女性声音参数
 **火山引擎TTS（可选）：**
 - `VOLCENGINE_TTS_APPID`: Volcengine TTS application ID
 - `VOLCENGINE_TTS_ACCESS_TOKEN`: Volcengine TTS access token
 - `VOLCENGINE_TTS_CLUSTER`: Volcengine TTS cluster (optional, defaults to "volcano_tts")
@ -183,3 +191,4 @@ The following environment variables must be set:
 - Technical content should be simplified for audio accessibility in the script
 - Complex notations (formulas, code) should be translated to plain language in the script
 - Long content may result in longer podcasts
 - Edge-TTS使用微软Edge浏览器的在线TTS服务，需要网络连接
--- a/skills/public/podcast-generation/requirements.txt
+++ b/skills/public/podcast-generation/requirements.txt
@ -0,0 +1,3 @@
 requests>=2.31.0
 python-dotenv>=1.0.0
 edge-tts>=6.1.0
--- a/skills/public/podcast-generation/scripts/generate.py
+++ b/skills/public/podcast-generation/scripts/generate.py
@ -4,10 +4,15 @@ import json
 import logging
 import os
 import uuid
 import asyncio
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Literal, Optional
 from pathlib import Path
 import requests
 from dotenv import load_dotenv
 load_dotenv()
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@ -39,7 +44,115 @@ class Script:
 def text_to_speech(text: str, voice_type: str) -> Optional[bytes]:
-    """Convert text to speech using Volcengine TTS."""
+    """Convert text to speech using available TTS service.
    优先级：
    1. RunningHub API（需要配置.env）
    2. 火山引擎TTS（需要配置.env）
    3. Edge-TTS（免费，无需配置，默认使用）
    """
    # 检查是否有RunningHub配置
    has_runninghub = (os.getenv("RUNNINGHUB_API_KEY") and 
                     os.getenv("RUNNINGHUB_API_URL") and 
                     os.getenv("RUNNINGHUB_TTS_WORKFLOW_ID"))
    # 检查是否有火山引擎配置
    has_volcengine = (os.getenv("VOLCENGINE_TTS_APPID") and 
                     os.getenv("VOLCENGINE_TTS_ACCESS_TOKEN"))
    if has_runninghub:
        logger.info("Using RunningHub TTS API")
        return text_to_speech_runninghub(text, voice_type)
    elif has_volcengine:
        logger.info("Using Volcengine TTS API")
        return text_to_speech_volcengine(text, voice_type)
    else:
        logger.info("Using Edge-TTS (free, no API key required)")
        return text_to_speech_edge(text, voice_type)
 def text_to_speech_runninghub(text: str, voice_type: str) -> Optional[bytes]:
    """Convert text to speech using RunningHub TTS API.
    需要在.env文件中配置以下环境变量：
    - RUNNINGHUB_API_KEY: RunningHub API密钥
    - RUNNINGHUB_API_URL: RunningHub API端点URL
    - RUNNINGHUB_TTS_WORKFLOW_ID: TTS工作流ID
    - RUNNINGHUB_MALE_VOICE: 男性声音参数
    - RUNNINGHUB_FEMALE_VOICE: 女性声音参数
    """
    api_key = os.getenv("RUNNINGHUB_API_KEY")
    api_url = os.getenv("RUNNINGHUB_API_URL")
    workflow_id = os.getenv("RUNNINGHUB_TTS_WORKFLOW_ID")
    if not api_key or not api_url or not workflow_id:
        raise ValueError(
            "请在.env文件中配置RUNNINGHUB_API_KEY, RUNNINGHUB_API_URL和RUNNINGHUB_TTS_WORKFLOW_ID"
        )
    # 根据speaker选择声音参数
    if voice_type == "male":
        voice_param = os.getenv("RUNNINGHUB_MALE_VOICE", "male_voice")
    else:
        voice_param = os.getenv("RUNNINGHUB_FEMALE_VOICE", "female_voice")
    # 构建RunningHub API请求
    # 注意：以下payload结构需要根据实际的RunningHub API文档进行调整
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}",
    }
    # 根据RunningHub工作流的实际参数格式调整payload
    # 这里提供一个通用模板，你需要根据实际API文档修改
    payload = {
        "workflow_id": workflow_id,
        "inputs": {
            "text": text,
            "voice": voice_param,
            # 根据实际工作流参数添加其他字段
            # "speed": 1.2,
            # "quality": "high",
        }
    }
    try:
        logger.info(f"Calling RunningHub API for text: {text[:50]}...")
        response = requests.post(api_url, json=payload, headers=headers, timeout=60)
        if response.status_code != 200:
            logger.error(f"RunningHub API error: {response.status_code} - {response.text}")
            return None
        result = response.json()
        # 根据RunningHub API的实际响应格式调整以下代码
        # 通常音频数据会在某个字段中，可能是base64编码的
        audio_data = result.get("data") or result.get("audio") or result.get("output")
        if audio_data:
            # 如果音频是base64编码的，需要解码
            if isinstance(audio_data, str):
                try:
                    return base64.b64decode(audio_data)
                except:
                    # 如果不是base64，可能已经是bytes
                    return audio_data.encode() if isinstance(audio_data, str) else audio_data
            elif isinstance(audio_data, bytes):
                return audio_data
        logger.error(f"No audio data in response: {result}")
        return None
    except Exception as e:
        logger.error(f"RunningHub TTS error: {str(e)}")
        import traceback
        traceback.print_exc()
        return None
 def text_to_speech_volcengine(text: str, voice_type: str) -> Optional[bytes]:
    """Convert text to speech using Volcengine TTS (备用方案)."""
    app_id = os.getenv("VOLCENGINE_TTS_APPID")
    access_token = os.getenv("VOLCENGINE_TTS_ACCESS_TOKEN")
    cluster = os.getenv("VOLCENGINE_TTS_CLUSTER", "volcano_tts")
@ -51,7 +164,6 @@ def text_to_speech(text: str, voice_type: str) -> Optional[bytes]:
    url = "https://openspeech.bytedance.com/api/v1/tts"
    # Authentication: Bearer token with semicolon separator
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer;{access_token}",
@ -60,7 +172,7 @@ def text_to_speech(text: str, voice_type: str) -> Optional[bytes]:
    payload = {
        "app": {
            "appid": app_id,
-            "token": "access_token",  # literal string, not the actual token
+            "token": "access_token",
            "cluster": cluster,
        },
        "user": {"uid": "podcast-generator"},
@ -70,7 +182,7 @@ def text_to_speech(text: str, voice_type: str) -> Optional[bytes]:
            "speed_ratio": 1.2,
        },
        "request": {
-            "reqid": str(uuid.uuid4()),  # must be unique UUID
+            "reqid": str(uuid.uuid4()),
            "text": text,
            "text_type": "plain",
            "operation": "query",
@ -99,15 +211,93 @@ def text_to_speech(text: str, voice_type: str) -> Optional[bytes]:
    return None
 def text_to_speech_edge(text: str, voice_type: str) -> Optional[bytes]:
    """Convert text to speech using Edge-TTS (免费，无需API密钥).
    Edge-TTS使用微软Edge浏览器的在线TTS服务，完全免费且无需注册。
    参数:
        text: 要转换的文本
        voice_type: "male" 或 "female"，用于选择声音
    """
    try:
        import edge_tts
    except ImportError:
        logger.error("edge-tts库未安装。请运行: pip install edge-tts")
        return None
    # 根据语言和性别选择声音
    # 中文声音
    zh_male_voices = [
        "zh-CN-YunxiNeural",  # 男声
        "zh-CN-YunyangNeural",  # 男声
    ]
    zh_female_voices = [
        "zh-CN-XiaoxiaoNeural",  # 女声
        "zh-CN-XiaoyiNeural",  # 女声
    ]
    # 英文声音
    en_male_voices = [
        "en-US-GuyNeural",  # 男声
        "en-US-EricNeural",  # 男声
    ]
    en_female_voices = [
        "en-US-JennyNeural",  # 女声
        "en-US-AriaNeural",  # 女声
    ]
    # 检测文本语言（简单判断：是否包含中文字符）
    has_chinese = any('\u4e00' <= char <= '\u9fff' for char in text)
    if has_chinese:
        if voice_type == "male":
            voice = zh_male_voices[0]
        else:
            voice = zh_female_voices[0]
    else:
        if voice_type == "male":
            voice = en_male_voices[0]
        else:
            voice = en_female_voices[0]
    logger.info(f"Using Edge-TTS voice: {voice}")
    try:
        communicate = edge_tts.Communicate(text, voice)
        # 获取音频数据
        audio_data = b""
        async def generate_audio():
            nonlocal audio_data
            async for chunk in communicate.stream():
                if chunk["type"] == "audio":
                    audio_data += chunk["data"]
        # 运行异步函数
        asyncio.run(generate_audio())
        if audio_data:
            logger.info(f"Generated {len(audio_data)} bytes of audio")
            return audio_data
        else:
            logger.error("No audio data generated")
            return None
    except Exception as e:
        logger.error(f"Edge-TTS error: {str(e)}")
        import traceback
        traceback.print_exc()
        return None
 def _process_line(args: tuple[int, ScriptLine, int]) -> tuple[int, Optional[bytes]]:
    """Process a single script line for TTS. Returns (index, audio_bytes)."""
    i, line, total = args
    # Select voice based on speaker gender
-    if line.speaker == "male":
+    # voice_type会传递给text_to_speech函数，函数会根据.env配置选择具体的声音参数
-        voice_type = "zh_male_yangguangqingnian_moon_bigtts"  # Male voice
+    voice_type = "male" if line.speaker == "male" else "female"
    else:
        voice_type = "zh_female_sajiaonvyou_moon_bigtts"  # Female voice
    logger.info(f"Processing line {i + 1}/{total} ({line.speaker})")
    audio = text_to_speech(line.paragraph, voice_type)
@ -123,15 +313,46 @@ def tts_node(script: Script, max_workers: int = 4) -> list[bytes]:
    logger.info(f"Converting script to audio using {max_workers} workers...")
    total = len(script.lines)
    # Handle empty script case
    if total == 0:
        raise ValueError("Script contains no lines to process")
    # Validate required environment variables before starting TTS
    # 检查RunningHub配置或火山引擎配置（至少需要一种）
    has_runninghub = (os.getenv("RUNNINGHUB_API_KEY") and 
                     os.getenv("RUNNINGHUB_API_URL") and 
                     os.getenv("RUNNINGHUB_TTS_WORKFLOW_ID"))
    has_volcengine = (os.getenv("VOLCENGINE_TTS_APPID") and 
                     os.getenv("VOLCENGINE_TTS_ACCESS_TOKEN"))
    if not has_runninghub and not has_volcengine:
        raise ValueError(
            "Missing required environment variables. Please configure either:\n"
            "- RunningHub: RUNNINGHUB_API_KEY, RUNNINGHUB_API_URL, RUNNINGHUB_TTS_WORKFLOW_ID\n"
            "- Volcengine: VOLCENGINE_TTS_APPID, VOLCENGINE_TTS_ACCESS_TOKEN"
        )
    tasks = [(i, line, total) for i, line in enumerate(script.lines)]
    # Use ThreadPoolExecutor for parallel TTS generation
    results: dict[int, Optional[bytes]] = {}
    failed_indices: list[int] = []
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(_process_line, task): task[0] for task in tasks}
        for future in as_completed(futures):
            idx, audio = future.result()
            results[idx] = audio
            # Use `not audio` to catch both None and empty bytes
            if not audio:
                failed_indices.append(idx)
    # Log failed lines with 1-based indices for user-friendly output
    if failed_indices:
        logger.warning(
            f"Failed to generate audio for {len(failed_indices)}/{total} lines: "
            f"line numbers {sorted(i + 1 for i in failed_indices)}"
        )
    # Collect results in order, skipping failed ones
    audio_chunks = []
@ -140,15 +361,30 @@ def tts_node(script: Script, max_workers: int = 4) -> list[bytes]:
        if audio:
            audio_chunks.append(audio)
-    logger.info(f"Generated {len(audio_chunks)} audio chunks")
+    logger.info(f"Generated {len(audio_chunks)}/{total} audio chunks successfully")
    if not audio_chunks:
        raise ValueError(
            f"TTS generation failed for all {total} lines. "
            "Please check VOLCENGINE_TTS_APPID and VOLCENGINE_TTS_ACCESS_TOKEN environment variables."
        )
    return audio_chunks
 def mix_audio(audio_chunks: list[bytes]) -> bytes:
    """Combine audio chunks into a single audio file."""
    logger.info("Mixing audio chunks...")
    if not audio_chunks:
        raise ValueError("No audio chunks to mix - TTS generation may have failed")
    output = b"".join(audio_chunks)
-    logger.info("Audio mixing complete")
+    
    if len(output) == 0:
        raise ValueError("Mixed audio is empty - TTS generation may have failed")
    logger.info(f"Audio mixing complete: {len(output)} bytes")
    return output
--- a/skills/public/podcast-generation/templates/tech-explainer.md
+++ b/skills/public/podcast-generation/templates/tech-explainer.md
--- a/skills/public/video-generation/.gitignore
+++ b/skills/public/video-generation/.gitignore
@ -0,0 +1,16 @@
 .env
 *.mp4
 *.avi
 *.mov
 *.mkv
 outputs/
 workspace/
 __pycache__/
 *.pyc
 *.pyo
 *.pyd
 .Python
 *.so
 *.egg-info/
 dist/
 build/
--- a/skills/public/video-generation/SETUP.md
+++ b/skills/public/video-generation/SETUP.md
@ -0,0 +1,93 @@
 # Video Generation Skill - Setup Guide
 ## Quick Start
 1. **Install Dependencies**
   ```bash
   pip install requests python-dotenv
   ```
 2. **Configure API Key**
   Create a `.env` file in the project root directory:
   ```env
   RUNNINGHUB_API_KEY=your_api_key_here
   ```
   Or set it as an environment variable:
   ```bash
   # Windows PowerShell
   $env:RUNNINGHUB_API_KEY="your_api_key_here"
   # Linux/Mac
   export RUNNINGHUB_API_KEY="your_api_key_here"
   ```
 3. **Generate a Video**
   ```bash
   python scripts/generate.py --prompt-file workspace/your-prompt.json --output-file outputs/video.mp4 --duration 5
   ```
 ## Parameters
 - `--prompt-file`: Path to JSON prompt file (required)
 - `--output-file`: Output video file path (required)
 - `--aspect-ratio`: Video aspect ratio (optional, default: 16:9)
 - `--duration`: Video duration in seconds (optional, default: 5, range: 1-16)
 ## Getting API Key
 To use this skill, you need a RunningHub API key:
 1. Visit [RunningHub](https://www.runninghub.cn/)
 2. Sign up for an account
 3. Get your API key from the dashboard
 4. Add it to your `.env` file
 ## Example Prompt
 Create a JSON file with your video description:
 ```json
 {
  "title": "Your Video Title",
  "description": "Description of what you want to generate",
  "visual": {
    "scene": "Scene description",
    "elements": ["element1", "element2"],
    "colors": "Color palette",
    "lighting": "Lighting description"
  },
  "camera": {
    "movement": "Camera movement",
    "focus": "Focus description"
  },
  "audio": {
    "background": "Background music description",
    "effects": "Sound effects description"
  }
 }
 ```
 ## Notes
 - The `.env` file is already in `.gitignore` and won't be committed to version control
 - Never share your API key or commit it to public repositories
 - The script automatically loads environment variables from `.env` file
 - Video generation may take several minutes depending on the complexity
 ## Troubleshooting
 **Error: RUNNINGHUB_API_KEY is not set**
 - Make sure you've created the `.env` file with your API key
 - Or set the environment variable before running the script
 **Error: Failed to submit task**
 - Check that your API key is valid
 - Ensure you have sufficient credits in your RunningHub account
 - Verify your internet connection
 **Video generation takes too long**
 - This is normal for AI video generation
 - The script will automatically poll for status until completion
 - You can check the RunningHub dashboard for task progress
--- a/skills/public/video-generation/SKILL.md
+++ b/skills/public/video-generation/SKILL.md
@ -7,13 +7,14 @@ description: Use this skill when the user requests to generate, create, or imagi
 ## Overview
-This skill generates high-quality videos using structured prompts and a Python script. The workflow includes creating JSON-formatted prompts and executing video generation with optional reference image.
+This skill generates high-quality videos using structured prompts and a Python script. The workflow includes creating JSON-formatted prompts and executing video generation through RunningHub API.
 ## Core Capabilities
 - Create structured JSON prompts for AIGC video generation
- Support reference image as guidance or the first/last frame of the video
+- Generate videos through RunningHub Vidu model (text-to-video-q3-turbo)
- Generate videos through automated Python script execution
+- Support up to 16 seconds video generation with audio
 - Automatic camera switching and dialogue generation
 ## Workflow
@ -21,21 +22,16 @@ This skill generates high-quality videos using structured prompts and a Python s
 When a user requests video generation, identify:
- Subject/content: What should be in the image
+- Subject/content: What should be in the video
 - Style preferences: Art style, mood, color palette
- Technical specs: Aspect ratio, composition, lighting
+- Technical specs: Aspect ratio, resolution, duration
- Reference image: Any image to guide generation
+- Audio requirements: Background music, dialogue, sound effects
 - You don't need to check the folder under `/mnt/user-data`
 ### Step 2: Create Structured Prompt
 Generate a structured JSON file in `/mnt/user-data/workspace/` with naming pattern: `{descriptive-name}.json`
-### Step 3: Create Reference Image (Optional when image-generation skill is available)
+The prompt should include visual descriptions, camera movements, and audio specifications in a natural language format.
 Generate reference image for the video generation.
 - If only 1 image is provided, use it as the guided frame of the video
 ### Step 3: Execute Generation
@ -43,7 +39,6 @@ Call the Python script:
 ```bash
 python /mnt/skills/public/video-generation/scripts/generate.py \
  --prompt-file /mnt/user-data/workspace/prompt-file.json \
  --reference-images /path/to/ref1.jpg \
  --output-file /mnt/user-data/outputs/generated-video.mp4 \
  --aspect-ratio 16:9
 ```
@ -51,20 +46,28 @@ python /mnt/skills/public/video-generation/scripts/generate.py \
 Parameters:
 - `--prompt-file`: Absolute path to JSON prompt file (required)
- `--reference-images`: Absolute paths to reference image (optional)
+- `--output-file`: Absolute path to output video file (required)
- `--output-file`: Absolute path to output image file (required)
+- `--aspect-ratio`: Aspect ratio of the generated video (optional, default: 16:9)
 - `--aspect-ratio`: Aspect ratio of the generated image (optional, default: 16:9)
 [!NOTE]
 Do NOT read the python file, instead just call it with the parameters.
 ## Environment Variables
 Set the following environment variable before running the script:
 - `RUNNINGHUB_API_KEY`: Your RunningHub API key
 Example:
 ```bash
 export RUNNINGHUB_API_KEY=a73d0e93afb4432c978e5bff30b7517e
 ```
 ## Video Generation Example
 User request: "Generate a short video clip depicting the opening scene from "The Chronicles of Narnia: The Lion, the Witch and the Wardrobe"
-Step 1: Search for the opening scene of "The Chronicles of Narnia: The Lion, the Witch and the Wardrobe" online
+Step 1: Create a JSON prompt file with the following content:
 Step 2: Create a JSON prompt file with the following content:
 ```json
 {
@ -108,16 +111,11 @@ Step 2: Create a JSON prompt file with the following content:
 }
 ```
-Step 3: Use the image-generation skill to generate the reference image
+Step 2: Use the generate.py script to generate the video
 Load the image-generation skill and generate a single reference image `narnia-farewell-scene-01.jpg` according to the skill.
 Step 4: Use the generate.py script to generate the video
 ```bash
 python /mnt/skills/public/video-generation/scripts/generate.py \
  --prompt-file /mnt/user-data/workspace/narnia-farewell-scene.json \
-  --reference-images /mnt/user-data/outputs/narnia-farewell-scene-01.jpg \
+  --output-file /mnt/user-data/outputs/narnia-farewell-scene.mp4 \
  --output-file /mnt/user-data/outputs/narnia-farewell-scene-01.mp4 \
  --aspect-ratio 16:9
 ```
 > Do NOT read the python file, just call it with the parameters.
@ -127,7 +125,7 @@ python /mnt/skills/public/video-generation/scripts/generate.py \
 After generation:
 - Videos are typically saved in `/mnt/user-data/outputs/`
- Share generated videos (come first) with user as well as generated image if applicable, using `present_files` tool
+- Share generated videos with user using `present_files` tool
 - Provide brief description of the generation result
 - Offer to iterate if adjustments needed
@ -135,5 +133,7 @@ After generation:
 - Always use English for prompts regardless of user's language
 - JSON format ensures structured, parsable prompts
- Reference image enhance generation quality significantly
+- RunningHub Vidu model supports up to 16 seconds video generation
 - Audio is automatically generated including dialogue and sound effects
 - The model has "director thinking" capability for automatic camera switching
 - Iterative refinement is normal for optimal results
--- a/skills/public/video-generation/scripts/generate.py
+++ b/skills/public/video-generation/scripts/generate.py
@ -1,75 +1,83 @@
 import base64
 import os
 import time
 from typing import List
 import requests
 from dotenv import load_dotenv
 load_dotenv()
 def generate_video(
    prompt_file: str,
-    reference_images: list[str],
+    reference_images: List[str],
    output_file: str,
    aspect_ratio: str = "16:9",
    duration: str = "5",
 ) -> str:
-    with open(prompt_file, "r") as f:
+    with open(prompt_file, "r", encoding="utf-8") as f:
        prompt = f.read()
-    referenceImages = []
+    
-    i = 0
+    api_key = os.getenv("RUNNINGHUB_API_KEY")
    json = {
        "instances": [{"prompt": prompt}],
    }
    for reference_image in reference_images:
        i += 1
        with open(reference_image, "rb") as f:
            image_b64 = base64.b64encode(f.read()).decode("utf-8")
        referenceImages.append(
            {
                "image": {"mimeType": "image/jpeg", "bytesBase64Encoded": image_b64},
                "referenceType": "asset",
            }
        )
    if i > 0:
        json["instances"][0]["referenceImages"] = referenceImages
    api_key = os.getenv("GEMINI_API_KEY")
    if not api_key:
-        return "GEMINI_API_KEY is not set"
+        return "RUNNINGHUB_API_KEY is not set"
    json_data = {
        "prompt": prompt,
        "style": "general",
        "aspectRatio": aspect_ratio,
        "resolution": "720p",
        "duration": duration,
        "audio": True
    }
    response = requests.post(
-        "https://generativelanguage.googleapis.com/v1beta/models/veo-3.1-generate-preview:predictLongRunning",
+        "https://www.runninghub.cn/openapi/v2/vidu/text-to-video-q3-turbo",
        headers={
-            "x-goog-api-key": api_key,
+            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json",
        },
-        json=json,
+        json=json_data,
    )
-    json = response.json()
+    
-    operation_name = json["name"]
+    response_json = response.json()
    if "taskId" not in response_json:
        return f"Failed to submit task: {response_json}"
    task_id = response_json["taskId"]
    while True:
-        response = requests.get(
+        response = requests.post(
-            f"https://generativelanguage.googleapis.com/v1beta/{operation_name}",
+            "https://www.runninghub.cn/openapi/v2/query",
            headers={
-                "x-goog-api-key": api_key,
+                "Authorization": f"Bearer {api_key}",
                "Content-Type": "application/json",
            },
            json={"taskId": task_id},
        )
-        json = response.json()
+        
-        if json.get("done", False):
+        response_json = response.json()
-            sample = json["response"]["generateVideoResponse"]["generatedSamples"][0]
+        status = response_json.get("status")
-            url = sample["video"]["uri"]
+        
        if status == "SUCCESS":
            results = response_json.get("results", [])
            if results and len(results) > 0:
                url = results[0].get("url")
                if url:
                    download(url, output_file)
                    break
        elif status == "FAILED":
            error_message = response_json.get("errorMessage", "Unknown error")
            return f"Video generation failed: {error_message}"
        time.sleep(3)
    return f"The video has been generated successfully to {output_file}"
 def download(url: str, output_file: str):
-    api_key = os.getenv("GEMINI_API_KEY")
+    response = requests.get(url)
    if not api_key:
        return "GEMINI_API_KEY is not set"
    response = requests.get(
        url,
        headers={
            "x-goog-api-key": api_key,
        },
    )
    with open(output_file, "wb") as f:
        f.write(response.content)
@ -77,28 +85,28 @@ def download(url: str, output_file: str):
 if __name__ == "__main__":
    import argparse
-    parser = argparse.ArgumentParser(description="Generate videos using Gemini API")
+    parser = argparse.ArgumentParser(description="Generate videos using RunningHub API")
    parser.add_argument(
        "--prompt-file",
        required=True,
        help="Absolute path to JSON prompt file",
    )
    parser.add_argument(
        "--reference-images",
        nargs="*",
        default=[],
        help="Absolute paths to reference images (space-separated)",
    )
    parser.add_argument(
        "--output-file",
        required=True,
-        help="Output path for generated image",
+        help="Output path for generated video",
    )
    parser.add_argument(
        "--aspect-ratio",
        required=False,
        default="16:9",
-        help="Aspect ratio of the generated image",
+        help="Aspect ratio of the generated video",
    )
    parser.add_argument(
        "--duration",
        required=False,
        default="5",
        help="Duration of the generated video in seconds (1-16)",
    )
    args = parser.parse_args()
@ -107,9 +115,10 @@ if __name__ == "__main__":
        print(
            generate_video(
                args.prompt_file,
-                args.reference_images,
+                [],
                args.output_file,
                args.aspect_ratio,
                args.duration,
            )
        )
    except Exception as e:
Author	SHA1	Message	Date
Titan	c2fc5bcd96	feat: 修改默认skill	2026-03-27 14:20:47 +08:00
Titan	5242df028b	feat:优化skill注入流程	2026-03-27 13:34:30 +08:00