From b3fc806076a418ecc95ebf8901c183491ef6ce5a Mon Sep 17 00:00:00 2001 From: MT-Mint <798521692@qq.com> Date: Wed, 8 Apr 2026 13:11:16 +0800 Subject: [PATCH] =?UTF-8?q?feat(skills):=20=E6=9B=B4=E6=96=B0=E5=9B=BE?= =?UTF-8?q?=E5=83=8F/=E8=A7=86=E9=A2=91=E7=94=9F=E6=88=90=E6=8A=80?= =?UTF-8?q?=E8=83=BD=E8=B5=84=E6=BA=90=E4=B8=8E=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- skills/public/image-generation/.env.example | 6 + skills/public/image-generation/.gitignore | 31 +++ skills/public/image-generation/SKILL.md | 103 ++++----- .../image-generation/scripts/generate.py | 210 +++++++++++++----- .../image-generation/templates/doraemon.md | 0 skills/public/video-generation/.gitignore | 16 ++ skills/public/video-generation/SETUP.md | 93 ++++++++ skills/public/video-generation/SKILL.md | 56 ++--- .../video-generation/scripts/generate.py | 117 +++++----- 9 files changed, 433 insertions(+), 199 deletions(-) create mode 100755 skills/public/image-generation/.env.example create mode 100755 skills/public/image-generation/.gitignore mode change 100644 => 100755 skills/public/image-generation/SKILL.md mode change 100644 => 100755 skills/public/image-generation/scripts/generate.py mode change 100644 => 100755 skills/public/image-generation/templates/doraemon.md create mode 100755 skills/public/video-generation/.gitignore create mode 100755 skills/public/video-generation/SETUP.md mode change 100644 => 100755 skills/public/video-generation/SKILL.md mode change 100644 => 100755 skills/public/video-generation/scripts/generate.py diff --git a/skills/public/image-generation/.env.example b/skills/public/image-generation/.env.example new file mode 100755 index 00000000..86474bd7 --- /dev/null +++ b/skills/public/image-generation/.env.example @@ -0,0 +1,6 @@ +# RunningHub API Configuration +# Copy this file to .env and fill in your actual API key + +# RunningHub API Key for image generation +# Get your API key from: https://www.runninghub.cn +RUNNINGHUB_API_KEY=your_api_key_here \ No newline at end of file diff --git a/skills/public/image-generation/.gitignore b/skills/public/image-generation/.gitignore new file mode 100755 index 00000000..cb609881 --- /dev/null +++ b/skills/public/image-generation/.gitignore @@ -0,0 +1,31 @@ +# Environment variables +.env +.env.local +.env.*.local + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +venv/ +env/ +ENV/ + +# Output files +*.jpg +*.jpeg +*.png +*.webp +outputs/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db \ No newline at end of file diff --git a/skills/public/image-generation/SKILL.md b/skills/public/image-generation/SKILL.md old mode 100644 new mode 100755 index d15cb63e..0e0dbed5 --- a/skills/public/image-generation/SKILL.md +++ b/skills/public/image-generation/SKILL.md @@ -7,14 +7,47 @@ description: Use this skill when the user requests to generate, create, imagine, ## Overview -This skill generates high-quality images using structured prompts and a Python script. The workflow includes creating JSON-formatted prompts and executing image generation with optional reference images. +This skill generates high-quality images using RunningHub API with structured prompts and a Python script. The workflow includes creating JSON-formatted prompts and executing image generation through asynchronous task submission. ## Core Capabilities - Create structured JSON prompts for AIGC image generation -- Support multiple reference images for style/composition guidance -- Generate images through automated Python script execution +- Generate images through RunningHub's Z-Image Turbo LoRA API +- Support asynchronous task submission and status polling - Handle various image generation scenarios (character design, scenes, products, etc.) +- Support multiple aspect ratios and output formats (PNG, JPEG, WEBP) + +## Configuration + +### API Key Setup + +This skill uses RunningHub API for image generation. You need to configure your API key before using the skill. + +**Option 1: Environment Variable (Recommended)** +```bash +# Set the RUNNINGHUB_API_KEY environment variable +export RUNNINGHUB_API_KEY=your_api_key_here + +# Or on Windows: +set RUNNINGHUB_API_KEY=your_api_key_here +``` + +**Option 2: .env File** +1. Copy `.env.example` to `.env`: + ```bash + cp .env.example .env + ``` +2. Edit `.env` and add your API key: + ``` + RUNNINGHUB_API_KEY=your_api_key_here + ``` +3. The `.env` file is automatically excluded from version control via `.gitignore` + +**Security Notes:** +- Never commit `.env` files to version control +- Never hardcode API keys in source code +- Rotate your API keys if they are accidentally exposed +- Get your API key from: https://www.runninghub.cn ## Workflow @@ -38,20 +71,20 @@ Call the Python script: ```bash python /mnt/skills/public/image-generation/scripts/generate.py \ --prompt-file /mnt/user-data/workspace/prompt-file.json \ - --reference-images /path/to/ref1.jpg /path/to/ref2.png \ - --output-file /mnt/user-data/outputs/generated-image.jpg + --output-file /mnt/user-data/outputs/generated-image.jpg \ --aspect-ratio 16:9 ``` Parameters: - `--prompt-file`: Absolute path to JSON prompt file (required) -- `--reference-images`: Absolute paths to reference images (optional, space-separated) - `--output-file`: Absolute path to output image file (required) - `--aspect-ratio`: Aspect ratio of the generated image (optional, default: 16:9) [!NOTE] -Do NOT read the python file, just call it with the parameters. +- The script uses RunningHub API which requires `RUNNINGHUB_API_KEY` environment variable to be set +- Do NOT read the python file, just call it with the parameters +- The script automatically handles task submission, status polling, and image download ## Character Generation Example @@ -86,40 +119,6 @@ python /mnt/skills/public/image-generation/scripts/generate.py \ --aspect-ratio 2:3 ``` -With reference images: -```json -{ - "characters": [{ - "gender": "based on [Image 1]", - "age": "based on [Image 1]", - "ethnicity": "human from [Image 1] adapted to Star Wars universe", - "body_type": "based on [Image 1]", - "facial_features": "matching [Image 1] with slight weathered look from space travel", - "clothing": "Star Wars style outfit - worn leather jacket with utility vest, cargo pants with tactical pouches, scuffed boots, belt with holster", - "accessories": "blaster pistol on hip, comlink device on wrist, goggles pushed up on forehead, satchel with supplies, personal vehicle based on [Image 2]", - "era": "Star Wars universe, post-Empire era" - }], - "prompt": "Character inspired by [Image 1] standing next to a vehicle inspired by [Image 2] on a bustling alien planet street in Star Wars universe aesthetic. Character wearing worn leather jacket with utility vest, cargo pants with tactical pouches, scuffed boots, belt with blaster holster. The vehicle adapted to Star Wars aesthetic with weathered metal panels, repulsor engines, desert dust covering, parked on the street. Exotic alien marketplace street with multi-level architecture, weathered metal structures, hanging market stalls with colorful awnings, alien species walking by as background characters. Twin suns casting warm golden light, atmospheric dust particles in air, moisture vaporators visible in distance. Gritty lived-in Star Wars aesthetic, practical effects look, film grain texture, cinematic composition.", - "negative_prompt": "clean futuristic look, sterile environment, overly CGI appearance, fantasy medieval elements, Earth architecture, modern city", - "style": "Star Wars original trilogy aesthetic, lived-in universe, practical effects inspired, cinematic film look, slightly desaturated with warm tones", - "composition": "medium wide shot, character in foreground with alien street extending into background, environmental storytelling, rule of thirds", - "lighting": "warm golden hour lighting from twin suns, rim lighting on character, atmospheric haze, practical light sources from market stalls", - "color_palette": "warm sandy tones, ochre and sienna, dusty blues, weathered metals, muted earth colors with pops of alien market colors", - "technical": { - "aspect_ratio": "9:16", - "quality": "high", - "detail_level": "highly detailed with film-like texture" - } -} -``` -```bash -python /mnt/skills/public/image-generation/scripts/generate.py \ - --prompt-file /mnt/user-data/workspace/star-wars-scene.json \ - --reference-images /mnt/user-data/uploads/character-ref.jpg /mnt/user-data/uploads/vehicle-ref.jpg \ - --output-file /mnt/user-data/outputs/star-wars-scene-01.jpg \ - --aspect-ratio 16:9 -``` - ## Common Scenarios Use different JSON schemas for different scenarios. @@ -158,30 +157,10 @@ After generation: - Provide brief description of the generation result - Offer to iterate if adjustments needed -## Tips: Enhancing Generation with Reference Images - -For scenarios where visual accuracy is critical, **use the `image_search` tool first** to find reference images before generation. - -**Recommended scenarios for using image_search tool:** -- **Character/Portrait Generation**: Search for similar poses, expressions, or styles to guide facial features and body proportions -- **Specific Objects or Products**: Find reference images of real objects to ensure accurate representation -- **Architectural or Environmental Scenes**: Search for location references to capture authentic details -- **Fashion and Clothing**: Find style references to ensure accurate garment details and styling - -**Example workflow:** -1. Call the `image_search` tool to find suitable reference images: - ``` - image_search(query="Japanese woman street photography 1990s", size="Large") - ``` -2. Download the returned image URLs to local files -3. Use the downloaded images as `--reference-images` parameter in the generation script - -This approach significantly improves generation quality by providing the model with concrete visual guidance rather than relying solely on text descriptions. - ## Notes - Always use English for prompts regardless of user's language - JSON format ensures structured, parsable prompts -- Reference images enhance generation quality significantly - Iterative refinement is normal for optimal results - For character generation, include the detailed character object plus a consolidated prompt field +- The script automatically polls task status and downloads the generated image diff --git a/skills/public/image-generation/scripts/generate.py b/skills/public/image-generation/scripts/generate.py old mode 100644 new mode 100755 index 7670176b..65b44185 --- a/skills/public/image-generation/scripts/generate.py +++ b/skills/public/image-generation/scripts/generate.py @@ -1,8 +1,14 @@ import base64 +import json import os +import time +from typing import List import requests from PIL import Image +from dotenv import load_dotenv + +load_dotenv() def validate_image(image_path: str) -> bool: @@ -17,77 +23,171 @@ def validate_image(image_path: str) -> bool: """ try: with Image.open(image_path) as img: - img.verify() # Verify that it's a valid image - # Re-open to check if it can be fully loaded (verify() may not catch all issues) + img.verify() with Image.open(image_path) as img: - img.load() # Force load the image data + img.load() return True except Exception as e: print(f"Warning: Image '{image_path}' is invalid or corrupted: {e}") return False +def submit_generation_task(prompt: str, aspect_ratio: str = "16:9", output_format: str = "png") -> str: + """ + Submit image generation task to RunningHub API. + + Args: + prompt: Text prompt for image generation + aspect_ratio: Aspect ratio of the generated image + output_format: Output image format (png, jpeg, webp) + + Returns: + Task ID for tracking the generation + """ + api_key = os.getenv("RUNNINGHUB_API_KEY") + if not api_key: + raise Exception("RUNNINGHUB_API_KEY environment variable is not set") + + url = "https://www.runninghub.cn/openapi/v2/rhart-image/z-image/turbo-lora" + + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + } + + payload = { + "prompt": prompt, + "aspectRatio": aspect_ratio, + "lora_name": "Z-Image _ 清纯高颜值_脸模版V1.0.safetensors", + "lora_strength": 1, + "outputFormat": output_format + } + + response = requests.post(url, headers=headers, json=payload) + response.raise_for_status() + result = response.json() + + if result.get("status") not in ["QUEUED", "RUNNING", "SUCCESS"]: + raise Exception(f"Task submission failed: {result.get('errorMessage', 'Unknown error')}") + + return result.get("taskId") + + +def query_task_status(task_id: str) -> dict: + """ + Query the status of a generation task. + + Args: + task_id: Task ID to query + + Returns: + Task status information + """ + api_key = os.getenv("RUNNINGHUB_API_KEY") + if not api_key: + raise Exception("RUNNINGHUB_API_KEY environment variable is not set") + + url = "https://www.runninghub.cn/openapi/v2/query" + + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + } + + payload = { + "taskId": task_id + } + + response = requests.post(url, headers=headers, json=payload) + response.raise_for_status() + return response.json() + + +def download_image(url: str, output_path: str) -> None: + """ + Download image from URL and save to file. + + Args: + url: Image URL to download + output_path: Local path to save the image + """ + response = requests.get(url, stream=True) + response.raise_for_status() + + with open(output_path, "wb") as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + + def generate_image( prompt_file: str, - reference_images: list[str], + reference_images: List[str], output_file: str, aspect_ratio: str = "16:9", ) -> str: + """ + Generate image using RunningHub API. + + Args: + prompt_file: Path to JSON prompt file + reference_images: List of reference image paths (currently not supported by RunningHub API) + output_file: Output path for generated image + aspect_ratio: Aspect ratio of the generated image + + Returns: + Success message with output file path + """ with open(prompt_file, "r", encoding="utf-8") as f: - prompt = f.read() - parts = [] - i = 0 + prompt_data = json.load(f) - # Filter out invalid reference images - valid_reference_images = [] - for ref_img in reference_images: - if validate_image(ref_img): - valid_reference_images.append(ref_img) + if reference_images: + print("Note: RunningHub API does not support reference images in this version. Reference images will be ignored.") + + prompt_text = prompt_data.get("prompt", "") + if not prompt_text: + prompt_text = json.dumps(prompt_data, ensure_ascii=False) + + output_format = "png" + if output_file.lower().endswith(".jpg") or output_file.lower().endswith(".jpeg"): + output_format = "jpeg" + elif output_file.lower().endswith(".webp"): + output_format = "webp" + + print(f"Submitting generation task...") + task_id = submit_generation_task(prompt_text, aspect_ratio, output_format) + print(f"Task submitted successfully. Task ID: {task_id}") + + max_retries = 60 + retry_interval = 2 + + for attempt in range(max_retries): + print(f"Checking task status... (Attempt {attempt + 1}/{max_retries})") + status_result = query_task_status(task_id) + status = status_result.get("status") + + if status == "SUCCESS": + print("Task completed successfully!") + results = status_result.get("results", []) + if results and len(results) > 0: + image_url = results[0].get("url") + if image_url: + print(f"Downloading image from: {image_url}") + download_image(image_url, output_file) + return f"Successfully generated image to {output_file}" + else: + raise Exception("No image URL found in task results") + else: + raise Exception("No results found in task response") + elif status == "FAILED": + error_msg = status_result.get("errorMessage", "Unknown error") + raise Exception(f"Task failed: {error_msg}") + elif status in ["QUEUED", "RUNNING"]: + print(f"Task status: {status}. Waiting...") + time.sleep(retry_interval) else: - print(f"Skipping invalid reference image: {ref_img}") + raise Exception(f"Unknown task status: {status}") - if len(valid_reference_images) < len(reference_images): - print(f"Note: {len(reference_images) - len(valid_reference_images)} reference image(s) were skipped due to validation failure.") - - for reference_image in valid_reference_images: - i += 1 - with open(reference_image, "rb") as f: - image_b64 = base64.b64encode(f.read()).decode("utf-8") - parts.append( - { - "inlineData": { - "mimeType": "image/jpeg", - "data": image_b64, - } - } - ) - - api_key = os.getenv("GEMINI_API_KEY") - if not api_key: - return "GEMINI_API_KEY is not set" - response = requests.post( - "https://generativelanguage.googleapis.com/v1beta/models/gemini-3-pro-image-preview:generateContent", - headers={ - "x-goog-api-key": api_key, - "Content-Type": "application/json", - }, - json={ - "generationConfig": {"imageConfig": {"aspectRatio": aspect_ratio}}, - "contents": [{"parts": [*parts, {"text": prompt}]}], - }, - ) - response.raise_for_status() - json = response.json() - parts: list[dict] = json["candidates"][0]["content"]["parts"] - image_parts = [part for part in parts if part.get("inlineData", False)] - if len(image_parts) == 1: - base64_image = image_parts[0]["inlineData"]["data"] - # Save the image to a file - with open(output_file, "wb") as f: - f.write(base64.b64decode(base64_image)) - return f"Successfully generated image to {output_file}" - else: - raise Exception("Failed to generate image") + raise Exception(f"Task did not complete within {max_retries * retry_interval} seconds") if __name__ == "__main__": diff --git a/skills/public/image-generation/templates/doraemon.md b/skills/public/image-generation/templates/doraemon.md old mode 100644 new mode 100755 diff --git a/skills/public/video-generation/.gitignore b/skills/public/video-generation/.gitignore new file mode 100755 index 00000000..699067e2 --- /dev/null +++ b/skills/public/video-generation/.gitignore @@ -0,0 +1,16 @@ +.env +*.mp4 +*.avi +*.mov +*.mkv +outputs/ +workspace/ +__pycache__/ +*.pyc +*.pyo +*.pyd +.Python +*.so +*.egg-info/ +dist/ +build/ \ No newline at end of file diff --git a/skills/public/video-generation/SETUP.md b/skills/public/video-generation/SETUP.md new file mode 100755 index 00000000..d3c4f885 --- /dev/null +++ b/skills/public/video-generation/SETUP.md @@ -0,0 +1,93 @@ +# Video Generation Skill - Setup Guide + +## Quick Start + +1. **Install Dependencies** + ```bash + pip install requests python-dotenv + ``` + +2. **Configure API Key** + + Create a `.env` file in the project root directory: + ```env + RUNNINGHUB_API_KEY=your_api_key_here + ``` + + Or set it as an environment variable: + ```bash + # Windows PowerShell + $env:RUNNINGHUB_API_KEY="your_api_key_here" + + # Linux/Mac + export RUNNINGHUB_API_KEY="your_api_key_here" + ``` + +3. **Generate a Video** + ```bash + python scripts/generate.py --prompt-file workspace/your-prompt.json --output-file outputs/video.mp4 --duration 5 + ``` + +## Parameters + +- `--prompt-file`: Path to JSON prompt file (required) +- `--output-file`: Output video file path (required) +- `--aspect-ratio`: Video aspect ratio (optional, default: 16:9) +- `--duration`: Video duration in seconds (optional, default: 5, range: 1-16) + +## Getting API Key + +To use this skill, you need a RunningHub API key: + +1. Visit [RunningHub](https://www.runninghub.cn/) +2. Sign up for an account +3. Get your API key from the dashboard +4. Add it to your `.env` file + +## Example Prompt + +Create a JSON file with your video description: + +```json +{ + "title": "Your Video Title", + "description": "Description of what you want to generate", + "visual": { + "scene": "Scene description", + "elements": ["element1", "element2"], + "colors": "Color palette", + "lighting": "Lighting description" + }, + "camera": { + "movement": "Camera movement", + "focus": "Focus description" + }, + "audio": { + "background": "Background music description", + "effects": "Sound effects description" + } +} +``` + +## Notes + +- The `.env` file is already in `.gitignore` and won't be committed to version control +- Never share your API key or commit it to public repositories +- The script automatically loads environment variables from `.env` file +- Video generation may take several minutes depending on the complexity + +## Troubleshooting + +**Error: RUNNINGHUB_API_KEY is not set** +- Make sure you've created the `.env` file with your API key +- Or set the environment variable before running the script + +**Error: Failed to submit task** +- Check that your API key is valid +- Ensure you have sufficient credits in your RunningHub account +- Verify your internet connection + +**Video generation takes too long** +- This is normal for AI video generation +- The script will automatically poll for status until completion +- You can check the RunningHub dashboard for task progress \ No newline at end of file diff --git a/skills/public/video-generation/SKILL.md b/skills/public/video-generation/SKILL.md old mode 100644 new mode 100755 index e0c55b36..a1e1e52a --- a/skills/public/video-generation/SKILL.md +++ b/skills/public/video-generation/SKILL.md @@ -7,13 +7,14 @@ description: Use this skill when the user requests to generate, create, or imagi ## Overview -This skill generates high-quality videos using structured prompts and a Python script. The workflow includes creating JSON-formatted prompts and executing video generation with optional reference image. +This skill generates high-quality videos using structured prompts and a Python script. The workflow includes creating JSON-formatted prompts and executing video generation through RunningHub API. ## Core Capabilities - Create structured JSON prompts for AIGC video generation -- Support reference image as guidance or the first/last frame of the video -- Generate videos through automated Python script execution +- Generate videos through RunningHub Vidu model (text-to-video-q3-turbo) +- Support up to 16 seconds video generation with audio +- Automatic camera switching and dialogue generation ## Workflow @@ -21,21 +22,16 @@ This skill generates high-quality videos using structured prompts and a Python s When a user requests video generation, identify: -- Subject/content: What should be in the image +- Subject/content: What should be in the video - Style preferences: Art style, mood, color palette -- Technical specs: Aspect ratio, composition, lighting -- Reference image: Any image to guide generation -- You don't need to check the folder under `/mnt/user-data` +- Technical specs: Aspect ratio, resolution, duration +- Audio requirements: Background music, dialogue, sound effects ### Step 2: Create Structured Prompt Generate a structured JSON file in `/mnt/user-data/workspace/` with naming pattern: `{descriptive-name}.json` -### Step 3: Create Reference Image (Optional when image-generation skill is available) - -Generate reference image for the video generation. - -- If only 1 image is provided, use it as the guided frame of the video +The prompt should include visual descriptions, camera movements, and audio specifications in a natural language format. ### Step 3: Execute Generation @@ -43,7 +39,6 @@ Call the Python script: ```bash python /mnt/skills/public/video-generation/scripts/generate.py \ --prompt-file /mnt/user-data/workspace/prompt-file.json \ - --reference-images /path/to/ref1.jpg \ --output-file /mnt/user-data/outputs/generated-video.mp4 \ --aspect-ratio 16:9 ``` @@ -51,20 +46,28 @@ python /mnt/skills/public/video-generation/scripts/generate.py \ Parameters: - `--prompt-file`: Absolute path to JSON prompt file (required) -- `--reference-images`: Absolute paths to reference image (optional) -- `--output-file`: Absolute path to output image file (required) -- `--aspect-ratio`: Aspect ratio of the generated image (optional, default: 16:9) +- `--output-file`: Absolute path to output video file (required) +- `--aspect-ratio`: Aspect ratio of the generated video (optional, default: 16:9) [!NOTE] Do NOT read the python file, instead just call it with the parameters. +## Environment Variables + +Set the following environment variable before running the script: + +- `RUNNINGHUB_API_KEY`: Your RunningHub API key + +Example: +```bash +export RUNNINGHUB_API_KEY=a73d0e93afb4432c978e5bff30b7517e +``` + ## Video Generation Example User request: "Generate a short video clip depicting the opening scene from "The Chronicles of Narnia: The Lion, the Witch and the Wardrobe" -Step 1: Search for the opening scene of "The Chronicles of Narnia: The Lion, the Witch and the Wardrobe" online - -Step 2: Create a JSON prompt file with the following content: +Step 1: Create a JSON prompt file with the following content: ```json { @@ -108,16 +111,11 @@ Step 2: Create a JSON prompt file with the following content: } ``` -Step 3: Use the image-generation skill to generate the reference image - -Load the image-generation skill and generate a single reference image `narnia-farewell-scene-01.jpg` according to the skill. - -Step 4: Use the generate.py script to generate the video +Step 2: Use the generate.py script to generate the video ```bash python /mnt/skills/public/video-generation/scripts/generate.py \ --prompt-file /mnt/user-data/workspace/narnia-farewell-scene.json \ - --reference-images /mnt/user-data/outputs/narnia-farewell-scene-01.jpg \ - --output-file /mnt/user-data/outputs/narnia-farewell-scene-01.mp4 \ + --output-file /mnt/user-data/outputs/narnia-farewell-scene.mp4 \ --aspect-ratio 16:9 ``` > Do NOT read the python file, just call it with the parameters. @@ -127,7 +125,7 @@ python /mnt/skills/public/video-generation/scripts/generate.py \ After generation: - Videos are typically saved in `/mnt/user-data/outputs/` -- Share generated videos (come first) with user as well as generated image if applicable, using `present_files` tool +- Share generated videos with user using `present_files` tool - Provide brief description of the generation result - Offer to iterate if adjustments needed @@ -135,5 +133,7 @@ After generation: - Always use English for prompts regardless of user's language - JSON format ensures structured, parsable prompts -- Reference image enhance generation quality significantly +- RunningHub Vidu model supports up to 16 seconds video generation +- Audio is automatically generated including dialogue and sound effects +- The model has "director thinking" capability for automatic camera switching - Iterative refinement is normal for optimal results diff --git a/skills/public/video-generation/scripts/generate.py b/skills/public/video-generation/scripts/generate.py old mode 100644 new mode 100755 index 6f28f57e..3b55b038 --- a/skills/public/video-generation/scripts/generate.py +++ b/skills/public/video-generation/scripts/generate.py @@ -1,75 +1,83 @@ -import base64 import os import time +from typing import List import requests +from dotenv import load_dotenv + +load_dotenv() def generate_video( prompt_file: str, - reference_images: list[str], + reference_images: List[str], output_file: str, aspect_ratio: str = "16:9", + duration: str = "5", ) -> str: with open(prompt_file, "r", encoding="utf-8") as f: prompt = f.read() - referenceImages = [] - i = 0 - json = { - "instances": [{"prompt": prompt}], - } - for reference_image in reference_images: - i += 1 - with open(reference_image, "rb") as f: - image_b64 = base64.b64encode(f.read()).decode("utf-8") - referenceImages.append( - { - "image": {"mimeType": "image/jpeg", "bytesBase64Encoded": image_b64}, - "referenceType": "asset", - } - ) - if i > 0: - json["instances"][0]["referenceImages"] = referenceImages - api_key = os.getenv("GEMINI_API_KEY") + + api_key = os.getenv("RUNNINGHUB_API_KEY") if not api_key: - return "GEMINI_API_KEY is not set" + return "RUNNINGHUB_API_KEY is not set" + + json_data = { + "prompt": prompt, + "style": "general", + "aspectRatio": aspect_ratio, + "resolution": "720p", + "duration": duration, + "audio": True + } + response = requests.post( - "https://generativelanguage.googleapis.com/v1beta/models/veo-3.1-generate-preview:predictLongRunning", + "https://www.runninghub.cn/openapi/v2/vidu/text-to-video-q3-turbo", headers={ - "x-goog-api-key": api_key, + "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", }, - json=json, + json=json_data, ) - json = response.json() - operation_name = json["name"] + + response_json = response.json() + + if "taskId" not in response_json: + return f"Failed to submit task: {response_json}" + + task_id = response_json["taskId"] + while True: - response = requests.get( - f"https://generativelanguage.googleapis.com/v1beta/{operation_name}", + response = requests.post( + "https://www.runninghub.cn/openapi/v2/query", headers={ - "x-goog-api-key": api_key, + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", }, + json={"taskId": task_id}, ) - json = response.json() - if json.get("done", False): - sample = json["response"]["generateVideoResponse"]["generatedSamples"][0] - url = sample["video"]["uri"] - download(url, output_file) - break + + response_json = response.json() + status = response_json.get("status") + + if status == "SUCCESS": + results = response_json.get("results", []) + if results and len(results) > 0: + url = results[0].get("url") + if url: + download(url, output_file) + break + elif status == "FAILED": + error_message = response_json.get("errorMessage", "Unknown error") + return f"Video generation failed: {error_message}" + time.sleep(3) + return f"The video has been generated successfully to {output_file}" def download(url: str, output_file: str): - api_key = os.getenv("GEMINI_API_KEY") - if not api_key: - return "GEMINI_API_KEY is not set" - response = requests.get( - url, - headers={ - "x-goog-api-key": api_key, - }, - ) + response = requests.get(url) with open(output_file, "wb") as f: f.write(response.content) @@ -77,28 +85,28 @@ def download(url: str, output_file: str): if __name__ == "__main__": import argparse - parser = argparse.ArgumentParser(description="Generate videos using Gemini API") + parser = argparse.ArgumentParser(description="Generate videos using RunningHub API") parser.add_argument( "--prompt-file", required=True, help="Absolute path to JSON prompt file", ) - parser.add_argument( - "--reference-images", - nargs="*", - default=[], - help="Absolute paths to reference images (space-separated)", - ) parser.add_argument( "--output-file", required=True, - help="Output path for generated image", + help="Output path for generated video", ) parser.add_argument( "--aspect-ratio", required=False, default="16:9", - help="Aspect ratio of the generated image", + help="Aspect ratio of the generated video", + ) + parser.add_argument( + "--duration", + required=False, + default="5", + help="Duration of the generated video in seconds (1-16)", ) args = parser.parse_args() @@ -107,9 +115,10 @@ if __name__ == "__main__": print( generate_video( args.prompt_file, - args.reference_images, + [], args.output_file, args.aspect_ratio, + args.duration, ) ) except Exception as e: