Compare commits

...

10 Commits

26 changed files with 1963 additions and 321 deletions

View File

@ -132,6 +132,7 @@ stop:
@-pkill -f "next start" 2>/dev/null || true
@-pkill -f "next-server" 2>/dev/null || true
@-pkill -f "next-server" 2>/dev/null || true
@-pkill -f "frontend/.next/standalone/server.js" 2>/dev/null || true
@-nginx -c $(PWD)/docker/nginx/nginx.local.conf -p $(PWD) -s quit 2>/dev/null || true
@sleep 1
@-pkill -9 nginx 2>/dev/null || true

View File

@ -75,6 +75,8 @@
"nanoid": "^5.1.6",
"next": "^16.1.7",
"next-themes": "^0.4.6",
"nextra": "^4.6.1",
"nextra-theme-docs": "^4.6.1",
"nuxt-og-image": "^5.1.13",
"ogl": "^1.0.11",
"react": "^19.0.0",

File diff suppressed because it is too large Load Diff

View File

@ -1,9 +1,7 @@
import { generateStaticParamsFor, importPage } from "nextra/pages";
import { importPage } from "nextra/pages";
import { useMDXComponents as getMDXComponents } from "../../../../mdx-components";
export const generateStaticParams = generateStaticParamsFor("mdxPath");
export async function generateMetadata(props) {
const params = await props.params;
const { metadata } = await importPage(params.mdxPath, params.lang);

View File

@ -3,7 +3,6 @@ import { getPageMap } from "nextra/page-map";
import { Footer, Layout } from "nextra-theme-docs";
import { Header } from "@/components/landing/header";
import { getLocaleByLang } from "@/core/i18n/locale";
import "nextra-theme-docs/style.css";
const footer = <Footer>MIT {new Date().getFullYear()} © Nextra.</Footer>;
@ -27,18 +26,11 @@ function formatPageRoute(base: string, items: PageMapItem[]): PageMapItem[] {
export default async function DocLayout({ children, params }) {
const { lang } = await params;
const locale = getLocaleByLang(lang);
const pages = await getPageMap(`/${lang}`);
return (
<Layout
navbar={
<Header
className="relative max-w-full px-10"
homeURL="/"
locale={locale}
/>
}
navbar={<Header />}
pageMap={formatPageRoute(`/${lang}/docs`, pages)}
docsRepositoryBase="https://github.com/bytedance/deerflow/tree/main/frontend/src/app/content"
footer={footer}

View File

@ -2,7 +2,7 @@
import { BotIcon, PlusSquare } from "lucide-react";
import { useParams, useRouter } from "next/navigation";
import { useCallback, useState } from "react";
import { useCallback } from "react";
import type { PromptInputMessage } from "@/components/ai-elements/prompt-input";
import { Button } from "@/components/ui/button";
@ -11,11 +11,7 @@ import { ArtifactTrigger } from "@/components/workspace/artifacts";
import { ChatBox, useThreadChat } from "@/components/workspace/chats";
import { ExportTrigger } from "@/components/workspace/export-trigger";
import { InputBox } from "@/components/workspace/input-box";
import {
MessageList,
MESSAGE_LIST_DEFAULT_PADDING_BOTTOM,
MESSAGE_LIST_FOLLOWUPS_EXTRA_PADDING_BOTTOM,
} from "@/components/workspace/messages";
import { MessageList } from "@/components/workspace/messages";
import { ThreadContext } from "@/components/workspace/messages/context";
import { ThreadTitle } from "@/components/workspace/thread-title";
import { TodoList } from "@/components/workspace/todo-list";
@ -24,15 +20,17 @@ import { Tooltip } from "@/components/workspace/tooltip";
import { useAgent } from "@/core/agents";
import { useI18n } from "@/core/i18n/hooks";
import { useNotification } from "@/core/notification/hooks";
import { useThreadSettings } from "@/core/settings";
import { useLocalSettings } from "@/core/settings";
import { useThreadStream } from "@/core/threads/hooks";
import { textOfMessage } from "@/core/threads/utils";
import { env } from "@/env";
import { cn } from "@/lib/utils";
const MESSAGE_LIST_DEFAULT_PADDING_BOTTOM = 160;
const MESSAGE_LIST_FOLLOWUPS_EXTRA_PADDING_BOTTOM = 120;
export default function AgentChatPage() {
const { t } = useI18n();
const [showFollowups, setShowFollowups] = useState(false);
const router = useRouter();
const { agent_name } = useParams<{
@ -42,7 +40,7 @@ export default function AgentChatPage() {
const { agent } = useAgent(agent_name);
const { threadId, isNewThread, setIsNewThread } = useThreadChat();
const [settings, setSettings] = useThreadSettings(threadId);
const [settings, setSettings] = useLocalSettings();
const { showNotification } = useNotification();
const [thread, sendMessage] = useThreadStream({
@ -86,13 +84,12 @@ export default function AgentChatPage() {
await thread.stop();
}, [thread]);
const messageListPaddingBottom = showFollowups
? MESSAGE_LIST_DEFAULT_PADDING_BOTTOM +
MESSAGE_LIST_FOLLOWUPS_EXTRA_PADDING_BOTTOM
: undefined;
const messageListPaddingBottom =
MESSAGE_LIST_DEFAULT_PADDING_BOTTOM +
MESSAGE_LIST_FOLLOWUPS_EXTRA_PADDING_BOTTOM;
return (
<ThreadContext.Provider value={{ thread }}>
<ThreadContext.Provider value={{ thread, threadId }}>
<ChatBox threadId={threadId}>
<div className="relative flex size-full min-h-0 justify-between">
<header
@ -166,9 +163,10 @@ export default function AgentChatPage() {
<InputBox
className={cn("bg-background/5 w-full -translate-y-4")}
isNewThread={isNewThread}
threadId={threadId}
autoFocus={isNewThread}
showWelcomeStyle={isNewThread}
hasSubmitted={!isNewThread}
status={
thread.error
? "error"
@ -184,7 +182,6 @@ export default function AgentChatPage() {
}
disabled={env.NEXT_PUBLIC_STATIC_WEBSITE_ONLY === "true"}
onContextChange={(context) => setSettings("context", context)}
onFollowupsVisibilityChange={setShowFollowups}
onSubmit={handleSubmit}
onStop={handleStop}
/>

View File

@ -186,7 +186,7 @@ export default function NewAgentPage() {
// ── Step 2: chat ───────────────────────────────────────────────────────────
return (
<ThreadContext.Provider value={{ thread }}>
<ThreadContext.Provider value={{ thread, threadId }}>
<ArtifactsProvider>
<div className="flex size-full flex-col">
{header}

View File

@ -67,10 +67,19 @@ export default function ChatPage() {
// 新逻辑:历史渲染和新会话仅由路由 /chats/new 控制,不再读取 isnew/is_chatting 参数。
const shouldRenderHistory = !showWelcomeStyle;
const createNewSession = useMemo(() => isNewThread, [isNewThread]);
const safeThreadId = useMemo(() => {
if (!threadId || threadId === "new") {
return undefined;
}
return threadId;
}, [threadId]);
const streamThreadId = useMemo(() => {
return isNewThread && createNewSession ? undefined : threadId;
}, [createNewSession, isNewThread, threadId]);
if (isNewThread && createNewSession) {
return undefined;
}
return safeThreadId;
}, [createNewSession, isNewThread, safeThreadId]);
const { showNotification } = useNotification();
@ -79,7 +88,7 @@ export default function ChatPage() {
skillError: selectedSkillError,
clearSkillError: clearSelectedSkillError,
isBootstrapping: isSelectedSkillBootstrapping,
} = useSelectedSkillListener({ threadId });
} = useSelectedSkillListener({ threadId: safeThreadId ?? null });
// 对话行为控制器
const [thread, sendMessage, isUploading] = useThreadStream({
threadId: streamThreadId,
@ -91,7 +100,7 @@ export default function ChatPage() {
setIsNewThread(false);
// if (!shouldStayOnNewRoute) {
// Keep /new in history so router.back() can return to it.
router.replace(`/workspace/chats/${currentThreadId}`);
router.replace(`/workspace/chats/${currentThreadId}?is_chatting=true`);
// }
// history.pushState(null, "", pathOfThread(currentThreadId));
},
@ -495,7 +504,7 @@ export default function ChatPage() {
if (threadId && threadId !== "new") {
nextQuery.set("thread_id", threadId);
}
router.replace(`/workspace/chats/new?${nextQuery.toString()}`);
router.replace(`/workspace/chats/${threadId}?is_chatting=false`);
}}
>

View File

@ -133,7 +133,7 @@ const ChatBox: React.FC<{
artifactPanelOpen ? "translate-x-0" : "translate-x-full",
)}
>
{selectedArtifact ? (
{selectedArtifact && threadId ? (
<ArtifactFileDetail
// className="size-full"
filepath={selectedArtifact}
@ -167,7 +167,7 @@ const ChatBox: React.FC<{
<ArtifactFileList
className="max-w-(--container-width-sm) p-4 pt-12"
files={thread.values.artifacts ?? []}
threadId={threadId}
threadId={threadId ?? ""}
/>
</main>
</div>

View File

@ -1,7 +1,7 @@
"use client";
import { useParams, usePathname, useSearchParams } from "next/navigation";
import { useCallback, useEffect, useState } from "react";
import { useEffect, useState } from "react";
export function useThreadChat() {
@ -26,53 +26,13 @@ export function useThreadChat() {
const rawPathThreadId = params?.thread_id ?? threadIdFromPathname;
const isNewRoute = rawPathThreadId === "new";
const threadIdFromPathOrParams:string = isNewRoute
? threadIdFromSearchParams?? params.thread_id
: params.thread_id;
const threadIdFromPathOrParams = isNewRoute
? normalizeThreadId(threadIdFromSearchParams)
: normalizeThreadId(rawPathThreadId);
// console.log("[useThreadChat] pathname", pathname);
// console.log("[useThreadChat] params.thread_id", params?.thread_id);
// console.log("[useThreadChat] threadIdFromPathname", threadIdFromPathname);
// console.log("[useThreadChat] threadIdFromPath", threadIdFromPath);
// 持久化兜底:用于处理首屏水合或 params 时序问题。
const readStoredThreadId = () => {
if (typeof window === "undefined") {
return undefined;
}
const stored = window.sessionStorage.getItem("workspace.thread_id");
return isValidThreadId(stored) ? stored : undefined;
};
// 读取 query 的 thread_id先用 hook必要时用 window 兜底)。
const readQueryThreadId = () => {
const fromHook = threadIdFromSearchParams;
if (isValidThreadId(fromHook)) {
return fromHook;
}
if (typeof window === "undefined") {
return undefined;
}
const fromLocation = new URLSearchParams(window.location.search).get(
"thread_id",
);
if (isValidThreadId(fromLocation)) {
return fromLocation.trim();
}
return undefined;
};
const queryThreadIdFromParams = readQueryThreadId();
// console.log("[useThreadChat] query.thread_id", queryThreadIdFromParams);
// 归一化:当值为 "new" 时,替换为 query 中的 thread_id如果存在
const normalizeThreadId = useCallback(
(value?: string | null) => {
if (!value) {
return undefined;
}
return value === "new" ? queryThreadIdFromParams : value;
},
[queryThreadIdFromParams],
);
// New session is only controlled by `/workspace/chats/new`.
const [isNewThread, setIsNewThread] = useState(() => isNewRoute);
@ -82,7 +42,7 @@ export function useThreadChat() {
// console.log("[useThreadChat] effectiveThreadIdFromPath", effectiveThreadIdFromPath);
const [threadId, setThreadId] = useState<string>(() => {
return threadIdFromPathOrParams;
return threadIdFromPathOrParams ?? "";
});
@ -93,11 +53,10 @@ export function useThreadChat() {
}
setIsNewThread(isNewRoute);
// Prefer path thread id, fall back to query thread_id when path is /new.
setThreadId(threadIdFromPathOrParams);
setThreadId(threadIdFromPathOrParams ?? "");
setShowWelcomeStyle(isNewRoute || !isChattingFromQuery);
}, [
isNewRoute,
normalizeThreadId,
pathname,
searchParams,
isChattingFromQuery,
@ -114,6 +73,11 @@ export function useThreadChat() {
};
}
function normalizeThreadId(value?: string | null): string | undefined {
if (!value) return undefined;
return isValidThreadId(value) ? value.trim() : undefined;
}
function isValidThreadId(value?: string | null): value is string {
if (!value) return false;
const normalized = value.trim().toLowerCase();

View File

@ -59,10 +59,10 @@ import {
import { useI18n } from "@/core/i18n/hooks";
import type {
SelectedSkillPayloadItem,
SuggestionSkillChildren,
} from "@/core/i18n/locales/types";
import { POST_MESSAGE_TYPES, sendToParent } from "@/core/iframe-messages";
import { useModels } from "@/core/models/hooks";
import { bootstrapRemoteSkill } from "@/core/skills/api";
import type { AgentThreadContext } from "@/core/threads";
import { useIframeSkill } from "@/hooks/use-iframe-skill";
import { cn } from "@/lib/utils";
@ -429,6 +429,7 @@ export function InputBox({
{showWelcomeStyle && !hasSubmitted && searchParams.get("mode") !== "skill" && (
<SuggestionListContainer
threadId={threadId}
sendSelectSkill={iframeSkill.sendSelectSkill}
/>
)}
@ -493,26 +494,31 @@ export function InputBox({
// SuggestionList 容器
function SuggestionListContainer({
threadId,
sendSelectSkill,
}: {
threadId: string;
sendSelectSkill: (selectedSkills: SelectedSkillPayloadItem[]) => void;
}) {
return (
<div className="absolute right-0 bottom-0 left-0 z-0 flex translate-y-full items-center justify-center pt-4">
<SuggestionList sendSelectSkill={sendSelectSkill} />
<SuggestionList threadId={threadId} sendSelectSkill={sendSelectSkill} />
</div>
);
}
// 快速选择skillbutton
function SuggestionList({
threadId,
sendSelectSkill,
}: {
threadId: string;
sendSelectSkill: (selectedSkills: SelectedSkillPayloadItem[]) => void;
}) {
const { t } = useI18n();
const searchParams = useSearchParams();
const { textInput } = usePromptInputController();
const suggestions =t.inputBox.suggestions
const suggestions = t.inputBox.suggestions;
const promptSuggestions = suggestions.filter(
(
suggestion,
@ -525,14 +531,35 @@ function SuggestionList({
suggestion: {
prompt: string;
skill_id?: string[];
children?: SuggestionSkillChildren[];
children?: SelectedSkillPayloadItem[];
suggestion: string;
},
) => {
const languageTypeRaw =
searchParams.get("languageType")?.trim() ??
searchParams.get("language_type")?.trim();
const languageType = languageTypeRaw ? Number(languageTypeRaw) : 0;
const bootstrapByIds = (ids: string[]) => {
const content_ids = Array.from(
new Set(
ids
.map((id) => Number(id))
.filter((id) => Number.isFinite(id) && id > 0),
),
);
if (!threadId || content_ids.length === 0) return;
void bootstrapRemoteSkill({
thread_id: threadId,
content_ids,
language_type: languageType,
target_dir: "/mnt/user-data/uploads/skill",
clear_target: true,
});
};
// 优先从 children 中提取 skill_id 数组,转换为 selectedSkills 发送给宿主页
const childSkillIds = (suggestion.children ?? [])
.flatMap((item) => item.skill_id)
.map((item) => item.trim())
.map((item) => String(item.id).trim())
.filter((id): id is string => Boolean(id));
if (childSkillIds.length > 0) {
sendSelectSkill(
@ -541,6 +568,7 @@ function SuggestionList({
name: suggestion.suggestion,
})),
);
bootstrapByIds(childSkillIds);
return;
}
if (suggestion.skill_id && suggestion.skill_id.length > 0) {
@ -550,6 +578,7 @@ function SuggestionList({
name: suggestion.suggestion,
})),
);
bootstrapByIds(suggestion.skill_id);
return;
}
// 原有逻辑
@ -569,7 +598,7 @@ function SuggestionList({
}
}, 500);
},
[textInput, sendSelectSkill],
[textInput, sendSelectSkill, threadId, searchParams],
);
return (
<Suggestions className="min-h-16 w-fit items-start" data-testid="welcome-suggestions">

View File

@ -58,7 +58,7 @@ export const zhCN: Translations = {
// Welcome
welcome: {
// TODO: 测试环境标识
greeting: "轻办公 · XClaw Tagv3.1.0 fix适配md图片的更多情况",
greeting: "轻办公 · XClaw Tagv3.2.0 --- Skill功能施工中 --- refactor(frontend): 将 SELECT_SKILL 重命名为 SELECT_SKILLS.",
description:
"欢迎使用 🦌 DeerFlow一个完全开源的超级智能体。通过内置和自定义的 Skills\nDeerFlow 可以帮你搜索网络、分析数据,还能为你生成幻灯片、\n图片、视频、播客及网页等几乎可以做任何事情。",
@ -117,31 +117,31 @@ export const zhCN: Translations = {
prompt:
"为[主题/产品]撰写吸引人的自媒体文案,包括标题、正文和话题标签。",
icon: PenLineIcon,
children: [{ id: "1245", name: "自媒体文案" }],
children: [{ id: "1245", name: "微信文章撰写" }],
},
{
suggestion: "需求文档",
prompt: "编写[项目/功能]的需求文档,包含功能描述、用户故事和验收标准。",
icon: CompassIcon,
children: [{ id: "520", name: "需求文档" }],
children: [{ id: "520", name: "分解功能产品需求文档" }],
},
{
suggestion: "使用指南",
prompt: "编写[产品/功能]的使用指南,包含操作步骤、注意事项和常见问题。",
icon: GraduationCapIcon,
children: [{ id: "409", name: "使用指南" }],
children: [{ id: "409", name: "指南编写" }],
},
{
suggestion: "Excel数据分析",
prompt: "对[Excel文件/数据]进行分析,生成数据洞察和可视化建议。",
icon: MicroscopeIcon,
children: [{ id: "5", name: "Excel数据分析" }],
children: [{ id: "5", name: "数据分析" }],
},
{
suggestion: "市场调研",
prompt: "针对[行业/产品]进行市场调研,分析市场规模、竞品和趋势。",
icon: ShapesIcon,
children: [{ id: "1216", name: "市场研" }],
children: [{ id: "1216", name: "市场究报告" }],
},
],
suggestionsCreate: [

View File

@ -12,7 +12,7 @@ export const POST_MESSAGE_TYPES = {
// 会话是否处于聊天态
IS_CHATTING: "isChatting",
// 选择预定义 skill
SELECT_SKILL: "selectedSkills",
SELECT_SKILLS: "selectedSkills",
// 打开 skill 选择对话框
OPEN_SKILL_DIALOG: "openSkillDialog",
} as const;
@ -41,7 +41,7 @@ export interface IsChattingMessage {
}
export interface SelectSkillMessage {
type: typeof POST_MESSAGE_TYPES.SELECT_SKILL;
type: typeof POST_MESSAGE_TYPES.SELECT_SKILLS;
selectedSkills: SelectedSkillPayloadItem[];
}

View File

@ -193,14 +193,17 @@ export function useThreadStream({
handleStreamStart(meta.thread_id);
setOnStreamThreadId(meta.thread_id);
},
onLangChainEvent(event) {
if (event.event === "on_tool_end") {
listeners.current.onToolEnd?.({
name: event.name,
data: event.data,
});
}
},
onLangChainEvent:
onToolEnd == null
? undefined
: (event) => {
if (event.event === "on_tool_end") {
listeners.current.onToolEnd?.({
name: event.name,
data: event.data,
});
}
},
onUpdateEvent(data) {
const updates: Array<Partial<AgentThreadState> | null> = Object.values(
data || {},
@ -325,15 +328,20 @@ export function useThreadStream({
}
setOptimisticMessages(newOptimistic);
if (resolvedThreadId) {
// For "new chat with prefilled thread_id" flows, calling onStart before
// submit can trigger route switch too early, which causes the new page to
// fetch history before the thread/run is actually created.
// Let useStream.onCreated -> handleStreamStart drive onStart instead.
if (resolvedThreadId && !createNewSession) {
_handleOnStart(resolvedThreadId);
}
let uploadedFileInfo: UploadedFileInfo[] = [];
try {
// 新会话模式下,删除旧线程并创建同名新线程
if (createNewSession && resolvedThreadId) {
// 新会话模式下,仅在本地已有历史消息时才重置旧线程。
// 对于全新 thread_id避免多发一次 DELETE /threads/{id}(通常会 404
if (createNewSession && resolvedThreadId && thread.messages.length > 0) {
await apiClient.threads.delete(resolvedThreadId).catch(() => undefined);
}

View File

@ -70,7 +70,7 @@ export function useIframeSkill(): UseIframeSkillReturn {
// 发送选择预定义 skill
const sendSelectSkill = useCallback((selectedSkills: SelectedSkillPayloadItem[]) => {
const message = { type: POST_MESSAGE_TYPES.SELECT_SKILL, selectedSkills };
const message = { type: POST_MESSAGE_TYPES.SELECT_SKILLS, selectedSkills };
console.log("[useIframeSkill] sendSelectSkill:", message);
sendToParent(message);
}, []);
@ -89,7 +89,7 @@ export function useIframeSkill(): UseIframeSkillReturn {
const clearSkill = useCallback(() => {
setSelectedSkill(null);
// 发送空数组给主页,通知取消选择
const message = { type: POST_MESSAGE_TYPES.SELECT_SKILL, selectedSkills: [] };
const message = { type: POST_MESSAGE_TYPES.SELECT_SKILLS, selectedSkills: [] };
console.log("[useIframeSkill] clearSkill, sending selectedSkills=[]:", message);
sendToParent(message);
}, []);

View File

@ -0,0 +1,72 @@
#!/usr/bin/env bash
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$REPO_ROOT"
# Default deploy target (can be overridden by positional args)
DEFAULT_REMOTE="root@111.228.39.147"
DEFAULT_REMOTE_APP_DIR="/root/deerflow2"
usage() {
cat <<'EOF'
Usage:
./scripts/deploy-frontend-standalone.sh [remote] [remote_app_dir]
Arguments:
remote SSH target, optional. Default: root@111.228.39.147
remote_app_dir Remote deerflow2 root dir, optional. Default: /root/deerflow2
Example:
./scripts/deploy-frontend-standalone.sh
./scripts/deploy-frontend-standalone.sh ubuntu@1.2.3.4 /opt/deerflow2
Notes:
- Run this script on your local machine (build machine), not on the server.
- Requires: pnpm, rsync, ssh
- Script will run:
sudo systemctl daemon-reload
sudo systemctl restart deerflow.service
EOF
}
if [ "${1:-}" = "-h" ] || [ "${1:-}" = "--help" ]; then
usage
exit 0
fi
if [ "$#" -gt 2 ]; then
usage
exit 1
fi
REMOTE="${1:-$DEFAULT_REMOTE}"
REMOTE_APP_DIR="${2:-$DEFAULT_REMOTE_APP_DIR}"
echo "==> Deploy target:"
echo " remote: $REMOTE"
echo " app dir: $REMOTE_APP_DIR"
echo "==> Building frontend (standalone)..."
pnpm -C frontend build
echo "==> Uploading standalone server..."
rsync -azP --delete --info=progress2 \
frontend/.next/standalone/ \
"$REMOTE:$REMOTE_APP_DIR/frontend/.next/standalone/"
echo "==> Uploading static assets..."
rsync -azP --info=progress2 \
frontend/.next/static/ \
"$REMOTE:$REMOTE_APP_DIR/frontend/.next/standalone/.next/static/"
echo "==> Uploading public assets..."
rsync -azP --info=progress2 \
frontend/public/ \
"$REMOTE:$REMOTE_APP_DIR/frontend/.next/standalone/public/"
echo "==> Reloading systemd and restarting deerflow.service on remote host..."
ssh "$REMOTE" "sudo systemctl daemon-reload && sudo systemctl restart deerflow.service"
echo "==> Done."

View File

@ -38,7 +38,43 @@ else
echo "Python is required to generate BETTER_AUTH_SECRET, but neither python3 nor python was found."
exit 1
fi
FRONTEND_CMD="env BETTER_AUTH_SECRET=$($PYTHON_BIN -c 'import secrets; print(secrets.token_hex(16))') pnpm run preview"
FRONTEND_STANDALONE_DIR="$REPO_ROOT/frontend/.next/standalone"
FRONTEND_STANDALONE_SERVER="$FRONTEND_STANDALONE_DIR/server.js"
FRONTEND_STANDALONE_STATIC_DIR="$FRONTEND_STANDALONE_DIR/.next/static"
FRONTEND_SOURCE_STATIC_DIR="$REPO_ROOT/frontend/.next/static"
FRONTEND_STANDALONE_PUBLIC_DIR="$FRONTEND_STANDALONE_DIR/public"
FRONTEND_SOURCE_PUBLIC_DIR="$REPO_ROOT/frontend/public"
if [ ! -f "$FRONTEND_STANDALONE_SERVER" ]; then
echo "✗ Frontend standalone server not found: $FRONTEND_STANDALONE_SERVER"
echo " make start (prod) now uses prebuilt standalone assets and will not run local build."
echo " Please prepare frontend artifacts first (example):"
echo " pnpm -C frontend build"
exit 1
fi
# Align local runtime layout with deploy-frontend-standalone.sh:
# standalone + .next/static + public under frontend/.next/standalone/
if [ ! -d "$FRONTEND_STANDALONE_STATIC_DIR" ] && [ -d "$FRONTEND_SOURCE_STATIC_DIR" ]; then
mkdir -p "$FRONTEND_STANDALONE_DIR/.next"
cp -R "$FRONTEND_SOURCE_STATIC_DIR" "$FRONTEND_STANDALONE_STATIC_DIR"
fi
if [ ! -d "$FRONTEND_STANDALONE_PUBLIC_DIR" ] && [ -d "$FRONTEND_SOURCE_PUBLIC_DIR" ]; then
cp -R "$FRONTEND_SOURCE_PUBLIC_DIR" "$FRONTEND_STANDALONE_PUBLIC_DIR"
fi
if [ ! -d "$FRONTEND_STANDALONE_STATIC_DIR" ]; then
echo "✗ Missing standalone static assets: $FRONTEND_STANDALONE_STATIC_DIR"
echo " Please ensure .next/static is available for standalone runtime."
exit 1
fi
if [ ! -d "$FRONTEND_STANDALONE_PUBLIC_DIR" ]; then
echo "✗ Missing standalone public assets: $FRONTEND_STANDALONE_PUBLIC_DIR"
echo " Please ensure public/ is available for standalone runtime."
exit 1
fi
FRONTEND_CMD="env BETTER_AUTH_SECRET=$($PYTHON_BIN -c 'import secrets; print(secrets.token_hex(16))') HOSTNAME=0.0.0.0 PORT=3000 node server.js"
fi
# ── Stop existing services ────────────────────────────────────────────────────
@ -48,6 +84,7 @@ pkill -f "langgraph dev" 2>/dev/null || true
pkill -f "uvicorn app.gateway.app:app" 2>/dev/null || true
pkill -f "next dev" 2>/dev/null || true
pkill -f "next-server" 2>/dev/null || true
pkill -f "frontend/.next/standalone/server.js" 2>/dev/null || true
nginx -c "$REPO_ROOT/docker/nginx/nginx.local.conf" -p "$REPO_ROOT" -s quit 2>/dev/null || true
sleep 1
pkill -9 nginx 2>/dev/null || true
@ -110,6 +147,7 @@ cleanup() {
pkill -f "next dev" 2>/dev/null || true
pkill -f "next start" 2>/dev/null || true
pkill -f "next-server" 2>/dev/null || true
pkill -f "frontend/.next/standalone/server.js" 2>/dev/null || true
# Kill nginx using the captured PID first (most reliable),
# then fall back to pkill/killall for any stray nginx workers.
if [ -n "${NGINX_PID:-}" ] && kill -0 "$NGINX_PID" 2>/dev/null; then
@ -181,7 +219,11 @@ echo "Starting Gateway API..."
echo "✓ Gateway API started on localhost:8001"
echo "Starting Frontend..."
(cd frontend && $FRONTEND_CMD > ../logs/frontend.log 2>&1) &
if $DEV_MODE; then
(cd frontend && $FRONTEND_CMD > ../logs/frontend.log 2>&1) &
else
(cd frontend/.next/standalone && $FRONTEND_CMD > ../../../logs/frontend.log 2>&1) &
fi
./scripts/wait-for-port.sh 3000 120 "Frontend" || {
echo " See logs/frontend.log for details"
tail -20 logs/frontend.log

View File

@ -0,0 +1,6 @@
# RunningHub API Configuration
# Copy this file to .env and fill in your actual API key
# RunningHub API Key for image generation
# Get your API key from: https://www.runninghub.cn
RUNNINGHUB_API_KEY=your_api_key_here

31
skills/public/image-generation/.gitignore vendored Executable file
View File

@ -0,0 +1,31 @@
# Environment variables
.env
.env.local
.env.*.local
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
venv/
env/
ENV/
# Output files
*.jpg
*.jpeg
*.png
*.webp
outputs/
# IDE
.vscode/
.idea/
*.swp
*.swo
# OS
.DS_Store
Thumbs.db

103
skills/public/image-generation/SKILL.md Normal file → Executable file
View File

@ -7,14 +7,47 @@ description: Use this skill when the user requests to generate, create, imagine,
## Overview
This skill generates high-quality images using structured prompts and a Python script. The workflow includes creating JSON-formatted prompts and executing image generation with optional reference images.
This skill generates high-quality images using RunningHub API with structured prompts and a Python script. The workflow includes creating JSON-formatted prompts and executing image generation through asynchronous task submission.
## Core Capabilities
- Create structured JSON prompts for AIGC image generation
- Support multiple reference images for style/composition guidance
- Generate images through automated Python script execution
- Generate images through RunningHub's Z-Image Turbo LoRA API
- Support asynchronous task submission and status polling
- Handle various image generation scenarios (character design, scenes, products, etc.)
- Support multiple aspect ratios and output formats (PNG, JPEG, WEBP)
## Configuration
### API Key Setup
This skill uses RunningHub API for image generation. You need to configure your API key before using the skill.
**Option 1: Environment Variable (Recommended)**
```bash
# Set the RUNNINGHUB_API_KEY environment variable
export RUNNINGHUB_API_KEY=your_api_key_here
# Or on Windows:
set RUNNINGHUB_API_KEY=your_api_key_here
```
**Option 2: .env File**
1. Copy `.env.example` to `.env`:
```bash
cp .env.example .env
```
2. Edit `.env` and add your API key:
```
RUNNINGHUB_API_KEY=your_api_key_here
```
3. The `.env` file is automatically excluded from version control via `.gitignore`
**Security Notes:**
- Never commit `.env` files to version control
- Never hardcode API keys in source code
- Rotate your API keys if they are accidentally exposed
- Get your API key from: https://www.runninghub.cn
## Workflow
@ -38,20 +71,20 @@ Call the Python script:
```bash
python /mnt/skills/public/image-generation/scripts/generate.py \
--prompt-file /mnt/user-data/workspace/prompt-file.json \
--reference-images /path/to/ref1.jpg /path/to/ref2.png \
--output-file /mnt/user-data/outputs/generated-image.jpg
--output-file /mnt/user-data/outputs/generated-image.jpg \
--aspect-ratio 16:9
```
Parameters:
- `--prompt-file`: Absolute path to JSON prompt file (required)
- `--reference-images`: Absolute paths to reference images (optional, space-separated)
- `--output-file`: Absolute path to output image file (required)
- `--aspect-ratio`: Aspect ratio of the generated image (optional, default: 16:9)
[!NOTE]
Do NOT read the python file, just call it with the parameters.
- The script uses RunningHub API which requires `RUNNINGHUB_API_KEY` environment variable to be set
- Do NOT read the python file, just call it with the parameters
- The script automatically handles task submission, status polling, and image download
## Character Generation Example
@ -86,40 +119,6 @@ python /mnt/skills/public/image-generation/scripts/generate.py \
--aspect-ratio 2:3
```
With reference images:
```json
{
"characters": [{
"gender": "based on [Image 1]",
"age": "based on [Image 1]",
"ethnicity": "human from [Image 1] adapted to Star Wars universe",
"body_type": "based on [Image 1]",
"facial_features": "matching [Image 1] with slight weathered look from space travel",
"clothing": "Star Wars style outfit - worn leather jacket with utility vest, cargo pants with tactical pouches, scuffed boots, belt with holster",
"accessories": "blaster pistol on hip, comlink device on wrist, goggles pushed up on forehead, satchel with supplies, personal vehicle based on [Image 2]",
"era": "Star Wars universe, post-Empire era"
}],
"prompt": "Character inspired by [Image 1] standing next to a vehicle inspired by [Image 2] on a bustling alien planet street in Star Wars universe aesthetic. Character wearing worn leather jacket with utility vest, cargo pants with tactical pouches, scuffed boots, belt with blaster holster. The vehicle adapted to Star Wars aesthetic with weathered metal panels, repulsor engines, desert dust covering, parked on the street. Exotic alien marketplace street with multi-level architecture, weathered metal structures, hanging market stalls with colorful awnings, alien species walking by as background characters. Twin suns casting warm golden light, atmospheric dust particles in air, moisture vaporators visible in distance. Gritty lived-in Star Wars aesthetic, practical effects look, film grain texture, cinematic composition.",
"negative_prompt": "clean futuristic look, sterile environment, overly CGI appearance, fantasy medieval elements, Earth architecture, modern city",
"style": "Star Wars original trilogy aesthetic, lived-in universe, practical effects inspired, cinematic film look, slightly desaturated with warm tones",
"composition": "medium wide shot, character in foreground with alien street extending into background, environmental storytelling, rule of thirds",
"lighting": "warm golden hour lighting from twin suns, rim lighting on character, atmospheric haze, practical light sources from market stalls",
"color_palette": "warm sandy tones, ochre and sienna, dusty blues, weathered metals, muted earth colors with pops of alien market colors",
"technical": {
"aspect_ratio": "9:16",
"quality": "high",
"detail_level": "highly detailed with film-like texture"
}
}
```
```bash
python /mnt/skills/public/image-generation/scripts/generate.py \
--prompt-file /mnt/user-data/workspace/star-wars-scene.json \
--reference-images /mnt/user-data/uploads/character-ref.jpg /mnt/user-data/uploads/vehicle-ref.jpg \
--output-file /mnt/user-data/outputs/star-wars-scene-01.jpg \
--aspect-ratio 16:9
```
## Common Scenarios
Use different JSON schemas for different scenarios.
@ -158,30 +157,10 @@ After generation:
- Provide brief description of the generation result
- Offer to iterate if adjustments needed
## Tips: Enhancing Generation with Reference Images
For scenarios where visual accuracy is critical, **use the `image_search` tool first** to find reference images before generation.
**Recommended scenarios for using image_search tool:**
- **Character/Portrait Generation**: Search for similar poses, expressions, or styles to guide facial features and body proportions
- **Specific Objects or Products**: Find reference images of real objects to ensure accurate representation
- **Architectural or Environmental Scenes**: Search for location references to capture authentic details
- **Fashion and Clothing**: Find style references to ensure accurate garment details and styling
**Example workflow:**
1. Call the `image_search` tool to find suitable reference images:
```
image_search(query="Japanese woman street photography 1990s", size="Large")
```
2. Download the returned image URLs to local files
3. Use the downloaded images as `--reference-images` parameter in the generation script
This approach significantly improves generation quality by providing the model with concrete visual guidance rather than relying solely on text descriptions.
## Notes
- Always use English for prompts regardless of user's language
- JSON format ensures structured, parsable prompts
- Reference images enhance generation quality significantly
- Iterative refinement is normal for optimal results
- For character generation, include the detailed character object plus a consolidated prompt field
- The script automatically polls task status and downloads the generated image

210
skills/public/image-generation/scripts/generate.py Normal file → Executable file
View File

@ -1,8 +1,14 @@
import base64
import json
import os
import time
from typing import List
import requests
from PIL import Image
from dotenv import load_dotenv
load_dotenv()
def validate_image(image_path: str) -> bool:
@ -17,77 +23,171 @@ def validate_image(image_path: str) -> bool:
"""
try:
with Image.open(image_path) as img:
img.verify() # Verify that it's a valid image
# Re-open to check if it can be fully loaded (verify() may not catch all issues)
img.verify()
with Image.open(image_path) as img:
img.load() # Force load the image data
img.load()
return True
except Exception as e:
print(f"Warning: Image '{image_path}' is invalid or corrupted: {e}")
return False
def submit_generation_task(prompt: str, aspect_ratio: str = "16:9", output_format: str = "png") -> str:
"""
Submit image generation task to RunningHub API.
Args:
prompt: Text prompt for image generation
aspect_ratio: Aspect ratio of the generated image
output_format: Output image format (png, jpeg, webp)
Returns:
Task ID for tracking the generation
"""
api_key = os.getenv("RUNNINGHUB_API_KEY")
if not api_key:
raise Exception("RUNNINGHUB_API_KEY environment variable is not set")
url = "https://www.runninghub.cn/openapi/v2/rhart-image/z-image/turbo-lora"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
payload = {
"prompt": prompt,
"aspectRatio": aspect_ratio,
"lora_name": "Z-Image _ 清纯高颜值_脸模版V1.0.safetensors",
"lora_strength": 1,
"outputFormat": output_format
}
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()
result = response.json()
if result.get("status") not in ["QUEUED", "RUNNING", "SUCCESS"]:
raise Exception(f"Task submission failed: {result.get('errorMessage', 'Unknown error')}")
return result.get("taskId")
def query_task_status(task_id: str) -> dict:
"""
Query the status of a generation task.
Args:
task_id: Task ID to query
Returns:
Task status information
"""
api_key = os.getenv("RUNNINGHUB_API_KEY")
if not api_key:
raise Exception("RUNNINGHUB_API_KEY environment variable is not set")
url = "https://www.runninghub.cn/openapi/v2/query"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
payload = {
"taskId": task_id
}
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()
return response.json()
def download_image(url: str, output_path: str) -> None:
"""
Download image from URL and save to file.
Args:
url: Image URL to download
output_path: Local path to save the image
"""
response = requests.get(url, stream=True)
response.raise_for_status()
with open(output_path, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
def generate_image(
prompt_file: str,
reference_images: list[str],
reference_images: List[str],
output_file: str,
aspect_ratio: str = "16:9",
) -> str:
"""
Generate image using RunningHub API.
Args:
prompt_file: Path to JSON prompt file
reference_images: List of reference image paths (currently not supported by RunningHub API)
output_file: Output path for generated image
aspect_ratio: Aspect ratio of the generated image
Returns:
Success message with output file path
"""
with open(prompt_file, "r", encoding="utf-8") as f:
prompt = f.read()
parts = []
i = 0
prompt_data = json.load(f)
# Filter out invalid reference images
valid_reference_images = []
for ref_img in reference_images:
if validate_image(ref_img):
valid_reference_images.append(ref_img)
if reference_images:
print("Note: RunningHub API does not support reference images in this version. Reference images will be ignored.")
prompt_text = prompt_data.get("prompt", "")
if not prompt_text:
prompt_text = json.dumps(prompt_data, ensure_ascii=False)
output_format = "png"
if output_file.lower().endswith(".jpg") or output_file.lower().endswith(".jpeg"):
output_format = "jpeg"
elif output_file.lower().endswith(".webp"):
output_format = "webp"
print(f"Submitting generation task...")
task_id = submit_generation_task(prompt_text, aspect_ratio, output_format)
print(f"Task submitted successfully. Task ID: {task_id}")
max_retries = 60
retry_interval = 2
for attempt in range(max_retries):
print(f"Checking task status... (Attempt {attempt + 1}/{max_retries})")
status_result = query_task_status(task_id)
status = status_result.get("status")
if status == "SUCCESS":
print("Task completed successfully!")
results = status_result.get("results", [])
if results and len(results) > 0:
image_url = results[0].get("url")
if image_url:
print(f"Downloading image from: {image_url}")
download_image(image_url, output_file)
return f"Successfully generated image to {output_file}"
else:
raise Exception("No image URL found in task results")
else:
raise Exception("No results found in task response")
elif status == "FAILED":
error_msg = status_result.get("errorMessage", "Unknown error")
raise Exception(f"Task failed: {error_msg}")
elif status in ["QUEUED", "RUNNING"]:
print(f"Task status: {status}. Waiting...")
time.sleep(retry_interval)
else:
print(f"Skipping invalid reference image: {ref_img}")
raise Exception(f"Unknown task status: {status}")
if len(valid_reference_images) < len(reference_images):
print(f"Note: {len(reference_images) - len(valid_reference_images)} reference image(s) were skipped due to validation failure.")
for reference_image in valid_reference_images:
i += 1
with open(reference_image, "rb") as f:
image_b64 = base64.b64encode(f.read()).decode("utf-8")
parts.append(
{
"inlineData": {
"mimeType": "image/jpeg",
"data": image_b64,
}
}
)
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
return "GEMINI_API_KEY is not set"
response = requests.post(
"https://generativelanguage.googleapis.com/v1beta/models/gemini-3-pro-image-preview:generateContent",
headers={
"x-goog-api-key": api_key,
"Content-Type": "application/json",
},
json={
"generationConfig": {"imageConfig": {"aspectRatio": aspect_ratio}},
"contents": [{"parts": [*parts, {"text": prompt}]}],
},
)
response.raise_for_status()
json = response.json()
parts: list[dict] = json["candidates"][0]["content"]["parts"]
image_parts = [part for part in parts if part.get("inlineData", False)]
if len(image_parts) == 1:
base64_image = image_parts[0]["inlineData"]["data"]
# Save the image to a file
with open(output_file, "wb") as f:
f.write(base64.b64decode(base64_image))
return f"Successfully generated image to {output_file}"
else:
raise Exception("Failed to generate image")
raise Exception(f"Task did not complete within {max_retries * retry_interval} seconds")
if __name__ == "__main__":

0
skills/public/image-generation/templates/doraemon.md Normal file → Executable file
View File

16
skills/public/video-generation/.gitignore vendored Executable file
View File

@ -0,0 +1,16 @@
.env
*.mp4
*.avi
*.mov
*.mkv
outputs/
workspace/
__pycache__/
*.pyc
*.pyo
*.pyd
.Python
*.so
*.egg-info/
dist/
build/

View File

@ -0,0 +1,93 @@
# Video Generation Skill - Setup Guide
## Quick Start
1. **Install Dependencies**
```bash
pip install requests python-dotenv
```
2. **Configure API Key**
Create a `.env` file in the project root directory:
```env
RUNNINGHUB_API_KEY=your_api_key_here
```
Or set it as an environment variable:
```bash
# Windows PowerShell
$env:RUNNINGHUB_API_KEY="your_api_key_here"
# Linux/Mac
export RUNNINGHUB_API_KEY="your_api_key_here"
```
3. **Generate a Video**
```bash
python scripts/generate.py --prompt-file workspace/your-prompt.json --output-file outputs/video.mp4 --duration 5
```
## Parameters
- `--prompt-file`: Path to JSON prompt file (required)
- `--output-file`: Output video file path (required)
- `--aspect-ratio`: Video aspect ratio (optional, default: 16:9)
- `--duration`: Video duration in seconds (optional, default: 5, range: 1-16)
## Getting API Key
To use this skill, you need a RunningHub API key:
1. Visit [RunningHub](https://www.runninghub.cn/)
2. Sign up for an account
3. Get your API key from the dashboard
4. Add it to your `.env` file
## Example Prompt
Create a JSON file with your video description:
```json
{
"title": "Your Video Title",
"description": "Description of what you want to generate",
"visual": {
"scene": "Scene description",
"elements": ["element1", "element2"],
"colors": "Color palette",
"lighting": "Lighting description"
},
"camera": {
"movement": "Camera movement",
"focus": "Focus description"
},
"audio": {
"background": "Background music description",
"effects": "Sound effects description"
}
}
```
## Notes
- The `.env` file is already in `.gitignore` and won't be committed to version control
- Never share your API key or commit it to public repositories
- The script automatically loads environment variables from `.env` file
- Video generation may take several minutes depending on the complexity
## Troubleshooting
**Error: RUNNINGHUB_API_KEY is not set**
- Make sure you've created the `.env` file with your API key
- Or set the environment variable before running the script
**Error: Failed to submit task**
- Check that your API key is valid
- Ensure you have sufficient credits in your RunningHub account
- Verify your internet connection
**Video generation takes too long**
- This is normal for AI video generation
- The script will automatically poll for status until completion
- You can check the RunningHub dashboard for task progress

56
skills/public/video-generation/SKILL.md Normal file → Executable file
View File

@ -7,13 +7,14 @@ description: Use this skill when the user requests to generate, create, or imagi
## Overview
This skill generates high-quality videos using structured prompts and a Python script. The workflow includes creating JSON-formatted prompts and executing video generation with optional reference image.
This skill generates high-quality videos using structured prompts and a Python script. The workflow includes creating JSON-formatted prompts and executing video generation through RunningHub API.
## Core Capabilities
- Create structured JSON prompts for AIGC video generation
- Support reference image as guidance or the first/last frame of the video
- Generate videos through automated Python script execution
- Generate videos through RunningHub Vidu model (text-to-video-q3-turbo)
- Support up to 16 seconds video generation with audio
- Automatic camera switching and dialogue generation
## Workflow
@ -21,21 +22,16 @@ This skill generates high-quality videos using structured prompts and a Python s
When a user requests video generation, identify:
- Subject/content: What should be in the image
- Subject/content: What should be in the video
- Style preferences: Art style, mood, color palette
- Technical specs: Aspect ratio, composition, lighting
- Reference image: Any image to guide generation
- You don't need to check the folder under `/mnt/user-data`
- Technical specs: Aspect ratio, resolution, duration
- Audio requirements: Background music, dialogue, sound effects
### Step 2: Create Structured Prompt
Generate a structured JSON file in `/mnt/user-data/workspace/` with naming pattern: `{descriptive-name}.json`
### Step 3: Create Reference Image (Optional when image-generation skill is available)
Generate reference image for the video generation.
- If only 1 image is provided, use it as the guided frame of the video
The prompt should include visual descriptions, camera movements, and audio specifications in a natural language format.
### Step 3: Execute Generation
@ -43,7 +39,6 @@ Call the Python script:
```bash
python /mnt/skills/public/video-generation/scripts/generate.py \
--prompt-file /mnt/user-data/workspace/prompt-file.json \
--reference-images /path/to/ref1.jpg \
--output-file /mnt/user-data/outputs/generated-video.mp4 \
--aspect-ratio 16:9
```
@ -51,20 +46,28 @@ python /mnt/skills/public/video-generation/scripts/generate.py \
Parameters:
- `--prompt-file`: Absolute path to JSON prompt file (required)
- `--reference-images`: Absolute paths to reference image (optional)
- `--output-file`: Absolute path to output image file (required)
- `--aspect-ratio`: Aspect ratio of the generated image (optional, default: 16:9)
- `--output-file`: Absolute path to output video file (required)
- `--aspect-ratio`: Aspect ratio of the generated video (optional, default: 16:9)
[!NOTE]
Do NOT read the python file, instead just call it with the parameters.
## Environment Variables
Set the following environment variable before running the script:
- `RUNNINGHUB_API_KEY`: Your RunningHub API key
Example:
```bash
export RUNNINGHUB_API_KEY=a73d0e93afb4432c978e5bff30b7517e
```
## Video Generation Example
User request: "Generate a short video clip depicting the opening scene from "The Chronicles of Narnia: The Lion, the Witch and the Wardrobe"
Step 1: Search for the opening scene of "The Chronicles of Narnia: The Lion, the Witch and the Wardrobe" online
Step 2: Create a JSON prompt file with the following content:
Step 1: Create a JSON prompt file with the following content:
```json
{
@ -108,16 +111,11 @@ Step 2: Create a JSON prompt file with the following content:
}
```
Step 3: Use the image-generation skill to generate the reference image
Load the image-generation skill and generate a single reference image `narnia-farewell-scene-01.jpg` according to the skill.
Step 4: Use the generate.py script to generate the video
Step 2: Use the generate.py script to generate the video
```bash
python /mnt/skills/public/video-generation/scripts/generate.py \
--prompt-file /mnt/user-data/workspace/narnia-farewell-scene.json \
--reference-images /mnt/user-data/outputs/narnia-farewell-scene-01.jpg \
--output-file /mnt/user-data/outputs/narnia-farewell-scene-01.mp4 \
--output-file /mnt/user-data/outputs/narnia-farewell-scene.mp4 \
--aspect-ratio 16:9
```
> Do NOT read the python file, just call it with the parameters.
@ -127,7 +125,7 @@ python /mnt/skills/public/video-generation/scripts/generate.py \
After generation:
- Videos are typically saved in `/mnt/user-data/outputs/`
- Share generated videos (come first) with user as well as generated image if applicable, using `present_files` tool
- Share generated videos with user using `present_files` tool
- Provide brief description of the generation result
- Offer to iterate if adjustments needed
@ -135,5 +133,7 @@ After generation:
- Always use English for prompts regardless of user's language
- JSON format ensures structured, parsable prompts
- Reference image enhance generation quality significantly
- RunningHub Vidu model supports up to 16 seconds video generation
- Audio is automatically generated including dialogue and sound effects
- The model has "director thinking" capability for automatic camera switching
- Iterative refinement is normal for optimal results

117
skills/public/video-generation/scripts/generate.py Normal file → Executable file
View File

@ -1,75 +1,83 @@
import base64
import os
import time
from typing import List
import requests
from dotenv import load_dotenv
load_dotenv()
def generate_video(
prompt_file: str,
reference_images: list[str],
reference_images: List[str],
output_file: str,
aspect_ratio: str = "16:9",
duration: str = "5",
) -> str:
with open(prompt_file, "r", encoding="utf-8") as f:
prompt = f.read()
referenceImages = []
i = 0
json = {
"instances": [{"prompt": prompt}],
}
for reference_image in reference_images:
i += 1
with open(reference_image, "rb") as f:
image_b64 = base64.b64encode(f.read()).decode("utf-8")
referenceImages.append(
{
"image": {"mimeType": "image/jpeg", "bytesBase64Encoded": image_b64},
"referenceType": "asset",
}
)
if i > 0:
json["instances"][0]["referenceImages"] = referenceImages
api_key = os.getenv("GEMINI_API_KEY")
api_key = os.getenv("RUNNINGHUB_API_KEY")
if not api_key:
return "GEMINI_API_KEY is not set"
return "RUNNINGHUB_API_KEY is not set"
json_data = {
"prompt": prompt,
"style": "general",
"aspectRatio": aspect_ratio,
"resolution": "720p",
"duration": duration,
"audio": True
}
response = requests.post(
"https://generativelanguage.googleapis.com/v1beta/models/veo-3.1-generate-preview:predictLongRunning",
"https://www.runninghub.cn/openapi/v2/vidu/text-to-video-q3-turbo",
headers={
"x-goog-api-key": api_key,
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
},
json=json,
json=json_data,
)
json = response.json()
operation_name = json["name"]
response_json = response.json()
if "taskId" not in response_json:
return f"Failed to submit task: {response_json}"
task_id = response_json["taskId"]
while True:
response = requests.get(
f"https://generativelanguage.googleapis.com/v1beta/{operation_name}",
response = requests.post(
"https://www.runninghub.cn/openapi/v2/query",
headers={
"x-goog-api-key": api_key,
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
},
json={"taskId": task_id},
)
json = response.json()
if json.get("done", False):
sample = json["response"]["generateVideoResponse"]["generatedSamples"][0]
url = sample["video"]["uri"]
download(url, output_file)
break
response_json = response.json()
status = response_json.get("status")
if status == "SUCCESS":
results = response_json.get("results", [])
if results and len(results) > 0:
url = results[0].get("url")
if url:
download(url, output_file)
break
elif status == "FAILED":
error_message = response_json.get("errorMessage", "Unknown error")
return f"Video generation failed: {error_message}"
time.sleep(3)
return f"The video has been generated successfully to {output_file}"
def download(url: str, output_file: str):
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
return "GEMINI_API_KEY is not set"
response = requests.get(
url,
headers={
"x-goog-api-key": api_key,
},
)
response = requests.get(url)
with open(output_file, "wb") as f:
f.write(response.content)
@ -77,28 +85,28 @@ def download(url: str, output_file: str):
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Generate videos using Gemini API")
parser = argparse.ArgumentParser(description="Generate videos using RunningHub API")
parser.add_argument(
"--prompt-file",
required=True,
help="Absolute path to JSON prompt file",
)
parser.add_argument(
"--reference-images",
nargs="*",
default=[],
help="Absolute paths to reference images (space-separated)",
)
parser.add_argument(
"--output-file",
required=True,
help="Output path for generated image",
help="Output path for generated video",
)
parser.add_argument(
"--aspect-ratio",
required=False,
default="16:9",
help="Aspect ratio of the generated image",
help="Aspect ratio of the generated video",
)
parser.add_argument(
"--duration",
required=False,
default="5",
help="Duration of the generated video in seconds (1-16)",
)
args = parser.parse_args()
@ -107,9 +115,10 @@ if __name__ == "__main__":
print(
generate_video(
args.prompt_file,
args.reference_images,
[],
args.output_file,
args.aspect_ratio,
args.duration,
)
)
except Exception as e: