AI podcast project
This commit is contained in:
@@ -8,7 +8,6 @@ from fastapi import HTTPException
|
||||
from loguru import logger
|
||||
|
||||
from .client import WaveSpeedClient
|
||||
from .kling_animation import generate_animation_prompt
|
||||
|
||||
INFINITALK_MODEL_PATH = "wavespeed-ai/infinitetalk"
|
||||
INFINITALK_MODEL_NAME = "wavespeed-ai/infinitetalk"
|
||||
@@ -22,6 +21,67 @@ def _as_data_uri(content_bytes: bytes, mime_type: str) -> str:
|
||||
return f"data:{mime_type};base64,{encoded}"
|
||||
|
||||
|
||||
def _generate_simple_infinitetalk_prompt(
|
||||
scene_data: Dict[str, Any],
|
||||
story_context: Dict[str, Any],
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Generate a balanced, concise prompt for InfiniteTalk.
|
||||
InfiniteTalk is audio-driven, so the prompt should describe the scene and suggest
|
||||
subtle motion, but avoid overly elaborate cinematic descriptions.
|
||||
|
||||
Returns None if no meaningful prompt can be generated.
|
||||
"""
|
||||
title = (scene_data.get("title") or "").strip()
|
||||
description = (scene_data.get("description") or "").strip()
|
||||
image_prompt = (scene_data.get("image_prompt") or "").strip()
|
||||
|
||||
# Build a balanced prompt: scene description + simple motion hint
|
||||
parts = []
|
||||
|
||||
# Start with the main subject/scene
|
||||
if title and len(title) > 5 and title.lower() not in ("scene", "podcast", "episode"):
|
||||
parts.append(title)
|
||||
elif description:
|
||||
# Take first sentence or first 60 chars
|
||||
desc_part = description.split('.')[0][:60].strip()
|
||||
if desc_part:
|
||||
parts.append(desc_part)
|
||||
elif image_prompt:
|
||||
# Take first sentence or first 60 chars
|
||||
img_part = image_prompt.split('.')[0][:60].strip()
|
||||
if img_part:
|
||||
parts.append(img_part)
|
||||
|
||||
if not parts:
|
||||
return None
|
||||
|
||||
# Add a simple, subtle motion suggestion (not elaborate camera movements)
|
||||
# Keep it natural and audio-driven
|
||||
motion_hints = [
|
||||
"with subtle movement",
|
||||
"with gentle motion",
|
||||
"with natural animation",
|
||||
]
|
||||
|
||||
# Combine scene description with subtle motion hint
|
||||
if len(parts[0]) < 80:
|
||||
# Room for a motion hint
|
||||
prompt = f"{parts[0]}, {motion_hints[0]}"
|
||||
else:
|
||||
# Just use the description if it's already long enough
|
||||
prompt = parts[0]
|
||||
|
||||
# Keep it concise - max 120 characters (allows for scene + motion hint)
|
||||
prompt = prompt[:120].strip()
|
||||
|
||||
# Clean up trailing commas or incomplete sentences
|
||||
if prompt.endswith(','):
|
||||
prompt = prompt[:-1].strip()
|
||||
|
||||
return prompt if len(prompt) >= 15 else None
|
||||
|
||||
|
||||
def animate_scene_with_voiceover(
|
||||
*,
|
||||
image_bytes: bytes,
|
||||
@@ -31,6 +91,8 @@ def animate_scene_with_voiceover(
|
||||
user_id: str,
|
||||
resolution: str = "720p",
|
||||
prompt_override: Optional[str] = None,
|
||||
mask_image_bytes: Optional[bytes] = None,
|
||||
seed: Optional[int] = -1,
|
||||
image_mime: str = "image/png",
|
||||
audio_mime: str = "audio/mpeg",
|
||||
client: Optional[WaveSpeedClient] = None,
|
||||
@@ -59,21 +121,28 @@ def animate_scene_with_voiceover(
|
||||
if resolution not in {"480p", "720p"}:
|
||||
raise HTTPException(status_code=400, detail="Resolution must be '480p' or '720p'.")
|
||||
|
||||
animation_prompt = prompt_override or generate_animation_prompt(scene_data, story_context, user_id)
|
||||
# Generate simple, concise prompt for InfiniteTalk (audio-driven, less need for elaborate descriptions)
|
||||
animation_prompt = prompt_override or _generate_simple_infinitetalk_prompt(scene_data, story_context)
|
||||
|
||||
payload = {
|
||||
payload: Dict[str, Any] = {
|
||||
"image": _as_data_uri(image_bytes, image_mime),
|
||||
"audio": _as_data_uri(audio_bytes, audio_mime),
|
||||
"resolution": resolution,
|
||||
}
|
||||
# Only include prompt if we have a meaningful one (InfiniteTalk works fine without it)
|
||||
if animation_prompt:
|
||||
payload["prompt"] = animation_prompt
|
||||
if mask_image_bytes:
|
||||
payload["mask_image"] = _as_data_uri(mask_image_bytes, image_mime)
|
||||
if seed is not None:
|
||||
payload["seed"] = seed
|
||||
|
||||
client = client or WaveSpeedClient()
|
||||
prediction_id = client.submit_image_to_video(INFINITALK_MODEL_PATH, payload, timeout=60)
|
||||
|
||||
try:
|
||||
result = client.poll_until_complete(prediction_id, timeout_seconds=600, interval_seconds=1.0)
|
||||
# Poll faster (0.5s) to mirror reference pattern; allow up to 10 minutes
|
||||
result = client.poll_until_complete(prediction_id, timeout_seconds=600, interval_seconds=0.5)
|
||||
except HTTPException as exc:
|
||||
detail = exc.detail or {}
|
||||
if isinstance(detail, dict):
|
||||
|
||||
Reference in New Issue
Block a user