Files
ALwrity/backend/services/wavespeed/client.py
ajaysi b134e9dc7e Added video studio router and endpoints. Added research router and endpoints. Added youtube router and endpoints. Added onboarding utils router and endpoints. Added onboarding utils service. Added onboarding utils models. Added onboarding utils routes. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils.
2026-01-01 17:56:25 +05:30

600 lines
22 KiB
Python

"""
WaveSpeed AI API Client
Thin HTTP client for the WaveSpeed AI API.
Handles authentication, submission, and delegates to specialized generators.
"""
from __future__ import annotations
from typing import Any, Dict, Optional, Callable
from fastapi import HTTPException
from services.onboarding.api_key_manager import APIKeyManager
from utils.logger_utils import get_service_logger
from .polling import WaveSpeedPolling
from .generators.prompt import PromptGenerator
from .generators.image import ImageGenerator
from .generators.video import VideoGenerator
from .generators.speech import SpeechGenerator
logger = get_service_logger("wavespeed.client")
class WaveSpeedClient:
"""
Thin HTTP client for the WaveSpeed AI API.
Handles authentication, submission, and polling helpers.
"""
BASE_URL = "https://api.wavespeed.ai/api/v3"
def __init__(self, api_key: Optional[str] = None):
manager = APIKeyManager()
self.api_key = api_key or manager.get_api_key("wavespeed")
if not self.api_key:
raise RuntimeError("WAVESPEED_API_KEY is not configured. Please add it to your environment.")
# Initialize polling utilities
self.polling = WaveSpeedPolling(self.api_key, self.BASE_URL)
# Initialize generators
self.prompt = PromptGenerator(self.api_key, self.BASE_URL, self.polling)
self.image = ImageGenerator(self.api_key, self.BASE_URL, self.polling)
self.video = VideoGenerator(self.api_key, self.BASE_URL, self.polling)
self.speech = SpeechGenerator(self.api_key, self.BASE_URL, self.polling)
def _headers(self) -> Dict[str, str]:
return {
"Content-Type": "application/json",
"Authorization": f"Bearer {self.api_key}",
}
# Core submission methods (delegated to video generator)
def submit_image_to_video(
self,
model_path: str,
payload: Dict[str, Any],
timeout: int = 30,
) -> str:
"""
Submit an image-to-video generation request.
Returns the prediction ID for polling.
"""
return self.video.submit_image_to_video(model_path, payload, timeout)
def submit_text_to_video(
self,
model_path: str,
payload: Dict[str, Any],
timeout: int = 60,
) -> str:
"""
Submit a text-to-video generation request to WaveSpeed.
Args:
model_path: Model path (e.g., "alibaba/wan-2.5/text-to-video")
payload: Request payload with prompt, resolution, duration, optional audio
timeout: Request timeout in seconds
Returns:
Prediction ID for polling
"""
return self.video.submit_text_to_video(model_path, payload, timeout)
# Polling methods (delegated to polling utilities)
def get_prediction_result(self, prediction_id: str, timeout: int = 30) -> Dict[str, Any]:
"""
Fetch the current status/result for a prediction.
Matches the example pattern: simple GET request, check status_code == 200, return data.
"""
return self.polling.get_prediction_result(prediction_id, timeout)
def poll_until_complete(
self,
prediction_id: str,
timeout_seconds: Optional[int] = None,
interval_seconds: float = 1.0,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> Dict[str, Any]:
"""
Poll WaveSpeed until the job completes or fails.
Matches the example pattern: simple polling loop until status is "completed" or "failed".
Args:
prediction_id: The prediction ID to poll for
timeout_seconds: Optional timeout in seconds. If None, polls indefinitely until completion/failure.
interval_seconds: Seconds to wait between polling attempts (default: 1.0, faster than 2.0)
progress_callback: Optional callback function(progress: float, message: str) for progress updates
Returns:
Dict containing the completed result
Raises:
HTTPException: If the task fails, polling fails, or times out (if timeout_seconds is set)
"""
return self.polling.poll_until_complete(
prediction_id,
timeout_seconds=timeout_seconds,
interval_seconds=interval_seconds,
progress_callback=progress_callback,
)
# Generator methods (delegated to specialized generators)
def optimize_prompt(
self,
text: str,
mode: str = "image",
style: str = "default",
image: Optional[str] = None,
enable_sync_mode: bool = True,
timeout: int = 30,
) -> str:
"""
Optimize a prompt using WaveSpeed prompt optimizer.
Args:
text: The prompt text to optimize
mode: "image" or "video" (default: "image")
style: "default", "artistic", "photographic", "technical", "anime", "realistic" (default: "default")
image: Base64-encoded image for context (optional)
enable_sync_mode: If True, wait for result and return it directly (default: True)
timeout: Request timeout in seconds (default: 30)
Returns:
Optimized prompt text
"""
return self.prompt.optimize_prompt(
text=text,
mode=mode,
style=style,
image=image,
enable_sync_mode=enable_sync_mode,
timeout=timeout,
)
def generate_image(
self,
model: str,
prompt: str,
width: int = 1024,
height: int = 1024,
num_inference_steps: Optional[int] = None,
guidance_scale: Optional[float] = None,
negative_prompt: Optional[str] = None,
seed: Optional[int] = None,
enable_sync_mode: bool = True,
timeout: int = 120,
**kwargs
) -> bytes:
"""
Generate image using WaveSpeed AI models (Ideogram V3 or Qwen Image).
Args:
model: Model to use ("ideogram-v3-turbo" or "qwen-image")
prompt: Text prompt for image generation
width: Image width (default: 1024)
height: Image height (default: 1024)
num_inference_steps: Number of inference steps
guidance_scale: Guidance scale for generation
negative_prompt: Negative prompt (what to avoid)
seed: Random seed for reproducibility
enable_sync_mode: If True, wait for result and return it directly (default: True)
timeout: Request timeout in seconds (default: 120)
**kwargs: Additional parameters
Returns:
bytes: Generated image bytes
"""
return self.image.generate_image(
model=model,
prompt=prompt,
width=width,
height=height,
num_inference_steps=num_inference_steps,
guidance_scale=guidance_scale,
negative_prompt=negative_prompt,
seed=seed,
enable_sync_mode=enable_sync_mode,
timeout=timeout,
**kwargs
)
def generate_character_image(
self,
prompt: str,
reference_image_bytes: bytes,
style: str = "Auto",
aspect_ratio: str = "16:9",
rendering_speed: str = "Default",
timeout: Optional[int] = None,
) -> bytes:
"""
Generate image using Ideogram Character API to maintain character consistency.
Creates variations of a reference character image while respecting the base appearance.
Note: This API is always async and requires polling for results.
Args:
prompt: Text prompt describing the scene/context for the character
reference_image_bytes: Reference image bytes (base avatar)
style: Character style type ("Auto", "Fiction", or "Realistic")
aspect_ratio: Aspect ratio ("1:1", "16:9", "9:16", "4:3", "3:4")
rendering_speed: Rendering speed ("Default", "Turbo", "Quality")
timeout: Total timeout in seconds for submission + polling (default: 180)
Returns:
bytes: Generated image bytes with consistent character
"""
return self.image.generate_character_image(
prompt=prompt,
reference_image_bytes=reference_image_bytes,
style=style,
aspect_ratio=aspect_ratio,
rendering_speed=rendering_speed,
timeout=timeout,
)
def generate_speech(
self,
text: str,
voice_id: str,
speed: float = 1.0,
volume: float = 1.0,
pitch: float = 0.0,
emotion: str = "happy",
enable_sync_mode: bool = True,
timeout: int = 120,
**kwargs
) -> bytes:
"""
Generate speech audio using Minimax Speech 02 HD via WaveSpeed.
Args:
text: Text to convert to speech (max 10000 characters)
voice_id: Voice ID (e.g., "Wise_Woman", "Friendly_Person", etc.)
speed: Speech speed (0.5-2.0, default: 1.0)
volume: Speech volume (0.1-10.0, default: 1.0)
pitch: Speech pitch (-12 to 12, default: 0.0)
emotion: Emotion ("happy", "sad", "angry", etc., default: "happy")
enable_sync_mode: If True, wait for result and return it directly (default: True)
timeout: Request timeout in seconds (default: 60)
**kwargs: Additional parameters (sample_rate, bitrate, format, etc.)
Returns:
bytes: Generated audio bytes
"""
return self.speech.generate_speech(
text=text,
voice_id=voice_id,
speed=speed,
volume=volume,
pitch=pitch,
emotion=emotion,
enable_sync_mode=enable_sync_mode,
timeout=timeout,
**kwargs
)
def generate_text_video(
self,
prompt: str,
resolution: str = "720p", # 480p, 720p, 1080p
duration: int = 5, # 5 or 10 seconds
audio_base64: Optional[str] = None, # Optional audio for lip-sync
negative_prompt: Optional[str] = None,
seed: Optional[int] = None,
enable_prompt_expansion: bool = True,
enable_sync_mode: bool = False,
timeout: int = 180,
) -> Dict[str, Any]:
"""
Generate video from text prompt using WAN 2.5 text-to-video.
Args:
prompt: Text prompt describing the video
resolution: Output resolution (480p, 720p, 1080p)
duration: Video duration in seconds (5 or 10)
audio_base64: Optional audio file (wav/mp3, 3-30s, ≤15MB) for lip-sync
negative_prompt: Optional negative prompt
seed: Optional random seed for reproducibility
enable_prompt_expansion: Enable prompt optimizer
enable_sync_mode: If True, wait for result and return it directly
timeout: Request timeout in seconds
Returns:
Dictionary with video bytes, metadata, and cost
"""
return self.video.generate_text_video(
prompt=prompt,
resolution=resolution,
duration=duration,
audio_base64=audio_base64,
negative_prompt=negative_prompt,
seed=seed,
enable_prompt_expansion=enable_prompt_expansion,
enable_sync_mode=enable_sync_mode,
timeout=timeout,
)
def upscale_video(
self,
video: str,
target_resolution: str = "1080p",
enable_sync_mode: bool = False,
timeout: int = 300,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> bytes:
"""
Upscale video using FlashVSR.
Args:
video: Base64-encoded video data URI or public URL
target_resolution: Target resolution ("720p", "1080p", "2k", "4k")
enable_sync_mode: If True, wait for result and return it directly
timeout: Request timeout in seconds (default: 300 for long videos)
progress_callback: Optional callback function(progress: float, message: str) for progress updates
Returns:
bytes: Upscaled video bytes
"""
return self.video.upscale_video(
video=video,
target_resolution=target_resolution,
enable_sync_mode=enable_sync_mode,
timeout=timeout,
progress_callback=progress_callback,
)
def extend_video(
self,
video: str,
prompt: str,
model: str = "wan-2.5",
audio: Optional[str] = None,
negative_prompt: Optional[str] = None,
resolution: str = "720p",
duration: int = 5,
enable_prompt_expansion: bool = False,
generate_audio: bool = True,
camera_fixed: bool = False,
seed: Optional[int] = None,
enable_sync_mode: bool = False,
timeout: int = 300,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> bytes:
"""
Extend video duration using WAN 2.5, WAN 2.2 Spicy, or Seedance 1.5 Pro video-extend.
Args:
video: Base64-encoded video data URI or public URL
prompt: Text prompt describing how to extend the video
model: Model to use ("wan-2.5", "wan-2.2-spicy", or "seedance-1.5-pro")
audio: Optional audio URL to guide generation (WAN 2.5 only)
negative_prompt: Optional negative prompt (WAN 2.5 only)
resolution: Output resolution (varies by model)
duration: Duration of extended video in seconds (varies by model)
enable_prompt_expansion: Enable prompt optimizer (WAN 2.5 only)
generate_audio: Generate audio for extended video (Seedance 1.5 Pro only)
camera_fixed: Fix camera position (Seedance 1.5 Pro only)
seed: Random seed for reproducibility (-1 for random)
enable_sync_mode: If True, wait for result and return it directly
timeout: Request timeout in seconds (default: 300)
progress_callback: Optional callback function(progress: float, message: str) for progress updates
Returns:
bytes: Extended video bytes
"""
return self.video.extend_video(
video=video,
prompt=prompt,
model=model,
audio=audio,
negative_prompt=negative_prompt,
resolution=resolution,
duration=duration,
enable_prompt_expansion=enable_prompt_expansion,
generate_audio=generate_audio,
camera_fixed=camera_fixed,
seed=seed,
enable_sync_mode=enable_sync_mode,
timeout=timeout,
progress_callback=progress_callback,
)
def face_swap(
self,
image: str,
video: str,
prompt: Optional[str] = None,
resolution: str = "480p",
seed: Optional[int] = None,
enable_sync_mode: bool = False,
timeout: int = 300,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> bytes:
"""
Perform face/character swap using MoCha (wavespeed-ai/wan-2.1/mocha).
Args:
image: Base64-encoded image data URI or public URL (reference character)
video: Base64-encoded video data URI or public URL (source video)
prompt: Optional prompt to guide the swap
resolution: Output resolution ("480p" or "720p")
seed: Random seed for reproducibility (-1 for random)
enable_sync_mode: If True, wait for result and return it directly
timeout: Request timeout in seconds (default: 300)
progress_callback: Optional callback function(progress: float, message: str) for progress updates
Returns:
bytes: Face-swapped video bytes
"""
return self.video.face_swap(
image=image,
video=video,
prompt=prompt,
resolution=resolution,
seed=seed,
enable_sync_mode=enable_sync_mode,
timeout=timeout,
progress_callback=progress_callback,
)
def video_face_swap(
self,
video: str,
face_image: str,
target_gender: str = "all",
target_index: int = 0,
enable_sync_mode: bool = False,
timeout: int = 300,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> bytes:
"""
Perform face swap using Video Face Swap (wavespeed-ai/video-face-swap).
Args:
video: Base64-encoded video data URI or public URL (source video)
face_image: Base64-encoded image data URI or public URL (reference face)
target_gender: Filter which faces to swap ("all", "female", "male")
target_index: Select which face to swap (0 = largest, 1 = second largest, etc.)
enable_sync_mode: If True, wait for result and return it directly
timeout: Request timeout in seconds (default: 300)
progress_callback: Optional callback function(progress: float, message: str) for progress updates
Returns:
bytes: Face-swapped video bytes
"""
return self.video.video_face_swap(
video=video,
face_image=face_image,
target_gender=target_gender,
target_index=target_index,
enable_sync_mode=enable_sync_mode,
timeout=timeout,
progress_callback=progress_callback,
)
def video_translate(
self,
video: str,
output_language: str = "English",
enable_sync_mode: bool = False,
timeout: int = 600,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> bytes:
"""
Translate video to target language using HeyGen Video Translate.
Args:
video: Base64-encoded video data URI or public URL (source video)
output_language: Target language for translation (default: "English")
enable_sync_mode: If True, wait for result and return it directly
timeout: Request timeout in seconds (default: 600)
progress_callback: Optional callback function(progress: float, message: str) for progress updates
Returns:
bytes: Translated video bytes
"""
return self.video.video_translate(
video=video,
output_language=output_language,
enable_sync_mode=enable_sync_mode,
timeout=timeout,
progress_callback=progress_callback,
)
def remove_background(
self,
video: str,
background_image: Optional[str] = None,
enable_sync_mode: bool = False,
timeout: int = 300,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> bytes:
"""
Remove or replace video background using Video Background Remover.
Args:
video: Base64-encoded video data URI or public URL (source video)
background_image: Optional base64-encoded image data URI or public URL (replacement background)
enable_sync_mode: If True, wait for result and return it directly
timeout: Request timeout in seconds (default: 300)
progress_callback: Optional callback function(progress: float, message: str) for progress updates
Returns:
bytes: Video with background removed/replaced
"""
return self.video.remove_background(
video=video,
background_image=background_image,
enable_sync_mode=enable_sync_mode,
timeout=timeout,
progress_callback=progress_callback,
)
def hunyuan_video_foley(
self,
video: str,
prompt: Optional[str] = None,
seed: int = -1,
enable_sync_mode: bool = False,
timeout: int = 300,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> bytes:
"""
Generate realistic Foley and ambient audio from video using Hunyuan Video Foley.
Args:
video: Base64-encoded video data URI or public URL (source video)
prompt: Optional text prompt describing desired sounds (e.g., "ocean waves, seagulls")
seed: Random seed for reproducibility (-1 for random)
enable_sync_mode: If True, wait for result and return it directly
timeout: Request timeout in seconds (default: 300)
progress_callback: Optional callback function(progress: float, message: str) for progress updates
Returns:
bytes: Video with generated audio
"""
return self.video.hunyuan_video_foley(
video=video,
prompt=prompt,
seed=seed,
enable_sync_mode=enable_sync_mode,
timeout=timeout,
progress_callback=progress_callback,
)
def think_sound(
self,
video: str,
prompt: Optional[str] = None,
seed: int = -1,
enable_sync_mode: bool = False,
timeout: int = 300,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> bytes:
"""
Generate realistic sound effects and audio tracks from video using Think Sound.
Args:
video: Base64-encoded video data URI or public URL (source video)
prompt: Optional text prompt describing desired sounds (e.g., "engine roaring, footsteps on gravel")
seed: Random seed for reproducibility (-1 for random)
enable_sync_mode: If True, wait for result and return it directly
timeout: Request timeout in seconds (default: 300)
progress_callback: Optional callback function(progress: float, message: str) for progress updates
Returns:
bytes: Video with generated audio
"""
return self.video.think_sound(
video=video,
prompt=prompt,
seed=seed,
enable_sync_mode=enable_sync_mode,
timeout=timeout,
progress_callback=progress_callback,
)