Files
ALwrity/backend/services/wavespeed/generators/video.py
ajaysi b134e9dc7e Added video studio router and endpoints. Added research router and endpoints. Added youtube router and endpoints. Added onboarding utils router and endpoints. Added onboarding utils service. Added onboarding utils models. Added onboarding utils routes. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils.
2026-01-01 17:56:25 +05:30

1330 lines
53 KiB
Python

"""
Video generation generator for WaveSpeed API.
"""
import requests
from typing import Any, Dict, Optional, Callable
from fastapi import HTTPException
from utils.logger_utils import get_service_logger
logger = get_service_logger("wavespeed.generators.video")
class VideoGenerator:
"""Video generation generator."""
def __init__(self, api_key: str, base_url: str, polling):
"""Initialize video generator.
Args:
api_key: WaveSpeed API key
base_url: WaveSpeed API base URL
polling: WaveSpeedPolling instance for async operations
"""
self.api_key = api_key
self.base_url = base_url
self.polling = polling
def _get_headers(self) -> dict:
"""Get HTTP headers for API requests."""
return {
"Content-Type": "application/json",
"Authorization": f"Bearer {self.api_key}",
}
def submit_image_to_video(
self,
model_path: str,
payload: Dict[str, Any],
timeout: int = 30,
) -> str:
"""
Submit an image-to-video generation request.
Returns the prediction ID for polling.
"""
url = f"{self.base_url}/{model_path}"
logger.info(f"[WaveSpeed] Submitting request to {url}")
response = requests.post(url, headers=self._get_headers(), json=payload, timeout=timeout)
if response.status_code != 200:
logger.error(f"[WaveSpeed] Submission failed: {response.status_code} {response.text}")
raise HTTPException(
status_code=502,
detail={
"error": "WaveSpeed image-to-video submission failed",
"status_code": response.status_code,
"response": response.text,
},
)
data = response.json().get("data")
if not data or "id" not in data:
logger.error(f"[WaveSpeed] Unexpected submission response: {response.text}")
raise HTTPException(
status_code=502,
detail={"error": "WaveSpeed response missing prediction id"},
)
prediction_id = data["id"]
logger.info(f"[WaveSpeed] Submitted request: {prediction_id}")
return prediction_id
def submit_text_to_video(
self,
model_path: str,
payload: Dict[str, Any],
timeout: int = 60,
) -> str:
"""
Submit a text-to-video generation request to WaveSpeed.
Args:
model_path: Model path (e.g., "alibaba/wan-2.5/text-to-video")
payload: Request payload with prompt, resolution, duration, optional audio
timeout: Request timeout in seconds
Returns:
Prediction ID for polling
"""
url = f"{self.base_url}/{model_path}"
logger.info(f"[WaveSpeed] Submitting text-to-video request to {url}")
response = requests.post(url, headers=self._get_headers(), json=payload, timeout=timeout)
if response.status_code != 200:
logger.error(f"[WaveSpeed] Text-to-video submission failed: {response.status_code} {response.text}")
raise HTTPException(
status_code=502,
detail={
"error": "WaveSpeed text-to-video submission failed",
"status_code": response.status_code,
"response": response.text,
},
)
data = response.json().get("data")
if not data or "id" not in data:
logger.error(f"[WaveSpeed] Unexpected text-to-video response: {response.text}")
raise HTTPException(
status_code=502,
detail={"error": "WaveSpeed response missing prediction id"},
)
prediction_id = data["id"]
logger.info(f"[WaveSpeed] Submitted text-to-video request: {prediction_id}")
return prediction_id
def generate_text_video(
self,
prompt: str,
resolution: str = "720p", # 480p, 720p, 1080p
duration: int = 5, # 5 or 10 seconds
audio_base64: Optional[str] = None, # Optional audio for lip-sync
negative_prompt: Optional[str] = None,
seed: Optional[int] = None,
enable_prompt_expansion: bool = True,
enable_sync_mode: bool = False,
timeout: int = 180,
) -> Dict[str, Any]:
"""
Generate video from text prompt using WAN 2.5 text-to-video.
Args:
prompt: Text prompt describing the video
resolution: Output resolution (480p, 720p, 1080p)
duration: Video duration in seconds (5 or 10)
audio_base64: Optional audio file (wav/mp3, 3-30s, ≤15MB) for lip-sync
negative_prompt: Optional negative prompt
seed: Optional random seed for reproducibility
enable_prompt_expansion: Enable prompt optimizer
enable_sync_mode: If True, wait for result and return it directly
timeout: Request timeout in seconds
Returns:
Dictionary with video bytes, metadata, and cost
"""
model_path = "alibaba/wan-2.5/text-to-video"
# Validate resolution
valid_resolutions = ["480p", "720p", "1080p"]
if resolution not in valid_resolutions:
raise HTTPException(
status_code=400,
detail=f"Invalid resolution: {resolution}. Must be one of: {valid_resolutions}"
)
# Validate duration
if duration not in [5, 10]:
raise HTTPException(
status_code=400,
detail="Duration must be 5 or 10 seconds"
)
# Build payload
payload = {
"prompt": prompt,
"resolution": resolution,
"duration": duration,
"enable_prompt_expansion": enable_prompt_expansion,
"enable_sync_mode": enable_sync_mode,
}
# Add optional audio
if audio_base64:
payload["audio"] = audio_base64
# Add optional parameters
if negative_prompt:
payload["negative_prompt"] = negative_prompt
if seed is not None:
payload["seed"] = seed
# Submit request
logger.info(
f"[WaveSpeed] Generating text-to-video: resolution={resolution}, "
f"duration={duration}s, prompt_length={len(prompt)}, sync_mode={enable_sync_mode}"
)
# For sync mode, submit and get result directly
if enable_sync_mode:
url = f"{self.base_url}/{model_path}"
response = requests.post(url, headers=self._get_headers(), json=payload, timeout=timeout)
if response.status_code != 200:
logger.error(f"[WaveSpeed] Text-to-video submission failed: {response.status_code} {response.text}")
raise HTTPException(
status_code=502,
detail={
"error": "WaveSpeed text-to-video submission failed",
"status_code": response.status_code,
"response": response.text[:500],
},
)
response_json = response.json()
data = response_json.get("data") or response_json
# Check status - if "created" or "processing", we need to poll even in sync mode
status = data.get("status", "").lower()
outputs = data.get("outputs") or []
prediction_id = data.get("id")
logger.debug(
f"[WaveSpeed] Sync mode response: status='{status}', outputs_count={len(outputs)}, "
f"prediction_id={prediction_id}"
)
# Handle sync mode - result should be directly in outputs
if status == "completed" and outputs:
# Sync mode returned completed result - use it directly
logger.info(f"[WaveSpeed] Got immediate video results from sync mode (status: {status})")
video_url = outputs[0]
if not isinstance(video_url, str) or not video_url.startswith("http"):
logger.error(f"[WaveSpeed] Invalid video URL format in sync mode: {video_url}")
raise HTTPException(
status_code=502,
detail=f"Invalid video URL format: {video_url}",
)
video_bytes = self._download_video(video_url)
metadata = data.get("metadata") or {}
# prediction_id is already set from data.get("id") above (line 210)
else:
# Sync mode returned "created", "processing", or incomplete status - need to poll
if not prediction_id:
logger.error(
f"[WaveSpeed] Sync mode returned status '{status}' but no prediction ID. "
f"Response: {response.text[:500]}"
)
raise HTTPException(
status_code=502,
detail="WaveSpeed text-to-video sync mode returned async response without prediction ID",
)
logger.info(
f"[WaveSpeed] Sync mode returned status '{status}' with {len(outputs)} output(s). "
f"Falling back to polling (prediction_id: {prediction_id})"
)
# Poll for completion
try:
result = self.polling.poll_until_complete(
prediction_id,
timeout_seconds=timeout,
interval_seconds=2.0,
)
except HTTPException as e:
detail = e.detail or {}
if isinstance(detail, dict):
detail.setdefault("prediction_id", prediction_id)
detail.setdefault("resume_available", True)
raise HTTPException(status_code=e.status_code, detail=detail)
outputs = result.get("outputs") or []
if not outputs:
logger.error(f"[WaveSpeed] Polling completed but no outputs: {result}")
raise HTTPException(
status_code=502,
detail="WaveSpeed text-to-video completed but returned no outputs",
)
video_url = outputs[0]
if not isinstance(video_url, str) or not video_url.startswith("http"):
logger.error(f"[WaveSpeed] Invalid video URL format after polling: {video_url}")
raise HTTPException(
status_code=502,
detail=f"Invalid video URL format: {video_url}",
)
video_bytes = self._download_video(video_url)
metadata = result.get("metadata") or {}
else:
# Async mode - submit and poll
prediction_id = self.submit_text_to_video(model_path, payload, timeout=timeout)
# Poll for completion
try:
result = self.polling.poll_until_complete(
prediction_id,
timeout_seconds=timeout,
interval_seconds=2.0
)
except HTTPException as e:
detail = e.detail or {}
if isinstance(detail, dict):
detail.setdefault("prediction_id", prediction_id)
detail.setdefault("resume_available", True)
raise HTTPException(status_code=e.status_code, detail=detail)
# Extract video URL
outputs = result.get("outputs") or []
if not outputs:
raise HTTPException(
status_code=502,
detail="WAN 2.5 text-to-video completed but returned no outputs"
)
video_url = outputs[0]
if not isinstance(video_url, str) or not video_url.startswith("http"):
raise HTTPException(
status_code=502,
detail=f"Invalid video URL format: {video_url}"
)
video_bytes = self._download_video(video_url)
metadata = result.get("metadata") or {}
# prediction_id is already set from earlier in the function
# Calculate cost (same pricing as image-to-video)
pricing = {
"480p": 0.05,
"720p": 0.10,
"1080p": 0.15,
}
cost = pricing.get(resolution, 0.10) * duration
# Get video dimensions
resolution_dims = {
"480p": (854, 480),
"720p": (1280, 720),
"1080p": (1920, 1080),
}
width, height = resolution_dims.get(resolution, (1280, 720))
logger.info(
f"[WaveSpeed] ✅ Generated text-to-video: {len(video_bytes)} bytes, "
f"resolution={resolution}, duration={duration}s, cost=${cost:.2f}"
)
return {
"video_bytes": video_bytes,
"prompt": prompt,
"duration": float(duration),
"model_name": "alibaba/wan-2.5/text-to-video",
"cost": cost,
"provider": "wavespeed",
"source_video_url": video_url,
"prediction_id": prediction_id,
"resolution": resolution,
"width": width,
"height": height,
"metadata": metadata,
}
def _download_video(self, video_url: str) -> bytes:
"""Download video from URL."""
logger.info(f"[WaveSpeed] Downloading video from: {video_url}")
video_response = requests.get(video_url, timeout=180)
if video_response.status_code != 200:
raise HTTPException(
status_code=502,
detail={
"error": "Failed to download WAN 2.5 video",
"status_code": video_response.status_code,
"response": video_response.text[:200],
}
)
return video_response.content
def upscale_video(
self,
video: str, # Base64-encoded video or URL
target_resolution: str = "1080p",
enable_sync_mode: bool = False,
timeout: int = 300,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> bytes:
"""
Upscale video using FlashVSR.
Args:
video: Base64-encoded video data URI or public URL
target_resolution: Target resolution ("720p", "1080p", "2k", "4k")
enable_sync_mode: If True, wait for result and return it directly
timeout: Request timeout in seconds (default: 300 for long videos)
progress_callback: Optional callback function(progress: float, message: str) for progress updates
Returns:
bytes: Upscaled video bytes
Raises:
HTTPException: If the upscaling fails
"""
model_path = "wavespeed-ai/flashvsr"
url = f"{self.base_url}/{model_path}"
payload = {
"video": video,
"target_resolution": target_resolution,
}
logger.info(f"[WaveSpeed] Upscaling video via {url} (target={target_resolution})")
# Submit the task
response = requests.post(url, headers=self._get_headers(), json=payload, timeout=timeout)
if response.status_code != 200:
logger.error(f"[WaveSpeed] FlashVSR submission failed: {response.status_code} {response.text}")
raise HTTPException(
status_code=502,
detail={
"error": "WaveSpeed FlashVSR submission failed",
"status_code": response.status_code,
"response": response.text,
},
)
response_json = response.json()
data = response_json.get("data") or response_json
prediction_id = data.get("id")
if not prediction_id:
logger.error(f"[WaveSpeed] No prediction ID in FlashVSR response: {response.text}")
raise HTTPException(
status_code=502,
detail="WaveSpeed FlashVSR response missing prediction id",
)
logger.info(f"[WaveSpeed] FlashVSR task submitted: {prediction_id}")
# Poll for result
result = self.polling.poll_until_complete(
prediction_id,
timeout_seconds=timeout,
interval_seconds=2.0, # Longer interval for upscaling (slower process)
progress_callback=progress_callback,
)
outputs = result.get("outputs") or []
if not outputs:
raise HTTPException(status_code=502, detail="WaveSpeed FlashVSR returned no outputs")
video_url = outputs[0] if isinstance(outputs[0], str) else outputs[0].get("url")
if not video_url:
raise HTTPException(status_code=502, detail="WaveSpeed FlashVSR output format not recognized")
# Download the upscaled video
logger.info(f"[WaveSpeed] Downloading upscaled video from: {video_url}")
video_response = requests.get(video_url, timeout=timeout)
if video_response.status_code != 200:
logger.error(f"[WaveSpeed] Failed to download upscaled video: {video_response.status_code}")
raise HTTPException(
status_code=502,
detail="Failed to download upscaled video from WaveSpeed",
)
video_bytes = video_response.content
logger.info(f"[WaveSpeed] Video upscaling completed successfully (size: {len(video_bytes)} bytes)")
return video_bytes
def extend_video(
self,
video: str, # Base64-encoded video or URL
prompt: str,
model: str = "wan-2.5", # "wan-2.5", "wan-2.2-spicy", or "seedance-1.5-pro"
audio: Optional[str] = None, # Optional audio URL (WAN 2.5 only)
negative_prompt: Optional[str] = None, # WAN 2.5 only
resolution: str = "720p",
duration: int = 5,
enable_prompt_expansion: bool = False, # WAN 2.5 only
generate_audio: bool = True, # Seedance 1.5 Pro only
camera_fixed: bool = False, # Seedance 1.5 Pro only
seed: Optional[int] = None,
enable_sync_mode: bool = False,
timeout: int = 300,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> bytes:
"""
Extend video duration using WAN 2.5, WAN 2.2 Spicy, or Seedance 1.5 Pro video-extend.
Args:
video: Base64-encoded video data URI or public URL
prompt: Text prompt describing how to extend the video
model: Model to use ("wan-2.5", "wan-2.2-spicy", or "seedance-1.5-pro")
audio: Optional audio URL to guide generation (WAN 2.5 only)
negative_prompt: Optional negative prompt (WAN 2.5 only)
resolution: Output resolution (varies by model)
duration: Duration of extended video in seconds (varies by model)
enable_prompt_expansion: Enable prompt optimizer (WAN 2.5 only)
generate_audio: Generate audio for extended video (Seedance 1.5 Pro only)
camera_fixed: Fix camera position (Seedance 1.5 Pro only)
seed: Random seed for reproducibility (-1 for random)
enable_sync_mode: If True, wait for result and return it directly
timeout: Request timeout in seconds (default: 300)
progress_callback: Optional callback function(progress: float, message: str) for progress updates
Returns:
bytes: Extended video bytes
Raises:
HTTPException: If the extension fails
"""
# Determine model path
if model in ("wan-2.2-spicy", "wavespeed-ai/wan-2.2-spicy/video-extend"):
model_path = "wavespeed-ai/wan-2.2-spicy/video-extend"
elif model in ("seedance-1.5-pro", "bytedance/seedance-v1.5-pro/video-extend"):
model_path = "bytedance/seedance-v1.5-pro/video-extend"
else:
# Default to WAN 2.5
model_path = "alibaba/wan-2.5/video-extend"
url = f"{self.base_url}/{model_path}"
# Base payload (common to all models)
payload = {
"video": video,
"prompt": prompt,
"resolution": resolution,
"duration": duration,
}
# Model-specific parameters
if model_path == "alibaba/wan-2.5/video-extend":
# WAN 2.5 specific
payload["enable_prompt_expansion"] = enable_prompt_expansion
if audio:
payload["audio"] = audio
if negative_prompt:
payload["negative_prompt"] = negative_prompt
elif model_path == "bytedance/seedance-v1.5-pro/video-extend":
# Seedance 1.5 Pro specific
payload["generate_audio"] = generate_audio
payload["camera_fixed"] = camera_fixed
# Seed (all models support it)
if seed is not None:
payload["seed"] = seed
logger.info(f"[WaveSpeed] Extending video via {url} (duration={duration}s, resolution={resolution})")
# Submit the task
response = requests.post(url, headers=self._get_headers(), json=payload, timeout=timeout)
if response.status_code != 200:
logger.error(f"[WaveSpeed] Video extend submission failed: {response.status_code} {response.text}")
raise HTTPException(
status_code=502,
detail={
"error": "WaveSpeed video extend submission failed",
"status_code": response.status_code,
"response": response.text,
},
)
response_json = response.json()
data = response_json.get("data") or response_json
prediction_id = data.get("id")
if not prediction_id:
logger.error(f"[WaveSpeed] No prediction ID in video extend response: {response.text}")
raise HTTPException(
status_code=502,
detail="WaveSpeed video extend response missing prediction id",
)
logger.info(f"[WaveSpeed] Video extend task submitted: {prediction_id}")
# Poll for result
result = self.polling.poll_until_complete(
prediction_id,
timeout_seconds=timeout,
interval_seconds=2.0,
progress_callback=progress_callback,
)
outputs = result.get("outputs") or []
if not outputs:
raise HTTPException(status_code=502, detail="WaveSpeed video extend returned no outputs")
# Handle outputs - can be array of strings or array of objects
video_url = None
if isinstance(outputs[0], str):
video_url = outputs[0]
elif isinstance(outputs[0], dict):
video_url = outputs[0].get("url") or outputs[0].get("video_url")
if not video_url:
raise HTTPException(status_code=502, detail="WaveSpeed video extend output format not recognized")
# Download the extended video
logger.info(f"[WaveSpeed] Downloading extended video from: {video_url}")
video_response = requests.get(video_url, timeout=timeout)
if video_response.status_code != 200:
logger.error(f"[WaveSpeed] Failed to download extended video: {video_response.status_code}")
raise HTTPException(
status_code=502,
detail="Failed to download extended video from WaveSpeed",
)
video_bytes = video_response.content
logger.info(f"[WaveSpeed] Video extension completed successfully (size: {len(video_bytes)} bytes)")
return video_bytes
def face_swap(
self,
image: str, # Base64-encoded image or URL
video: str, # Base64-encoded video or URL
prompt: Optional[str] = None,
resolution: str = "480p",
seed: Optional[int] = None,
enable_sync_mode: bool = False,
timeout: int = 300,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> bytes:
"""
Perform face/character swap using MoCha (wavespeed-ai/wan-2.1/mocha).
Args:
image: Base64-encoded image data URI or public URL (reference character)
video: Base64-encoded video data URI or public URL (source video)
prompt: Optional prompt to guide the swap
resolution: Output resolution ("480p" or "720p")
seed: Random seed for reproducibility (-1 for random)
enable_sync_mode: If True, wait for result and return it directly
timeout: Request timeout in seconds (default: 300)
progress_callback: Optional callback function(progress: float, message: str) for progress updates
Returns:
bytes: Face-swapped video bytes
Raises:
HTTPException: If the face swap fails
"""
model_path = "wavespeed-ai/wan-2.1/mocha"
url = f"{self.base_url}/{model_path}"
# Build payload
payload = {
"image": image,
"video": video,
}
if prompt:
payload["prompt"] = prompt
if resolution in ("480p", "720p"):
payload["resolution"] = resolution
else:
payload["resolution"] = "480p" # Default
if seed is not None:
payload["seed"] = seed
else:
payload["seed"] = -1 # Random seed
logger.info(
f"[WaveSpeed] Face swap request via {url} "
f"(resolution={payload['resolution']}, seed={payload['seed']})"
)
# Submit the task
response = requests.post(url, headers=self._get_headers(), json=payload, timeout=timeout)
if response.status_code != 200:
logger.error(f"[WaveSpeed] Face swap submission failed: {response.status_code} {response.text}")
raise HTTPException(
status_code=502,
detail={
"error": "WaveSpeed face swap submission failed",
"status_code": response.status_code,
"response": response.text,
},
)
response_json = response.json()
data = response_json.get("data") or response_json
if not data or "id" not in data:
logger.error(f"[WaveSpeed] Unexpected face swap response: {response.text}")
raise HTTPException(
status_code=502,
detail={"error": "WaveSpeed response missing prediction id"},
)
prediction_id = data["id"]
logger.info(f"[WaveSpeed] Face swap submitted: {prediction_id}")
if enable_sync_mode:
# Poll until complete
result = self.polling.poll_until_complete(
prediction_id,
timeout_seconds=timeout,
interval_seconds=2.0,
progress_callback=progress_callback,
)
# Extract video URL from result
outputs = result.get("outputs", [])
if not outputs:
raise HTTPException(
status_code=502,
detail={"error": "Face swap completed but no output video found"},
)
# Handle outputs - can be array of strings or array of objects
video_url = None
if isinstance(outputs[0], str):
video_url = outputs[0]
elif isinstance(outputs[0], dict):
video_url = outputs[0].get("url") or outputs[0].get("video_url")
if not video_url:
raise HTTPException(
status_code=502,
detail={"error": "Face swap output format not recognized"},
)
# Download video
logger.info(f"[WaveSpeed] Downloading face-swapped video from: {video_url}")
video_response = requests.get(video_url, timeout=timeout)
if video_response.status_code != 200:
raise HTTPException(
status_code=502,
detail={"error": f"Failed to download face-swapped video: {video_response.status_code}"},
)
video_bytes = video_response.content
logger.info(f"[WaveSpeed] Face swap completed: {len(video_bytes)} bytes")
return video_bytes
else:
# Return prediction ID for async polling
raise HTTPException(
status_code=501,
detail={
"error": "Async mode not yet implemented for face swap",
"prediction_id": prediction_id,
},
)
def video_face_swap(
self,
video: str, # Base64-encoded video or URL
face_image: str, # Base64-encoded image or URL
target_gender: str = "all",
target_index: int = 0,
enable_sync_mode: bool = False,
timeout: int = 300,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> bytes:
"""
Perform face swap using Video Face Swap (wavespeed-ai/video-face-swap).
Args:
video: Base64-encoded video data URI or public URL (source video)
face_image: Base64-encoded image data URI or public URL (reference face)
target_gender: Filter which faces to swap ("all", "female", "male")
target_index: Select which face to swap (0 = largest, 1 = second largest, etc.)
enable_sync_mode: If True, wait for result and return it directly
timeout: Request timeout in seconds (default: 300)
progress_callback: Optional callback function(progress: float, message: str) for progress updates
Returns:
bytes: Face-swapped video bytes
Raises:
HTTPException: If the face swap fails
"""
model_path = "wavespeed-ai/video-face-swap"
url = f"{self.base_url}/{model_path}"
# Build payload
payload = {
"video": video,
"face_image": face_image,
}
if target_gender in ("all", "female", "male"):
payload["target_gender"] = target_gender
else:
payload["target_gender"] = "all" # Default
if 0 <= target_index <= 10:
payload["target_index"] = target_index
else:
payload["target_index"] = 0 # Default
logger.info(
f"[WaveSpeed] Video face swap request via {url} "
f"(target_gender={payload['target_gender']}, target_index={payload['target_index']})"
)
# Submit the task
response = requests.post(url, headers=self._get_headers(), json=payload, timeout=timeout)
if response.status_code != 200:
logger.error(f"[WaveSpeed] Video face swap submission failed: {response.status_code} {response.text}")
raise HTTPException(
status_code=502,
detail={
"error": "WaveSpeed video face swap submission failed",
"status_code": response.status_code,
"response": response.text,
},
)
response_json = response.json()
data = response_json.get("data") or response_json
if not data or "id" not in data:
logger.error(f"[WaveSpeed] Unexpected video face swap response: {response.text}")
raise HTTPException(
status_code=502,
detail={"error": "WaveSpeed response missing prediction id"},
)
prediction_id = data["id"]
logger.info(f"[WaveSpeed] Video face swap submitted: {prediction_id}")
if enable_sync_mode:
# Poll until complete
result = self.polling.poll_until_complete(
prediction_id,
timeout_seconds=timeout,
interval_seconds=2.0,
progress_callback=progress_callback,
)
# Extract video URL from result
outputs = result.get("outputs", [])
if not outputs:
raise HTTPException(
status_code=502,
detail={"error": "Video face swap completed but no output video found"},
)
# Handle outputs - can be array of strings or array of objects
video_url = None
if isinstance(outputs[0], str):
video_url = outputs[0]
elif isinstance(outputs[0], dict):
video_url = outputs[0].get("url") or outputs[0].get("video_url")
if not video_url:
raise HTTPException(
status_code=502,
detail={"error": "Video face swap output format not recognized"},
)
# Download video
logger.info(f"[WaveSpeed] Downloading face-swapped video from: {video_url}")
video_response = requests.get(video_url, timeout=timeout)
if video_response.status_code != 200:
raise HTTPException(
status_code=502,
detail={"error": f"Failed to download face-swapped video: {video_response.status_code}"},
)
video_bytes = video_response.content
logger.info(f"[WaveSpeed] Video face swap completed: {len(video_bytes)} bytes")
return video_bytes
else:
# Return prediction ID for async polling
raise HTTPException(
status_code=501,
detail={
"error": "Async mode not yet implemented for video face swap",
"prediction_id": prediction_id,
},
)
def video_translate(
self,
video: str, # Base64-encoded video or URL
output_language: str = "English",
enable_sync_mode: bool = False,
timeout: int = 600,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> bytes:
"""
Translate video to target language using HeyGen Video Translate.
Args:
video: Base64-encoded video data URI or public URL (source video)
output_language: Target language for translation (default: "English")
enable_sync_mode: If True, wait for result and return it directly
timeout: Request timeout in seconds (default: 600)
progress_callback: Optional callback function(progress: float, message: str) for progress updates
Returns:
bytes: Translated video bytes
Raises:
HTTPException: If the video translation fails
"""
model_path = "heygen/video-translate"
url = f"{self.base_url}/{model_path}"
# Build payload
payload = {
"video": video,
"output_language": output_language,
}
logger.info(
f"[WaveSpeed] Video translate request via {url} "
f"(output_language={output_language})"
)
# Submit the task
response = requests.post(url, headers=self._get_headers(), json=payload, timeout=timeout)
if response.status_code != 200:
logger.error(f"[WaveSpeed] Video translate submission failed: {response.status_code} {response.text}")
raise HTTPException(
status_code=502,
detail={
"error": "WaveSpeed video translate submission failed",
"status_code": response.status_code,
"response": response.text,
},
)
response_json = response.json()
data = response_json.get("data") or response_json
if not data or "id" not in data:
logger.error(f"[WaveSpeed] Unexpected video translate response: {response.text}")
raise HTTPException(
status_code=502,
detail={"error": "WaveSpeed response missing prediction id"},
)
prediction_id = data["id"]
logger.info(f"[WaveSpeed] Video translate submitted: {prediction_id}")
if enable_sync_mode:
# Poll until complete
result = self.polling.poll_until_complete(
prediction_id,
timeout_seconds=timeout,
interval_seconds=2.0,
progress_callback=progress_callback,
)
# Extract video URL from result
outputs = result.get("outputs", [])
if not outputs:
raise HTTPException(
status_code=502,
detail={"error": "Video translate completed but no output video found"},
)
# Handle outputs - can be array of strings or array of objects
video_url = None
if isinstance(outputs[0], str):
video_url = outputs[0]
elif isinstance(outputs[0], dict):
video_url = outputs[0].get("url") or outputs[0].get("video_url")
if not video_url:
raise HTTPException(
status_code=502,
detail={"error": "Video translate output format not recognized"},
)
# Download video
logger.info(f"[WaveSpeed] Downloading translated video from: {video_url}")
video_response = requests.get(video_url, timeout=timeout)
if video_response.status_code != 200:
raise HTTPException(
status_code=502,
detail={"error": f"Failed to download translated video: {video_response.status_code}"},
)
video_bytes = video_response.content
logger.info(f"[WaveSpeed] Video translate completed: {len(video_bytes)} bytes")
return video_bytes
else:
# Return prediction ID for async polling
raise HTTPException(
status_code=501,
detail={
"error": "Async mode not yet implemented for video translate",
"prediction_id": prediction_id,
},
)
def remove_background(
self,
video: str, # Base64-encoded video or URL
background_image: Optional[str] = None, # Base64-encoded image or URL (optional)
enable_sync_mode: bool = False,
timeout: int = 300,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> bytes:
"""
Remove or replace video background using Video Background Remover.
Args:
video: Base64-encoded video data URI or public URL (source video)
background_image: Optional base64-encoded image data URI or public URL (replacement background)
enable_sync_mode: If True, wait for result and return it directly
timeout: Request timeout in seconds (default: 300)
progress_callback: Optional callback function(progress: float, message: str) for progress updates
Returns:
bytes: Video with background removed/replaced
Raises:
HTTPException: If the background removal fails
"""
model_path = "wavespeed-ai/video-background-remover"
url = f"{self.base_url}/{model_path}"
# Build payload
payload = {
"video": video,
}
if background_image:
payload["background_image"] = background_image
logger.info(
f"[WaveSpeed] Video background removal request via {url} "
f"(has_background={background_image is not None})"
)
# Submit the task
response = requests.post(url, headers=self._get_headers(), json=payload, timeout=timeout)
if response.status_code != 200:
logger.error(f"[WaveSpeed] Video background removal submission failed: {response.status_code} {response.text}")
raise HTTPException(
status_code=502,
detail={
"error": "WaveSpeed video background removal submission failed",
"status_code": response.status_code,
"response": response.text,
},
)
response_json = response.json()
data = response_json.get("data") or response_json
prediction_id = data.get("id")
if not prediction_id:
logger.error(f"[WaveSpeed] No prediction ID in video background removal response: {response.text}")
raise HTTPException(
status_code=502,
detail="WaveSpeed video background removal response missing prediction id",
)
logger.info(f"[WaveSpeed] Video background removal task submitted: {prediction_id}")
if enable_sync_mode:
result = self.polling.poll_until_complete(
prediction_id,
timeout_seconds=timeout,
interval_seconds=2.0,
progress_callback=progress_callback,
)
outputs = result.get("outputs") or []
if not outputs:
raise HTTPException(status_code=502, detail="WaveSpeed video background removal returned no outputs")
video_url = None
if isinstance(outputs[0], str):
video_url = outputs[0]
elif isinstance(outputs[0], dict):
video_url = outputs[0].get("url") or outputs[0].get("video_url")
if not video_url:
raise HTTPException(status_code=502, detail="WaveSpeed video background removal output format not recognized")
logger.info(f"[WaveSpeed] Downloading processed video from: {video_url}")
video_response = requests.get(video_url, timeout=timeout)
if video_response.status_code != 200:
logger.error(f"[WaveSpeed] Failed to download processed video: {video_response.status_code}")
raise HTTPException(
status_code=502,
detail="Failed to download processed video from WaveSpeed",
)
video_bytes = video_response.content
logger.info(f"[WaveSpeed] Video background removal completed successfully (size: {len(video_bytes)} bytes)")
return video_bytes
else:
raise HTTPException(
status_code=501,
detail={
"error": "Async mode not yet implemented for video background removal",
"prediction_id": prediction_id,
},
)
def hunyuan_video_foley(
self,
video: str, # Base64-encoded video or URL
prompt: Optional[str] = None, # Optional text prompt describing desired sounds
seed: int = -1, # Random seed (-1 for random)
enable_sync_mode: bool = False,
timeout: int = 300,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> bytes:
"""
Generate realistic Foley and ambient audio from video using Hunyuan Video Foley.
Args:
video: Base64-encoded video data URI or public URL (source video)
prompt: Optional text prompt describing desired sounds (e.g., "ocean waves, seagulls")
seed: Random seed for reproducibility (-1 for random)
enable_sync_mode: If True, wait for result and return it directly
timeout: Request timeout in seconds (default: 300)
progress_callback: Optional callback function(progress: float, message: str) for progress updates
Returns:
bytes: Video with generated audio
Raises:
HTTPException: If the audio generation fails
"""
model_path = "wavespeed-ai/hunyuan-video-foley"
url = f"{self.base_url}/{model_path}"
# Build payload
payload = {
"video": video,
"seed": seed,
}
if prompt:
payload["prompt"] = prompt
logger.info(
f"[WaveSpeed] Hunyuan Video Foley request via {url} "
f"(has_prompt={prompt is not None}, seed={seed})"
)
# Submit the task
response = requests.post(url, headers=self._get_headers(), json=payload, timeout=timeout)
if response.status_code != 200:
logger.error(f"[WaveSpeed] Hunyuan Video Foley submission failed: {response.status_code} {response.text}")
raise HTTPException(
status_code=502,
detail={
"error": "WaveSpeed Hunyuan Video Foley submission failed",
"status_code": response.status_code,
"response": response.text,
},
)
response_json = response.json()
data = response_json.get("data") or response_json
prediction_id = data.get("id")
if not prediction_id:
logger.error(f"[WaveSpeed] No prediction ID in Hunyuan Video Foley response: {response.text}")
raise HTTPException(
status_code=502,
detail="WaveSpeed Hunyuan Video Foley response missing prediction id",
)
logger.info(f"[WaveSpeed] Hunyuan Video Foley task submitted: {prediction_id}")
if enable_sync_mode:
result = self.polling.poll_until_complete(
prediction_id,
timeout_seconds=timeout,
interval_seconds=2.0,
progress_callback=progress_callback,
)
outputs = result.get("outputs") or []
if not outputs:
raise HTTPException(status_code=502, detail="WaveSpeed Hunyuan Video Foley returned no outputs")
video_url = None
if isinstance(outputs[0], str):
video_url = outputs[0]
elif isinstance(outputs[0], dict):
video_url = outputs[0].get("url") or outputs[0].get("video_url")
if not video_url:
raise HTTPException(status_code=502, detail="WaveSpeed Hunyuan Video Foley output format not recognized")
logger.info(f"[WaveSpeed] Downloading video with audio from: {video_url}")
video_response = requests.get(video_url, timeout=timeout)
if video_response.status_code != 200:
logger.error(f"[WaveSpeed] Failed to download video with audio: {video_response.status_code}")
raise HTTPException(
status_code=502,
detail="Failed to download video with audio from WaveSpeed",
)
video_bytes = video_response.content
logger.info(f"[WaveSpeed] Hunyuan Video Foley completed successfully (size: {len(video_bytes)} bytes)")
return video_bytes
else:
raise HTTPException(
status_code=501,
detail={
"error": "Async mode not yet implemented for Hunyuan Video Foley",
"prediction_id": prediction_id,
},
)
def think_sound(
self,
video: str, # Base64-encoded video or URL
prompt: Optional[str] = None, # Optional text prompt describing desired sounds
seed: int = -1, # Random seed (-1 for random)
enable_sync_mode: bool = False,
timeout: int = 300,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> bytes:
"""
Generate realistic sound effects and audio tracks from video using Think Sound.
Args:
video: Base64-encoded video data URI or public URL (source video)
prompt: Optional text prompt describing desired sounds (e.g., "engine roaring, footsteps on gravel")
seed: Random seed for reproducibility (-1 for random)
enable_sync_mode: If True, wait for result and return it directly
timeout: Request timeout in seconds (default: 300)
progress_callback: Optional callback function(progress: float, message: str) for progress updates
Returns:
bytes: Video with generated audio
Raises:
HTTPException: If the audio generation fails
"""
model_path = "wavespeed-ai/think-sound"
url = f"{self.base_url}/{model_path}"
# Build payload
payload = {
"video": video,
"seed": seed,
}
if prompt:
payload["prompt"] = prompt
logger.info(
f"[WaveSpeed] Think Sound request via {url} "
f"(has_prompt={prompt is not None}, seed={seed})"
)
# Submit the task
response = requests.post(url, headers=self._get_headers(), json=payload, timeout=timeout)
if response.status_code != 200:
logger.error(f"[WaveSpeed] Think Sound submission failed: {response.status_code} {response.text}")
raise HTTPException(
status_code=502,
detail={
"error": "WaveSpeed Think Sound submission failed",
"status_code": response.status_code,
"response": response.text,
},
)
response_json = response.json()
data = response_json.get("data") or response_json
prediction_id = data.get("id")
if not prediction_id:
logger.error(f"[WaveSpeed] No prediction ID in Think Sound response: {response.text}")
raise HTTPException(
status_code=502,
detail="WaveSpeed Think Sound response missing prediction id",
)
logger.info(f"[WaveSpeed] Think Sound task submitted: {prediction_id}")
if enable_sync_mode:
result = self.polling.poll_until_complete(
prediction_id,
timeout_seconds=timeout,
interval_seconds=2.0,
progress_callback=progress_callback,
)
outputs = result.get("outputs") or []
if not outputs:
raise HTTPException(status_code=502, detail="WaveSpeed Think Sound returned no outputs")
video_url = None
if isinstance(outputs[0], str):
video_url = outputs[0]
elif isinstance(outputs[0], dict):
video_url = outputs[0].get("url") or outputs[0].get("video_url")
if not video_url:
raise HTTPException(status_code=502, detail="WaveSpeed Think Sound output format not recognized")
logger.info(f"[WaveSpeed] Downloading video with audio from: {video_url}")
video_response = requests.get(video_url, timeout=timeout)
if video_response.status_code != 200:
logger.error(f"[WaveSpeed] Failed to download video with audio: {video_response.status_code}")
raise HTTPException(
status_code=502,
detail="Failed to download video with audio from WaveSpeed",
)
video_bytes = video_response.content
logger.info(f"[WaveSpeed] Think Sound completed successfully (size: {len(video_bytes)} bytes)")
return video_bytes
else:
raise HTTPException(
status_code=501,
detail={
"error": "Async mode not yet implemented for Think Sound",
"prediction_id": prediction_id,
},
)