Base code

2026-01-08 22:39:53 +07:00
parent 697115c61a
commit c35fa52117
2169 changed files with 626670 additions and 0 deletions
--- a/backend/services/wavespeed/init.py
+++ b/backend/services/wavespeed/init.py
@@ -0,0 +1 @@
+
--- a/backend/services/wavespeed/client.py
+++ b/backend/services/wavespeed/client.py
@@ -0,0 +1,600 @@
+"""
+WaveSpeed AI API Client
+
+Thin HTTP client for the WaveSpeed AI API.
+Handles authentication, submission, and delegates to specialized generators.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Dict, Optional, Callable
+
+from fastapi import HTTPException
+
+from services.onboarding.api_key_manager import APIKeyManager
+from utils.logger_utils import get_service_logger
+from .polling import WaveSpeedPolling
+from .generators.prompt import PromptGenerator
+from .generators.image import ImageGenerator
+from .generators.video import VideoGenerator
+from .generators.speech import SpeechGenerator
+
+logger = get_service_logger("wavespeed.client")
+
+
+class WaveSpeedClient:
+    """
+    Thin HTTP client for the WaveSpeed AI API.
+    Handles authentication, submission, and polling helpers.
+    """
+
+    BASE_URL = "https://api.wavespeed.ai/api/v3"
+
+    def __init__(self, api_key: Optional[str] = None):
+        manager = APIKeyManager()
+        self.api_key = api_key or manager.get_api_key("wavespeed")
+        if not self.api_key:
+            raise RuntimeError("WAVESPEED_API_KEY is not configured. Please add it to your environment.")
+        
+        # Initialize polling utilities
+        self.polling = WaveSpeedPolling(self.api_key, self.BASE_URL)
+        
+        # Initialize generators
+        self.prompt = PromptGenerator(self.api_key, self.BASE_URL, self.polling)
+        self.image = ImageGenerator(self.api_key, self.BASE_URL, self.polling)
+        self.video = VideoGenerator(self.api_key, self.BASE_URL, self.polling)
+        self.speech = SpeechGenerator(self.api_key, self.BASE_URL, self.polling)
+
+    def _headers(self) -> Dict[str, str]:
+        return {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.api_key}",
+        }
+
+    # Core submission methods (delegated to video generator)
+    def submit_image_to_video(
+        self,
+        model_path: str,
+        payload: Dict[str, Any],
+        timeout: int = 30,
+    ) -> str:
+        """
+        Submit an image-to-video generation request.
+
+        Returns the prediction ID for polling.
+        """
+        return self.video.submit_image_to_video(model_path, payload, timeout)
+
+    def submit_text_to_video(
+        self,
+        model_path: str,
+        payload: Dict[str, Any],
+        timeout: int = 60,
+    ) -> str:
+        """
+        Submit a text-to-video generation request to WaveSpeed.
+        
+        Args:
+            model_path: Model path (e.g., "alibaba/wan-2.5/text-to-video")
+            payload: Request payload with prompt, resolution, duration, optional audio
+            timeout: Request timeout in seconds
+            
+        Returns:
+            Prediction ID for polling
+        """
+        return self.video.submit_text_to_video(model_path, payload, timeout)
+
+    # Polling methods (delegated to polling utilities)
+    def get_prediction_result(self, prediction_id: str, timeout: int = 30) -> Dict[str, Any]:
+        """
+        Fetch the current status/result for a prediction.
+        Matches the example pattern: simple GET request, check status_code == 200, return data.
+        """
+        return self.polling.get_prediction_result(prediction_id, timeout)
+
+    def poll_until_complete(
+        self,
+        prediction_id: str,
+        timeout_seconds: Optional[int] = None,
+        interval_seconds: float = 1.0,
+        progress_callback: Optional[Callable[[float, str], None]] = None,
+    ) -> Dict[str, Any]:
+        """
+        Poll WaveSpeed until the job completes or fails.
+        Matches the example pattern: simple polling loop until status is "completed" or "failed".
+        
+        Args:
+            prediction_id: The prediction ID to poll for
+            timeout_seconds: Optional timeout in seconds. If None, polls indefinitely until completion/failure.
+            interval_seconds: Seconds to wait between polling attempts (default: 1.0, faster than 2.0)
+            progress_callback: Optional callback function(progress: float, message: str) for progress updates
+        
+        Returns:
+            Dict containing the completed result
+            
+        Raises:
+            HTTPException: If the task fails, polling fails, or times out (if timeout_seconds is set)
+        """
+        return self.polling.poll_until_complete(
+            prediction_id,
+            timeout_seconds=timeout_seconds,
+            interval_seconds=interval_seconds,
+            progress_callback=progress_callback,
+        )
+
+    # Generator methods (delegated to specialized generators)
+    def optimize_prompt(
+        self,
+        text: str,
+        mode: str = "image",
+        style: str = "default",
+        image: Optional[str] = None,
+        enable_sync_mode: bool = True,
+        timeout: int = 30,
+    ) -> str:
+        """
+        Optimize a prompt using WaveSpeed prompt optimizer.
+        
+        Args:
+            text: The prompt text to optimize
+            mode: "image" or "video" (default: "image")
+            style: "default", "artistic", "photographic", "technical", "anime", "realistic" (default: "default")
+            image: Base64-encoded image for context (optional)
+            enable_sync_mode: If True, wait for result and return it directly (default: True)
+            timeout: Request timeout in seconds (default: 30)
+            
+        Returns:
+            Optimized prompt text
+        """
+        return self.prompt.optimize_prompt(
+            text=text,
+            mode=mode,
+            style=style,
+            image=image,
+            enable_sync_mode=enable_sync_mode,
+            timeout=timeout,
+        )
+
+    def generate_image(
+        self,
+        model: str,
+        prompt: str,
+        width: int = 1024,
+        height: int = 1024,
+        num_inference_steps: Optional[int] = None,
+        guidance_scale: Optional[float] = None,
+        negative_prompt: Optional[str] = None,
+        seed: Optional[int] = None,
+        enable_sync_mode: bool = True,
+        timeout: int = 120,
+        **kwargs
+    ) -> bytes:
+        """
+        Generate image using WaveSpeed AI models (Ideogram V3 or Qwen Image).
+        
+        Args:
+            model: Model to use ("ideogram-v3-turbo" or "qwen-image")
+            prompt: Text prompt for image generation
+            width: Image width (default: 1024)
+            height: Image height (default: 1024)
+            num_inference_steps: Number of inference steps
+            guidance_scale: Guidance scale for generation
+            negative_prompt: Negative prompt (what to avoid)
+            seed: Random seed for reproducibility
+            enable_sync_mode: If True, wait for result and return it directly (default: True)
+            timeout: Request timeout in seconds (default: 120)
+            **kwargs: Additional parameters
+            
+        Returns:
+            bytes: Generated image bytes
+        """
+        return self.image.generate_image(
+            model=model,
+            prompt=prompt,
+            width=width,
+            height=height,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale,
+            negative_prompt=negative_prompt,
+            seed=seed,
+            enable_sync_mode=enable_sync_mode,
+            timeout=timeout,
+            **kwargs
+        )
+
+    def generate_character_image(
+        self,
+        prompt: str,
+        reference_image_bytes: bytes,
+        style: str = "Auto",
+        aspect_ratio: str = "16:9",
+        rendering_speed: str = "Default",
+        timeout: Optional[int] = None,
+    ) -> bytes:
+        """
+        Generate image using Ideogram Character API to maintain character consistency.
+        Creates variations of a reference character image while respecting the base appearance.
+        
+        Note: This API is always async and requires polling for results.
+        
+        Args:
+            prompt: Text prompt describing the scene/context for the character
+            reference_image_bytes: Reference image bytes (base avatar)
+            style: Character style type ("Auto", "Fiction", or "Realistic")
+            aspect_ratio: Aspect ratio ("1:1", "16:9", "9:16", "4:3", "3:4")
+            rendering_speed: Rendering speed ("Default", "Turbo", "Quality")
+            timeout: Total timeout in seconds for submission + polling (default: 180)
+            
+        Returns:
+            bytes: Generated image bytes with consistent character
+        """
+        return self.image.generate_character_image(
+            prompt=prompt,
+            reference_image_bytes=reference_image_bytes,
+            style=style,
+            aspect_ratio=aspect_ratio,
+            rendering_speed=rendering_speed,
+            timeout=timeout,
+        )
+
+    def generate_speech(
+        self,
+        text: str,
+        voice_id: str,
+        speed: float = 1.0,
+        volume: float = 1.0,
+        pitch: float = 0.0,
+        emotion: str = "happy",
+        enable_sync_mode: bool = True,
+        timeout: int = 120,
+        **kwargs
+    ) -> bytes:
+        """
+        Generate speech audio using Minimax Speech 02 HD via WaveSpeed.
+        
+        Args:
+            text: Text to convert to speech (max 10000 characters)
+            voice_id: Voice ID (e.g., "Wise_Woman", "Friendly_Person", etc.)
+            speed: Speech speed (0.5-2.0, default: 1.0)
+            volume: Speech volume (0.1-10.0, default: 1.0)
+            pitch: Speech pitch (-12 to 12, default: 0.0)
+            emotion: Emotion ("happy", "sad", "angry", etc., default: "happy")
+            enable_sync_mode: If True, wait for result and return it directly (default: True)
+            timeout: Request timeout in seconds (default: 60)
+            **kwargs: Additional parameters (sample_rate, bitrate, format, etc.)
+            
+        Returns:
+            bytes: Generated audio bytes
+        """
+        return self.speech.generate_speech(
+            text=text,
+            voice_id=voice_id,
+            speed=speed,
+            volume=volume,
+            pitch=pitch,
+            emotion=emotion,
+            enable_sync_mode=enable_sync_mode,
+            timeout=timeout,
+            **kwargs
+        )
+
+    def generate_text_video(
+        self,
+        prompt: str,
+        resolution: str = "720p",  # 480p, 720p, 1080p
+        duration: int = 5,  # 5 or 10 seconds
+        audio_base64: Optional[str] = None,  # Optional audio for lip-sync
+        negative_prompt: Optional[str] = None,
+        seed: Optional[int] = None,
+        enable_prompt_expansion: bool = True,
+        enable_sync_mode: bool = False,
+        timeout: int = 180,
+    ) -> Dict[str, Any]:
+        """
+        Generate video from text prompt using WAN 2.5 text-to-video.
+        
+        Args:
+            prompt: Text prompt describing the video
+            resolution: Output resolution (480p, 720p, 1080p)
+            duration: Video duration in seconds (5 or 10)
+            audio_base64: Optional audio file (wav/mp3, 3-30s, ≤15MB) for lip-sync
+            negative_prompt: Optional negative prompt
+            seed: Optional random seed for reproducibility
+            enable_prompt_expansion: Enable prompt optimizer
+            enable_sync_mode: If True, wait for result and return it directly
+            timeout: Request timeout in seconds
+            
+        Returns:
+            Dictionary with video bytes, metadata, and cost
+        """
+        return self.video.generate_text_video(
+            prompt=prompt,
+            resolution=resolution,
+            duration=duration,
+            audio_base64=audio_base64,
+            negative_prompt=negative_prompt,
+            seed=seed,
+            enable_prompt_expansion=enable_prompt_expansion,
+            enable_sync_mode=enable_sync_mode,
+            timeout=timeout,
+        )
+    
+    def upscale_video(
+        self,
+        video: str,
+        target_resolution: str = "1080p",
+        enable_sync_mode: bool = False,
+        timeout: int = 300,
+        progress_callback: Optional[Callable[[float, str], None]] = None,
+    ) -> bytes:
+        """
+        Upscale video using FlashVSR.
+        
+        Args:
+            video: Base64-encoded video data URI or public URL
+            target_resolution: Target resolution ("720p", "1080p", "2k", "4k")
+            enable_sync_mode: If True, wait for result and return it directly
+            timeout: Request timeout in seconds (default: 300 for long videos)
+            progress_callback: Optional callback function(progress: float, message: str) for progress updates
+            
+        Returns:
+            bytes: Upscaled video bytes
+        """
+        return self.video.upscale_video(
+            video=video,
+            target_resolution=target_resolution,
+            enable_sync_mode=enable_sync_mode,
+            timeout=timeout,
+            progress_callback=progress_callback,
+        )
+    
+    def extend_video(
+        self,
+        video: str,
+        prompt: str,
+        model: str = "wan-2.5",
+        audio: Optional[str] = None,
+        negative_prompt: Optional[str] = None,
+        resolution: str = "720p",
+        duration: int = 5,
+        enable_prompt_expansion: bool = False,
+        generate_audio: bool = True,
+        camera_fixed: bool = False,
+        seed: Optional[int] = None,
+        enable_sync_mode: bool = False,
+        timeout: int = 300,
+        progress_callback: Optional[Callable[[float, str], None]] = None,
+    ) -> bytes:
+        """
+        Extend video duration using WAN 2.5, WAN 2.2 Spicy, or Seedance 1.5 Pro video-extend.
+        
+        Args:
+            video: Base64-encoded video data URI or public URL
+            prompt: Text prompt describing how to extend the video
+            model: Model to use ("wan-2.5", "wan-2.2-spicy", or "seedance-1.5-pro")
+            audio: Optional audio URL to guide generation (WAN 2.5 only)
+            negative_prompt: Optional negative prompt (WAN 2.5 only)
+            resolution: Output resolution (varies by model)
+            duration: Duration of extended video in seconds (varies by model)
+            enable_prompt_expansion: Enable prompt optimizer (WAN 2.5 only)
+            generate_audio: Generate audio for extended video (Seedance 1.5 Pro only)
+            camera_fixed: Fix camera position (Seedance 1.5 Pro only)
+            seed: Random seed for reproducibility (-1 for random)
+            enable_sync_mode: If True, wait for result and return it directly
+            timeout: Request timeout in seconds (default: 300)
+            progress_callback: Optional callback function(progress: float, message: str) for progress updates
+            
+        Returns:
+            bytes: Extended video bytes
+        """
+        return self.video.extend_video(
+            video=video,
+            prompt=prompt,
+            model=model,
+            audio=audio,
+            negative_prompt=negative_prompt,
+            resolution=resolution,
+            duration=duration,
+            enable_prompt_expansion=enable_prompt_expansion,
+            generate_audio=generate_audio,
+            camera_fixed=camera_fixed,
+            seed=seed,
+            enable_sync_mode=enable_sync_mode,
+            timeout=timeout,
+            progress_callback=progress_callback,
+        )
+    
+    def face_swap(
+        self,
+        image: str,
+        video: str,
+        prompt: Optional[str] = None,
+        resolution: str = "480p",
+        seed: Optional[int] = None,
+        enable_sync_mode: bool = False,
+        timeout: int = 300,
+        progress_callback: Optional[Callable[[float, str], None]] = None,
+    ) -> bytes:
+        """
+        Perform face/character swap using MoCha (wavespeed-ai/wan-2.1/mocha).
+        
+        Args:
+            image: Base64-encoded image data URI or public URL (reference character)
+            video: Base64-encoded video data URI or public URL (source video)
+            prompt: Optional prompt to guide the swap
+            resolution: Output resolution ("480p" or "720p")
+            seed: Random seed for reproducibility (-1 for random)
+            enable_sync_mode: If True, wait for result and return it directly
+            timeout: Request timeout in seconds (default: 300)
+            progress_callback: Optional callback function(progress: float, message: str) for progress updates
+            
+        Returns:
+            bytes: Face-swapped video bytes
+        """
+        return self.video.face_swap(
+            image=image,
+            video=video,
+            prompt=prompt,
+            resolution=resolution,
+            seed=seed,
+            enable_sync_mode=enable_sync_mode,
+            timeout=timeout,
+            progress_callback=progress_callback,
+        )
+    
+    def video_face_swap(
+        self,
+        video: str,
+        face_image: str,
+        target_gender: str = "all",
+        target_index: int = 0,
+        enable_sync_mode: bool = False,
+        timeout: int = 300,
+        progress_callback: Optional[Callable[[float, str], None]] = None,
+    ) -> bytes:
+        """
+        Perform face swap using Video Face Swap (wavespeed-ai/video-face-swap).
+        
+        Args:
+            video: Base64-encoded video data URI or public URL (source video)
+            face_image: Base64-encoded image data URI or public URL (reference face)
+            target_gender: Filter which faces to swap ("all", "female", "male")
+            target_index: Select which face to swap (0 = largest, 1 = second largest, etc.)
+            enable_sync_mode: If True, wait for result and return it directly
+            timeout: Request timeout in seconds (default: 300)
+            progress_callback: Optional callback function(progress: float, message: str) for progress updates
+            
+        Returns:
+            bytes: Face-swapped video bytes
+        """
+        return self.video.video_face_swap(
+            video=video,
+            face_image=face_image,
+            target_gender=target_gender,
+            target_index=target_index,
+            enable_sync_mode=enable_sync_mode,
+            timeout=timeout,
+            progress_callback=progress_callback,
+        )
+    
+    def video_translate(
+        self,
+        video: str,
+        output_language: str = "English",
+        enable_sync_mode: bool = False,
+        timeout: int = 600,
+        progress_callback: Optional[Callable[[float, str], None]] = None,
+    ) -> bytes:
+        """
+        Translate video to target language using HeyGen Video Translate.
+        
+        Args:
+            video: Base64-encoded video data URI or public URL (source video)
+            output_language: Target language for translation (default: "English")
+            enable_sync_mode: If True, wait for result and return it directly
+            timeout: Request timeout in seconds (default: 600)
+            progress_callback: Optional callback function(progress: float, message: str) for progress updates
+            
+        Returns:
+            bytes: Translated video bytes
+        """
+        return self.video.video_translate(
+            video=video,
+            output_language=output_language,
+            enable_sync_mode=enable_sync_mode,
+            timeout=timeout,
+            progress_callback=progress_callback,
+        )
+    
+    def remove_background(
+        self,
+        video: str,
+        background_image: Optional[str] = None,
+        enable_sync_mode: bool = False,
+        timeout: int = 300,
+        progress_callback: Optional[Callable[[float, str], None]] = None,
+    ) -> bytes:
+        """
+        Remove or replace video background using Video Background Remover.
+        
+        Args:
+            video: Base64-encoded video data URI or public URL (source video)
+            background_image: Optional base64-encoded image data URI or public URL (replacement background)
+            enable_sync_mode: If True, wait for result and return it directly
+            timeout: Request timeout in seconds (default: 300)
+            progress_callback: Optional callback function(progress: float, message: str) for progress updates
+            
+        Returns:
+            bytes: Video with background removed/replaced
+        """
+        return self.video.remove_background(
+            video=video,
+            background_image=background_image,
+            enable_sync_mode=enable_sync_mode,
+            timeout=timeout,
+            progress_callback=progress_callback,
+        )
+    
+    def hunyuan_video_foley(
+        self,
+        video: str,
+        prompt: Optional[str] = None,
+        seed: int = -1,
+        enable_sync_mode: bool = False,
+        timeout: int = 300,
+        progress_callback: Optional[Callable[[float, str], None]] = None,
+    ) -> bytes:
+        """
+        Generate realistic Foley and ambient audio from video using Hunyuan Video Foley.
+        
+        Args:
+            video: Base64-encoded video data URI or public URL (source video)
+            prompt: Optional text prompt describing desired sounds (e.g., "ocean waves, seagulls")
+            seed: Random seed for reproducibility (-1 for random)
+            enable_sync_mode: If True, wait for result and return it directly
+            timeout: Request timeout in seconds (default: 300)
+            progress_callback: Optional callback function(progress: float, message: str) for progress updates
+            
+        Returns:
+            bytes: Video with generated audio
+        """
+        return self.video.hunyuan_video_foley(
+            video=video,
+            prompt=prompt,
+            seed=seed,
+            enable_sync_mode=enable_sync_mode,
+            timeout=timeout,
+            progress_callback=progress_callback,
+        )
+    
+    def think_sound(
+        self,
+        video: str,
+        prompt: Optional[str] = None,
+        seed: int = -1,
+        enable_sync_mode: bool = False,
+        timeout: int = 300,
+        progress_callback: Optional[Callable[[float, str], None]] = None,
+    ) -> bytes:
+        """
+        Generate realistic sound effects and audio tracks from video using Think Sound.
+        
+        Args:
+            video: Base64-encoded video data URI or public URL (source video)
+            prompt: Optional text prompt describing desired sounds (e.g., "engine roaring, footsteps on gravel")
+            seed: Random seed for reproducibility (-1 for random)
+            enable_sync_mode: If True, wait for result and return it directly
+            timeout: Request timeout in seconds (default: 300)
+            progress_callback: Optional callback function(progress: float, message: str) for progress updates
+            
+        Returns:
+            bytes: Video with generated audio
+        """
+        return self.video.think_sound(
+            video=video,
+            prompt=prompt,
+            seed=seed,
+            enable_sync_mode=enable_sync_mode,
+            timeout=timeout,
+            progress_callback=progress_callback,
+        )
--- a/backend/services/wavespeed/generators/init.py
+++ b/backend/services/wavespeed/generators/init.py
@@ -0,0 +1 @@
+"""WaveSpeed API generators for different content types."""
--- a/backend/services/wavespeed/generators/image.py
+++ b/backend/services/wavespeed/generators/image.py
@@ -0,0 +1,374 @@
+"""
+Image generation generator for WaveSpeed API.
+"""
+
+import time
+import requests
+from typing import Optional
+from requests import exceptions as requests_exceptions
+from fastapi import HTTPException
+
+from utils.logger_utils import get_service_logger
+
+logger = get_service_logger("wavespeed.generators.image")
+
+
+class ImageGenerator:
+    """Image generation generator."""
+    
+    def __init__(self, api_key: str, base_url: str, polling):
+        """Initialize image generator.
+        
+        Args:
+            api_key: WaveSpeed API key
+            base_url: WaveSpeed API base URL
+            polling: WaveSpeedPolling instance for async operations
+        """
+        self.api_key = api_key
+        self.base_url = base_url
+        self.polling = polling
+    
+    def _get_headers(self) -> dict:
+        """Get HTTP headers for API requests."""
+        return {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.api_key}",
+        }
+    
+    def generate_image(
+        self,
+        model: str,
+        prompt: str,
+        width: int = 1024,
+        height: int = 1024,
+        num_inference_steps: Optional[int] = None,
+        guidance_scale: Optional[float] = None,
+        negative_prompt: Optional[str] = None,
+        seed: Optional[int] = None,
+        enable_sync_mode: bool = True,
+        timeout: int = 120,
+        **kwargs
+    ) -> bytes:
+        """
+        Generate image using WaveSpeed AI models (Ideogram V3 or Qwen Image).
+        
+        Args:
+            model: Model to use ("ideogram-v3-turbo" or "qwen-image")
+            prompt: Text prompt for image generation
+            width: Image width (default: 1024)
+            height: Image height (default: 1024)
+            num_inference_steps: Number of inference steps
+            guidance_scale: Guidance scale for generation
+            negative_prompt: Negative prompt (what to avoid)
+            seed: Random seed for reproducibility
+            enable_sync_mode: If True, wait for result and return it directly (default: True)
+            timeout: Request timeout in seconds (default: 120)
+            **kwargs: Additional parameters
+            
+        Returns:
+            bytes: Generated image bytes
+        """
+        # Map model names to WaveSpeed API paths
+        model_paths = {
+            "ideogram-v3-turbo": "ideogram-ai/ideogram-v3-turbo",
+            "qwen-image": "wavespeed-ai/qwen-image/text-to-image",
+        }
+        
+        model_path = model_paths.get(model)
+        if not model_path:
+            raise ValueError(f"Unsupported image model: {model}. Supported: {list(model_paths.keys())}")
+        
+        url = f"{self.base_url}/{model_path}"
+        
+        payload = {
+            "prompt": prompt,
+            "width": width,
+            "height": height,
+            "enable_sync_mode": enable_sync_mode,
+        }
+        
+        # Add optional parameters
+        if num_inference_steps is not None:
+            payload["num_inference_steps"] = num_inference_steps
+        if guidance_scale is not None:
+            payload["guidance_scale"] = guidance_scale
+        if negative_prompt:
+            payload["negative_prompt"] = negative_prompt
+        if seed is not None:
+            payload["seed"] = seed
+        
+        # Add any extra parameters
+        for key, value in kwargs.items():
+            if key not in payload:
+                payload[key] = value
+        
+        logger.info(f"[WaveSpeed] Generating image via {url} (model={model}, prompt_length={len(prompt)})")
+        response = requests.post(url, headers=self._get_headers(), json=payload, timeout=timeout)
+        
+        if response.status_code != 200:
+            logger.error(f"[WaveSpeed] Image generation failed: {response.status_code} {response.text}")
+            raise HTTPException(
+                status_code=502,
+                detail={
+                    "error": "WaveSpeed image generation failed",
+                    "status_code": response.status_code,
+                    "response": response.text,
+                },
+            )
+        
+        response_json = response.json()
+        data = response_json.get("data") or response_json
+        
+        # Check status - if "created" or "processing", we need to poll even in sync mode
+        status = data.get("status", "").lower()
+        outputs = data.get("outputs") or []
+        prediction_id = data.get("id")
+        
+        # Handle sync mode - result should be directly in outputs
+        if enable_sync_mode:
+            # If we have outputs and status is "completed", use them directly
+            if outputs and status == "completed":
+                logger.info(f"[WaveSpeed] Got immediate results from sync mode (status: {status})")
+                image_url = self._extract_image_url(outputs)
+                return self._download_image(image_url, timeout)
+            
+            # Sync mode returned "created" or "processing" status - need to poll
+            if not prediction_id:
+                logger.error(f"[WaveSpeed] Sync mode returned status '{status}' but no prediction ID: {response.text}")
+                raise HTTPException(
+                    status_code=502,
+                    detail="WaveSpeed sync mode returned async response without prediction ID",
+                )
+            
+            logger.info(
+                f"[WaveSpeed] Sync mode returned status '{status}' with no outputs. "
+                f"Falling back to polling (prediction_id: {prediction_id})"
+            )
+        
+        # Async mode OR sync mode that returned "created"/"processing" - poll for result
+        if not prediction_id:
+            logger.error(f"[WaveSpeed] No prediction ID in response: {response.text}")
+            raise HTTPException(
+                status_code=502,
+                detail="WaveSpeed response missing prediction id",
+            )
+        
+        # Poll for result (use longer timeout for image generation)
+        logger.info(f"[WaveSpeed] Polling for image generation result (prediction_id: {prediction_id}, status: {status})")
+        result = self.polling.poll_until_complete(prediction_id, timeout_seconds=240, interval_seconds=1.0)
+        outputs = result.get("outputs") or []
+        
+        if not outputs:
+            raise HTTPException(status_code=502, detail="WaveSpeed image generator returned no outputs")
+        
+        image_url = self._extract_image_url(outputs)
+        return self._download_image(image_url, timeout=60)
+    
+    def generate_character_image(
+        self,
+        prompt: str,
+        reference_image_bytes: bytes,
+        style: str = "Auto",
+        aspect_ratio: str = "16:9",
+        rendering_speed: str = "Default",
+        timeout: Optional[int] = None,
+    ) -> bytes:
+        """
+        Generate image using Ideogram Character API to maintain character consistency.
+        Creates variations of a reference character image while respecting the base appearance.
+        
+        Note: This API is always async and requires polling for results.
+        
+        Args:
+            prompt: Text prompt describing the scene/context for the character
+            reference_image_bytes: Reference image bytes (base avatar)
+            style: Character style type ("Auto", "Fiction", or "Realistic")
+            aspect_ratio: Aspect ratio ("1:1", "16:9", "9:16", "4:3", "3:4")
+            rendering_speed: Rendering speed ("Default", "Turbo", "Quality")
+            timeout: Total timeout in seconds for submission + polling (default: 180)
+            
+        Returns:
+            bytes: Generated image bytes with consistent character
+        """
+        import base64
+        
+        # Encode reference image to base64
+        image_base64 = base64.b64encode(reference_image_bytes).decode('utf-8')
+        # Add data URI prefix
+        image_data_uri = f"data:image/png;base64,{image_base64}"
+        
+        url = f"{self.base_url}/ideogram-ai/ideogram-character"
+        
+        payload = {
+            "prompt": prompt,
+            "image": image_data_uri,
+            "style": style,
+            "aspect_ratio": aspect_ratio,
+            "rendering_speed": rendering_speed,
+        }
+        
+        logger.info(f"[WaveSpeed] Generating character image via Ideogram Character (prompt_length={len(prompt)})")
+        
+        # Retry on transient connection failures
+        max_retries = 2
+        retry_delay = 2.0
+        
+        for attempt in range(max_retries + 1):
+            try:
+                response = requests.post(
+                    url, 
+                    headers=self._get_headers(), 
+                    json=payload, 
+                    timeout=(30, 30)
+                )
+                break
+            except (requests_exceptions.ConnectTimeout, requests_exceptions.ConnectionError) as e:
+                if attempt < max_retries:
+                    logger.warning(f"[WaveSpeed] Connection attempt {attempt + 1}/{max_retries + 1} failed, retrying in {retry_delay}s: {e}")
+                    time.sleep(retry_delay)
+                    retry_delay *= 2
+                    continue
+                else:
+                    error_type = "Connection timeout" if isinstance(e, requests_exceptions.ConnectTimeout) else "Connection error"
+                    logger.error(f"[WaveSpeed] {error_type} to Ideogram Character API after {max_retries + 1} attempts: {e}")
+                    raise HTTPException(
+                        status_code=504 if isinstance(e, requests_exceptions.ConnectTimeout) else 502,
+                        detail={
+                            "error": f"{error_type} to WaveSpeed Ideogram Character API",
+                            "message": "Unable to establish connection to the image generation service after multiple attempts. Please check your network connection and try again.",
+                            "exception": str(e),
+                            "retry_recommended": True,
+                        },
+                    )
+            except requests_exceptions.Timeout as e:
+                logger.error(f"[WaveSpeed] Request timeout to Ideogram Character API: {e}")
+                raise HTTPException(
+                    status_code=504,
+                    detail={
+                        "error": "Request timeout to WaveSpeed Ideogram Character API",
+                        "message": "The image generation request took too long. Please try again.",
+                        "exception": str(e),
+                    },
+                )
+        
+        if response.status_code != 200:
+            logger.error(f"[WaveSpeed] Character image generation failed: {response.status_code} {response.text}")
+            raise HTTPException(
+                status_code=502,
+                detail={
+                    "error": "WaveSpeed Ideogram Character generation failed",
+                    "status_code": response.status_code,
+                    "response": response.text,
+                },
+            )
+        
+        response_json = response.json()
+        data = response_json.get("data") or response_json
+        
+        # Extract prediction ID
+        prediction_id = data.get("id")
+        if not prediction_id:
+            logger.error(f"[WaveSpeed] No prediction ID in response: {response.text}")
+            raise HTTPException(
+                status_code=502,
+                detail="WaveSpeed Ideogram Character response missing prediction id",
+            )
+        
+        # Ideogram Character API is always async - check status and poll if needed
+        outputs = data.get("outputs") or []
+        status = data.get("status", "unknown")
+        
+        logger.info(f"[WaveSpeed] Ideogram Character task created: prediction_id={prediction_id}, status={status}")
+        
+        # If status is already completed, use outputs directly (unlikely but possible)
+        if outputs and status == "completed":
+            logger.info(f"[WaveSpeed] Got immediate results from Ideogram Character")
+        else:
+            # Always need to poll for results (API is async)
+            logger.info(f"[WaveSpeed] Polling for Ideogram Character result (status: {status}, prediction_id: {prediction_id})")
+            polling_timeout = timeout if timeout else None
+            result = self.polling.poll_until_complete(
+                prediction_id,
+                timeout_seconds=polling_timeout,
+                interval_seconds=0.5,
+            )
+            
+            if not isinstance(result, dict):
+                logger.error(f"[WaveSpeed] Unexpected result type: {type(result)}, value: {result}")
+                raise HTTPException(
+                    status_code=502,
+                    detail="WaveSpeed Ideogram Character returned unexpected response format",
+                )
+            
+            outputs = result.get("outputs") or []
+            status = result.get("status", "unknown")
+            
+            if status != "completed":
+                error_msg = "Unknown error"
+                if isinstance(result, dict):
+                    error_msg = result.get("error") or result.get("message") or str(result.get("details", "Unknown error"))
+                else:
+                    error_msg = str(result)
+                
+                logger.error(f"[WaveSpeed] Ideogram Character task did not complete: status={status}, error={error_msg}")
+                raise HTTPException(
+                    status_code=502,
+                    detail={
+                        "error": "WaveSpeed Ideogram Character task failed",
+                        "status": status,
+                        "message": error_msg,
+                    }
+                )
+        
+        # Extract image URL from outputs
+        if not outputs:
+            logger.error(f"[WaveSpeed] No outputs after polling: status={status}")
+            raise HTTPException(
+                status_code=502,
+                detail="WaveSpeed Ideogram Character returned no outputs",
+            )
+        
+        image_url = self._extract_image_url(outputs)
+        return self._download_image(image_url, timeout=60)
+    
+    def _extract_image_url(self, outputs: list) -> str:
+        """Extract image URL from outputs."""
+        if not isinstance(outputs, list) or len(outputs) == 0:
+            raise HTTPException(
+                status_code=502,
+                detail="WaveSpeed image generator output format not recognized",
+            )
+        
+        first_output = outputs[0]
+        if isinstance(first_output, str):
+            image_url = first_output
+        elif isinstance(first_output, dict):
+            image_url = first_output.get("url") or first_output.get("image_url") or first_output.get("output")
+        else:
+            raise HTTPException(
+                status_code=502,
+                detail="WaveSpeed image generator output format not recognized",
+            )
+        
+        if not image_url or not (image_url.startswith("http://") or image_url.startswith("https://")):
+            raise HTTPException(
+                status_code=502,
+                detail="WaveSpeed image generator output format not recognized",
+            )
+        
+        return image_url
+    
+    def _download_image(self, image_url: str, timeout: int = 60) -> bytes:
+        """Download image from URL."""
+        logger.info(f"[WaveSpeed] Fetching image from URL: {image_url}")
+        image_response = requests.get(image_url, timeout=timeout)
+        if image_response.status_code == 200:
+            image_bytes = image_response.content
+            logger.info(f"[WaveSpeed] Image generated successfully (size: {len(image_bytes)} bytes)")
+            return image_bytes
+        else:
+            logger.error(f"[WaveSpeed] Failed to fetch image from URL: {image_response.status_code}")
+            raise HTTPException(
+                status_code=502,
+                detail="Failed to fetch generated image from WaveSpeed URL",
+            )
--- a/backend/services/wavespeed/generators/prompt.py
+++ b/backend/services/wavespeed/generators/prompt.py
@@ -0,0 +1,164 @@
+"""
+Prompt optimization generator for WaveSpeed API.
+"""
+
+import requests
+from typing import Optional
+from fastapi import HTTPException
+
+from utils.logger_utils import get_service_logger
+
+logger = get_service_logger("wavespeed.generators.prompt")
+
+
+class PromptGenerator:
+    """Prompt optimization generator."""
+    
+    def __init__(self, api_key: str, base_url: str, polling):
+        """Initialize prompt generator.
+        
+        Args:
+            api_key: WaveSpeed API key
+            base_url: WaveSpeed API base URL
+            polling: WaveSpeedPolling instance for async operations
+        """
+        self.api_key = api_key
+        self.base_url = base_url
+        self.polling = polling
+    
+    def _get_headers(self) -> dict:
+        """Get HTTP headers for API requests."""
+        return {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.api_key}",
+        }
+    
+    def optimize_prompt(
+        self,
+        text: str,
+        mode: str = "image",
+        style: str = "default",
+        image: Optional[str] = None,
+        enable_sync_mode: bool = True,
+        timeout: int = 30,
+    ) -> str:
+        """
+        Optimize a prompt using WaveSpeed prompt optimizer.
+        
+        Args:
+            text: The prompt text to optimize
+            mode: "image" or "video" (default: "image")
+            style: "default", "artistic", "photographic", "technical", "anime", "realistic" (default: "default")
+            image: Base64-encoded image for context (optional)
+            enable_sync_mode: If True, wait for result and return it directly (default: True)
+            timeout: Request timeout in seconds (default: 30)
+            
+        Returns:
+            Optimized prompt text
+        """
+        model_path = "wavespeed-ai/prompt-optimizer"
+        url = f"{self.base_url}/{model_path}"
+        
+        payload = {
+            "text": text,
+            "mode": mode,
+            "style": style,
+            "enable_sync_mode": enable_sync_mode,
+        }
+        
+        if image:
+            payload["image"] = image
+        
+        logger.info(f"[WaveSpeed] Optimizing prompt via {url} (mode={mode}, style={style})")
+        response = requests.post(url, headers=self._get_headers(), json=payload, timeout=timeout)
+        
+        if response.status_code != 200:
+            logger.error(f"[WaveSpeed] Prompt optimization failed: {response.status_code} {response.text}")
+            raise HTTPException(
+                status_code=502,
+                detail={
+                    "error": "WaveSpeed prompt optimization failed",
+                    "status_code": response.status_code,
+                    "response": response.text,
+                },
+            )
+        
+        response_json = response.json()
+        data = response_json.get("data") or response_json
+        
+        # Handle sync mode - result should be directly in outputs
+        if enable_sync_mode:
+            outputs = data.get("outputs") or []
+            if not outputs:
+                logger.error(f"[WaveSpeed] No outputs in sync mode response: {response.text}")
+                raise HTTPException(
+                    status_code=502,
+                    detail="WaveSpeed prompt optimizer returned no outputs",
+                )
+            
+            # Extract optimized prompt from outputs
+            optimized_prompt = self._extract_prompt_from_outputs(outputs, timeout)
+            if not optimized_prompt:
+                logger.error(f"[WaveSpeed] Could not extract optimized prompt from outputs: {outputs}")
+                raise HTTPException(
+                    status_code=502,
+                    detail="WaveSpeed prompt optimizer output format not recognized",
+                )
+            
+            logger.info(f"[WaveSpeed] Prompt optimized successfully (length: {len(optimized_prompt)} chars)")
+            return optimized_prompt
+        
+        # Async mode - return prediction ID for polling
+        prediction_id = data.get("id")
+        if not prediction_id:
+            logger.error(f"[WaveSpeed] No prediction ID in async response: {response.text}")
+            raise HTTPException(
+                status_code=502,
+                detail="WaveSpeed response missing prediction id for async mode",
+            )
+        
+        # Poll for result
+        result = self.polling.poll_until_complete(prediction_id, timeout_seconds=60, interval_seconds=0.5)
+        outputs = result.get("outputs") or []
+        
+        if not outputs:
+            raise HTTPException(status_code=502, detail="WaveSpeed prompt optimizer returned no outputs")
+        
+        # Extract optimized prompt from outputs
+        optimized_prompt = self._extract_prompt_from_outputs(outputs, timeout)
+        if not optimized_prompt:
+            raise HTTPException(
+                status_code=502,
+                detail="WaveSpeed prompt optimizer output format not recognized",
+            )
+        
+        logger.info(f"[WaveSpeed] Prompt optimized successfully (length: {len(optimized_prompt)} chars)")
+        return optimized_prompt
+    
+    def _extract_prompt_from_outputs(self, outputs: list, timeout: int) -> Optional[str]:
+        """Extract optimized prompt from outputs, handling URLs and direct text."""
+        if not isinstance(outputs, list) or len(outputs) == 0:
+            return None
+        
+        first_output = outputs[0]
+        
+        # If it's a string that looks like a URL, fetch it
+        if isinstance(first_output, str):
+            if first_output.startswith("http://") or first_output.startswith("https://"):
+                logger.info(f"[WaveSpeed] Fetching optimized prompt from URL: {first_output}")
+                url_response = requests.get(first_output, timeout=timeout)
+                if url_response.status_code == 200:
+                    return url_response.text.strip()
+                else:
+                    logger.error(f"[WaveSpeed] Failed to fetch prompt from URL: {url_response.status_code}")
+                    raise HTTPException(
+                        status_code=502,
+                        detail="Failed to fetch optimized prompt from WaveSpeed URL",
+                    )
+            else:
+                # It's already the text
+                return first_output
+        elif isinstance(first_output, dict):
+            return first_output.get("text") or first_output.get("prompt") or first_output.get("output")
+        
+        return None
--- a/backend/services/wavespeed/generators/speech.py
+++ b/backend/services/wavespeed/generators/speech.py
@@ -0,0 +1,223 @@
+"""
+Speech generation generator for WaveSpeed API.
+"""
+
+import time
+import requests
+from typing import Optional
+from requests import exceptions as requests_exceptions
+from fastapi import HTTPException
+
+from utils.logger_utils import get_service_logger
+
+logger = get_service_logger("wavespeed.generators.speech")
+
+
+class SpeechGenerator:
+    """Speech generation generator."""
+    
+    def __init__(self, api_key: str, base_url: str, polling):
+        """Initialize speech generator.
+        
+        Args:
+            api_key: WaveSpeed API key
+            base_url: WaveSpeed API base URL
+            polling: WaveSpeedPolling instance for async operations
+        """
+        self.api_key = api_key
+        self.base_url = base_url
+        self.polling = polling
+    
+    def _get_headers(self) -> dict:
+        """Get HTTP headers for API requests."""
+        return {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.api_key}",
+        }
+    
+    def generate_speech(
+        self,
+        text: str,
+        voice_id: str,
+        speed: float = 1.0,
+        volume: float = 1.0,
+        pitch: float = 0.0,
+        emotion: str = "happy",
+        enable_sync_mode: bool = True,
+        timeout: int = 120,
+        **kwargs
+    ) -> bytes:
+        """
+        Generate speech audio using Minimax Speech 02 HD via WaveSpeed.
+        
+        Args:
+            text: Text to convert to speech (max 10000 characters)
+            voice_id: Voice ID (e.g., "Wise_Woman", "Friendly_Person", etc.)
+            speed: Speech speed (0.5-2.0, default: 1.0)
+            volume: Speech volume (0.1-10.0, default: 1.0)
+            pitch: Speech pitch (-12 to 12, default: 0.0)
+            emotion: Emotion ("happy", "sad", "angry", etc., default: "happy")
+            enable_sync_mode: If True, wait for result and return it directly (default: True)
+            timeout: Request timeout in seconds (default: 60)
+            **kwargs: Additional parameters (sample_rate, bitrate, format, etc.)
+            
+        Returns:
+            bytes: Generated audio bytes
+        """
+        model_path = "minimax/speech-02-hd"
+        url = f"{self.base_url}/{model_path}"
+        
+        payload = {
+            "text": text,
+            "voice_id": voice_id,
+            "speed": speed,
+            "volume": volume,
+            "pitch": pitch,
+            "emotion": emotion,
+            "enable_sync_mode": enable_sync_mode,
+        }
+        
+        # Add optional parameters
+        optional_params = [
+            "english_normalization",
+            "sample_rate",
+            "bitrate",
+            "channel",
+            "format",
+            "language_boost",
+        ]
+        for param in optional_params:
+            if param in kwargs:
+                payload[param] = kwargs[param]
+        
+        logger.info(f"[WaveSpeed] Generating speech via {url} (voice={voice_id}, text_length={len(text)})")
+
+        # Retry on transient connection issues
+        max_retries = 2
+        retry_delay = 2.0
+        for attempt in range(max_retries + 1):
+            try:
+                response = requests.post(
+                    url,
+                    headers=self._get_headers(),
+                    json=payload,
+                    timeout=(30, 60),  # connect, read
+                )
+                break
+            except (requests_exceptions.ConnectTimeout, requests_exceptions.ConnectionError) as e:
+                if attempt < max_retries:
+                    logger.warning(
+                        f"[WaveSpeed] Speech connection attempt {attempt + 1}/{max_retries + 1} failed, "
+                        f"retrying in {retry_delay}s: {e}"
+                    )
+                    time.sleep(retry_delay)
+                    retry_delay *= 2
+                    continue
+                logger.error(f"[WaveSpeed] Speech connection failed after {max_retries + 1} attempts: {e}")
+                raise HTTPException(
+                    status_code=504,
+                    detail={
+                        "error": "Connection to WaveSpeed speech API timed out",
+                        "message": "Unable to reach the speech service. Please try again.",
+                        "exception": str(e),
+                        "retry_recommended": True,
+                    },
+                )
+            except requests_exceptions.Timeout as e:
+                logger.error(f"[WaveSpeed] Speech request timeout: {e}")
+                raise HTTPException(
+                    status_code=504,
+                    detail={
+                        "error": "WaveSpeed speech request timed out",
+                        "message": "The speech generation request took too long. Please try again.",
+                        "exception": str(e),
+                    },
+                )
+        
+        if response.status_code != 200:
+            logger.error(f"[WaveSpeed] Speech generation failed: {response.status_code} {response.text}")
+            raise HTTPException(
+                status_code=502,
+                detail={
+                    "error": "WaveSpeed speech generation failed",
+                    "status_code": response.status_code,
+                    "response": response.text,
+                },
+            )
+        
+        response_json = response.json()
+        data = response_json.get("data") or response_json
+        
+        # Handle sync mode - result should be directly in outputs
+        if enable_sync_mode:
+            outputs = data.get("outputs") or []
+            if not outputs:
+                logger.error(f"[WaveSpeed] No outputs in sync mode response: {response.text}")
+                raise HTTPException(
+                    status_code=502,
+                    detail="WaveSpeed speech generator returned no outputs",
+                )
+            
+            audio_url = self._extract_audio_url(outputs)
+            return self._download_audio(audio_url, timeout)
+        
+        # Async mode - return prediction ID for polling
+        prediction_id = data.get("id")
+        if not prediction_id:
+            logger.error(f"[WaveSpeed] No prediction ID in async response: {response.text}")
+            raise HTTPException(
+                status_code=502,
+                detail="WaveSpeed response missing prediction id for async mode",
+            )
+        
+        # Poll for result
+        result = self.polling.poll_until_complete(prediction_id, timeout_seconds=120, interval_seconds=0.5)
+        outputs = result.get("outputs") or []
+        
+        if not outputs:
+            raise HTTPException(status_code=502, detail="WaveSpeed speech generator returned no outputs")
+        
+        audio_url = self._extract_audio_url(outputs)
+        return self._download_audio(audio_url, timeout)
+    
+    def _extract_audio_url(self, outputs: list) -> str:
+        """Extract audio URL from outputs."""
+        if not isinstance(outputs, list) or len(outputs) == 0:
+            raise HTTPException(
+                status_code=502,
+                detail="WaveSpeed speech generator output format not recognized",
+            )
+        
+        first_output = outputs[0]
+        if isinstance(first_output, str):
+            audio_url = first_output
+        elif isinstance(first_output, dict):
+            audio_url = first_output.get("url") or first_output.get("output")
+        else:
+            raise HTTPException(
+                status_code=502,
+                detail="WaveSpeed speech generator output format not recognized",
+            )
+        
+        if not audio_url or not (audio_url.startswith("http://") or audio_url.startswith("https://")):
+            raise HTTPException(
+                status_code=502,
+                detail="WaveSpeed speech generator output format not recognized",
+            )
+        
+        return audio_url
+    
+    def _download_audio(self, audio_url: str, timeout: int) -> bytes:
+        """Download audio from URL."""
+        logger.info(f"[WaveSpeed] Fetching audio from URL: {audio_url}")
+        audio_response = requests.get(audio_url, timeout=timeout)
+        if audio_response.status_code == 200:
+            audio_bytes = audio_response.content
+            logger.info(f"[WaveSpeed] Speech generated successfully (size: {len(audio_bytes)} bytes)")
+            return audio_bytes
+        else:
+            logger.error(f"[WaveSpeed] Failed to fetch audio from URL: {audio_response.status_code}")
+            raise HTTPException(
+                status_code=502,
+                detail="Failed to fetch generated audio from WaveSpeed URL",
+            )
--- a/backend/services/wavespeed/generators/video.py
+++ b/backend/services/wavespeed/generators/video.py
--- a/backend/services/wavespeed/hunyuan_avatar.py
+++ b/backend/services/wavespeed/hunyuan_avatar.py
@@ -0,0 +1,253 @@
+"""
+Hunyuan Avatar Service
+
+Service for creating talking avatars using Hunyuan Avatar model.
+Reference: https://wavespeed.ai/models/wavespeed-ai/hunyuan-avatar
+"""
+
+from __future__ import annotations
+
+import base64
+from typing import Any, Dict, Optional
+
+import requests
+from fastapi import HTTPException
+from loguru import logger
+
+from .client import WaveSpeedClient
+
+HUNYUAN_AVATAR_MODEL_PATH = "wavespeed-ai/hunyuan-avatar"
+HUNYUAN_AVATAR_MODEL_NAME = "wavespeed-ai/hunyuan-avatar"
+MAX_IMAGE_BYTES = 10 * 1024 * 1024  # 10MB
+MAX_AUDIO_BYTES = 50 * 1024 * 1024  # 50MB safety cap
+MAX_DURATION_SECONDS = 120  # 2 minutes maximum
+MIN_DURATION_SECONDS = 5  # Minimum billable duration
+
+
+def _as_data_uri(content_bytes: bytes, mime_type: str) -> str:
+    """Convert bytes to data URI."""
+    encoded = base64.b64encode(content_bytes).decode("utf-8")
+    return f"data:{mime_type};base64,{encoded}"
+
+
+def calculate_hunyuan_avatar_cost(resolution: str, duration: float) -> float:
+    """
+    Calculate cost for Hunyuan Avatar video.
+    
+    Pricing:
+    - 480p: $0.15 per 5 seconds
+    - 720p: $0.30 per 5 seconds
+    - Minimum charge: 5 seconds
+    - Maximum billable: 120 seconds
+    
+    Args:
+        resolution: Output resolution (480p or 720p)
+        duration: Video duration in seconds
+        
+    Returns:
+        Cost in USD
+    """
+    # Clamp duration to valid range
+    actual_duration = max(MIN_DURATION_SECONDS, min(duration, MAX_DURATION_SECONDS))
+    
+    # Calculate cost per 5 seconds
+    cost_per_5_seconds = 0.15 if resolution == "480p" else 0.30
+    
+    # Round up to nearest 5 seconds
+    billable_5_second_blocks = (actual_duration + 4) // 5  # Ceiling division
+    
+    return cost_per_5_seconds * billable_5_second_blocks
+
+
+def create_hunyuan_avatar(
+    *,
+    image_bytes: bytes,
+    audio_bytes: bytes,
+    resolution: str = "480p",
+    prompt: Optional[str] = None,
+    seed: Optional[int] = None,
+    user_id: str = "video_studio",
+    image_mime: str = "image/png",
+    audio_mime: str = "audio/mpeg",
+    client: Optional[WaveSpeedClient] = None,
+    progress_callback: Optional[callable] = None,
+) -> Dict[str, Any]:
+    """
+    Create talking avatar video using Hunyuan Avatar.
+    
+    Reference: https://wavespeed.ai/docs/docs-api/wavespeed-ai/hunyuan-avatar
+    
+    Args:
+        image_bytes: Portrait image as bytes
+        audio_bytes: Audio file as bytes
+        resolution: Output resolution (480p or 720p, default: 480p)
+        prompt: Optional text to guide expression or style
+        seed: Optional random seed (-1 for random)
+        user_id: User ID for tracking
+        image_mime: MIME type of image
+        audio_mime: MIME type of audio
+        client: Optional WaveSpeedClient instance
+        progress_callback: Optional progress callback function
+        
+    Returns:
+        Dictionary with video_bytes, prompt, duration, model_name, cost, etc.
+    """
+    if not image_bytes:
+        raise HTTPException(status_code=400, detail="Image bytes are required for Hunyuan Avatar.")
+    if not audio_bytes:
+        raise HTTPException(status_code=400, detail="Audio bytes are required for Hunyuan Avatar.")
+
+    if len(image_bytes) > MAX_IMAGE_BYTES:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Image exceeds {MAX_IMAGE_BYTES / (1024 * 1024):.0f}MB limit required by Hunyuan Avatar.",
+        )
+    if len(audio_bytes) > MAX_AUDIO_BYTES:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Audio exceeds {MAX_AUDIO_BYTES / (1024 * 1024):.0f}MB limit allowed for Hunyuan Avatar requests.",
+        )
+
+    if resolution not in {"480p", "720p"}:
+        raise HTTPException(status_code=400, detail="Resolution must be '480p' or '720p'.")
+
+    # Build payload
+    payload: Dict[str, Any] = {
+        "image": _as_data_uri(image_bytes, image_mime),
+        "audio": _as_data_uri(audio_bytes, audio_mime),
+        "resolution": resolution,
+    }
+    
+    if prompt:
+        payload["prompt"] = prompt.strip()
+    if seed is not None:
+        payload["seed"] = seed
+
+    client = client or WaveSpeedClient()
+    
+    # Progress callback: submission
+    if progress_callback:
+        progress_callback(10.0, "Submitting Hunyuan Avatar request to WaveSpeed...")
+    
+    prediction_id = client.submit_image_to_video(HUNYUAN_AVATAR_MODEL_PATH, payload, timeout=60)
+
+    try:
+        # Poll for completion
+        if progress_callback:
+            progress_callback(20.0, f"Polling for completion (prediction_id: {prediction_id})...")
+        
+        result = client.poll_until_complete(
+            prediction_id,
+            timeout_seconds=600,  # 10 minutes max
+            interval_seconds=0.5,  # Poll every 0.5 seconds
+            progress_callback=progress_callback,
+        )
+    except HTTPException as exc:
+        detail = exc.detail or {}
+        if isinstance(detail, dict):
+            detail.setdefault("prediction_id", prediction_id)
+            detail.setdefault("resume_available", True)
+        raise
+
+    outputs = result.get("outputs") or []
+    if not outputs:
+        raise HTTPException(
+            status_code=502,
+            detail={
+                "error": "Hunyuan Avatar completed but returned no outputs",
+                "prediction_id": prediction_id,
+            }
+        )
+
+    video_url = outputs[0]
+    if not isinstance(video_url, str) or not video_url.startswith("http"):
+        raise HTTPException(
+            status_code=502,
+            detail={
+                "error": f"Invalid video URL format: {video_url}",
+                "prediction_id": prediction_id,
+            }
+        )
+
+    # Progress callback: downloading video
+    if progress_callback:
+        progress_callback(90.0, "Downloading generated video...")
+
+    # Download video
+    try:
+        video_response = requests.get(video_url, timeout=180)
+        if video_response.status_code != 200:
+            raise HTTPException(
+                status_code=502,
+                detail={
+                    "error": "Failed to download Hunyuan Avatar video",
+                    "status_code": video_response.status_code,
+                    "response": video_response.text[:200],
+                    "prediction_id": prediction_id,
+                }
+            )
+    except requests.exceptions.RequestException as e:
+        raise HTTPException(
+            status_code=502,
+            detail={
+                "error": f"Failed to download video: {str(e)}",
+                "prediction_id": prediction_id,
+            }
+        )
+
+    video_bytes = video_response.content
+    if len(video_bytes) == 0:
+        raise HTTPException(
+            status_code=502,
+            detail={
+                "error": "Downloaded video is empty",
+                "prediction_id": prediction_id,
+            }
+        )
+
+    # Estimate duration (we don't get exact duration from API, so estimate from audio or use default)
+    # For now, we'll use a default estimate - in production, you might want to analyze the audio file
+    estimated_duration = 10.0  # Default estimate
+    
+    # Calculate cost
+    cost = calculate_hunyuan_avatar_cost(resolution, estimated_duration)
+
+    # Get video dimensions from resolution
+    resolution_dims = {
+        "480p": (854, 480),
+        "720p": (1280, 720),
+    }
+    width, height = resolution_dims.get(resolution, (854, 480))
+
+    # Extract metadata
+    metadata = result.get("metadata", {})
+    metadata.update({
+        "has_nsfw_contents": result.get("has_nsfw_contents", []),
+        "created_at": result.get("created_at"),
+        "resolution": resolution,
+        "max_duration": MAX_DURATION_SECONDS,
+    })
+
+    logger.info(
+        f"[Hunyuan Avatar] ✅ Generated video: {len(video_bytes)} bytes, "
+        f"resolution={resolution}, cost=${cost:.2f}"
+    )
+
+    # Progress callback: completed
+    if progress_callback:
+        progress_callback(100.0, "Avatar generation completed!")
+
+    return {
+        "video_bytes": video_bytes,
+        "prompt": prompt or "",
+        "duration": estimated_duration,
+        "model_name": HUNYUAN_AVATAR_MODEL_NAME,
+        "cost": cost,
+        "provider": "wavespeed",
+        "resolution": resolution,
+        "width": width,
+        "height": height,
+        "metadata": metadata,
+        "source_video_url": video_url,
+        "prediction_id": prediction_id,
+    }
--- a/backend/services/wavespeed/infinitetalk.py
+++ b/backend/services/wavespeed/infinitetalk.py
@@ -0,0 +1,191 @@
+from __future__ import annotations
+
+import base64
+from typing import Any, Dict, Optional
+
+import requests
+from fastapi import HTTPException
+from loguru import logger
+
+from .client import WaveSpeedClient
+
+INFINITALK_MODEL_PATH = "wavespeed-ai/infinitetalk"
+INFINITALK_MODEL_NAME = "wavespeed-ai/infinitetalk"
+INFINITALK_DEFAULT_COST = 0.30  # $0.30 per 5 seconds at 720p tier
+MAX_IMAGE_BYTES = 10 * 1024 * 1024  # 10MB
+MAX_AUDIO_BYTES = 50 * 1024 * 1024  # 50MB safety cap
+
+
+def _as_data_uri(content_bytes: bytes, mime_type: str) -> str:
+    encoded = base64.b64encode(content_bytes).decode("utf-8")
+    return f"data:{mime_type};base64,{encoded}"
+
+
+def _generate_simple_infinitetalk_prompt(
+    scene_data: Dict[str, Any],
+    story_context: Dict[str, Any],
+) -> Optional[str]:
+    """
+    Generate a balanced, concise prompt for InfiniteTalk.
+    InfiniteTalk is audio-driven, so the prompt should describe the scene and suggest
+    subtle motion, but avoid overly elaborate cinematic descriptions.
+    
+    Returns None if no meaningful prompt can be generated.
+    """
+    title = (scene_data.get("title") or "").strip()
+    description = (scene_data.get("description") or "").strip()
+    image_prompt = (scene_data.get("image_prompt") or "").strip()
+    
+    # Build a balanced prompt: scene description + simple motion hint
+    parts = []
+    
+    # Start with the main subject/scene
+    if title and len(title) > 5 and title.lower() not in ("scene", "podcast", "episode"):
+        parts.append(title)
+    elif description:
+        # Take first sentence or first 60 chars
+        desc_part = description.split('.')[0][:60].strip()
+        if desc_part:
+            parts.append(desc_part)
+    elif image_prompt:
+        # Take first sentence or first 60 chars
+        img_part = image_prompt.split('.')[0][:60].strip()
+        if img_part:
+            parts.append(img_part)
+    
+    if not parts:
+        return None
+    
+    # Add a simple, subtle motion suggestion (not elaborate camera movements)
+    # Keep it natural and audio-driven
+    motion_hints = [
+        "with subtle movement",
+        "with gentle motion",
+        "with natural animation",
+    ]
+    
+    # Combine scene description with subtle motion hint
+    if len(parts[0]) < 80:
+        # Room for a motion hint
+        prompt = f"{parts[0]}, {motion_hints[0]}"
+    else:
+        # Just use the description if it's already long enough
+        prompt = parts[0]
+    
+    # Keep it concise - max 120 characters (allows for scene + motion hint)
+    prompt = prompt[:120].strip()
+    
+    # Clean up trailing commas or incomplete sentences
+    if prompt.endswith(','):
+        prompt = prompt[:-1].strip()
+    
+    return prompt if len(prompt) >= 15 else None
+
+
+def animate_scene_with_voiceover(
+    *,
+    image_bytes: bytes,
+    audio_bytes: bytes,
+    scene_data: Dict[str, Any],
+    story_context: Dict[str, Any],
+    user_id: str,
+    resolution: str = "720p",
+    prompt_override: Optional[str] = None,
+    mask_image_bytes: Optional[bytes] = None,
+    seed: Optional[int] = -1,
+    image_mime: str = "image/png",
+    audio_mime: str = "audio/mpeg",
+    client: Optional[WaveSpeedClient] = None,
+) -> Dict[str, Any]:
+    """
+    Animate a scene image with narration audio using WaveSpeed InfiniteTalk.
+    Returns dict with video bytes, prompt used, model name, and cost.
+    """
+
+    if not image_bytes:
+        raise HTTPException(status_code=404, detail="Scene image bytes missing for animation.")
+    if not audio_bytes:
+        raise HTTPException(status_code=404, detail="Scene audio bytes missing for animation.")
+
+    if len(image_bytes) > MAX_IMAGE_BYTES:
+        raise HTTPException(
+            status_code=400,
+            detail="Scene image exceeds 10MB limit required by WaveSpeed InfiniteTalk.",
+        )
+    if len(audio_bytes) > MAX_AUDIO_BYTES:
+        raise HTTPException(
+            status_code=400,
+            detail="Scene audio exceeds 50MB limit allowed for InfiniteTalk requests.",
+        )
+
+    if resolution not in {"480p", "720p"}:
+        raise HTTPException(status_code=400, detail="Resolution must be '480p' or '720p'.")
+
+    # Generate simple, concise prompt for InfiniteTalk (audio-driven, less need for elaborate descriptions)
+    animation_prompt = prompt_override or _generate_simple_infinitetalk_prompt(scene_data, story_context)
+
+    payload: Dict[str, Any] = {
+        "image": _as_data_uri(image_bytes, image_mime),
+        "audio": _as_data_uri(audio_bytes, audio_mime),
+        "resolution": resolution,
+    }
+    # Only include prompt if we have a meaningful one (InfiniteTalk works fine without it)
+    if animation_prompt:
+        payload["prompt"] = animation_prompt
+    if mask_image_bytes:
+        payload["mask_image"] = _as_data_uri(mask_image_bytes, image_mime)
+    if seed is not None:
+        payload["seed"] = seed
+
+    client = client or WaveSpeedClient()
+    prediction_id = client.submit_image_to_video(INFINITALK_MODEL_PATH, payload, timeout=60)
+
+    try:
+        # Poll faster (0.5s) to mirror reference pattern; allow up to 10 minutes
+        result = client.poll_until_complete(prediction_id, timeout_seconds=600, interval_seconds=0.5)
+    except HTTPException as exc:
+        detail = exc.detail or {}
+        if isinstance(detail, dict):
+            detail.setdefault("prediction_id", prediction_id)
+            detail.setdefault("resume_available", True)
+        raise
+
+    outputs = result.get("outputs") or []
+    if not outputs:
+        raise HTTPException(status_code=502, detail="WaveSpeed InfiniteTalk completed but returned no outputs.")
+
+    video_url = outputs[0]
+    video_response = requests.get(video_url, timeout=180)
+    if video_response.status_code != 200:
+        raise HTTPException(
+            status_code=502,
+            detail={
+                "error": "Failed to download InfiniteTalk video",
+                "status_code": video_response.status_code,
+                "response": video_response.text[:200],
+            },
+        )
+
+    metadata = result.get("metadata") or {}
+    duration = metadata.get("duration_seconds") or metadata.get("duration") or 0
+
+    logger.info(
+        "[InfiniteTalk] Generated talking avatar video user=%s scene=%s resolution=%s size=%s bytes",
+        user_id,
+        scene_data.get("scene_number"),
+        resolution,
+        len(video_response.content),
+    )
+
+    return {
+        "video_bytes": video_response.content,
+        "prompt": animation_prompt,
+        "duration": duration or 5,
+        "model_name": INFINITALK_MODEL_NAME,
+        "cost": INFINITALK_DEFAULT_COST,
+        "provider": "wavespeed",
+        "source_video_url": video_url,
+        "prediction_id": prediction_id,
+    }
+
+
--- a/backend/services/wavespeed/kling_animation.py
+++ b/backend/services/wavespeed/kling_animation.py
@@ -0,0 +1,360 @@
+from __future__ import annotations
+
+import base64
+import json
+from typing import Any, Dict, Optional
+
+import requests
+from fastapi import HTTPException
+
+from services.llm_providers.main_text_generation import llm_text_gen
+from utils.logger_utils import get_service_logger
+
+from .client import WaveSpeedClient
+
+try:
+    import imghdr
+except ModuleNotFoundError:  # Python 3.13 removed imghdr
+    imghdr = None
+
+logger = get_service_logger("wavespeed.kling_animation")
+
+KLING_MODEL_PATH = "kwaivgi/kling-v2.5-turbo-std/image-to-video"
+KLING_MODEL_5S = "kling-v2.5-turbo-std-5s"
+KLING_MODEL_10S = "kling-v2.5-turbo-std-10s"
+MAX_IMAGE_BYTES = 10 * 1024 * 1024  # 10 MB limit per docs
+
+
+def _detect_image_mime(image_bytes: bytes) -> str:
+    if imghdr:
+        detected = imghdr.what(None, h=image_bytes)
+        if detected == "jpeg":
+            return "image/jpeg"
+        if detected == "png":
+            return "image/png"
+        if detected == "gif":
+            return "image/gif"
+
+    header = image_bytes[:8]
+    if header.startswith(b"\x89PNG"):
+        return "image/png"
+    if header[:2] == b"\xff\xd8":
+        return "image/jpeg"
+    if header[:3] in (b"GIF", b"GIF"):
+        return "image/gif"
+
+    return "image/png"
+
+
+def _build_fallback_prompt(scene_data: Dict[str, Any], story_context: Dict[str, Any]) -> str:
+    title = (scene_data.get("title") or "Scene").strip()
+    description = (scene_data.get("description") or "").strip()
+    image_prompt = (scene_data.get("image_prompt") or "").strip()
+    tone = (story_context.get("story_tone") or "story").strip()
+    setting = (story_context.get("story_setting") or "the scene").strip()
+
+    parts = [
+        f"{title} cinematic motion shot.",
+        description[:220] if description else "",
+        f"Camera glides with subtle parallax over {setting}.",
+        f"Maintain a {tone} mood with natural lighting accents.",
+        f"Honor the original illustration details: {image_prompt[:200]}." if image_prompt else "",
+        "5-second sequence, gentle push-in, flowing cloth and atmospheric particles.",
+    ]
+    fallback_prompt = " ".join(filter(None, parts))
+    return fallback_prompt.strip()
+
+
+def _load_llm_json_response(response_text: Any) -> Dict[str, Any]:
+    """Normalize responses from llm_text_gen (dict or JSON string)."""
+    if isinstance(response_text, dict):
+        return response_text
+    if isinstance(response_text, str):
+        return json.loads(response_text)
+    raise ValueError(f"Unexpected response type: {type(response_text)}")
+
+
+def _generate_text_prompt(
+    *,
+    prompt: str,
+    system_prompt: str,
+    user_id: str,
+    fallback_prompt: str,
+) -> str:
+    """Fallback text generation when structured JSON parsing fails."""
+    try:
+        response = llm_text_gen(
+            prompt=prompt.strip(),
+            system_prompt=system_prompt,
+            user_id=user_id,
+        )
+    except HTTPException as exc:
+        if exc.status_code == 429:
+            raise
+        logger.warning(
+            "[AnimateScene] Text-mode prompt generation failed (%s). Using deterministic fallback.",
+            exc.detail,
+        )
+        return fallback_prompt
+    except Exception as exc:
+        logger.error(
+            "[AnimateScene] Unexpected error generating text prompt: %s",
+            exc,
+            exc_info=True,
+        )
+        return fallback_prompt
+
+    if isinstance(response, dict):
+        candidates = [
+            response.get("animation_prompt"),
+            response.get("prompt"),
+            response.get("text"),
+        ]
+        for candidate in candidates:
+            if isinstance(candidate, str) and candidate.strip():
+                return candidate.strip()
+        # As a last resort, stringify the dict
+        response_text = json.dumps(response, ensure_ascii=False)
+    else:
+        response_text = str(response)
+
+    cleaned = response_text.strip()
+    return cleaned or fallback_prompt
+
+
+def generate_animation_prompt(
+    scene_data: Dict[str, Any],
+    story_context: Dict[str, Any],
+    user_id: str,
+) -> str:
+    """
+    Generate an animation-focused prompt using llm_text_gen, falling back to a deterministic prompt if LLM fails.
+    """
+    fallback_prompt = _build_fallback_prompt(scene_data, story_context)
+    system_prompt = (
+        "You are an expert cinematic animation director. "
+        "You transform static illustrated scenes into short cinematic motion clips. "
+        "Describe motion, camera behavior, atmosphere, and pacing."
+    )
+
+    description = scene_data.get("description", "")
+    image_prompt = scene_data.get("image_prompt", "")
+    title = scene_data.get("title", "")
+    tone = story_context.get("story_tone") or story_context.get("story_tone", "")
+    setting = story_context.get("story_setting") or story_context.get("story_setting", "")
+
+    prompt = f"""
+Create a concise animation prompt (2-3 sentences) for a 5-second cinematic clip.
+
+Scene Title: {title}
+Description: {description}
+Existing Image Prompt: {image_prompt}
+Story Tone: {tone}
+Setting: {setting}
+
+Focus on:
+- Motion of characters/objects
+- Camera movement (pan, zoom, dolly, orbit)
+- Atmosphere, lighting, and emotion
+- Timing cues appropriate for a {tone or "story"} scene
+
+Respond with JSON: {{"animation_prompt": "<prompt>"}}
+"""
+
+    try:
+        response = llm_text_gen(
+            prompt=prompt.strip(),
+            system_prompt=system_prompt,
+            user_id=user_id,
+            json_struct={
+                "type": "object",
+                "properties": {
+                    "animation_prompt": {
+                        "type": "string",
+                        "description": "A cinematic motion prompt for the WaveSpeed image-to-video model.",
+                    }
+                },
+                "required": ["animation_prompt"],
+            },
+        )
+        structured = _load_llm_json_response(response)
+        animation_prompt = structured.get("animation_prompt")
+        if not animation_prompt or not isinstance(animation_prompt, str):
+            raise ValueError("Missing animation_prompt in structured response")
+        cleaned_prompt = animation_prompt.strip()
+        if not cleaned_prompt:
+            raise ValueError("animation_prompt is empty after trimming")
+        return cleaned_prompt
+    except HTTPException as exc:
+        if exc.status_code == 429:
+            raise
+        logger.warning(
+            "[AnimateScene] Structured LLM prompt generation failed (%s). Falling back to text parsing.",
+            exc.detail,
+        )
+        return _generate_text_prompt(
+            prompt=prompt,
+            system_prompt=system_prompt,
+            user_id=user_id,
+            fallback_prompt=fallback_prompt,
+        )
+    except (json.JSONDecodeError, ValueError, KeyError) as exc:
+        logger.warning(
+            "[AnimateScene] Failed to parse structured animation prompt (%s). Falling back to text parsing.",
+            exc,
+        )
+        return _generate_text_prompt(
+            prompt=prompt,
+            system_prompt=system_prompt,
+            user_id=user_id,
+            fallback_prompt=fallback_prompt,
+        )
+    except Exception as exc:
+        logger.error(
+            "[AnimateScene] Unexpected error generating animation prompt: %s",
+            exc,
+            exc_info=True,
+        )
+        return fallback_prompt
+
+
+def animate_scene_image(
+    *,
+    image_bytes: bytes,
+    scene_data: Dict[str, Any],
+    story_context: Dict[str, Any],
+    user_id: str,
+    duration: int = 5,
+    guidance_scale: float = 0.5,
+    negative_prompt: Optional[str] = None,
+    client: Optional[WaveSpeedClient] = None,
+) -> Dict[str, Any]:
+    """
+    Animate a scene image using WaveSpeed Kling v2.5 Turbo Std.
+    Returns dict with video bytes, prompt used, model name, duration, and cost.
+    """
+    if duration not in (5, 10):
+        raise HTTPException(status_code=400, detail="Duration must be 5 or 10 seconds for scene animation.")
+
+    if len(image_bytes) > MAX_IMAGE_BYTES:
+        raise HTTPException(
+            status_code=400,
+            detail="Scene image exceeds 10MB limit required by WaveSpeed."
+        )
+
+    guidance_scale = max(0.0, min(1.0, guidance_scale))
+    animation_prompt = generate_animation_prompt(scene_data, story_context, user_id)
+    image_b64 = base64.b64encode(image_bytes).decode("utf-8")
+
+    payload = {
+        "duration": duration,
+        "guidance_scale": guidance_scale,
+        "image": image_b64,
+        "prompt": animation_prompt,
+    }
+    if negative_prompt:
+        payload["negative_prompt"] = negative_prompt.strip()
+
+    client = client or WaveSpeedClient()
+    prediction_id = client.submit_image_to_video(KLING_MODEL_PATH, payload)
+    try:
+        result = client.poll_until_complete(prediction_id, timeout_seconds=240, interval_seconds=1.0)
+    except HTTPException as exc:
+        detail = exc.detail or {}
+        if isinstance(detail, dict):
+            detail.setdefault("prediction_id", prediction_id)
+            detail.setdefault("resume_available", True)
+            detail.setdefault("message", "WaveSpeed request is still processing. Use resume endpoint to fetch the video once ready.")
+        raise HTTPException(status_code=exc.status_code, detail=detail)
+
+    outputs = result.get("outputs") or []
+    if not outputs:
+        raise HTTPException(status_code=502, detail="WaveSpeed completed but returned no outputs.")
+
+    video_url = outputs[0]
+    video_response = requests.get(video_url, timeout=60)
+    if video_response.status_code != 200:
+        raise HTTPException(
+            status_code=502,
+            detail={
+                "error": "Failed to download animation video",
+                "status_code": video_response.status_code,
+                "response": video_response.text[:200],
+            },
+        )
+
+    model_name = KLING_MODEL_5S if duration == 5 else KLING_MODEL_10S
+    cost = 0.21 if duration == 5 else 0.42
+
+    return {
+        "video_bytes": video_response.content,
+        "prompt": animation_prompt,
+        "duration": duration,
+        "model_name": model_name,
+        "cost": cost,
+        "provider": "wavespeed",
+        "source_video_url": video_url,
+        "prediction_id": prediction_id,
+    }
+
+
+def resume_scene_animation(
+    *,
+    prediction_id: str,
+    duration: int,
+    user_id: str,
+    client: Optional[WaveSpeedClient] = None,
+) -> Dict[str, Any]:
+    """
+    Resume a previously submitted animation by fetching the completed result.
+    """
+    if duration not in (5, 10):
+        raise HTTPException(status_code=400, detail="Duration must be 5 or 10 seconds for scene animation.")
+
+    client = client or WaveSpeedClient()
+    result = client.get_prediction_result(prediction_id, timeout=120)
+    status = result.get("status")
+    if status != "completed":
+        raise HTTPException(
+            status_code=409,
+            detail={
+                "error": "WaveSpeed prediction is not completed yet",
+                "prediction_id": prediction_id,
+                "status": status,
+            },
+        )
+
+    outputs = result.get("outputs") or []
+    if not outputs:
+        raise HTTPException(status_code=502, detail="WaveSpeed completed but returned no outputs.")
+
+    video_url = outputs[0]
+    video_response = requests.get(video_url, timeout=120)
+    if video_response.status_code != 200:
+        raise HTTPException(
+            status_code=502,
+            detail={
+                "error": "Failed to download animation video during resume",
+                "status_code": video_response.status_code,
+                "response": video_response.text[:200],
+                "prediction_id": prediction_id,
+            },
+        )
+
+    animation_prompt = result.get("prompt") or ""
+    model_name = KLING_MODEL_5S if duration == 5 else KLING_MODEL_10S
+    cost = 0.21 if duration == 5 else 0.42
+
+    logger.info("[AnimateScene] Resumed download for prediction=%s", prediction_id)
+
+    return {
+        "video_bytes": video_response.content,
+        "prompt": animation_prompt,
+        "duration": duration,
+        "model_name": model_name,
+        "cost": cost,
+        "provider": "wavespeed",
+        "source_video_url": video_url,
+        "prediction_id": prediction_id,
+    }
+
--- a/backend/services/wavespeed/polling.py
+++ b/backend/services/wavespeed/polling.py
@@ -0,0 +1,203 @@
+"""
+Polling utilities for WaveSpeed API.
+"""
+
+import time
+from typing import Any, Dict, Optional, Callable
+
+import requests
+from fastapi import HTTPException
+from requests import exceptions as requests_exceptions
+
+from utils.logger_utils import get_service_logger
+
+logger = get_service_logger("wavespeed.polling")
+
+
+class WaveSpeedPolling:
+    """Polling utilities for WaveSpeed API predictions."""
+    
+    def __init__(self, api_key: str, base_url: str):
+        """Initialize polling utilities.
+        
+        Args:
+            api_key: WaveSpeed API key
+            base_url: WaveSpeed API base URL
+        """
+        self.api_key = api_key
+        self.base_url = base_url
+    
+    def _get_headers(self) -> Dict[str, str]:
+        """Get HTTP headers for API requests."""
+        return {"Authorization": f"Bearer {self.api_key}"}
+    
+    def get_prediction_result(self, prediction_id: str, timeout: int = 30) -> Dict[str, Any]:
+        """
+        Fetch the current status/result for a prediction.
+        Matches the example pattern: simple GET request, check status_code == 200, return data.
+        """
+        url = f"{self.base_url}/predictions/{prediction_id}/result"
+        headers = self._get_headers()
+        
+        try:
+            response = requests.get(url, headers=headers, timeout=timeout)
+        except requests_exceptions.Timeout as exc:
+            raise HTTPException(
+                status_code=504,
+                detail={
+                    "error": "WaveSpeed polling request timed out",
+                    "prediction_id": prediction_id,
+                    "resume_available": True,
+                    "exception": str(exc),
+                },
+            ) from exc
+        except requests_exceptions.RequestException as exc:
+            raise HTTPException(
+                status_code=502,
+                detail={
+                    "error": "WaveSpeed polling request failed",
+                    "prediction_id": prediction_id,
+                    "resume_available": True,
+                    "exception": str(exc),
+                },
+            ) from exc
+        
+        # Match example pattern: check status_code == 200, then get data
+        if response.status_code == 200:
+            result = response.json().get("data")
+            if not result:
+                raise HTTPException(status_code=502, detail={"error": "WaveSpeed polling response missing data"})
+            return result
+        else:
+            # Non-200 status - log and raise error (matching example's break behavior)
+            logger.error(f"[WaveSpeed] Polling failed: {response.status_code} {response.text}")
+            raise HTTPException(
+                status_code=502,
+                detail={
+                    "error": "WaveSpeed prediction polling failed",
+                    "status_code": response.status_code,
+                    "response": response.text,
+                },
+            )
+    
+    def poll_until_complete(
+        self,
+        prediction_id: str,
+        timeout_seconds: Optional[int] = None,
+        interval_seconds: float = 1.0,
+        progress_callback: Optional[Callable[[float, str], None]] = None,
+    ) -> Dict[str, Any]:
+        """
+        Poll WaveSpeed until the job completes or fails.
+        Matches the example pattern: simple polling loop until status is "completed" or "failed".
+        
+        Args:
+            prediction_id: The prediction ID to poll for
+            timeout_seconds: Optional timeout in seconds. If None, polls indefinitely until completion/failure.
+            interval_seconds: Seconds to wait between polling attempts (default: 1.0, faster than 2.0)
+            progress_callback: Optional callback function(progress: float, message: str) for progress updates
+        
+        Returns:
+            Dict containing the completed result
+            
+        Raises:
+            HTTPException: If the task fails, polling fails, or times out (if timeout_seconds is set)
+        """
+        start_time = time.time()
+        consecutive_errors = 0
+        max_consecutive_errors = 6  # safety guard for non-transient errors
+        
+        while True:
+            try:
+                result = self.get_prediction_result(prediction_id)
+                consecutive_errors = 0  # Reset error counter on success
+            except HTTPException as exc:
+                detail = exc.detail or {}
+                if isinstance(detail, dict):
+                    detail.setdefault("prediction_id", prediction_id)
+                    detail.setdefault("resume_available", True)
+                    detail.setdefault("error", detail.get("error", "WaveSpeed polling failed"))
+
+                # Determine underlying status code (WaveSpeed vs proxy)
+                status_code = detail.get("status_code", exc.status_code)
+
+                # Treat 5xx as transient: keep polling indefinitely with backoff
+                if 500 <= int(status_code) < 600:
+                    consecutive_errors += 1
+                    backoff = min(30.0, interval_seconds * (2 ** (consecutive_errors - 1)))
+                    logger.warning(
+                        f"[WaveSpeed] Transient polling error {consecutive_errors} for {prediction_id}: "
+                        f"{status_code}. Backing off {backoff:.1f}s"
+                    )
+                    time.sleep(backoff)
+                    continue
+
+                # For non-transient (typically 4xx) errors, apply safety cap
+                consecutive_errors += 1
+                if consecutive_errors >= max_consecutive_errors:
+                    logger.error(
+                        f"[WaveSpeed] Too many polling errors ({consecutive_errors}) for {prediction_id}, "
+                        f"status_code={status_code}. Giving up."
+                    )
+                raise HTTPException(status_code=exc.status_code, detail=detail) from exc
+
+                backoff = min(30.0, interval_seconds * (2 ** (consecutive_errors - 1)))
+                logger.warning(
+                    f"[WaveSpeed] Polling error {consecutive_errors}/{max_consecutive_errors} for {prediction_id}: "
+                    f"{status_code}. Backing off {backoff:.1f}s"
+                )
+                time.sleep(backoff)
+                continue
+            
+            # Extract status from result (matching example pattern)
+            status = result.get("status")
+            
+            if status == "completed":
+                elapsed = time.time() - start_time
+                logger.info(f"[WaveSpeed] Prediction {prediction_id} completed in {elapsed:.1f}s")
+                return result
+            
+            if status == "failed":
+                error_msg = result.get("error", "Unknown error")
+                logger.error(f"[WaveSpeed] Prediction {prediction_id} failed: {error_msg}")
+                raise HTTPException(
+                    status_code=502,
+                    detail={
+                        "error": "WaveSpeed task failed",
+                        "prediction_id": prediction_id,
+                        "message": error_msg,
+                        "details": result,
+                    },
+                )
+
+            # Check timeout only if specified
+            if timeout_seconds is not None:
+                elapsed = time.time() - start_time
+                if elapsed > timeout_seconds:
+                    logger.error(f"[WaveSpeed] Prediction {prediction_id} timed out after {timeout_seconds}s")
+                    raise HTTPException(
+                        status_code=504,
+                        detail={
+                            "error": "WaveSpeed task timed out",
+                            "prediction_id": prediction_id,
+                            "timeout_seconds": timeout_seconds,
+                            "current_status": status,
+                            "message": f"Task did not complete within {timeout_seconds} seconds. Status: {status}",
+                        },
+                    )
+
+            # Log progress periodically (every 30 seconds)
+            elapsed = time.time() - start_time
+            if int(elapsed) % 30 == 0 and elapsed > 0:
+                logger.info(f"[WaveSpeed] Polling {prediction_id}: status={status}, elapsed={elapsed:.0f}s")
+            
+            # Call progress callback if provided
+            if progress_callback:
+                # Map elapsed time to progress (20-80% range during polling)
+                # Assume typical completion time is timeout_seconds or 120s default
+                estimated_total = timeout_seconds or 120
+                progress = min(80.0, 20.0 + (elapsed / estimated_total) * 60.0)
+                progress_callback(progress, f"Video generation in progress... ({elapsed:.0f}s)")
+            
+            # Poll faster (1.0s instead of 2.0s) to match example's responsiveness
+            time.sleep(interval_seconds)
				`@@ -0,0 +1 @@`
				`"""WaveSpeed API generators for different content types."""`