ALwrity/backend/services/llm_providers/video_generation/wavespeed_provider.py

"""
WaveSpeed Text-to-Video Provider

Modular services for WaveSpeed text-to-video models:
- HunyuanVideo-1.5
- LTX-2 Pro
- LTX-2 Fast
- LTX-2 Retake

Each model has its own service class for separation of concerns.
"""

from __future__ import annotations

import asyncio
import requests
from typing import Optional, Dict, Any, Callable
from fastapi import HTTPException
from loguru import logger

from services.wavespeed.client import WaveSpeedClient
from utils.logger_utils import get_service_logger
from .base import VideoGenerationOptions, VideoGenerationResult

logger = get_service_logger("wavespeed.text_to_video")


class BaseWaveSpeedTextToVideoService:
    """Base class for WaveSpeed text-to-video services."""

    MODEL_PATH: str  # Must be set by subclasses
    MODEL_NAME: str  # Must be set by subclasses
    DEFAULT_COST: float = 0.10  # Default cost per second

    def __init__(self, client: Optional[WaveSpeedClient] = None):
        """Initialize the service.

        Args:
            client: Optional WaveSpeedClient instance (creates new if not provided)
        """
        self.client = client or WaveSpeedClient()
        logger.info(f"[{self.MODEL_NAME}] Service initialized")

    def calculate_cost(self, resolution: str, duration: int) -> float:
        """Calculate cost for video generation.

        Args:
            resolution: Output resolution (480p, 720p, 1080p)
            duration: Video duration in seconds

        Returns:
            Cost in USD
        """
        # Default implementation - override in subclasses if needed
        cost_per_second = self.DEFAULT_COST
        return cost_per_second * duration

    async def generate_video(
        self,
        prompt: str,
        duration: int = 5,
        resolution: str = "720p",
        negative_prompt: Optional[str] = None,
        seed: Optional[int] = None,
        audio_base64: Optional[str] = None,
        enable_prompt_expansion: bool = True,
        progress_callback: Optional[Callable[[float, str], None]] = None,
        **kwargs
    ) -> Dict[str, Any]:
        """
        Generate video using the model.

        Args:
            prompt: Text prompt describing the video
            duration: Video duration in seconds (5 or 10)
            resolution: Output resolution (480p, 720p, 1080p)
            negative_prompt: Optional negative prompt
            seed: Optional random seed
            audio_base64: Optional audio file for synchronization
            enable_prompt_expansion: Enable prompt optimization
            progress_callback: Optional progress callback function
            **kwargs: Additional model-specific parameters

        Returns:
            Dictionary with video_bytes, prompt, duration, model_name, cost, etc.
        """
        raise NotImplementedError("Subclasses must implement generate_video()")

    def _validate_inputs(
        self,
        prompt: str,
        duration: int,
        resolution: str,
    ) -> None:
        """Validate input parameters.

        Args:
            prompt: Text prompt
            duration: Video duration
            resolution: Output resolution

        Raises:
            HTTPException: If validation fails
        """
        if not prompt or not prompt.strip():
            raise HTTPException(
                status_code=400,
                detail="Prompt is required and cannot be empty"
            )

        # Default validation - subclasses should override for model-specific requirements
        if duration not in [5, 8, 10]:
            raise HTTPException(
                status_code=400,
                detail=f"Invalid duration: {duration}. Must be 5, 8, or 10 seconds"
            )

        valid_resolutions = ["480p", "720p", "1080p"]
        if resolution not in valid_resolutions:
            raise HTTPException(
                status_code=400,
                detail=f"Invalid resolution: {resolution}. Must be one of: {valid_resolutions}"
            )


class HunyuanVideoService(BaseWaveSpeedTextToVideoService):
    """
    Service for HunyuanVideo-1.5 text-to-video generation.

    HunyuanVideo-1.5 is Tencent's lightweight 8.3B parameter text-to-video model
    that generates high-quality videos with top-tier visual quality and motion coherence.
    """

    MODEL_PATH = "wavespeed-ai/hunyuan-video-1.5/text-to-video"
    MODEL_NAME = "hunyuan-video-1.5"

    # Pricing per second (from WaveSpeed docs)
    PRICING = {
        "480p": 0.02,  # $0.02 per second
        "720p": 0.04,  # $0.04 per second
    }

    # Size mapping: resolution -> size format (width*height)
    SIZE_MAPPING = {
        "480p": {
            "landscape": "832*480",
            "portrait": "480*832",
        },
        "720p": {
            "landscape": "1280*720",
            "portrait": "720*1280",
        },
    }

    def calculate_cost(self, resolution: str, duration: int) -> float:
        """Calculate cost for video generation.

        Args:
            resolution: Output resolution (480p, 720p)
            duration: Video duration in seconds (5 or 8)

        Returns:
            Cost in USD
        """
        cost_per_second = self.PRICING.get(resolution, self.PRICING["720p"])
        return cost_per_second * duration

    def _validate_inputs(
        self,
        prompt: str,
        duration: int,
        resolution: str,
    ) -> None:
        """Validate input parameters for HunyuanVideo-1.5.

        Args:
            prompt: Text prompt
            duration: Video duration (5, 8, or 10 seconds)
            resolution: Output resolution (480p or 720p)

        Raises:
            HTTPException: If validation fails
        """
        if not prompt or not prompt.strip():
            raise HTTPException(
                status_code=400,
                detail="Prompt is required and cannot be empty"
            )

        # HunyuanVideo-1.5 supports 5, 8, or 10 seconds (per official docs)
        if duration not in [5, 8, 10]:
            raise HTTPException(
                status_code=400,
                detail=f"Invalid duration: {duration}. Must be 5, 8, or 10 seconds for HunyuanVideo-1.5"
            )

        # HunyuanVideo-1.5 supports 480p and 720p only
        valid_resolutions = ["480p", "720p"]
        if resolution not in valid_resolutions:
            raise HTTPException(
                status_code=400,
                detail=f"Invalid resolution: {resolution}. Must be one of: {valid_resolutions} for HunyuanVideo-1.5"
            )

    def _resolution_to_size(self, resolution: str, aspect_ratio: str = "16:9") -> str:
        """Convert resolution to size format (width*height).

        Args:
            resolution: Resolution (480p, 720p)
            aspect_ratio: Aspect ratio (16:9 for landscape, 9:16 for portrait)

        Returns:
            Size string in format "width*height"
        """
        # Determine orientation
        if aspect_ratio in ["9:16", "1:1"]:
            orientation = "portrait"
        else:
            orientation = "landscape"

        # Get size from mapping
        size_mapping = self.SIZE_MAPPING.get(resolution, {})
        size = size_mapping.get(orientation, size_mapping.get("landscape", "1280*720"))

        return size

    async def generate_video(
        self,
        prompt: str,
        duration: int = 5,
        resolution: str = "720p",
        negative_prompt: Optional[str] = None,
        seed: Optional[int] = None,
        audio_base64: Optional[str] = None,
        enable_prompt_expansion: bool = True,
        progress_callback: Optional[Callable[[float, str], None]] = None,
        **kwargs
    ) -> Dict[str, Any]:
        """
        Generate video using HunyuanVideo-1.5.

        Reference: https://wavespeed.ai/docs/docs-api/wavespeed-ai/hunyuan-video-1.5-text-to-video

        Args:
            prompt: Text prompt describing the video
            duration: Video duration in seconds (5, 8, or 10)
            resolution: Output resolution (480p, 720p)
            negative_prompt: Optional negative prompt
            seed: Optional random seed (-1 for random)
            audio_base64: Not supported by HunyuanVideo-1.5 (ignored with warning)
            enable_prompt_expansion: Not supported by HunyuanVideo-1.5 (ignored with warning)
            progress_callback: Optional progress callback function
            **kwargs: Additional parameters (aspect_ratio for size calculation)

        Returns:
            Dictionary with video_bytes, prompt, duration, model_name, cost, etc.
        """
        # Validate inputs (HunyuanVideo-1.5 specific)
        self._validate_inputs(prompt, duration, resolution)

        # Get aspect ratio from kwargs (default to 16:9)
        aspect_ratio = kwargs.get("aspect_ratio", "16:9")

        # Convert resolution to size format
        size = self._resolution_to_size(resolution, aspect_ratio)

        # Build payload according to API spec
        payload = {
            "prompt": prompt.strip(),
            "duration": duration,
            "size": size,
        }

        # Add optional parameters
        if negative_prompt:
            payload["negative_prompt"] = negative_prompt.strip()

        if seed is not None:
            payload["seed"] = seed
        else:
            payload["seed"] = -1  # Default to random seed

        # Note: audio_base64 and enable_prompt_expansion are not supported by HunyuanVideo-1.5
        if audio_base64:
            logger.warning("[HunyuanVideo] audio_base64 is not supported by HunyuanVideo-1.5, ignoring")
        if not enable_prompt_expansion:
            logger.warning("[HunyuanVideo] enable_prompt_expansion is not supported by HunyuanVideo-1.5, ignoring")

        logger.info(
            f"[HunyuanVideo] Generating video: resolution={resolution}, "
            f"duration={duration}s, size={size}, prompt_length={len(prompt)}"
        )

        # Progress callback: submission
        if progress_callback:
            progress_callback(10.0, "Submitting HunyuanVideo-1.5 request to WaveSpeed...")

        # Submit request using WaveSpeedClient
        try:
            prediction_id = self.client.submit_text_to_video(
                model_path=self.MODEL_PATH,
                payload=payload,
                timeout=60,
            )
        except HTTPException as e:
            logger.error(f"[HunyuanVideo] Submission failed: {e.detail}")
            raise

        logger.info(f"[HunyuanVideo] Request submitted: prediction_id={prediction_id}")

        # Progress callback: polling started
        if progress_callback:
            progress_callback(20.0, f"Polling for completion (prediction_id: {prediction_id})...")

        # Poll for completion with progress updates
        try:
            result = await asyncio.to_thread(
                self.client.poll_until_complete,
                prediction_id,
                timeout_seconds=600,  # 10 minutes max
                interval_seconds=0.5,  # Poll every 0.5 seconds (as per example)
                progress_callback=progress_callback,
            )
        except HTTPException as e:
            detail = e.detail or {}
            if isinstance(detail, dict):
                detail.setdefault("prediction_id", prediction_id)
                detail.setdefault("resume_available", True)
            logger.error(f"[HunyuanVideo] Polling failed: {detail}")
            raise HTTPException(status_code=e.status_code, detail=detail)

        # Progress callback: processing result
        if progress_callback:
            progress_callback(90.0, "Downloading generated video...")

        # Extract video URL from result
        outputs = result.get("outputs") or []
        if not outputs:
            raise HTTPException(
                status_code=502,
                detail={
                    "error": "HunyuanVideo-1.5 completed but returned no outputs",
                    "prediction_id": prediction_id,
                    "status": result.get("status"),
                }
            )

        video_url = outputs[0]
        if not isinstance(video_url, str) or not video_url.startswith("http"):
            raise HTTPException(
                status_code=502,
                detail={
                    "error": f"Invalid video URL format: {video_url}",
                    "prediction_id": prediction_id,
                }
            )

        # Download video
        logger.info(f"[HunyuanVideo] Downloading video from: {video_url}")
        try:
            video_response = requests.get(video_url, timeout=180)
            if video_response.status_code != 200:
                raise HTTPException(
                    status_code=502,
                    detail={
                        "error": "Failed to download HunyuanVideo-1.5 video",
                        "status_code": video_response.status_code,
                        "response": video_response.text[:200],
                        "prediction_id": prediction_id,
                    }
                )
        except requests.exceptions.RequestException as e:
            raise HTTPException(
                status_code=502,
                detail={
                    "error": f"Failed to download video: {str(e)}",
                    "prediction_id": prediction_id,
                }
            )

        video_bytes = video_response.content
        if len(video_bytes) == 0:
            raise HTTPException(
                status_code=502,
                detail={
                    "error": "Downloaded video is empty",
                    "prediction_id": prediction_id,
                }
            )

        # Calculate cost
        cost = self.calculate_cost(resolution, duration)

        # Get video dimensions from size
        width, height = map(int, size.split("*"))

        # Extract metadata
        metadata = result.get("metadata", {})
        metadata.update({
            "has_nsfw_contents": result.get("has_nsfw_contents", []),
            "created_at": result.get("created_at"),
            "size": size,
        })

        logger.info(
            f"[HunyuanVideo] ✅ Generated video: {len(video_bytes)} bytes, "
            f"resolution={resolution}, duration={duration}s, cost=${cost:.2f}"
        )

        # Progress callback: completed
        if progress_callback:
            progress_callback(100.0, "Video generation completed!")

        # Return metadata dict
        return {
            "video_bytes": video_bytes,
            "prompt": prompt,
            "duration": float(duration),
            "model_name": self.MODEL_NAME,
            "cost": cost,
            "provider": "wavespeed",
            "resolution": resolution,
            "width": width,
            "height": height,
            "metadata": metadata,
            "source_video_url": video_url,
            "prediction_id": prediction_id,
        }


class LTX2ProService(BaseWaveSpeedTextToVideoService):
    """
    Service for Lightricks LTX-2 Pro text-to-video generation.

    LTX-2 Pro is a next-generation AI creative engine by Lightricks, designed for
    real production workflows. It generates high-quality, synchronized audio and
    1080p video directly from text.

    Official API Documentation:
    https://wavespeed.ai/docs/docs-api/lightricks/ltx-2-pro/text-to-video

    Features:
    - Video durations: 6s, 8s, or 10s
    - Fixed resolution: 1080p
    - Synchronized audio generation (optional)
    - Production-ready quality
    """

    MODEL_PATH = "lightricks/ltx-2-pro/text-to-video"
    MODEL_NAME = "lightricks/ltx-2-pro/text-to-video"

    # Pricing per second (from official docs: https://wavespeed.ai/docs/docs-api/lightricks/lightricks-ltx-2-pro-text-to-video)
    PRICING = {
        "1080p": 0.06,  # $0.06 per second for 1080p
    }

    def calculate_cost(self, resolution: str, duration: int) -> float:
        """Calculate cost for video generation.

        Args:
            resolution: Output resolution (always 1080p for LTX-2 Pro)
            duration: Video duration in seconds (6, 8, or 10)

        Returns:
            Cost in USD
        """
        # LTX-2 Pro is always 1080p
        cost_per_second = self.PRICING.get("1080p", 0.10)
        return cost_per_second * duration

    def _validate_inputs(
        self,
        prompt: str,
        duration: int,
        resolution: str,
    ) -> None:
        """Validate input parameters for LTX-2 Pro.

        Args:
            prompt: Text prompt
            duration: Video duration (6, 8, or 10 seconds)
            resolution: Output resolution (ignored - always 1080p)

        Raises:
            HTTPException: If validation fails
        """
        if not prompt or not prompt.strip():
            raise HTTPException(
                status_code=400,
                detail="Prompt is required and cannot be empty"
            )

        # LTX-2 Pro supports 6, 8, or 10 seconds
        if duration not in [6, 8, 10]:
            raise HTTPException(
                status_code=400,
                detail=f"Invalid duration: {duration}. Must be 6, 8, or 10 seconds for LTX-2 Pro"
            )

        # LTX-2 Pro is fixed at 1080p - resolution parameter is ignored
        # But we validate it's a valid resolution for consistency
        if resolution and resolution not in ["480p", "720p", "1080p"]:
            logger.warning(f"[LTX-2 Pro] Resolution {resolution} specified but LTX-2 Pro is fixed at 1080p")

    async def generate_video(
        self,
        prompt: str,
        duration: int = 6,
        resolution: str = "1080p",
        negative_prompt: Optional[str] = None,
        seed: Optional[int] = None,
        audio_base64: Optional[str] = None,
        enable_prompt_expansion: bool = True,
        progress_callback: Optional[Callable[[float, str], None]] = None,
        **kwargs
    ) -> Dict[str, Any]:
        """
        Generate video using Lightricks LTX-2 Pro.

        Reference: https://wavespeed.ai/docs/docs-api/lightricks/ltx-2-pro/text-to-video

        Args:
            prompt: Text prompt describing the video
            duration: Video duration in seconds (6, 8, or 10)
            resolution: Output resolution (ignored - LTX-2 Pro is fixed at 1080p)
            negative_prompt: Not supported by LTX-2 Pro (ignored with warning)
            seed: Not supported by LTX-2 Pro (ignored with warning)
            audio_base64: Not supported by LTX-2 Pro (ignored with warning)
            enable_prompt_expansion: Not supported by LTX-2 Pro (ignored with warning)
            progress_callback: Optional progress callback function
            **kwargs: Additional parameters (generate_audio: bool, default: True)

        Returns:
            Dictionary with video_bytes, prompt, duration, model_name, cost, etc.
        """
        # Validate inputs (LTX-2 Pro specific)
        self._validate_inputs(prompt, duration, resolution)

        # Get generate_audio from kwargs (default: True)
        generate_audio = kwargs.get("generate_audio", True)
        if not isinstance(generate_audio, bool):
            generate_audio = True  # Default to True if invalid type

        # Build payload according to API spec
        payload = {
            "prompt": prompt.strip(),
            "duration": duration,
            "generate_audio": generate_audio,
        }

        # Note: negative_prompt, seed, audio_base64, enable_prompt_expansion are not supported
        if negative_prompt:
            logger.warning("[LTX-2 Pro] negative_prompt is not supported by LTX-2 Pro, ignoring")
        if seed is not None:
            logger.warning("[LTX-2 Pro] seed is not supported by LTX-2 Pro, ignoring")
        if audio_base64:
            logger.warning("[LTX-2 Pro] audio_base64 is not supported by LTX-2 Pro, ignoring")
        if not enable_prompt_expansion:
            logger.warning("[LTX-2 Pro] enable_prompt_expansion is not supported by LTX-2 Pro, ignoring")

        logger.info(
            f"[LTX-2 Pro] Generating video: duration={duration}s, "
            f"generate_audio={generate_audio}, prompt_length={len(prompt)}"
        )

        # Progress callback: submission
        if progress_callback:
            progress_callback(10.0, "Submitting LTX-2 Pro request to WaveSpeed...")

        # Submit request using WaveSpeedClient
        try:
            prediction_id = self.client.submit_text_to_video(
                model_path=self.MODEL_PATH,
                payload=payload,
                timeout=60,
            )
        except HTTPException as e:
            logger.error(f"[LTX-2 Pro] Submission failed: {e.detail}")
            raise

        logger.info(f"[LTX-2 Pro] Request submitted: prediction_id={prediction_id}")

        # Progress callback: polling started
        if progress_callback:
            progress_callback(20.0, f"Polling for completion (prediction_id: {prediction_id})...")

        # Poll for completion with progress updates
        try:
            result = await asyncio.to_thread(
                self.client.poll_until_complete,
                prediction_id,
                timeout_seconds=600,  # 10 minutes max
                interval_seconds=0.5,  # Poll every 0.5 seconds
                progress_callback=progress_callback,
            )
        except HTTPException as e:
            detail = e.detail or {}
            if isinstance(detail, dict):
                detail.setdefault("prediction_id", prediction_id)
                detail.setdefault("resume_available", True)
            logger.error(f"[LTX-2 Pro] Polling failed: {detail}")
            raise HTTPException(status_code=e.status_code, detail=detail)

        # Progress callback: processing result
        if progress_callback:
            progress_callback(90.0, "Downloading generated video...")

        # Extract video URL from result
        outputs = result.get("outputs") or []
        if not outputs:
            raise HTTPException(
                status_code=502,
                detail={
                    "error": "LTX-2 Pro completed but returned no outputs",
                    "prediction_id": prediction_id,
                    "status": result.get("status"),
                }
            )

        video_url = outputs[0]
        if not isinstance(video_url, str) or not video_url.startswith("http"):
            raise HTTPException(
                status_code=502,
                detail={
                    "error": f"Invalid video URL format: {video_url}",
                    "prediction_id": prediction_id,
                }
            )

        # Download video
        logger.info(f"[LTX-2 Pro] Downloading video from: {video_url}")
        try:
            video_response = requests.get(video_url, timeout=180)
            if video_response.status_code != 200:
                raise HTTPException(
                    status_code=502,
                    detail={
                        "error": "Failed to download LTX-2 Pro video",
                        "status_code": video_response.status_code,
                        "response": video_response.text[:200],
                        "prediction_id": prediction_id,
                    }
                )
        except requests.exceptions.RequestException as e:
            raise HTTPException(
                status_code=502,
                detail={
                    "error": f"Failed to download video: {str(e)}",
                    "prediction_id": prediction_id,
                }
            )

        video_bytes = video_response.content
        if len(video_bytes) == 0:
            raise HTTPException(
                status_code=502,
                detail={
                    "error": "Downloaded video is empty",
                    "prediction_id": prediction_id,
                }
            )

        # Calculate cost
        cost = self.calculate_cost("1080p", duration)

        # LTX-2 Pro is fixed at 1080p
        width, height = 1920, 1080

        # Extract metadata
        metadata = result.get("metadata", {})
        metadata.update({
            "has_nsfw_contents": result.get("has_nsfw_contents", []),
            "created_at": result.get("created_at"),
            "generate_audio": generate_audio,
            "resolution": "1080p",  # Fixed resolution
        })

        logger.info(
            f"[LTX-2 Pro] ✅ Generated video: {len(video_bytes)} bytes, "
            f"duration={duration}s, generate_audio={generate_audio}, cost=${cost:.2f}"
        )

        # Progress callback: completed
        if progress_callback:
            progress_callback(100.0, "Video generation completed!")

        # Return metadata dict
        return {
            "video_bytes": video_bytes,
            "prompt": prompt,
            "duration": float(duration),
            "model_name": self.MODEL_NAME,
            "cost": cost,
            "provider": "wavespeed",
            "resolution": "1080p",
            "width": width,
            "height": height,
            "metadata": metadata,
            "source_video_url": video_url,
            "prediction_id": prediction_id,
        }


class GoogleVeo31Service(BaseWaveSpeedTextToVideoService):
    """
    Service for Google Veo 3.1 text-to-video generation.

    Google Veo 3.1 converts text prompts into videos with synchronized audio
    at native 1080p for high-quality outputs. Designed for professional content creation.

    Official API Documentation:
    https://wavespeed.ai/docs/docs-api/google/veo3.1/text-to-video

    Features:
    - Video durations: 4s, 6s, or 8s
    - Resolutions: 720p or 1080p
    - Aspect ratios: 16:9 or 9:16
    - Synchronized audio generation (optional)
    - Negative prompt support
    - Seed control for reproducibility
    """

    MODEL_PATH = "google/veo3.1/text-to-video"
    MODEL_NAME = "google/veo3.1/text-to-video"

    # Pricing per second (TODO: Update with actual pricing from docs)
    PRICING = {
        "720p": 0.08,   # Placeholder - update with actual pricing
        "1080p": 0.12,  # Placeholder - update with actual pricing
    }

    def calculate_cost(self, resolution: str, duration: int) -> float:
        """Calculate cost for video generation.

        Args:
            resolution: Output resolution (720p, 1080p)
            duration: Video duration in seconds (4, 6, or 8)

        Returns:
            Cost in USD
        """
        cost_per_second = self.PRICING.get(resolution, self.PRICING["1080p"])
        return cost_per_second * duration

    def _validate_inputs(
        self,
        prompt: str,
        duration: int,
        resolution: str,
    ) -> None:
        """Validate input parameters for Google Veo 3.1.

        Args:
            prompt: Text prompt
            duration: Video duration (4, 6, or 8 seconds)
            resolution: Output resolution (720p or 1080p)

        Raises:
            HTTPException: If validation fails
        """
        if not prompt or not prompt.strip():
            raise HTTPException(
                status_code=400,
                detail="Prompt is required and cannot be empty"
            )

        # Google Veo 3.1 supports 4, 6, or 8 seconds
        if duration not in [4, 6, 8]:
            raise HTTPException(
                status_code=400,
                detail=f"Invalid duration: {duration}. Must be 4, 6, or 8 seconds for Google Veo 3.1"
            )

        # Google Veo 3.1 supports 720p and 1080p
        valid_resolutions = ["720p", "1080p"]
        if resolution not in valid_resolutions:
            raise HTTPException(
                status_code=400,
                detail=f"Invalid resolution: {resolution}. Must be one of: {valid_resolutions} for Google Veo 3.1"
            )

    async def generate_video(
        self,
        prompt: str,
        duration: int = 8,
        resolution: str = "1080p",
        negative_prompt: Optional[str] = None,
        seed: Optional[int] = None,
        audio_base64: Optional[str] = None,
        enable_prompt_expansion: bool = True,
        progress_callback: Optional[Callable[[float, str], None]] = None,
        **kwargs
    ) -> Dict[str, Any]:
        """
        Generate video using Google Veo 3.1.

        Reference: https://wavespeed.ai/docs/docs-api/google/veo3.1/text-to-video

        Args:
            prompt: Text prompt describing the video
            duration: Video duration in seconds (4, 6, or 8)
            resolution: Output resolution (720p, 1080p)
            negative_prompt: Optional negative prompt
            seed: Optional random seed for reproducibility
            audio_base64: Not supported by Veo 3.1 (ignored with warning)
            enable_prompt_expansion: Not supported by Veo 3.1 (ignored with warning)
            progress_callback: Optional progress callback function
            **kwargs: Additional parameters (aspect_ratio: "16:9" or "9:16", generate_audio: bool)

        Returns:
            Dictionary with video_bytes, prompt, duration, model_name, cost, etc.
        """
        # Validate inputs (Google Veo 3.1 specific)
        self._validate_inputs(prompt, duration, resolution)

        # Get aspect_ratio from kwargs (default: "16:9")
        aspect_ratio = kwargs.get("aspect_ratio", "16:9")
        if aspect_ratio not in ["16:9", "9:16"]:
            aspect_ratio = "16:9"  # Default to 16:9 if invalid

        # Get generate_audio from kwargs (default: True)
        generate_audio = kwargs.get("generate_audio", True)
        if not isinstance(generate_audio, bool):
            generate_audio = True  # Default to True if invalid type

        # Build payload according to API spec
        payload = {
            "prompt": prompt.strip(),
            "duration": duration,
            "resolution": resolution,
            "aspect_ratio": aspect_ratio,
            "generate_audio": generate_audio,
        }

        # Add optional parameters
        if negative_prompt:
            payload["negative_prompt"] = negative_prompt.strip()

        if seed is not None:
            payload["seed"] = seed

        # Note: audio_base64 and enable_prompt_expansion are not supported
        if audio_base64:
            logger.warning("[Google Veo 3.1] audio_base64 is not supported by Veo 3.1, ignoring")
        if not enable_prompt_expansion:
            logger.warning("[Google Veo 3.1] enable_prompt_expansion is not supported by Veo 3.1, ignoring")

        logger.info(
            f"[Google Veo 3.1] Generating video: resolution={resolution}, "
            f"duration={duration}s, aspect_ratio={aspect_ratio}, generate_audio={generate_audio}, prompt_length={len(prompt)}"
        )

        # Progress callback: submission
        if progress_callback:
            progress_callback(10.0, "Submitting Google Veo 3.1 request to WaveSpeed...")

        # Submit request using WaveSpeedClient
        try:
            prediction_id = self.client.submit_text_to_video(
                model_path=self.MODEL_PATH,
                payload=payload,
                timeout=60,
            )
        except HTTPException as e:
            logger.error(f"[Google Veo 3.1] Submission failed: {e.detail}")
            raise

        logger.info(f"[Google Veo 3.1] Request submitted: prediction_id={prediction_id}")

        # Progress callback: polling started
        if progress_callback:
            progress_callback(20.0, f"Polling for completion (prediction_id: {prediction_id})...")

        # Poll for completion with progress updates
        try:
            result = await asyncio.to_thread(
                self.client.poll_until_complete,
                prediction_id,
                timeout_seconds=600,  # 10 minutes max
                interval_seconds=0.5,  # Poll every 0.5 seconds
                progress_callback=progress_callback,
            )
        except HTTPException as e:
            detail = e.detail or {}
            if isinstance(detail, dict):
                detail.setdefault("prediction_id", prediction_id)
                detail.setdefault("resume_available", True)
            logger.error(f"[Google Veo 3.1] Polling failed: {detail}")
            raise HTTPException(status_code=e.status_code, detail=detail)

        # Progress callback: processing result
        if progress_callback:
            progress_callback(90.0, "Downloading generated video...")

        # Extract video URL from result
        outputs = result.get("outputs") or []
        if not outputs:
            raise HTTPException(
                status_code=502,
                detail={
                    "error": "Google Veo 3.1 completed but returned no outputs",
                    "prediction_id": prediction_id,
                    "status": result.get("status"),
                }
            )

        video_url = outputs[0]
        if not isinstance(video_url, str) or not video_url.startswith("http"):
            raise HTTPException(
                status_code=502,
                detail={
                    "error": f"Invalid video URL format: {video_url}",
                    "prediction_id": prediction_id,
                }
            )

        # Download video
        logger.info(f"[Google Veo 3.1] Downloading video from: {video_url}")
        try:
            video_response = requests.get(video_url, timeout=180)
            if video_response.status_code != 200:
                raise HTTPException(
                    status_code=502,
                    detail={
                        "error": "Failed to download Google Veo 3.1 video",
                        "status_code": video_response.status_code,
                        "response": video_response.text[:200],
                        "prediction_id": prediction_id,
                    }
                )
        except requests.exceptions.RequestException as e:
            raise HTTPException(
                status_code=502,
                detail={
                    "error": f"Failed to download video: {str(e)}",
                    "prediction_id": prediction_id,
                }
            )

        video_bytes = video_response.content
        if len(video_bytes) == 0:
            raise HTTPException(
                status_code=502,
                detail={
                    "error": "Downloaded video is empty",
                    "prediction_id": prediction_id,
                }
            )

        # Calculate cost
        cost = self.calculate_cost(resolution, duration)

        # Get video dimensions from resolution and aspect ratio
        if resolution == "720p":
            width, height = (1280, 720) if aspect_ratio == "16:9" else (720, 1280)
        else:  # 1080p
            width, height = (1920, 1080) if aspect_ratio == "16:9" else (1080, 1920)

        # Extract metadata
        metadata = result.get("metadata", {})
        metadata.update({
            "has_nsfw_contents": result.get("has_nsfw_contents", []),
            "created_at": result.get("created_at"),
            "generate_audio": generate_audio,
            "aspect_ratio": aspect_ratio,
            "resolution": resolution,
        })

        logger.info(
            f"[Google Veo 3.1] ✅ Generated video: {len(video_bytes)} bytes, "
            f"resolution={resolution}, duration={duration}s, aspect_ratio={aspect_ratio}, cost=${cost:.2f}"
        )

        # Progress callback: completed
        if progress_callback:
            progress_callback(100.0, "Video generation completed!")

        # Return metadata dict
        return {
            "video_bytes": video_bytes,
            "prompt": prompt,
            "duration": float(duration),
            "model_name": self.MODEL_NAME,
            "cost": cost,
            "provider": "wavespeed",
            "resolution": resolution,
            "width": width,
            "height": height,
            "metadata": metadata,
            "source_video_url": video_url,
            "prediction_id": prediction_id,
        }


def get_wavespeed_text_to_video_service(model: str) -> BaseWaveSpeedTextToVideoService:
    """
    Get the appropriate WaveSpeed text-to-video service for the given model.

    Args:
        model: Model identifier (e.g., "hunyuan-video-1.5", "ltx-2-pro")

    Returns:
        Appropriate service instance

    Raises:
        ValueError: If model is not supported
    """
    model_mapping = {
        "hunyuan-video-1.5": HunyuanVideoService,
        "wavespeed-ai/hunyuan-video-1.5": HunyuanVideoService,
        "wavespeed-ai/hunyuan-video-1.5/text-to-video": HunyuanVideoService,
        "ltx-2-pro": LTX2ProService,
        "lightricks/ltx-2-pro": LTX2ProService,
        "lightricks/ltx-2-pro/text-to-video": LTX2ProService,
        "veo3.1": GoogleVeo31Service,
        "google/veo3.1": GoogleVeo31Service,
        "google/veo3.1/text-to-video": GoogleVeo31Service,
        # TODO: Add other models as they are implemented
        # "lightricks/ltx-2-fast": LTX2FastService,
        # "lightricks/ltx-2-retake": LTX2RetakeService,
    }

    # Try exact match first
    service_class = model_mapping.get(model)
    if service_class:
        return service_class()

    # Try partial match (e.g., "hunyuan" -> "hunyuan-video-1.5")
    model_lower = model.lower()
    for key, service_class in model_mapping.items():
        if model_lower in key.lower() or key.lower() in model_lower:
            return service_class()

    raise ValueError(
        f"Unsupported WaveSpeed text-to-video model: {model}. "
        f"Supported models: {list(model_mapping.keys())}"
    )