ALwrity/backend/services/video_studio/add_audio_to_video_service.py

"""
Add Audio to Video service for Video Studio.

Supports multiple models for adding audio to videos:
1. Hunyuan Video Foley - Generate realistic Foley and ambient audio from video
2. Think Sound - (To be added)
"""

import asyncio
import base64
from typing import Dict, Any, Optional, Callable
from fastapi import HTTPException

from utils.logger_utils import get_service_logger
from ..wavespeed.client import WaveSpeedClient

logger = get_service_logger("video_studio.add_audio_to_video")


class AddAudioToVideoService:
    """Service for adding audio to video operations."""

    def __init__(self):
        """Initialize Add Audio to Video service."""
        self.wavespeed_client = WaveSpeedClient()
        logger.info("[AddAudioToVideo] Service initialized")

    def calculate_cost(self, model: str, duration: float = 10.0) -> float:
        """
        Calculate cost for adding audio to video operation.

        Args:
            model: Model to use ("hunyuan-video-foley" or "think-sound")
            duration: Video duration in seconds (for Hunyuan Video Foley)

        Returns:
            Cost in USD
        """
        if model == "hunyuan-video-foley":
            # Estimated pricing: $0.02/s (similar to other video processing models)
            # Minimum charge: 5 seconds
            # Maximum: 600 seconds (10 minutes)
            cost_per_second = 0.02
            billed_duration = max(5.0, min(duration, 600.0))
            return cost_per_second * billed_duration
        elif model == "think-sound":
            # Think Sound pricing: $0.05 per video (flat rate)
            return 0.05
        else:
            # Default fallback
            cost_per_second = 0.02
            billed_duration = max(5.0, min(duration, 600.0))
            return cost_per_second * billed_duration

    async def add_audio(
        self,
        video_data: bytes,
        model: str = "hunyuan-video-foley",
        prompt: Optional[str] = None,
        seed: Optional[int] = None,
        user_id: str = None,
        progress_callback: Optional[Callable[[float, str], None]] = None,
    ) -> Dict[str, Any]:
        """
        Add audio to video using AI models.

        Args:
            video_data: Source video as bytes
            model: Model to use ("hunyuan-video-foley" or "think-sound")
            prompt: Optional text prompt describing desired sounds (Hunyuan Video Foley)
            seed: Random seed for reproducibility (-1 for random)
            user_id: User ID for tracking
            progress_callback: Optional callback for progress updates

        Returns:
            Dict with processed video_url, cost, and metadata
        """
        try:
            logger.info(f"[AddAudioToVideo] Audio addition request: user={user_id}, model={model}, has_prompt={prompt is not None}")

            # Convert video to base64 data URI
            video_b64 = base64.b64encode(video_data).decode('utf-8')
            video_uri = f"data:video/mp4;base64,{video_b64}"

            # Handle different models
            if model == "hunyuan-video-foley":
                # Use Hunyuan Video Foley
                processed_video_bytes = await asyncio.to_thread(
                    self.wavespeed_client.hunyuan_video_foley,
                    video=video_uri,
                    prompt=prompt,
                    seed=seed if seed is not None else -1,
                    enable_sync_mode=False,  # Always use async with polling
                    timeout=600,  # 10 minutes max for long videos
                    progress_callback=progress_callback,
                )
            else:
                # Think Sound or other models (to be implemented)
                logger.warning(f"[AddAudioToVideo] Model '{model}' not yet implemented")
                raise HTTPException(
                    status_code=400,
                    detail=f"Model '{model}' is not yet supported. Currently only 'hunyuan-video-foley' is available."
                )

            # Estimate video duration (rough estimate: 1MB ≈ 1 second at 1080p)
            # Only needed for Hunyuan Video Foley (per-second pricing)
            estimated_duration = max(5, len(video_data) / (1024 * 1024)) if model == "hunyuan-video-foley" else 10.0
            cost = self.calculate_cost(model, estimated_duration)

            # Save processed video
            from .video_studio_service import VideoStudioService
            video_service = VideoStudioService()
            save_result = video_service._save_video_file(
                video_bytes=processed_video_bytes,
                operation_type="add_audio",
                user_id=user_id,
            )

            logger.info(f"[AddAudioToVideo] Audio addition successful: user={user_id}, model={model}, cost=${cost:.4f}")

            return {
                "success": True,
                "video_url": save_result["file_url"],
                "video_bytes": processed_video_bytes,
                "cost": cost,
                "model_used": model,
                "metadata": {
                    "original_size": len(video_data),
                    "processed_size": len(processed_video_bytes),
                    "estimated_duration": estimated_duration,
                    "has_prompt": prompt is not None,
                },
            }

        except HTTPException:
            raise
        except Exception as e:
            logger.error(f"[AddAudioToVideo] Audio addition failed: {e}", exc_info=True)
            raise HTTPException(
                status_code=500,
                detail=f"Adding audio to video failed: {str(e)}"
            )