143 lines
5.6 KiB
Python
143 lines
5.6 KiB
Python
"""
|
|
Add Audio to Video service for Video Studio.
|
|
|
|
Supports multiple models for adding audio to videos:
|
|
1. Hunyuan Video Foley - Generate realistic Foley and ambient audio from video
|
|
2. Think Sound - (To be added)
|
|
"""
|
|
|
|
import asyncio
|
|
import base64
|
|
from typing import Dict, Any, Optional, Callable
|
|
from fastapi import HTTPException
|
|
|
|
from utils.logger_utils import get_service_logger
|
|
from ..wavespeed.client import WaveSpeedClient
|
|
|
|
logger = get_service_logger("video_studio.add_audio_to_video")
|
|
|
|
|
|
class AddAudioToVideoService:
|
|
"""Service for adding audio to video operations."""
|
|
|
|
def __init__(self):
|
|
"""Initialize Add Audio to Video service."""
|
|
self.wavespeed_client = WaveSpeedClient()
|
|
logger.info("[AddAudioToVideo] Service initialized")
|
|
|
|
def calculate_cost(self, model: str, duration: float = 10.0) -> float:
|
|
"""
|
|
Calculate cost for adding audio to video operation.
|
|
|
|
Args:
|
|
model: Model to use ("hunyuan-video-foley" or "think-sound")
|
|
duration: Video duration in seconds (for Hunyuan Video Foley)
|
|
|
|
Returns:
|
|
Cost in USD
|
|
"""
|
|
if model == "hunyuan-video-foley":
|
|
# Estimated pricing: $0.02/s (similar to other video processing models)
|
|
# Minimum charge: 5 seconds
|
|
# Maximum: 600 seconds (10 minutes)
|
|
cost_per_second = 0.02
|
|
billed_duration = max(5.0, min(duration, 600.0))
|
|
return cost_per_second * billed_duration
|
|
elif model == "think-sound":
|
|
# Think Sound pricing: $0.05 per video (flat rate)
|
|
return 0.05
|
|
else:
|
|
# Default fallback
|
|
cost_per_second = 0.02
|
|
billed_duration = max(5.0, min(duration, 600.0))
|
|
return cost_per_second * billed_duration
|
|
|
|
async def add_audio(
|
|
self,
|
|
video_data: bytes,
|
|
model: str = "hunyuan-video-foley",
|
|
prompt: Optional[str] = None,
|
|
seed: Optional[int] = None,
|
|
user_id: str = None,
|
|
progress_callback: Optional[Callable[[float, str], None]] = None,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Add audio to video using AI models.
|
|
|
|
Args:
|
|
video_data: Source video as bytes
|
|
model: Model to use ("hunyuan-video-foley" or "think-sound")
|
|
prompt: Optional text prompt describing desired sounds (Hunyuan Video Foley)
|
|
seed: Random seed for reproducibility (-1 for random)
|
|
user_id: User ID for tracking
|
|
progress_callback: Optional callback for progress updates
|
|
|
|
Returns:
|
|
Dict with processed video_url, cost, and metadata
|
|
"""
|
|
try:
|
|
logger.info(f"[AddAudioToVideo] Audio addition request: user={user_id}, model={model}, has_prompt={prompt is not None}")
|
|
|
|
# Convert video to base64 data URI
|
|
video_b64 = base64.b64encode(video_data).decode('utf-8')
|
|
video_uri = f"data:video/mp4;base64,{video_b64}"
|
|
|
|
# Handle different models
|
|
if model == "hunyuan-video-foley":
|
|
# Use Hunyuan Video Foley
|
|
processed_video_bytes = await asyncio.to_thread(
|
|
self.wavespeed_client.hunyuan_video_foley,
|
|
video=video_uri,
|
|
prompt=prompt,
|
|
seed=seed if seed is not None else -1,
|
|
enable_sync_mode=False, # Always use async with polling
|
|
timeout=600, # 10 minutes max for long videos
|
|
progress_callback=progress_callback,
|
|
)
|
|
else:
|
|
# Think Sound or other models (to be implemented)
|
|
logger.warning(f"[AddAudioToVideo] Model '{model}' not yet implemented")
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Model '{model}' is not yet supported. Currently only 'hunyuan-video-foley' is available."
|
|
)
|
|
|
|
# Estimate video duration (rough estimate: 1MB ≈ 1 second at 1080p)
|
|
# Only needed for Hunyuan Video Foley (per-second pricing)
|
|
estimated_duration = max(5, len(video_data) / (1024 * 1024)) if model == "hunyuan-video-foley" else 10.0
|
|
cost = self.calculate_cost(model, estimated_duration)
|
|
|
|
# Save processed video
|
|
from .video_studio_service import VideoStudioService
|
|
video_service = VideoStudioService()
|
|
save_result = video_service._save_video_file(
|
|
video_bytes=processed_video_bytes,
|
|
operation_type="add_audio",
|
|
user_id=user_id,
|
|
)
|
|
|
|
logger.info(f"[AddAudioToVideo] Audio addition successful: user={user_id}, model={model}, cost=${cost:.4f}")
|
|
|
|
return {
|
|
"success": True,
|
|
"video_url": save_result["file_url"],
|
|
"video_bytes": processed_video_bytes,
|
|
"cost": cost,
|
|
"model_used": model,
|
|
"metadata": {
|
|
"original_size": len(video_data),
|
|
"processed_size": len(processed_video_bytes),
|
|
"estimated_duration": estimated_duration,
|
|
"has_prompt": prompt is not None,
|
|
},
|
|
}
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"[AddAudioToVideo] Audio addition failed: {e}", exc_info=True)
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Adding audio to video failed: {str(e)}"
|
|
)
|