Added video studio router and endpoints. Added research router and endpoints. Added youtube router and endpoints. Added onboarding utils router and endpoints. Added onboarding utils service. Added onboarding utils models. Added onboarding utils routes. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils.
This commit is contained in:
15
backend/services/video_studio/__init__.py
Normal file
15
backend/services/video_studio/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
||||
"""
|
||||
Video Studio Services
|
||||
|
||||
Provides AI-powered video generation capabilities including:
|
||||
- Text-to-video generation
|
||||
- Image-to-video transformation
|
||||
- Avatar and face generation
|
||||
- Video enhancement
|
||||
|
||||
Integrates with WaveSpeed AI models for high-quality results.
|
||||
"""
|
||||
|
||||
from .video_studio_service import VideoStudioService
|
||||
|
||||
__all__ = ["VideoStudioService"]
|
||||
142
backend/services/video_studio/add_audio_to_video_service.py
Normal file
142
backend/services/video_studio/add_audio_to_video_service.py
Normal file
@@ -0,0 +1,142 @@
|
||||
"""
|
||||
Add Audio to Video service for Video Studio.
|
||||
|
||||
Supports multiple models for adding audio to videos:
|
||||
1. Hunyuan Video Foley - Generate realistic Foley and ambient audio from video
|
||||
2. Think Sound - (To be added)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
from typing import Dict, Any, Optional, Callable
|
||||
from fastapi import HTTPException
|
||||
|
||||
from utils.logger_utils import get_service_logger
|
||||
from ..wavespeed.client import WaveSpeedClient
|
||||
|
||||
logger = get_service_logger("video_studio.add_audio_to_video")
|
||||
|
||||
|
||||
class AddAudioToVideoService:
|
||||
"""Service for adding audio to video operations."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize Add Audio to Video service."""
|
||||
self.wavespeed_client = WaveSpeedClient()
|
||||
logger.info("[AddAudioToVideo] Service initialized")
|
||||
|
||||
def calculate_cost(self, model: str, duration: float = 10.0) -> float:
|
||||
"""
|
||||
Calculate cost for adding audio to video operation.
|
||||
|
||||
Args:
|
||||
model: Model to use ("hunyuan-video-foley" or "think-sound")
|
||||
duration: Video duration in seconds (for Hunyuan Video Foley)
|
||||
|
||||
Returns:
|
||||
Cost in USD
|
||||
"""
|
||||
if model == "hunyuan-video-foley":
|
||||
# Estimated pricing: $0.02/s (similar to other video processing models)
|
||||
# Minimum charge: 5 seconds
|
||||
# Maximum: 600 seconds (10 minutes)
|
||||
cost_per_second = 0.02
|
||||
billed_duration = max(5.0, min(duration, 600.0))
|
||||
return cost_per_second * billed_duration
|
||||
elif model == "think-sound":
|
||||
# Think Sound pricing: $0.05 per video (flat rate)
|
||||
return 0.05
|
||||
else:
|
||||
# Default fallback
|
||||
cost_per_second = 0.02
|
||||
billed_duration = max(5.0, min(duration, 600.0))
|
||||
return cost_per_second * billed_duration
|
||||
|
||||
async def add_audio(
|
||||
self,
|
||||
video_data: bytes,
|
||||
model: str = "hunyuan-video-foley",
|
||||
prompt: Optional[str] = None,
|
||||
seed: Optional[int] = None,
|
||||
user_id: str = None,
|
||||
progress_callback: Optional[Callable[[float, str], None]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Add audio to video using AI models.
|
||||
|
||||
Args:
|
||||
video_data: Source video as bytes
|
||||
model: Model to use ("hunyuan-video-foley" or "think-sound")
|
||||
prompt: Optional text prompt describing desired sounds (Hunyuan Video Foley)
|
||||
seed: Random seed for reproducibility (-1 for random)
|
||||
user_id: User ID for tracking
|
||||
progress_callback: Optional callback for progress updates
|
||||
|
||||
Returns:
|
||||
Dict with processed video_url, cost, and metadata
|
||||
"""
|
||||
try:
|
||||
logger.info(f"[AddAudioToVideo] Audio addition request: user={user_id}, model={model}, has_prompt={prompt is not None}")
|
||||
|
||||
# Convert video to base64 data URI
|
||||
video_b64 = base64.b64encode(video_data).decode('utf-8')
|
||||
video_uri = f"data:video/mp4;base64,{video_b64}"
|
||||
|
||||
# Handle different models
|
||||
if model == "hunyuan-video-foley":
|
||||
# Use Hunyuan Video Foley
|
||||
processed_video_bytes = await asyncio.to_thread(
|
||||
self.wavespeed_client.hunyuan_video_foley,
|
||||
video=video_uri,
|
||||
prompt=prompt,
|
||||
seed=seed if seed is not None else -1,
|
||||
enable_sync_mode=False, # Always use async with polling
|
||||
timeout=600, # 10 minutes max for long videos
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
else:
|
||||
# Think Sound or other models (to be implemented)
|
||||
logger.warning(f"[AddAudioToVideo] Model '{model}' not yet implemented")
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Model '{model}' is not yet supported. Currently only 'hunyuan-video-foley' is available."
|
||||
)
|
||||
|
||||
# Estimate video duration (rough estimate: 1MB ≈ 1 second at 1080p)
|
||||
# Only needed for Hunyuan Video Foley (per-second pricing)
|
||||
estimated_duration = max(5, len(video_data) / (1024 * 1024)) if model == "hunyuan-video-foley" else 10.0
|
||||
cost = self.calculate_cost(model, estimated_duration)
|
||||
|
||||
# Save processed video
|
||||
from .video_studio_service import VideoStudioService
|
||||
video_service = VideoStudioService()
|
||||
save_result = video_service._save_video_file(
|
||||
video_bytes=processed_video_bytes,
|
||||
operation_type="add_audio",
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
logger.info(f"[AddAudioToVideo] Audio addition successful: user={user_id}, model={model}, cost=${cost:.4f}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"video_url": save_result["file_url"],
|
||||
"video_bytes": processed_video_bytes,
|
||||
"cost": cost,
|
||||
"model_used": model,
|
||||
"metadata": {
|
||||
"original_size": len(video_data),
|
||||
"processed_size": len(processed_video_bytes),
|
||||
"estimated_duration": estimated_duration,
|
||||
"has_prompt": prompt is not None,
|
||||
},
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[AddAudioToVideo] Audio addition failed: {e}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Adding audio to video failed: {str(e)}"
|
||||
)
|
||||
122
backend/services/video_studio/avatar_service.py
Normal file
122
backend/services/video_studio/avatar_service.py
Normal file
@@ -0,0 +1,122 @@
|
||||
"""
|
||||
Avatar Studio Service
|
||||
|
||||
Service for creating talking avatars using InfiniteTalk and Hunyuan Avatar.
|
||||
Supports both models with automatic selection or explicit model choice.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, Optional
|
||||
from fastapi import HTTPException
|
||||
from loguru import logger
|
||||
|
||||
from services.image_studio.infinitetalk_adapter import InfiniteTalkService
|
||||
from services.video_studio.hunyuan_avatar_adapter import HunyuanAvatarService
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("video_studio.avatar")
|
||||
|
||||
|
||||
class AvatarStudioService:
|
||||
"""Service for Avatar Studio operations using InfiniteTalk and Hunyuan Avatar."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize Avatar Studio service."""
|
||||
self.infinitetalk_service = InfiniteTalkService()
|
||||
self.hunyuan_avatar_service = HunyuanAvatarService()
|
||||
logger.info("[AvatarStudio] Service initialized with InfiniteTalk and Hunyuan Avatar")
|
||||
|
||||
async def create_talking_avatar(
|
||||
self,
|
||||
image_base64: str,
|
||||
audio_base64: str,
|
||||
resolution: str = "720p",
|
||||
prompt: Optional[str] = None,
|
||||
mask_image_base64: Optional[str] = None,
|
||||
seed: Optional[int] = None,
|
||||
user_id: str = "video_studio",
|
||||
model: str = "infinitetalk",
|
||||
progress_callback: Optional[callable] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Create talking avatar video using InfiniteTalk or Hunyuan Avatar.
|
||||
|
||||
Args:
|
||||
image_base64: Person image in base64 or data URI
|
||||
audio_base64: Audio file in base64 or data URI
|
||||
resolution: Output resolution (480p or 720p)
|
||||
prompt: Optional prompt for expression/style
|
||||
mask_image_base64: Optional mask for animatable regions (InfiniteTalk only)
|
||||
seed: Optional random seed
|
||||
user_id: User ID for tracking
|
||||
model: Model to use - "infinitetalk" (default) or "hunyuan-avatar"
|
||||
progress_callback: Optional progress callback function
|
||||
|
||||
Returns:
|
||||
Dictionary with video_bytes, metadata, cost, and file info
|
||||
"""
|
||||
logger.info(
|
||||
f"[AvatarStudio] Creating talking avatar: user={user_id}, resolution={resolution}, model={model}"
|
||||
)
|
||||
|
||||
try:
|
||||
if model == "hunyuan-avatar":
|
||||
# Use Hunyuan Avatar (doesn't support mask_image)
|
||||
result = await self.hunyuan_avatar_service.create_talking_avatar(
|
||||
image_base64=image_base64,
|
||||
audio_base64=audio_base64,
|
||||
resolution=resolution,
|
||||
prompt=prompt,
|
||||
seed=seed,
|
||||
user_id=user_id,
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
else:
|
||||
# Default to InfiniteTalk
|
||||
result = await self.infinitetalk_service.create_talking_avatar(
|
||||
image_base64=image_base64,
|
||||
audio_base64=audio_base64,
|
||||
resolution=resolution,
|
||||
prompt=prompt,
|
||||
mask_image_base64=mask_image_base64,
|
||||
seed=seed,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"[AvatarStudio] ✅ Talking avatar created: "
|
||||
f"model={model}, resolution={resolution}, duration={result.get('duration', 0)}s, "
|
||||
f"cost=${result.get('cost', 0):.2f}"
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[AvatarStudio] ❌ Error creating talking avatar: {str(e)}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to create talking avatar: {str(e)}"
|
||||
)
|
||||
|
||||
def calculate_cost_estimate(
|
||||
self,
|
||||
resolution: str,
|
||||
estimated_duration: float,
|
||||
model: str = "infinitetalk",
|
||||
) -> float:
|
||||
"""
|
||||
Calculate estimated cost for talking avatar generation.
|
||||
|
||||
Args:
|
||||
resolution: Output resolution (480p or 720p)
|
||||
estimated_duration: Estimated video duration in seconds
|
||||
model: Model to use - "infinitetalk" (default) or "hunyuan-avatar"
|
||||
|
||||
Returns:
|
||||
Estimated cost in USD
|
||||
"""
|
||||
if model == "hunyuan-avatar":
|
||||
return self.hunyuan_avatar_service.calculate_cost(resolution, estimated_duration)
|
||||
else:
|
||||
return self.infinitetalk_service.calculate_cost(resolution, estimated_duration)
|
||||
206
backend/services/video_studio/face_swap_service.py
Normal file
206
backend/services/video_studio/face_swap_service.py
Normal file
@@ -0,0 +1,206 @@
|
||||
"""
|
||||
Face Swap service for Video Studio.
|
||||
|
||||
Supports two models:
|
||||
1. MoCha (wavespeed-ai/wan-2.1/mocha) - Character replacement with motion preservation
|
||||
2. Video Face Swap (wavespeed-ai/video-face-swap) - Simple face swap with multi-face support
|
||||
"""
|
||||
|
||||
import base64
|
||||
from typing import Dict, Any, Optional, Callable
|
||||
from fastapi import HTTPException
|
||||
|
||||
from utils.logger_utils import get_service_logger
|
||||
from ..wavespeed.client import WaveSpeedClient
|
||||
|
||||
logger = get_service_logger("video_studio.face_swap")
|
||||
|
||||
|
||||
class FaceSwapService:
|
||||
"""Service for face/character swap operations."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize Face Swap service."""
|
||||
self.wavespeed_client = WaveSpeedClient()
|
||||
logger.info("[FaceSwap] Service initialized")
|
||||
|
||||
def calculate_cost(self, model: str, resolution: Optional[str] = None, duration: float = 10.0) -> float:
|
||||
"""
|
||||
Calculate cost for face swap operation.
|
||||
|
||||
Args:
|
||||
model: Model to use ("mocha" or "video-face-swap")
|
||||
resolution: Output resolution for MoCha ("480p" or "720p"), ignored for video-face-swap
|
||||
duration: Video duration in seconds
|
||||
|
||||
Returns:
|
||||
Cost in USD
|
||||
"""
|
||||
if model == "video-face-swap":
|
||||
# Video Face Swap pricing: $0.01/s
|
||||
# Minimum charge: 5 seconds
|
||||
# Maximum: 600 seconds (10 minutes)
|
||||
cost_per_second = 0.01
|
||||
billed_duration = max(5.0, min(duration, 600.0))
|
||||
return cost_per_second * billed_duration
|
||||
else:
|
||||
# MoCha pricing: $0.04/s (480p), $0.08/s (720p)
|
||||
# Minimum charge: 5 seconds
|
||||
# Maximum billed: 120 seconds
|
||||
pricing = {
|
||||
"480p": 0.04,
|
||||
"720p": 0.08,
|
||||
}
|
||||
cost_per_second = pricing.get(resolution or "480p", pricing["480p"])
|
||||
billed_duration = max(5.0, min(duration, 120.0))
|
||||
return cost_per_second * billed_duration
|
||||
|
||||
async def swap_face(
|
||||
self,
|
||||
image_data: bytes,
|
||||
video_data: bytes,
|
||||
model: str = "mocha",
|
||||
prompt: Optional[str] = None,
|
||||
resolution: str = "480p",
|
||||
seed: Optional[int] = None,
|
||||
target_gender: str = "all",
|
||||
target_index: int = 0,
|
||||
user_id: str = None,
|
||||
progress_callback: Optional[Callable[[float, str], None]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Perform face/character swap using MoCha or Video Face Swap.
|
||||
|
||||
Args:
|
||||
image_data: Reference image as bytes
|
||||
video_data: Source video as bytes
|
||||
model: Model to use ("mocha" or "video-face-swap")
|
||||
prompt: Optional prompt to guide the swap (MoCha only)
|
||||
resolution: Output resolution for MoCha ("480p" or "720p")
|
||||
seed: Random seed for reproducibility (MoCha only)
|
||||
target_gender: Filter which faces to swap (video-face-swap only: "all", "female", "male")
|
||||
target_index: Select which face to swap (video-face-swap only: 0 = largest)
|
||||
user_id: User ID for tracking
|
||||
progress_callback: Optional callback for progress updates
|
||||
|
||||
Returns:
|
||||
Dict with swapped video_url, cost, and metadata
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
f"[FaceSwap] Face swap request: user={user_id}, "
|
||||
f"model={model}, resolution={resolution if model == 'mocha' else 'N/A'}"
|
||||
)
|
||||
|
||||
if not user_id:
|
||||
raise ValueError("user_id is required for face swap")
|
||||
|
||||
# Validate model
|
||||
if model not in ("mocha", "video-face-swap"):
|
||||
raise ValueError("Model must be 'mocha' or 'video-face-swap'")
|
||||
|
||||
# Convert image to base64 data URI
|
||||
image_b64 = base64.b64encode(image_data).decode('utf-8')
|
||||
image_uri = f"data:image/png;base64,{image_b64}"
|
||||
|
||||
# Convert video to base64 data URI
|
||||
video_b64 = base64.b64encode(video_data).decode('utf-8')
|
||||
video_uri = f"data:video/mp4;base64,{video_b64}"
|
||||
|
||||
# Estimate duration (we'll use a default, actual duration would come from video metadata)
|
||||
estimated_duration = 10.0 # Default estimate, should be improved with actual video duration
|
||||
|
||||
# Calculate cost estimate
|
||||
cost = self.calculate_cost(model, resolution if model == "mocha" else None, estimated_duration)
|
||||
|
||||
if progress_callback:
|
||||
model_name = "MoCha" if model == "mocha" else "Video Face Swap"
|
||||
progress_callback(10.0, f"Submitting face swap request to {model_name}...")
|
||||
|
||||
# Perform face swap based on model
|
||||
if model == "mocha":
|
||||
# Validate resolution for MoCha
|
||||
if resolution not in ("480p", "720p"):
|
||||
raise ValueError("Resolution must be '480p' or '720p' for MoCha")
|
||||
|
||||
# face_swap is synchronous (uses sync_mode internally)
|
||||
swapped_video_bytes = self.wavespeed_client.face_swap(
|
||||
image=image_uri,
|
||||
video=video_uri,
|
||||
prompt=prompt,
|
||||
resolution=resolution,
|
||||
seed=seed,
|
||||
enable_sync_mode=True,
|
||||
timeout=600, # 10 minutes timeout
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
else: # video-face-swap
|
||||
# video_face_swap is synchronous (uses sync_mode internally)
|
||||
swapped_video_bytes = self.wavespeed_client.video_face_swap(
|
||||
video=video_uri,
|
||||
face_image=image_uri,
|
||||
target_gender=target_gender,
|
||||
target_index=target_index,
|
||||
enable_sync_mode=True,
|
||||
timeout=600, # 10 minutes timeout
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(90.0, "Face swap complete, saving video...")
|
||||
|
||||
# Save swapped video
|
||||
from . import VideoStudioService
|
||||
video_service = VideoStudioService()
|
||||
save_result = video_service._save_video_file(
|
||||
video_bytes=swapped_video_bytes,
|
||||
operation_type="face_swap",
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
# Recalculate cost with actual duration if available
|
||||
# For now, use estimated cost
|
||||
actual_cost = cost
|
||||
|
||||
logger.info(
|
||||
f"[FaceSwap] Face swap successful: user={user_id}, "
|
||||
f"resolution={resolution}, cost=${actual_cost:.4f}"
|
||||
)
|
||||
|
||||
metadata = {
|
||||
"original_image_size": len(image_data),
|
||||
"original_video_size": len(video_data),
|
||||
"swapped_video_size": len(swapped_video_bytes),
|
||||
"model": model,
|
||||
}
|
||||
|
||||
if model == "mocha":
|
||||
metadata.update({
|
||||
"resolution": resolution,
|
||||
"seed": seed,
|
||||
"prompt": prompt,
|
||||
})
|
||||
else: # video-face-swap
|
||||
metadata.update({
|
||||
"target_gender": target_gender,
|
||||
"target_index": target_index,
|
||||
})
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"video_url": save_result["file_url"],
|
||||
"video_bytes": swapped_video_bytes,
|
||||
"cost": actual_cost,
|
||||
"model": model,
|
||||
"resolution": resolution if model == "mocha" else None,
|
||||
"metadata": metadata,
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[FaceSwap] Face swap error: {e}", exc_info=True)
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
}
|
||||
148
backend/services/video_studio/hunyuan_avatar_adapter.py
Normal file
148
backend/services/video_studio/hunyuan_avatar_adapter.py
Normal file
@@ -0,0 +1,148 @@
|
||||
"""Hunyuan Avatar adapter for Avatar Studio."""
|
||||
|
||||
import asyncio
|
||||
from typing import Any, Dict, Optional
|
||||
from fastapi import HTTPException
|
||||
from loguru import logger
|
||||
|
||||
from services.wavespeed.hunyuan_avatar import create_hunyuan_avatar, calculate_hunyuan_avatar_cost
|
||||
from services.wavespeed.client import WaveSpeedClient
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("video_studio.hunyuan_avatar")
|
||||
|
||||
|
||||
class HunyuanAvatarService:
|
||||
"""Adapter for Hunyuan Avatar in Avatar Studio context."""
|
||||
|
||||
def __init__(self, client: Optional[WaveSpeedClient] = None):
|
||||
"""Initialize Hunyuan Avatar service adapter."""
|
||||
self.client = client or WaveSpeedClient()
|
||||
logger.info("[Hunyuan Avatar Adapter] Service initialized")
|
||||
|
||||
def calculate_cost(self, resolution: str, duration: float) -> float:
|
||||
"""Calculate cost for Hunyuan Avatar video.
|
||||
|
||||
Args:
|
||||
resolution: Output resolution (480p or 720p)
|
||||
duration: Video duration in seconds
|
||||
|
||||
Returns:
|
||||
Cost in USD
|
||||
"""
|
||||
return calculate_hunyuan_avatar_cost(resolution, duration)
|
||||
|
||||
async def create_talking_avatar(
|
||||
self,
|
||||
image_base64: str,
|
||||
audio_base64: str,
|
||||
resolution: str = "480p",
|
||||
prompt: Optional[str] = None,
|
||||
seed: Optional[int] = None,
|
||||
user_id: str = "video_studio",
|
||||
progress_callback: Optional[callable] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Create talking avatar video using Hunyuan Avatar.
|
||||
|
||||
Args:
|
||||
image_base64: Person image in base64 or data URI
|
||||
audio_base64: Audio file in base64 or data URI
|
||||
resolution: Output resolution (480p or 720p, default: 480p)
|
||||
prompt: Optional prompt for expression/style
|
||||
seed: Optional random seed
|
||||
user_id: User ID for tracking
|
||||
progress_callback: Optional progress callback function
|
||||
|
||||
Returns:
|
||||
Dictionary with video_bytes, metadata, and cost
|
||||
"""
|
||||
# Validate resolution
|
||||
if resolution not in ["480p", "720p"]:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Resolution must be '480p' or '720p' for Hunyuan Avatar"
|
||||
)
|
||||
|
||||
# Decode image
|
||||
import base64
|
||||
try:
|
||||
if image_base64.startswith("data:"):
|
||||
if "," not in image_base64:
|
||||
raise ValueError("Invalid data URI format: missing comma separator")
|
||||
header, encoded = image_base64.split(",", 1)
|
||||
mime_parts = header.split(":")[1].split(";")[0] if ":" in header else "image/png"
|
||||
image_mime = mime_parts.strip() or "image/png"
|
||||
image_bytes = base64.b64decode(encoded)
|
||||
else:
|
||||
image_bytes = base64.b64decode(image_base64)
|
||||
image_mime = "image/png"
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Failed to decode image: {str(e)}"
|
||||
)
|
||||
|
||||
# Decode audio
|
||||
try:
|
||||
if audio_base64.startswith("data:"):
|
||||
if "," not in audio_base64:
|
||||
raise ValueError("Invalid data URI format: missing comma separator")
|
||||
header, encoded = audio_base64.split(",", 1)
|
||||
mime_parts = header.split(":")[1].split(";")[0] if ":" in header else "audio/mpeg"
|
||||
audio_mime = mime_parts.strip() or "audio/mpeg"
|
||||
audio_bytes = base64.b64decode(encoded)
|
||||
else:
|
||||
audio_bytes = base64.b64decode(audio_base64)
|
||||
audio_mime = "audio/mpeg"
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Failed to decode audio: {str(e)}"
|
||||
)
|
||||
|
||||
# Call Hunyuan Avatar function (run in thread since it's synchronous)
|
||||
try:
|
||||
result = await asyncio.to_thread(
|
||||
create_hunyuan_avatar,
|
||||
image_bytes=image_bytes,
|
||||
audio_bytes=audio_bytes,
|
||||
resolution=resolution,
|
||||
prompt=prompt,
|
||||
seed=seed,
|
||||
user_id=user_id,
|
||||
image_mime=image_mime,
|
||||
audio_mime=audio_mime,
|
||||
client=self.client,
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[Hunyuan Avatar Adapter] Error: {str(e)}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Hunyuan Avatar generation failed: {str(e)}"
|
||||
)
|
||||
|
||||
# Calculate actual cost based on duration
|
||||
actual_cost = self.calculate_cost(resolution, result.get("duration", 5.0))
|
||||
|
||||
# Update result with actual cost and additional metadata
|
||||
result["cost"] = actual_cost
|
||||
result["resolution"] = resolution
|
||||
|
||||
# Get video dimensions from resolution
|
||||
resolution_dims = {
|
||||
"480p": (854, 480),
|
||||
"720p": (1280, 720),
|
||||
}
|
||||
width, height = resolution_dims.get(resolution, (854, 480))
|
||||
result["width"] = width
|
||||
result["height"] = height
|
||||
|
||||
logger.info(
|
||||
f"[Hunyuan Avatar Adapter] ✅ Generated talking avatar: "
|
||||
f"resolution={resolution}, duration={result.get('duration', 5.0)}s, cost=${actual_cost:.2f}"
|
||||
)
|
||||
|
||||
return result
|
||||
156
backend/services/video_studio/platform_specs.py
Normal file
156
backend/services/video_studio/platform_specs.py
Normal file
@@ -0,0 +1,156 @@
|
||||
"""
|
||||
Platform specifications for Social Optimizer.
|
||||
|
||||
Defines aspect ratios, duration limits, file size limits, and other requirements
|
||||
for each social media platform.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class Platform(Enum):
|
||||
"""Social media platforms."""
|
||||
INSTAGRAM = "instagram"
|
||||
TIKTOK = "tiktok"
|
||||
YOUTUBE = "youtube"
|
||||
LINKEDIN = "linkedin"
|
||||
FACEBOOK = "facebook"
|
||||
TWITTER = "twitter"
|
||||
|
||||
|
||||
@dataclass
|
||||
class PlatformSpec:
|
||||
"""Platform specification for video optimization."""
|
||||
platform: Platform
|
||||
name: str
|
||||
aspect_ratio: str # e.g., "9:16", "16:9", "1:1"
|
||||
width: int
|
||||
height: int
|
||||
max_duration: float # seconds
|
||||
max_file_size_mb: float # MB
|
||||
formats: List[str] # e.g., ["mp4", "mov"]
|
||||
description: str
|
||||
|
||||
|
||||
# Platform specifications
|
||||
PLATFORM_SPECS: List[PlatformSpec] = [
|
||||
PlatformSpec(
|
||||
platform=Platform.INSTAGRAM,
|
||||
name="Instagram Reels",
|
||||
aspect_ratio="9:16",
|
||||
width=1080,
|
||||
height=1920,
|
||||
max_duration=90.0, # 90 seconds
|
||||
max_file_size_mb=4000.0, # 4GB
|
||||
formats=["mp4"],
|
||||
description="Vertical video format for Instagram Reels",
|
||||
),
|
||||
PlatformSpec(
|
||||
platform=Platform.TIKTOK,
|
||||
name="TikTok",
|
||||
aspect_ratio="9:16",
|
||||
width=1080,
|
||||
height=1920,
|
||||
max_duration=60.0, # 60 seconds
|
||||
max_file_size_mb=287.0, # 287MB
|
||||
formats=["mp4", "mov"],
|
||||
description="Vertical video format for TikTok",
|
||||
),
|
||||
PlatformSpec(
|
||||
platform=Platform.YOUTUBE,
|
||||
name="YouTube Shorts",
|
||||
aspect_ratio="9:16",
|
||||
width=1080,
|
||||
height=1920,
|
||||
max_duration=60.0, # 60 seconds
|
||||
max_file_size_mb=256000.0, # 256GB (very high limit)
|
||||
formats=["mp4", "mov", "webm"],
|
||||
description="Vertical video format for YouTube Shorts",
|
||||
),
|
||||
PlatformSpec(
|
||||
platform=Platform.LINKEDIN,
|
||||
name="LinkedIn Video",
|
||||
aspect_ratio="16:9",
|
||||
width=1920,
|
||||
height=1080,
|
||||
max_duration=600.0, # 10 minutes
|
||||
max_file_size_mb=5000.0, # 5GB
|
||||
formats=["mp4"],
|
||||
description="Horizontal video format for LinkedIn",
|
||||
),
|
||||
PlatformSpec(
|
||||
platform=Platform.LINKEDIN,
|
||||
name="LinkedIn Video (Square)",
|
||||
aspect_ratio="1:1",
|
||||
width=1080,
|
||||
height=1080,
|
||||
max_duration=600.0, # 10 minutes
|
||||
max_file_size_mb=5000.0, # 5GB
|
||||
formats=["mp4"],
|
||||
description="Square video format for LinkedIn",
|
||||
),
|
||||
PlatformSpec(
|
||||
platform=Platform.FACEBOOK,
|
||||
name="Facebook Video",
|
||||
aspect_ratio="16:9",
|
||||
width=1920,
|
||||
height=1080,
|
||||
max_duration=240.0, # 240 seconds (4 minutes)
|
||||
max_file_size_mb=4000.0, # 4GB
|
||||
formats=["mp4", "mov"],
|
||||
description="Horizontal video format for Facebook",
|
||||
),
|
||||
PlatformSpec(
|
||||
platform=Platform.FACEBOOK,
|
||||
name="Facebook Video (Square)",
|
||||
aspect_ratio="1:1",
|
||||
width=1080,
|
||||
height=1080,
|
||||
max_duration=240.0, # 240 seconds
|
||||
max_file_size_mb=4000.0, # 4GB
|
||||
formats=["mp4", "mov"],
|
||||
description="Square video format for Facebook",
|
||||
),
|
||||
PlatformSpec(
|
||||
platform=Platform.TWITTER,
|
||||
name="Twitter/X Video",
|
||||
aspect_ratio="16:9",
|
||||
width=1920,
|
||||
height=1080,
|
||||
max_duration=140.0, # 140 seconds (2:20)
|
||||
max_file_size_mb=512.0, # 512MB
|
||||
formats=["mp4"],
|
||||
description="Horizontal video format for Twitter/X",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def get_platform_specs(platform: Platform) -> List[PlatformSpec]:
|
||||
"""Get all specifications for a platform."""
|
||||
return [spec for spec in PLATFORM_SPECS if spec.platform == platform]
|
||||
|
||||
|
||||
def get_platform_spec(platform: Platform, aspect_ratio: Optional[str] = None) -> Optional[PlatformSpec]:
|
||||
"""Get a specific platform specification."""
|
||||
specs = get_platform_specs(platform)
|
||||
if aspect_ratio:
|
||||
for spec in specs:
|
||||
if spec.aspect_ratio == aspect_ratio:
|
||||
return spec
|
||||
return specs[0] if specs else None
|
||||
|
||||
|
||||
def get_all_platforms() -> List[Platform]:
|
||||
"""Get all available platforms."""
|
||||
return list(Platform)
|
||||
|
||||
|
||||
def get_platform_by_name(name: str) -> Optional[Platform]:
|
||||
"""Get platform enum by name."""
|
||||
name_lower = name.lower()
|
||||
for platform in Platform:
|
||||
if platform.value == name_lower:
|
||||
return platform
|
||||
return None
|
||||
269
backend/services/video_studio/social_optimizer_service.py
Normal file
269
backend/services/video_studio/social_optimizer_service.py
Normal file
@@ -0,0 +1,269 @@
|
||||
"""
|
||||
Social Optimizer service for platform-specific video optimization.
|
||||
|
||||
Creates optimized versions of videos for Instagram, TikTok, YouTube, LinkedIn, Facebook, and Twitter.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, List, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
from utils.logger_utils import get_service_logger
|
||||
from .platform_specs import Platform, PlatformSpec, get_platform_spec, get_platform_specs
|
||||
from .video_processors import (
|
||||
convert_aspect_ratio,
|
||||
trim_video,
|
||||
compress_video,
|
||||
extract_thumbnail,
|
||||
)
|
||||
|
||||
logger = get_service_logger("video_studio.social_optimizer")
|
||||
|
||||
|
||||
@dataclass
|
||||
class OptimizationOptions:
|
||||
"""Options for video optimization."""
|
||||
auto_crop: bool = True
|
||||
generate_thumbnails: bool = True
|
||||
compress: bool = True
|
||||
trim_mode: str = "beginning" # "beginning", "middle", "end"
|
||||
|
||||
|
||||
@dataclass
|
||||
class PlatformResult:
|
||||
"""Result for a single platform optimization."""
|
||||
platform: str
|
||||
name: str
|
||||
aspect_ratio: str
|
||||
video_url: str
|
||||
thumbnail_url: Optional[str] = None
|
||||
duration: float = 0.0
|
||||
file_size: int = 0
|
||||
width: int = 0
|
||||
height: int = 0
|
||||
|
||||
|
||||
class SocialOptimizerService:
|
||||
"""Service for optimizing videos for social media platforms."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize Social Optimizer service."""
|
||||
logger.info("[SocialOptimizer] Service initialized")
|
||||
|
||||
async def optimize_for_platforms(
|
||||
self,
|
||||
video_bytes: bytes,
|
||||
platforms: List[str],
|
||||
options: OptimizationOptions,
|
||||
user_id: str,
|
||||
video_studio_service: Any, # VideoStudioService
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Optimize video for multiple platforms.
|
||||
|
||||
Args:
|
||||
video_bytes: Source video as bytes
|
||||
platforms: List of platform names (e.g., ["instagram", "tiktok"])
|
||||
options: Optimization options
|
||||
user_id: User ID for file storage
|
||||
video_studio_service: VideoStudioService instance for saving files
|
||||
|
||||
Returns:
|
||||
Dict with results for each platform
|
||||
"""
|
||||
logger.info(
|
||||
f"[SocialOptimizer] Optimizing video for platforms: {platforms}, "
|
||||
f"user={user_id}"
|
||||
)
|
||||
|
||||
results: List[PlatformResult] = []
|
||||
errors: List[Dict[str, str]] = []
|
||||
|
||||
# Process each platform
|
||||
for platform_name in platforms:
|
||||
try:
|
||||
platform_enum = Platform(platform_name.lower())
|
||||
platform_specs = get_platform_specs(platform_enum)
|
||||
|
||||
# Process each format variant for the platform
|
||||
for spec in platform_specs:
|
||||
try:
|
||||
result = await self._optimize_for_spec(
|
||||
video_bytes=video_bytes,
|
||||
spec=spec,
|
||||
options=options,
|
||||
user_id=user_id,
|
||||
video_studio_service=video_studio_service,
|
||||
)
|
||||
results.append(result)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"[SocialOptimizer] Failed to optimize for {spec.name}: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
errors.append({
|
||||
"platform": platform_name,
|
||||
"format": spec.name,
|
||||
"error": str(e),
|
||||
})
|
||||
except ValueError:
|
||||
logger.warning(f"[SocialOptimizer] Unknown platform: {platform_name}")
|
||||
errors.append({
|
||||
"platform": platform_name,
|
||||
"error": f"Unknown platform: {platform_name}",
|
||||
})
|
||||
|
||||
# Calculate total cost (free - FFmpeg processing)
|
||||
total_cost = 0.0
|
||||
|
||||
logger.info(
|
||||
f"[SocialOptimizer] Optimization complete: "
|
||||
f"{len(results)} successful, {len(errors)} errors"
|
||||
)
|
||||
|
||||
return {
|
||||
"success": len(results) > 0,
|
||||
"results": [
|
||||
{
|
||||
"platform": r.platform,
|
||||
"name": r.name,
|
||||
"aspect_ratio": r.aspect_ratio,
|
||||
"video_url": r.video_url,
|
||||
"thumbnail_url": r.thumbnail_url,
|
||||
"duration": r.duration,
|
||||
"file_size": r.file_size,
|
||||
"width": r.width,
|
||||
"height": r.height,
|
||||
}
|
||||
for r in results
|
||||
],
|
||||
"errors": errors,
|
||||
"cost": total_cost,
|
||||
}
|
||||
|
||||
async def _optimize_for_spec(
|
||||
self,
|
||||
video_bytes: bytes,
|
||||
spec: PlatformSpec,
|
||||
options: OptimizationOptions,
|
||||
user_id: str,
|
||||
video_studio_service: Any,
|
||||
) -> PlatformResult:
|
||||
"""
|
||||
Optimize video for a specific platform specification.
|
||||
|
||||
Args:
|
||||
video_bytes: Source video as bytes
|
||||
spec: Platform specification
|
||||
options: Optimization options
|
||||
user_id: User ID for file storage
|
||||
video_studio_service: VideoStudioService instance
|
||||
|
||||
Returns:
|
||||
PlatformResult with optimized video URL and metadata
|
||||
"""
|
||||
logger.info(
|
||||
f"[SocialOptimizer] Optimizing for {spec.name} "
|
||||
f"({spec.aspect_ratio}, max {spec.max_duration}s)"
|
||||
)
|
||||
|
||||
processed_video = video_bytes
|
||||
original_size_mb = len(video_bytes) / (1024 * 1024)
|
||||
|
||||
# Step 1: Convert aspect ratio if needed
|
||||
if options.auto_crop:
|
||||
processed_video = await asyncio.to_thread(
|
||||
convert_aspect_ratio,
|
||||
processed_video,
|
||||
spec.aspect_ratio,
|
||||
"center", # Use center crop for social media
|
||||
)
|
||||
logger.debug(f"[SocialOptimizer] Aspect ratio converted to {spec.aspect_ratio}")
|
||||
|
||||
# Step 2: Trim if video exceeds max duration
|
||||
if spec.max_duration > 0:
|
||||
# Get video duration (we'll need to check this)
|
||||
# For now, we'll trim if the video is likely too long
|
||||
# In a real implementation, we'd use MoviePy to get duration first
|
||||
processed_video = await asyncio.to_thread(
|
||||
trim_video,
|
||||
processed_video,
|
||||
start_time=0.0,
|
||||
end_time=None,
|
||||
max_duration=spec.max_duration,
|
||||
trim_mode=options.trim_mode,
|
||||
)
|
||||
logger.debug(f"[SocialOptimizer] Video trimmed to max {spec.max_duration}s")
|
||||
|
||||
# Step 3: Compress if needed and file size exceeds limit
|
||||
if options.compress:
|
||||
current_size_mb = len(processed_video) / (1024 * 1024)
|
||||
if current_size_mb > spec.max_file_size_mb:
|
||||
# Calculate target size (90% of max to be safe)
|
||||
target_size_mb = spec.max_file_size_mb * 0.9
|
||||
processed_video = await asyncio.to_thread(
|
||||
compress_video,
|
||||
processed_video,
|
||||
target_size_mb=target_size_mb,
|
||||
quality="medium",
|
||||
)
|
||||
logger.debug(
|
||||
f"[SocialOptimizer] Video compressed: "
|
||||
f"{current_size_mb:.2f}MB -> {len(processed_video) / (1024 * 1024):.2f}MB"
|
||||
)
|
||||
|
||||
# Step 4: Save optimized video
|
||||
save_result = video_studio_service._save_video_file(
|
||||
video_bytes=processed_video,
|
||||
operation_type=f"social_optimizer_{spec.platform.value}",
|
||||
user_id=user_id,
|
||||
)
|
||||
video_url = save_result["file_url"]
|
||||
|
||||
# Step 5: Generate thumbnail if requested
|
||||
thumbnail_url = None
|
||||
if options.generate_thumbnails:
|
||||
try:
|
||||
thumbnail_bytes = await asyncio.to_thread(
|
||||
extract_thumbnail,
|
||||
processed_video,
|
||||
time_position=None, # Middle of video
|
||||
width=spec.width,
|
||||
height=spec.height,
|
||||
)
|
||||
|
||||
# Save thumbnail
|
||||
thumbnail_save_result = video_studio_service._save_video_file(
|
||||
video_bytes=thumbnail_bytes,
|
||||
operation_type=f"social_optimizer_thumbnail_{spec.platform.value}",
|
||||
user_id=user_id,
|
||||
)
|
||||
thumbnail_url = thumbnail_save_result["file_url"]
|
||||
logger.debug(f"[SocialOptimizer] Thumbnail generated: {thumbnail_url}")
|
||||
except Exception as e:
|
||||
logger.warning(f"[SocialOptimizer] Failed to generate thumbnail: {e}")
|
||||
|
||||
# Get video metadata (duration, file size)
|
||||
# For now, we'll estimate based on file size
|
||||
# In a real implementation, we'd use MoviePy to get actual duration
|
||||
file_size = len(processed_video)
|
||||
estimated_duration = spec.max_duration if spec.max_duration > 0 else 10.0
|
||||
|
||||
logger.info(
|
||||
f"[SocialOptimizer] Optimization complete for {spec.name}: "
|
||||
f"video_url={video_url}, size={file_size} bytes"
|
||||
)
|
||||
|
||||
return PlatformResult(
|
||||
platform=spec.platform.value,
|
||||
name=spec.name,
|
||||
aspect_ratio=spec.aspect_ratio,
|
||||
video_url=video_url,
|
||||
thumbnail_url=thumbnail_url,
|
||||
duration=estimated_duration,
|
||||
file_size=file_size,
|
||||
width=spec.width,
|
||||
height=spec.height,
|
||||
)
|
||||
@@ -0,0 +1,129 @@
|
||||
"""
|
||||
Video Background Remover service for Video Studio.
|
||||
|
||||
Removes or replaces video backgrounds using WaveSpeed Video Background Remover.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
from typing import Dict, Any, Optional, Callable
|
||||
from fastapi import HTTPException
|
||||
|
||||
from utils.logger_utils import get_service_logger
|
||||
from ..wavespeed.client import WaveSpeedClient
|
||||
|
||||
logger = get_service_logger("video_studio.video_background_remover")
|
||||
|
||||
|
||||
class VideoBackgroundRemoverService:
|
||||
"""Service for video background removal/replacement operations."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize Video Background Remover service."""
|
||||
self.wavespeed_client = WaveSpeedClient()
|
||||
logger.info("[VideoBackgroundRemover] Service initialized")
|
||||
|
||||
def calculate_cost(self, duration: float = 10.0) -> float:
|
||||
"""
|
||||
Calculate cost for video background removal operation.
|
||||
|
||||
Pricing from WaveSpeed documentation:
|
||||
- Rate: $0.01 per second
|
||||
- Minimum: $0.05 for ≤5 seconds
|
||||
- Maximum: $6.00 for 600 seconds (10 minutes)
|
||||
|
||||
Args:
|
||||
duration: Video duration in seconds
|
||||
|
||||
Returns:
|
||||
Cost in USD
|
||||
"""
|
||||
# Pricing: $0.01 per second
|
||||
# Minimum charge: $0.05 for ≤5 seconds
|
||||
# Maximum: $6.00 for 600 seconds (10 minutes)
|
||||
cost_per_second = 0.01
|
||||
if duration <= 5.0:
|
||||
return 0.05 # Minimum charge
|
||||
elif duration >= 600.0:
|
||||
return 6.00 # Maximum charge
|
||||
else:
|
||||
return duration * cost_per_second
|
||||
|
||||
async def remove_background(
|
||||
self,
|
||||
video_data: bytes,
|
||||
background_image_data: Optional[bytes] = None,
|
||||
user_id: str = None,
|
||||
progress_callback: Optional[Callable[[float, str], None]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Remove or replace video background.
|
||||
|
||||
Args:
|
||||
video_data: Source video as bytes
|
||||
background_image_data: Optional replacement background image as bytes
|
||||
user_id: User ID for tracking
|
||||
progress_callback: Optional callback for progress updates
|
||||
|
||||
Returns:
|
||||
Dict with processed video_url, cost, and metadata
|
||||
"""
|
||||
try:
|
||||
logger.info(f"[VideoBackgroundRemover] Background removal request: user={user_id}, has_background={background_image_data is not None}")
|
||||
|
||||
# Convert video to base64 data URI
|
||||
video_b64 = base64.b64encode(video_data).decode('utf-8')
|
||||
video_uri = f"data:video/mp4;base64,{video_b64}"
|
||||
|
||||
# Convert background image to base64 if provided
|
||||
background_image_uri = None
|
||||
if background_image_data:
|
||||
image_b64 = base64.b64encode(background_image_data).decode('utf-8')
|
||||
background_image_uri = f"data:image/jpeg;base64,{image_b64}"
|
||||
|
||||
# Call WaveSpeed API
|
||||
processed_video_bytes = await asyncio.to_thread(
|
||||
self.wavespeed_client.remove_background,
|
||||
video=video_uri,
|
||||
background_image=background_image_uri,
|
||||
enable_sync_mode=False, # Always use async with polling
|
||||
timeout=600, # 10 minutes max for long videos
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
|
||||
# Estimate video duration (rough estimate: 1MB ≈ 1 second at 1080p)
|
||||
estimated_duration = max(5, len(video_data) / (1024 * 1024)) # Minimum 5 seconds
|
||||
cost = self.calculate_cost(estimated_duration)
|
||||
|
||||
# Save processed video
|
||||
from .video_studio_service import VideoStudioService
|
||||
video_service = VideoStudioService()
|
||||
save_result = video_service._save_video_file(
|
||||
video_bytes=processed_video_bytes,
|
||||
operation_type="background_removal",
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
logger.info(f"[VideoBackgroundRemover] Background removal successful: user={user_id}, cost=${cost:.4f}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"video_url": save_result["file_url"],
|
||||
"video_bytes": processed_video_bytes,
|
||||
"cost": cost,
|
||||
"has_background_replacement": background_image_data is not None,
|
||||
"metadata": {
|
||||
"original_size": len(video_data),
|
||||
"processed_size": len(processed_video_bytes),
|
||||
"estimated_duration": estimated_duration,
|
||||
},
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[VideoBackgroundRemover] Background removal failed: {e}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Video background removal failed: {str(e)}"
|
||||
)
|
||||
647
backend/services/video_studio/video_processors.py
Normal file
647
backend/services/video_studio/video_processors.py
Normal file
@@ -0,0 +1,647 @@
|
||||
"""
|
||||
Video processing utilities for Transform Studio.
|
||||
|
||||
Handles format conversion, aspect ratio conversion, speed adjustment,
|
||||
resolution scaling, and compression using MoviePy/FFmpeg.
|
||||
"""
|
||||
|
||||
import io
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Optional, Tuple, Dict, Any
|
||||
from fastapi import HTTPException
|
||||
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("video_studio.video_processors")
|
||||
|
||||
try:
|
||||
from moviepy import VideoFileClip
|
||||
MOVIEPY_AVAILABLE = True
|
||||
except ImportError:
|
||||
MOVIEPY_AVAILABLE = False
|
||||
logger.warning("[VideoProcessors] MoviePy not available. Video processing will not work.")
|
||||
|
||||
|
||||
def _check_moviepy():
|
||||
"""Check if MoviePy is available."""
|
||||
if not MOVIEPY_AVAILABLE:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="MoviePy is not installed. Please install it: pip install moviepy imageio imageio-ffmpeg"
|
||||
)
|
||||
|
||||
|
||||
def _get_resolution_dimensions(resolution: str) -> Tuple[int, int]:
|
||||
"""Get width and height for a resolution string."""
|
||||
resolution_map = {
|
||||
"480p": (854, 480),
|
||||
"720p": (1280, 720),
|
||||
"1080p": (1920, 1080),
|
||||
"1440p": (2560, 1440),
|
||||
"4k": (3840, 2160),
|
||||
}
|
||||
return resolution_map.get(resolution.lower(), (1280, 720))
|
||||
|
||||
|
||||
def _get_aspect_ratio_dimensions(aspect_ratio: str, target_height: int = 720) -> Tuple[int, int]:
|
||||
"""Get width and height for an aspect ratio."""
|
||||
aspect_map = {
|
||||
"16:9": (16, 9),
|
||||
"9:16": (9, 16),
|
||||
"1:1": (1, 1),
|
||||
"4:5": (4, 5),
|
||||
"21:9": (21, 9),
|
||||
}
|
||||
|
||||
if aspect_ratio not in aspect_map:
|
||||
return (1280, 720) # Default to 16:9
|
||||
|
||||
width_ratio, height_ratio = aspect_map[aspect_ratio]
|
||||
width = int((width_ratio / height_ratio) * target_height)
|
||||
return (width, target_height)
|
||||
|
||||
|
||||
def convert_format(
|
||||
video_bytes: bytes,
|
||||
output_format: str = "mp4",
|
||||
codec: str = "libx264",
|
||||
quality: str = "medium",
|
||||
audio_codec: str = "aac",
|
||||
) -> bytes:
|
||||
"""
|
||||
Convert video to a different format.
|
||||
|
||||
Args:
|
||||
video_bytes: Input video as bytes
|
||||
output_format: Output format (mp4, mov, webm, gif)
|
||||
codec: Video codec (libx264, libvpx-vp9, etc.)
|
||||
quality: Quality preset (high, medium, low)
|
||||
audio_codec: Audio codec (aac, mp3, opus, etc.)
|
||||
|
||||
Returns:
|
||||
Converted video as bytes
|
||||
"""
|
||||
_check_moviepy()
|
||||
|
||||
quality_presets = {
|
||||
"high": {"bitrate": "5000k", "preset": "slow"},
|
||||
"medium": {"bitrate": "2500k", "preset": "medium"},
|
||||
"low": {"bitrate": "1000k", "preset": "fast"},
|
||||
}
|
||||
preset = quality_presets.get(quality, quality_presets["medium"])
|
||||
|
||||
# Save input to temp file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as input_file:
|
||||
input_file.write(video_bytes)
|
||||
input_path = input_file.name
|
||||
|
||||
try:
|
||||
# Load video
|
||||
clip = VideoFileClip(input_path)
|
||||
|
||||
# Format-specific codec selection
|
||||
if output_format == "webm":
|
||||
codec = "libvpx-vp9"
|
||||
audio_codec = "libopus"
|
||||
elif output_format == "gif":
|
||||
# For GIF, we need to handle differently
|
||||
codec = None
|
||||
audio_codec = None
|
||||
elif output_format == "mov":
|
||||
codec = "libx264"
|
||||
audio_codec = "aac"
|
||||
else: # mp4
|
||||
codec = codec or "libx264"
|
||||
audio_codec = audio_codec or "aac"
|
||||
|
||||
# Write to temp output file
|
||||
output_suffix = f".{output_format}" if output_format != "gif" else ".gif"
|
||||
with tempfile.NamedTemporaryFile(suffix=output_suffix, delete=False) as output_file:
|
||||
output_path = output_file.name
|
||||
|
||||
if output_format == "gif":
|
||||
# For GIF, use write_gif
|
||||
clip.write_gif(output_path, fps=15, logger=None)
|
||||
else:
|
||||
# For video formats
|
||||
clip.write_videofile(
|
||||
output_path,
|
||||
codec=codec,
|
||||
audio_codec=audio_codec,
|
||||
bitrate=preset["bitrate"],
|
||||
preset=preset["preset"],
|
||||
threads=4,
|
||||
logger=None,
|
||||
)
|
||||
|
||||
# Read output file
|
||||
with open(output_path, "rb") as f:
|
||||
output_bytes = f.read()
|
||||
|
||||
# Cleanup
|
||||
clip.close()
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True)
|
||||
|
||||
logger.info(f"[VideoProcessors] Format conversion successful: {output_format}, size={len(output_bytes)} bytes")
|
||||
return output_bytes
|
||||
|
||||
except Exception as e:
|
||||
# Cleanup on error
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True) if 'output_path' in locals() else None
|
||||
logger.error(f"[VideoProcessors] Format conversion failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Format conversion failed: {str(e)}")
|
||||
|
||||
|
||||
def convert_aspect_ratio(
|
||||
video_bytes: bytes,
|
||||
target_aspect: str,
|
||||
crop_mode: str = "center",
|
||||
) -> bytes:
|
||||
"""
|
||||
Convert video to a different aspect ratio.
|
||||
|
||||
Args:
|
||||
video_bytes: Input video as bytes
|
||||
target_aspect: Target aspect ratio (16:9, 9:16, 1:1, 4:5, 21:9)
|
||||
crop_mode: Crop mode (center, smart, letterbox)
|
||||
|
||||
Returns:
|
||||
Converted video as bytes
|
||||
"""
|
||||
_check_moviepy()
|
||||
|
||||
# Save input to temp file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as input_file:
|
||||
input_file.write(video_bytes)
|
||||
input_path = input_file.name
|
||||
|
||||
try:
|
||||
# Load video
|
||||
clip = VideoFileClip(input_path)
|
||||
original_width, original_height = clip.size
|
||||
|
||||
# Calculate target dimensions
|
||||
target_width, target_height = _get_aspect_ratio_dimensions(target_aspect, original_height)
|
||||
target_aspect_ratio = target_width / target_height
|
||||
original_aspect_ratio = original_width / original_height
|
||||
|
||||
# Determine crop dimensions
|
||||
if crop_mode == "letterbox":
|
||||
# Letterboxing: add black bars
|
||||
if target_aspect_ratio > original_aspect_ratio:
|
||||
# Target is wider, add horizontal bars
|
||||
new_height = int(original_width / target_aspect_ratio)
|
||||
y_offset = (original_height - new_height) // 2
|
||||
clip = clip.crop(y1=y_offset, y2=y_offset + new_height)
|
||||
else:
|
||||
# Target is taller, add vertical bars
|
||||
new_width = int(original_height * target_aspect_ratio)
|
||||
x_offset = (original_width - new_width) // 2
|
||||
clip = clip.crop(x1=x_offset, x2=x_offset + new_width)
|
||||
else:
|
||||
# Center crop (default)
|
||||
if target_aspect_ratio > original_aspect_ratio:
|
||||
# Need to crop height
|
||||
new_height = int(original_width / target_aspect_ratio)
|
||||
y_offset = (original_height - new_height) // 2
|
||||
clip = clip.crop(y1=y_offset, y2=y_offset + new_height)
|
||||
else:
|
||||
# Need to crop width
|
||||
new_width = int(original_height * target_aspect_ratio)
|
||||
x_offset = (original_width - new_width) // 2
|
||||
clip = clip.crop(x1=x_offset, x2=x_offset + new_width)
|
||||
|
||||
# Resize to target dimensions (maintain quality)
|
||||
clip = clip.resize((target_width, target_height))
|
||||
|
||||
# Write to temp output file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as output_file:
|
||||
output_path = output_file.name
|
||||
|
||||
clip.write_videofile(
|
||||
output_path,
|
||||
codec="libx264",
|
||||
audio_codec="aac",
|
||||
preset="medium",
|
||||
threads=4,
|
||||
logger=None,
|
||||
)
|
||||
|
||||
# Read output file
|
||||
with open(output_path, "rb") as f:
|
||||
output_bytes = f.read()
|
||||
|
||||
# Cleanup
|
||||
clip.close()
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True)
|
||||
|
||||
logger.info(f"[VideoProcessors] Aspect ratio conversion successful: {target_aspect}, size={len(output_bytes)} bytes")
|
||||
return output_bytes
|
||||
|
||||
except Exception as e:
|
||||
# Cleanup on error
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True) if 'output_path' in locals() else None
|
||||
logger.error(f"[VideoProcessors] Aspect ratio conversion failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Aspect ratio conversion failed: {str(e)}")
|
||||
|
||||
|
||||
def adjust_speed(
|
||||
video_bytes: bytes,
|
||||
speed_factor: float,
|
||||
) -> bytes:
|
||||
"""
|
||||
Adjust video playback speed.
|
||||
|
||||
Args:
|
||||
video_bytes: Input video as bytes
|
||||
speed_factor: Speed multiplier (0.25, 0.5, 1.0, 1.5, 2.0, 4.0)
|
||||
|
||||
Returns:
|
||||
Speed-adjusted video as bytes
|
||||
"""
|
||||
_check_moviepy()
|
||||
|
||||
if speed_factor <= 0:
|
||||
raise HTTPException(status_code=400, detail="Speed factor must be greater than 0")
|
||||
|
||||
# Save input to temp file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as input_file:
|
||||
input_file.write(video_bytes)
|
||||
input_path = input_file.name
|
||||
|
||||
try:
|
||||
# Load video
|
||||
clip = VideoFileClip(input_path)
|
||||
|
||||
# Adjust speed using MoviePy's speedx effect
|
||||
try:
|
||||
# Try MoviePy v2 API first
|
||||
from moviepy.video.fx.speedx import speedx
|
||||
clip = clip.fx(speedx, speed_factor)
|
||||
except (ImportError, AttributeError):
|
||||
try:
|
||||
# Fallback: try direct import
|
||||
from moviepy.video.fx import speedx
|
||||
clip = clip.fx(speedx, speed_factor)
|
||||
except (ImportError, AttributeError):
|
||||
# Fallback: Manual speed adjustment (less accurate but works)
|
||||
# This maintains audio sync by adjusting fps and duration
|
||||
original_fps = clip.fps
|
||||
new_fps = original_fps * speed_factor
|
||||
original_duration = clip.duration
|
||||
new_duration = original_duration / speed_factor
|
||||
clip = clip.with_fps(new_fps).with_duration(new_duration)
|
||||
|
||||
# Write to temp output file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as output_file:
|
||||
output_path = output_file.name
|
||||
|
||||
clip.write_videofile(
|
||||
output_path,
|
||||
codec="libx264",
|
||||
audio_codec="aac",
|
||||
preset="medium",
|
||||
threads=4,
|
||||
logger=None,
|
||||
)
|
||||
|
||||
# Read output file
|
||||
with open(output_path, "rb") as f:
|
||||
output_bytes = f.read()
|
||||
|
||||
# Cleanup
|
||||
clip.close()
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True)
|
||||
|
||||
logger.info(f"[VideoProcessors] Speed adjustment successful: {speed_factor}x, size={len(output_bytes)} bytes")
|
||||
return output_bytes
|
||||
|
||||
except Exception as e:
|
||||
# Cleanup on error
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True) if 'output_path' in locals() else None
|
||||
logger.error(f"[VideoProcessors] Speed adjustment failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Speed adjustment failed: {str(e)}")
|
||||
|
||||
|
||||
def scale_resolution(
|
||||
video_bytes: bytes,
|
||||
target_resolution: str,
|
||||
maintain_aspect: bool = True,
|
||||
) -> bytes:
|
||||
"""
|
||||
Scale video to target resolution.
|
||||
|
||||
Args:
|
||||
video_bytes: Input video as bytes
|
||||
target_resolution: Target resolution (480p, 720p, 1080p, 1440p, 4k)
|
||||
maintain_aspect: Whether to maintain aspect ratio
|
||||
|
||||
Returns:
|
||||
Scaled video as bytes
|
||||
"""
|
||||
_check_moviepy()
|
||||
|
||||
# Save input to temp file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as input_file:
|
||||
input_file.write(video_bytes)
|
||||
input_path = input_file.name
|
||||
|
||||
try:
|
||||
# Load video
|
||||
clip = VideoFileClip(input_path)
|
||||
target_width, target_height = _get_resolution_dimensions(target_resolution)
|
||||
|
||||
# Resize
|
||||
if maintain_aspect:
|
||||
clip = clip.resize(height=target_height) # Maintain aspect ratio
|
||||
else:
|
||||
clip = clip.resize((target_width, target_height))
|
||||
|
||||
# Write to temp output file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as output_file:
|
||||
output_path = output_file.name
|
||||
|
||||
clip.write_videofile(
|
||||
output_path,
|
||||
codec="libx264",
|
||||
audio_codec="aac",
|
||||
preset="medium",
|
||||
threads=4,
|
||||
logger=None,
|
||||
)
|
||||
|
||||
# Read output file
|
||||
with open(output_path, "rb") as f:
|
||||
output_bytes = f.read()
|
||||
|
||||
# Cleanup
|
||||
clip.close()
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True)
|
||||
|
||||
logger.info(f"[VideoProcessors] Resolution scaling successful: {target_resolution}, size={len(output_bytes)} bytes")
|
||||
return output_bytes
|
||||
|
||||
except Exception as e:
|
||||
# Cleanup on error
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True) if 'output_path' in locals() else None
|
||||
logger.error(f"[VideoProcessors] Resolution scaling failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Resolution scaling failed: {str(e)}")
|
||||
|
||||
|
||||
def compress_video(
|
||||
video_bytes: bytes,
|
||||
target_size_mb: Optional[float] = None,
|
||||
quality: str = "medium",
|
||||
) -> bytes:
|
||||
"""
|
||||
Compress video to reduce file size.
|
||||
|
||||
Args:
|
||||
video_bytes: Input video as bytes
|
||||
target_size_mb: Target file size in MB (optional)
|
||||
quality: Quality preset (high, medium, low)
|
||||
|
||||
Returns:
|
||||
Compressed video as bytes
|
||||
"""
|
||||
_check_moviepy()
|
||||
|
||||
quality_presets = {
|
||||
"high": {"bitrate": "5000k", "preset": "slow"},
|
||||
"medium": {"bitrate": "2500k", "preset": "medium"},
|
||||
"low": {"bitrate": "1000k", "preset": "fast"},
|
||||
}
|
||||
preset = quality_presets.get(quality, quality_presets["medium"])
|
||||
|
||||
# Save input to temp file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as input_file:
|
||||
input_file.write(video_bytes)
|
||||
input_path = input_file.name
|
||||
|
||||
try:
|
||||
# Load video
|
||||
clip = VideoFileClip(input_path)
|
||||
|
||||
# Calculate bitrate if target size is specified
|
||||
if target_size_mb:
|
||||
duration = clip.duration
|
||||
target_size_bits = target_size_mb * 8 * 1024 * 1024 # Convert MB to bits
|
||||
calculated_bitrate = int(target_size_bits / duration)
|
||||
# Ensure reasonable bitrate (min 500k, max 10000k)
|
||||
bitrate = f"{max(500, min(10000, calculated_bitrate // 1000))}k"
|
||||
else:
|
||||
bitrate = preset["bitrate"]
|
||||
|
||||
# Write to temp output file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as output_file:
|
||||
output_path = output_file.name
|
||||
|
||||
clip.write_videofile(
|
||||
output_path,
|
||||
codec="libx264",
|
||||
audio_codec="aac",
|
||||
bitrate=bitrate,
|
||||
preset=preset["preset"],
|
||||
threads=4,
|
||||
logger=None,
|
||||
)
|
||||
|
||||
# Read output file
|
||||
with open(output_path, "rb") as f:
|
||||
output_bytes = f.read()
|
||||
|
||||
# Cleanup
|
||||
clip.close()
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True)
|
||||
|
||||
original_size_mb = len(video_bytes) / (1024 * 1024)
|
||||
compressed_size_mb = len(output_bytes) / (1024 * 1024)
|
||||
compression_ratio = (1 - compressed_size_mb / original_size_mb) * 100
|
||||
|
||||
logger.info(
|
||||
f"[VideoProcessors] Compression successful: "
|
||||
f"{original_size_mb:.2f}MB -> {compressed_size_mb:.2f}MB ({compression_ratio:.1f}% reduction)"
|
||||
)
|
||||
return output_bytes
|
||||
|
||||
except Exception as e:
|
||||
# Cleanup on error
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True) if 'output_path' in locals() else None
|
||||
logger.error(f"[VideoProcessors] Compression failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Compression failed: {str(e)}")
|
||||
|
||||
|
||||
def trim_video(
|
||||
video_bytes: bytes,
|
||||
start_time: float = 0.0,
|
||||
end_time: Optional[float] = None,
|
||||
max_duration: Optional[float] = None,
|
||||
trim_mode: str = "beginning",
|
||||
) -> bytes:
|
||||
"""
|
||||
Trim video to specified duration or time range.
|
||||
|
||||
Args:
|
||||
video_bytes: Input video as bytes
|
||||
start_time: Start time in seconds (default: 0.0)
|
||||
end_time: End time in seconds (optional, uses video duration if not provided)
|
||||
max_duration: Maximum duration in seconds (trims if video is longer)
|
||||
trim_mode: How to trim if max_duration is set ("beginning", "middle", "end")
|
||||
|
||||
Returns:
|
||||
Trimmed video as bytes
|
||||
"""
|
||||
_check_moviepy()
|
||||
|
||||
# Save input to temp file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as input_file:
|
||||
input_file.write(video_bytes)
|
||||
input_path = input_file.name
|
||||
|
||||
try:
|
||||
# Load video
|
||||
clip = VideoFileClip(input_path)
|
||||
original_duration = clip.duration
|
||||
|
||||
# Determine trim range
|
||||
if max_duration and original_duration > max_duration:
|
||||
# Need to trim to max_duration
|
||||
if trim_mode == "beginning":
|
||||
# Keep the beginning
|
||||
start_time = 0.0
|
||||
end_time = max_duration
|
||||
elif trim_mode == "end":
|
||||
# Keep the end
|
||||
start_time = original_duration - max_duration
|
||||
end_time = original_duration
|
||||
else: # middle
|
||||
# Keep the middle
|
||||
start_time = (original_duration - max_duration) / 2
|
||||
end_time = start_time + max_duration
|
||||
else:
|
||||
# Use provided times or full video
|
||||
if end_time is None:
|
||||
end_time = original_duration
|
||||
|
||||
# Ensure valid range
|
||||
start_time = max(0.0, min(start_time, original_duration))
|
||||
end_time = max(start_time, min(end_time, original_duration))
|
||||
|
||||
# Trim video
|
||||
trimmed_clip = clip.subclip(start_time, end_time)
|
||||
|
||||
# Write to temp output file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as output_file:
|
||||
output_path = output_file.name
|
||||
|
||||
trimmed_clip.write_videofile(
|
||||
output_path,
|
||||
codec="libx264",
|
||||
audio_codec="aac",
|
||||
preset="medium",
|
||||
threads=4,
|
||||
logger=None,
|
||||
)
|
||||
|
||||
# Read output file
|
||||
with open(output_path, "rb") as f:
|
||||
output_bytes = f.read()
|
||||
|
||||
# Cleanup
|
||||
trimmed_clip.close()
|
||||
clip.close()
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True)
|
||||
|
||||
logger.info(
|
||||
f"[VideoProcessors] Video trimmed: {start_time:.2f}s-{end_time:.2f}s, "
|
||||
f"duration={end_time - start_time:.2f}s, size={len(output_bytes)} bytes"
|
||||
)
|
||||
return output_bytes
|
||||
|
||||
except Exception as e:
|
||||
# Cleanup on error
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True) if 'output_path' in locals() else None
|
||||
logger.error(f"[VideoProcessors] Video trimming failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Video trimming failed: {str(e)}")
|
||||
|
||||
|
||||
def extract_thumbnail(
|
||||
video_bytes: bytes,
|
||||
time_position: Optional[float] = None,
|
||||
width: int = 1280,
|
||||
height: int = 720,
|
||||
) -> bytes:
|
||||
"""
|
||||
Extract a thumbnail frame from video.
|
||||
|
||||
Args:
|
||||
video_bytes: Input video as bytes
|
||||
time_position: Time position in seconds (default: middle of video)
|
||||
width: Thumbnail width (default: 1280)
|
||||
height: Thumbnail height (default: 720)
|
||||
|
||||
Returns:
|
||||
Thumbnail image as bytes (JPEG format)
|
||||
"""
|
||||
_check_moviepy()
|
||||
|
||||
# Save input to temp file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as input_file:
|
||||
input_file.write(video_bytes)
|
||||
input_path = input_file.name
|
||||
|
||||
try:
|
||||
# Load video
|
||||
clip = VideoFileClip(input_path)
|
||||
|
||||
# Determine time position
|
||||
if time_position is None:
|
||||
time_position = clip.duration / 2 # Middle of video
|
||||
|
||||
# Ensure valid time position
|
||||
time_position = max(0.0, min(time_position, clip.duration))
|
||||
|
||||
# Get frame at specified time
|
||||
frame = clip.get_frame(time_position)
|
||||
|
||||
# Convert numpy array to PIL Image
|
||||
from PIL import Image
|
||||
img = Image.fromarray(frame)
|
||||
|
||||
# Resize if needed
|
||||
if img.size != (width, height):
|
||||
img = img.resize((width, height), Image.Resampling.LANCZOS)
|
||||
|
||||
# Convert to bytes (JPEG)
|
||||
output_buffer = io.BytesIO()
|
||||
img.save(output_buffer, format="JPEG", quality=90)
|
||||
output_bytes = output_buffer.getvalue()
|
||||
|
||||
# Cleanup
|
||||
clip.close()
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
|
||||
logger.info(
|
||||
f"[VideoProcessors] Thumbnail extracted: time={time_position:.2f}s, "
|
||||
f"size={width}x{height}, image_size={len(output_bytes)} bytes"
|
||||
)
|
||||
return output_bytes
|
||||
|
||||
except Exception as e:
|
||||
# Cleanup on error
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
logger.error(f"[VideoProcessors] Thumbnail extraction failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Thumbnail extraction failed: {str(e)}")
|
||||
1063
backend/services/video_studio/video_studio_service.py
Normal file
1063
backend/services/video_studio/video_studio_service.py
Normal file
File diff suppressed because it is too large
Load Diff
135
backend/services/video_studio/video_translate_service.py
Normal file
135
backend/services/video_studio/video_translate_service.py
Normal file
@@ -0,0 +1,135 @@
|
||||
"""
|
||||
Video Translate service for Video Studio.
|
||||
|
||||
Uses HeyGen Video Translate (heygen/video-translate) for video translation.
|
||||
"""
|
||||
|
||||
import base64
|
||||
from typing import Dict, Any, Optional, Callable
|
||||
from fastapi import HTTPException
|
||||
|
||||
from utils.logger_utils import get_service_logger
|
||||
from ..wavespeed.client import WaveSpeedClient
|
||||
|
||||
logger = get_service_logger("video_studio.video_translate")
|
||||
|
||||
|
||||
class VideoTranslateService:
|
||||
"""Service for video translation operations."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize Video Translate service."""
|
||||
self.wavespeed_client = WaveSpeedClient()
|
||||
logger.info("[VideoTranslate] Service initialized")
|
||||
|
||||
def calculate_cost(self, duration: float = 10.0) -> float:
|
||||
"""
|
||||
Calculate cost for video translation operation.
|
||||
|
||||
Args:
|
||||
duration: Video duration in seconds
|
||||
|
||||
Returns:
|
||||
Cost in USD
|
||||
"""
|
||||
# HeyGen Video Translate pricing: $0.0375/s
|
||||
# No minimum charge mentioned in docs, but we'll use 1 second minimum
|
||||
cost_per_second = 0.0375
|
||||
billed_duration = max(1.0, duration)
|
||||
return cost_per_second * billed_duration
|
||||
|
||||
async def translate_video(
|
||||
self,
|
||||
video_data: bytes,
|
||||
output_language: str = "English",
|
||||
user_id: str = None,
|
||||
progress_callback: Optional[Callable[[float, str], None]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Translate video to target language using HeyGen Video Translate.
|
||||
|
||||
Args:
|
||||
video_data: Source video as bytes
|
||||
output_language: Target language for translation
|
||||
user_id: User ID for tracking
|
||||
progress_callback: Optional callback for progress updates
|
||||
|
||||
Returns:
|
||||
Dict with translated video_url, cost, and metadata
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
f"[VideoTranslate] Video translate request: user={user_id}, "
|
||||
f"output_language={output_language}"
|
||||
)
|
||||
|
||||
if not user_id:
|
||||
raise ValueError("user_id is required for video translation")
|
||||
|
||||
# Convert video to base64 data URI
|
||||
video_b64 = base64.b64encode(video_data).decode('utf-8')
|
||||
video_uri = f"data:video/mp4;base64,{video_b64}"
|
||||
|
||||
# Estimate duration (we'll use a default, actual duration would come from video metadata)
|
||||
estimated_duration = 10.0 # Default estimate, should be improved with actual video duration
|
||||
|
||||
# Calculate cost estimate
|
||||
cost = self.calculate_cost(estimated_duration)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(10.0, f"Submitting video translation request to HeyGen ({output_language})...")
|
||||
|
||||
# Perform video translation
|
||||
# video_translate is synchronous (uses sync_mode internally)
|
||||
translated_video_bytes = self.wavespeed_client.video_translate(
|
||||
video=video_uri,
|
||||
output_language=output_language,
|
||||
enable_sync_mode=True,
|
||||
timeout=600, # 10 minutes timeout
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(90.0, "Video translation complete, saving video...")
|
||||
|
||||
# Save translated video
|
||||
from . import VideoStudioService
|
||||
video_service = VideoStudioService()
|
||||
save_result = video_service._save_video_file(
|
||||
video_bytes=translated_video_bytes,
|
||||
operation_type="video_translate",
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
# Recalculate cost with actual duration if available
|
||||
# For now, use estimated cost
|
||||
actual_cost = cost
|
||||
|
||||
logger.info(
|
||||
f"[VideoTranslate] Video translate successful: user={user_id}, "
|
||||
f"output_language={output_language}, cost=${actual_cost:.4f}"
|
||||
)
|
||||
|
||||
metadata = {
|
||||
"original_video_size": len(video_data),
|
||||
"translated_video_size": len(translated_video_bytes),
|
||||
"output_language": output_language,
|
||||
}
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"video_url": save_result["file_url"],
|
||||
"video_bytes": translated_video_bytes,
|
||||
"cost": actual_cost,
|
||||
"output_language": output_language,
|
||||
"metadata": metadata,
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[VideoTranslate] Video translate error: {e}", exc_info=True)
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
}
|
||||
Reference in New Issue
Block a user