Base code
This commit is contained in:
15
backend/services/video_studio/__init__.py
Normal file
15
backend/services/video_studio/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
||||
"""
|
||||
Video Studio Services
|
||||
|
||||
Provides AI-powered video generation capabilities including:
|
||||
- Text-to-video generation
|
||||
- Image-to-video transformation
|
||||
- Avatar and face generation
|
||||
- Video enhancement
|
||||
|
||||
Integrates with WaveSpeed AI models for high-quality results.
|
||||
"""
|
||||
|
||||
from .video_studio_service import VideoStudioService
|
||||
|
||||
__all__ = ["VideoStudioService"]
|
||||
142
backend/services/video_studio/add_audio_to_video_service.py
Normal file
142
backend/services/video_studio/add_audio_to_video_service.py
Normal file
@@ -0,0 +1,142 @@
|
||||
"""
|
||||
Add Audio to Video service for Video Studio.
|
||||
|
||||
Supports multiple models for adding audio to videos:
|
||||
1. Hunyuan Video Foley - Generate realistic Foley and ambient audio from video
|
||||
2. Think Sound - (To be added)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
from typing import Dict, Any, Optional, Callable
|
||||
from fastapi import HTTPException
|
||||
|
||||
from utils.logger_utils import get_service_logger
|
||||
from ..wavespeed.client import WaveSpeedClient
|
||||
|
||||
logger = get_service_logger("video_studio.add_audio_to_video")
|
||||
|
||||
|
||||
class AddAudioToVideoService:
|
||||
"""Service for adding audio to video operations."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize Add Audio to Video service."""
|
||||
self.wavespeed_client = WaveSpeedClient()
|
||||
logger.info("[AddAudioToVideo] Service initialized")
|
||||
|
||||
def calculate_cost(self, model: str, duration: float = 10.0) -> float:
|
||||
"""
|
||||
Calculate cost for adding audio to video operation.
|
||||
|
||||
Args:
|
||||
model: Model to use ("hunyuan-video-foley" or "think-sound")
|
||||
duration: Video duration in seconds (for Hunyuan Video Foley)
|
||||
|
||||
Returns:
|
||||
Cost in USD
|
||||
"""
|
||||
if model == "hunyuan-video-foley":
|
||||
# Estimated pricing: $0.02/s (similar to other video processing models)
|
||||
# Minimum charge: 5 seconds
|
||||
# Maximum: 600 seconds (10 minutes)
|
||||
cost_per_second = 0.02
|
||||
billed_duration = max(5.0, min(duration, 600.0))
|
||||
return cost_per_second * billed_duration
|
||||
elif model == "think-sound":
|
||||
# Think Sound pricing: $0.05 per video (flat rate)
|
||||
return 0.05
|
||||
else:
|
||||
# Default fallback
|
||||
cost_per_second = 0.02
|
||||
billed_duration = max(5.0, min(duration, 600.0))
|
||||
return cost_per_second * billed_duration
|
||||
|
||||
async def add_audio(
|
||||
self,
|
||||
video_data: bytes,
|
||||
model: str = "hunyuan-video-foley",
|
||||
prompt: Optional[str] = None,
|
||||
seed: Optional[int] = None,
|
||||
user_id: str = None,
|
||||
progress_callback: Optional[Callable[[float, str], None]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Add audio to video using AI models.
|
||||
|
||||
Args:
|
||||
video_data: Source video as bytes
|
||||
model: Model to use ("hunyuan-video-foley" or "think-sound")
|
||||
prompt: Optional text prompt describing desired sounds (Hunyuan Video Foley)
|
||||
seed: Random seed for reproducibility (-1 for random)
|
||||
user_id: User ID for tracking
|
||||
progress_callback: Optional callback for progress updates
|
||||
|
||||
Returns:
|
||||
Dict with processed video_url, cost, and metadata
|
||||
"""
|
||||
try:
|
||||
logger.info(f"[AddAudioToVideo] Audio addition request: user={user_id}, model={model}, has_prompt={prompt is not None}")
|
||||
|
||||
# Convert video to base64 data URI
|
||||
video_b64 = base64.b64encode(video_data).decode('utf-8')
|
||||
video_uri = f"data:video/mp4;base64,{video_b64}"
|
||||
|
||||
# Handle different models
|
||||
if model == "hunyuan-video-foley":
|
||||
# Use Hunyuan Video Foley
|
||||
processed_video_bytes = await asyncio.to_thread(
|
||||
self.wavespeed_client.hunyuan_video_foley,
|
||||
video=video_uri,
|
||||
prompt=prompt,
|
||||
seed=seed if seed is not None else -1,
|
||||
enable_sync_mode=False, # Always use async with polling
|
||||
timeout=600, # 10 minutes max for long videos
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
else:
|
||||
# Think Sound or other models (to be implemented)
|
||||
logger.warning(f"[AddAudioToVideo] Model '{model}' not yet implemented")
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Model '{model}' is not yet supported. Currently only 'hunyuan-video-foley' is available."
|
||||
)
|
||||
|
||||
# Estimate video duration (rough estimate: 1MB ≈ 1 second at 1080p)
|
||||
# Only needed for Hunyuan Video Foley (per-second pricing)
|
||||
estimated_duration = max(5, len(video_data) / (1024 * 1024)) if model == "hunyuan-video-foley" else 10.0
|
||||
cost = self.calculate_cost(model, estimated_duration)
|
||||
|
||||
# Save processed video
|
||||
from .video_studio_service import VideoStudioService
|
||||
video_service = VideoStudioService()
|
||||
save_result = video_service._save_video_file(
|
||||
video_bytes=processed_video_bytes,
|
||||
operation_type="add_audio",
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
logger.info(f"[AddAudioToVideo] Audio addition successful: user={user_id}, model={model}, cost=${cost:.4f}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"video_url": save_result["file_url"],
|
||||
"video_bytes": processed_video_bytes,
|
||||
"cost": cost,
|
||||
"model_used": model,
|
||||
"metadata": {
|
||||
"original_size": len(video_data),
|
||||
"processed_size": len(processed_video_bytes),
|
||||
"estimated_duration": estimated_duration,
|
||||
"has_prompt": prompt is not None,
|
||||
},
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[AddAudioToVideo] Audio addition failed: {e}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Adding audio to video failed: {str(e)}"
|
||||
)
|
||||
122
backend/services/video_studio/avatar_service.py
Normal file
122
backend/services/video_studio/avatar_service.py
Normal file
@@ -0,0 +1,122 @@
|
||||
"""
|
||||
Avatar Studio Service
|
||||
|
||||
Service for creating talking avatars using InfiniteTalk and Hunyuan Avatar.
|
||||
Supports both models with automatic selection or explicit model choice.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, Optional
|
||||
from fastapi import HTTPException
|
||||
from loguru import logger
|
||||
|
||||
from services.image_studio.infinitetalk_adapter import InfiniteTalkService
|
||||
from services.video_studio.hunyuan_avatar_adapter import HunyuanAvatarService
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("video_studio.avatar")
|
||||
|
||||
|
||||
class AvatarStudioService:
|
||||
"""Service for Avatar Studio operations using InfiniteTalk and Hunyuan Avatar."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize Avatar Studio service."""
|
||||
self.infinitetalk_service = InfiniteTalkService()
|
||||
self.hunyuan_avatar_service = HunyuanAvatarService()
|
||||
logger.info("[AvatarStudio] Service initialized with InfiniteTalk and Hunyuan Avatar")
|
||||
|
||||
async def create_talking_avatar(
|
||||
self,
|
||||
image_base64: str,
|
||||
audio_base64: str,
|
||||
resolution: str = "720p",
|
||||
prompt: Optional[str] = None,
|
||||
mask_image_base64: Optional[str] = None,
|
||||
seed: Optional[int] = None,
|
||||
user_id: str = "video_studio",
|
||||
model: str = "infinitetalk",
|
||||
progress_callback: Optional[callable] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Create talking avatar video using InfiniteTalk or Hunyuan Avatar.
|
||||
|
||||
Args:
|
||||
image_base64: Person image in base64 or data URI
|
||||
audio_base64: Audio file in base64 or data URI
|
||||
resolution: Output resolution (480p or 720p)
|
||||
prompt: Optional prompt for expression/style
|
||||
mask_image_base64: Optional mask for animatable regions (InfiniteTalk only)
|
||||
seed: Optional random seed
|
||||
user_id: User ID for tracking
|
||||
model: Model to use - "infinitetalk" (default) or "hunyuan-avatar"
|
||||
progress_callback: Optional progress callback function
|
||||
|
||||
Returns:
|
||||
Dictionary with video_bytes, metadata, cost, and file info
|
||||
"""
|
||||
logger.info(
|
||||
f"[AvatarStudio] Creating talking avatar: user={user_id}, resolution={resolution}, model={model}"
|
||||
)
|
||||
|
||||
try:
|
||||
if model == "hunyuan-avatar":
|
||||
# Use Hunyuan Avatar (doesn't support mask_image)
|
||||
result = await self.hunyuan_avatar_service.create_talking_avatar(
|
||||
image_base64=image_base64,
|
||||
audio_base64=audio_base64,
|
||||
resolution=resolution,
|
||||
prompt=prompt,
|
||||
seed=seed,
|
||||
user_id=user_id,
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
else:
|
||||
# Default to InfiniteTalk
|
||||
result = await self.infinitetalk_service.create_talking_avatar(
|
||||
image_base64=image_base64,
|
||||
audio_base64=audio_base64,
|
||||
resolution=resolution,
|
||||
prompt=prompt,
|
||||
mask_image_base64=mask_image_base64,
|
||||
seed=seed,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"[AvatarStudio] ✅ Talking avatar created: "
|
||||
f"model={model}, resolution={resolution}, duration={result.get('duration', 0)}s, "
|
||||
f"cost=${result.get('cost', 0):.2f}"
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[AvatarStudio] ❌ Error creating talking avatar: {str(e)}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to create talking avatar: {str(e)}"
|
||||
)
|
||||
|
||||
def calculate_cost_estimate(
|
||||
self,
|
||||
resolution: str,
|
||||
estimated_duration: float,
|
||||
model: str = "infinitetalk",
|
||||
) -> float:
|
||||
"""
|
||||
Calculate estimated cost for talking avatar generation.
|
||||
|
||||
Args:
|
||||
resolution: Output resolution (480p or 720p)
|
||||
estimated_duration: Estimated video duration in seconds
|
||||
model: Model to use - "infinitetalk" (default) or "hunyuan-avatar"
|
||||
|
||||
Returns:
|
||||
Estimated cost in USD
|
||||
"""
|
||||
if model == "hunyuan-avatar":
|
||||
return self.hunyuan_avatar_service.calculate_cost(resolution, estimated_duration)
|
||||
else:
|
||||
return self.infinitetalk_service.calculate_cost(resolution, estimated_duration)
|
||||
206
backend/services/video_studio/face_swap_service.py
Normal file
206
backend/services/video_studio/face_swap_service.py
Normal file
@@ -0,0 +1,206 @@
|
||||
"""
|
||||
Face Swap service for Video Studio.
|
||||
|
||||
Supports two models:
|
||||
1. MoCha (wavespeed-ai/wan-2.1/mocha) - Character replacement with motion preservation
|
||||
2. Video Face Swap (wavespeed-ai/video-face-swap) - Simple face swap with multi-face support
|
||||
"""
|
||||
|
||||
import base64
|
||||
from typing import Dict, Any, Optional, Callable
|
||||
from fastapi import HTTPException
|
||||
|
||||
from utils.logger_utils import get_service_logger
|
||||
from ..wavespeed.client import WaveSpeedClient
|
||||
|
||||
logger = get_service_logger("video_studio.face_swap")
|
||||
|
||||
|
||||
class FaceSwapService:
|
||||
"""Service for face/character swap operations."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize Face Swap service."""
|
||||
self.wavespeed_client = WaveSpeedClient()
|
||||
logger.info("[FaceSwap] Service initialized")
|
||||
|
||||
def calculate_cost(self, model: str, resolution: Optional[str] = None, duration: float = 10.0) -> float:
|
||||
"""
|
||||
Calculate cost for face swap operation.
|
||||
|
||||
Args:
|
||||
model: Model to use ("mocha" or "video-face-swap")
|
||||
resolution: Output resolution for MoCha ("480p" or "720p"), ignored for video-face-swap
|
||||
duration: Video duration in seconds
|
||||
|
||||
Returns:
|
||||
Cost in USD
|
||||
"""
|
||||
if model == "video-face-swap":
|
||||
# Video Face Swap pricing: $0.01/s
|
||||
# Minimum charge: 5 seconds
|
||||
# Maximum: 600 seconds (10 minutes)
|
||||
cost_per_second = 0.01
|
||||
billed_duration = max(5.0, min(duration, 600.0))
|
||||
return cost_per_second * billed_duration
|
||||
else:
|
||||
# MoCha pricing: $0.04/s (480p), $0.08/s (720p)
|
||||
# Minimum charge: 5 seconds
|
||||
# Maximum billed: 120 seconds
|
||||
pricing = {
|
||||
"480p": 0.04,
|
||||
"720p": 0.08,
|
||||
}
|
||||
cost_per_second = pricing.get(resolution or "480p", pricing["480p"])
|
||||
billed_duration = max(5.0, min(duration, 120.0))
|
||||
return cost_per_second * billed_duration
|
||||
|
||||
async def swap_face(
|
||||
self,
|
||||
image_data: bytes,
|
||||
video_data: bytes,
|
||||
model: str = "mocha",
|
||||
prompt: Optional[str] = None,
|
||||
resolution: str = "480p",
|
||||
seed: Optional[int] = None,
|
||||
target_gender: str = "all",
|
||||
target_index: int = 0,
|
||||
user_id: str = None,
|
||||
progress_callback: Optional[Callable[[float, str], None]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Perform face/character swap using MoCha or Video Face Swap.
|
||||
|
||||
Args:
|
||||
image_data: Reference image as bytes
|
||||
video_data: Source video as bytes
|
||||
model: Model to use ("mocha" or "video-face-swap")
|
||||
prompt: Optional prompt to guide the swap (MoCha only)
|
||||
resolution: Output resolution for MoCha ("480p" or "720p")
|
||||
seed: Random seed for reproducibility (MoCha only)
|
||||
target_gender: Filter which faces to swap (video-face-swap only: "all", "female", "male")
|
||||
target_index: Select which face to swap (video-face-swap only: 0 = largest)
|
||||
user_id: User ID for tracking
|
||||
progress_callback: Optional callback for progress updates
|
||||
|
||||
Returns:
|
||||
Dict with swapped video_url, cost, and metadata
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
f"[FaceSwap] Face swap request: user={user_id}, "
|
||||
f"model={model}, resolution={resolution if model == 'mocha' else 'N/A'}"
|
||||
)
|
||||
|
||||
if not user_id:
|
||||
raise ValueError("user_id is required for face swap")
|
||||
|
||||
# Validate model
|
||||
if model not in ("mocha", "video-face-swap"):
|
||||
raise ValueError("Model must be 'mocha' or 'video-face-swap'")
|
||||
|
||||
# Convert image to base64 data URI
|
||||
image_b64 = base64.b64encode(image_data).decode('utf-8')
|
||||
image_uri = f"data:image/png;base64,{image_b64}"
|
||||
|
||||
# Convert video to base64 data URI
|
||||
video_b64 = base64.b64encode(video_data).decode('utf-8')
|
||||
video_uri = f"data:video/mp4;base64,{video_b64}"
|
||||
|
||||
# Estimate duration (we'll use a default, actual duration would come from video metadata)
|
||||
estimated_duration = 10.0 # Default estimate, should be improved with actual video duration
|
||||
|
||||
# Calculate cost estimate
|
||||
cost = self.calculate_cost(model, resolution if model == "mocha" else None, estimated_duration)
|
||||
|
||||
if progress_callback:
|
||||
model_name = "MoCha" if model == "mocha" else "Video Face Swap"
|
||||
progress_callback(10.0, f"Submitting face swap request to {model_name}...")
|
||||
|
||||
# Perform face swap based on model
|
||||
if model == "mocha":
|
||||
# Validate resolution for MoCha
|
||||
if resolution not in ("480p", "720p"):
|
||||
raise ValueError("Resolution must be '480p' or '720p' for MoCha")
|
||||
|
||||
# face_swap is synchronous (uses sync_mode internally)
|
||||
swapped_video_bytes = self.wavespeed_client.face_swap(
|
||||
image=image_uri,
|
||||
video=video_uri,
|
||||
prompt=prompt,
|
||||
resolution=resolution,
|
||||
seed=seed,
|
||||
enable_sync_mode=True,
|
||||
timeout=600, # 10 minutes timeout
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
else: # video-face-swap
|
||||
# video_face_swap is synchronous (uses sync_mode internally)
|
||||
swapped_video_bytes = self.wavespeed_client.video_face_swap(
|
||||
video=video_uri,
|
||||
face_image=image_uri,
|
||||
target_gender=target_gender,
|
||||
target_index=target_index,
|
||||
enable_sync_mode=True,
|
||||
timeout=600, # 10 minutes timeout
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(90.0, "Face swap complete, saving video...")
|
||||
|
||||
# Save swapped video
|
||||
from . import VideoStudioService
|
||||
video_service = VideoStudioService()
|
||||
save_result = video_service._save_video_file(
|
||||
video_bytes=swapped_video_bytes,
|
||||
operation_type="face_swap",
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
# Recalculate cost with actual duration if available
|
||||
# For now, use estimated cost
|
||||
actual_cost = cost
|
||||
|
||||
logger.info(
|
||||
f"[FaceSwap] Face swap successful: user={user_id}, "
|
||||
f"resolution={resolution}, cost=${actual_cost:.4f}"
|
||||
)
|
||||
|
||||
metadata = {
|
||||
"original_image_size": len(image_data),
|
||||
"original_video_size": len(video_data),
|
||||
"swapped_video_size": len(swapped_video_bytes),
|
||||
"model": model,
|
||||
}
|
||||
|
||||
if model == "mocha":
|
||||
metadata.update({
|
||||
"resolution": resolution,
|
||||
"seed": seed,
|
||||
"prompt": prompt,
|
||||
})
|
||||
else: # video-face-swap
|
||||
metadata.update({
|
||||
"target_gender": target_gender,
|
||||
"target_index": target_index,
|
||||
})
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"video_url": save_result["file_url"],
|
||||
"video_bytes": swapped_video_bytes,
|
||||
"cost": actual_cost,
|
||||
"model": model,
|
||||
"resolution": resolution if model == "mocha" else None,
|
||||
"metadata": metadata,
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[FaceSwap] Face swap error: {e}", exc_info=True)
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
}
|
||||
148
backend/services/video_studio/hunyuan_avatar_adapter.py
Normal file
148
backend/services/video_studio/hunyuan_avatar_adapter.py
Normal file
@@ -0,0 +1,148 @@
|
||||
"""Hunyuan Avatar adapter for Avatar Studio."""
|
||||
|
||||
import asyncio
|
||||
from typing import Any, Dict, Optional
|
||||
from fastapi import HTTPException
|
||||
from loguru import logger
|
||||
|
||||
from services.wavespeed.hunyuan_avatar import create_hunyuan_avatar, calculate_hunyuan_avatar_cost
|
||||
from services.wavespeed.client import WaveSpeedClient
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("video_studio.hunyuan_avatar")
|
||||
|
||||
|
||||
class HunyuanAvatarService:
|
||||
"""Adapter for Hunyuan Avatar in Avatar Studio context."""
|
||||
|
||||
def __init__(self, client: Optional[WaveSpeedClient] = None):
|
||||
"""Initialize Hunyuan Avatar service adapter."""
|
||||
self.client = client or WaveSpeedClient()
|
||||
logger.info("[Hunyuan Avatar Adapter] Service initialized")
|
||||
|
||||
def calculate_cost(self, resolution: str, duration: float) -> float:
|
||||
"""Calculate cost for Hunyuan Avatar video.
|
||||
|
||||
Args:
|
||||
resolution: Output resolution (480p or 720p)
|
||||
duration: Video duration in seconds
|
||||
|
||||
Returns:
|
||||
Cost in USD
|
||||
"""
|
||||
return calculate_hunyuan_avatar_cost(resolution, duration)
|
||||
|
||||
async def create_talking_avatar(
|
||||
self,
|
||||
image_base64: str,
|
||||
audio_base64: str,
|
||||
resolution: str = "480p",
|
||||
prompt: Optional[str] = None,
|
||||
seed: Optional[int] = None,
|
||||
user_id: str = "video_studio",
|
||||
progress_callback: Optional[callable] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Create talking avatar video using Hunyuan Avatar.
|
||||
|
||||
Args:
|
||||
image_base64: Person image in base64 or data URI
|
||||
audio_base64: Audio file in base64 or data URI
|
||||
resolution: Output resolution (480p or 720p, default: 480p)
|
||||
prompt: Optional prompt for expression/style
|
||||
seed: Optional random seed
|
||||
user_id: User ID for tracking
|
||||
progress_callback: Optional progress callback function
|
||||
|
||||
Returns:
|
||||
Dictionary with video_bytes, metadata, and cost
|
||||
"""
|
||||
# Validate resolution
|
||||
if resolution not in ["480p", "720p"]:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Resolution must be '480p' or '720p' for Hunyuan Avatar"
|
||||
)
|
||||
|
||||
# Decode image
|
||||
import base64
|
||||
try:
|
||||
if image_base64.startswith("data:"):
|
||||
if "," not in image_base64:
|
||||
raise ValueError("Invalid data URI format: missing comma separator")
|
||||
header, encoded = image_base64.split(",", 1)
|
||||
mime_parts = header.split(":")[1].split(";")[0] if ":" in header else "image/png"
|
||||
image_mime = mime_parts.strip() or "image/png"
|
||||
image_bytes = base64.b64decode(encoded)
|
||||
else:
|
||||
image_bytes = base64.b64decode(image_base64)
|
||||
image_mime = "image/png"
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Failed to decode image: {str(e)}"
|
||||
)
|
||||
|
||||
# Decode audio
|
||||
try:
|
||||
if audio_base64.startswith("data:"):
|
||||
if "," not in audio_base64:
|
||||
raise ValueError("Invalid data URI format: missing comma separator")
|
||||
header, encoded = audio_base64.split(",", 1)
|
||||
mime_parts = header.split(":")[1].split(";")[0] if ":" in header else "audio/mpeg"
|
||||
audio_mime = mime_parts.strip() or "audio/mpeg"
|
||||
audio_bytes = base64.b64decode(encoded)
|
||||
else:
|
||||
audio_bytes = base64.b64decode(audio_base64)
|
||||
audio_mime = "audio/mpeg"
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Failed to decode audio: {str(e)}"
|
||||
)
|
||||
|
||||
# Call Hunyuan Avatar function (run in thread since it's synchronous)
|
||||
try:
|
||||
result = await asyncio.to_thread(
|
||||
create_hunyuan_avatar,
|
||||
image_bytes=image_bytes,
|
||||
audio_bytes=audio_bytes,
|
||||
resolution=resolution,
|
||||
prompt=prompt,
|
||||
seed=seed,
|
||||
user_id=user_id,
|
||||
image_mime=image_mime,
|
||||
audio_mime=audio_mime,
|
||||
client=self.client,
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[Hunyuan Avatar Adapter] Error: {str(e)}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Hunyuan Avatar generation failed: {str(e)}"
|
||||
)
|
||||
|
||||
# Calculate actual cost based on duration
|
||||
actual_cost = self.calculate_cost(resolution, result.get("duration", 5.0))
|
||||
|
||||
# Update result with actual cost and additional metadata
|
||||
result["cost"] = actual_cost
|
||||
result["resolution"] = resolution
|
||||
|
||||
# Get video dimensions from resolution
|
||||
resolution_dims = {
|
||||
"480p": (854, 480),
|
||||
"720p": (1280, 720),
|
||||
}
|
||||
width, height = resolution_dims.get(resolution, (854, 480))
|
||||
result["width"] = width
|
||||
result["height"] = height
|
||||
|
||||
logger.info(
|
||||
f"[Hunyuan Avatar Adapter] ✅ Generated talking avatar: "
|
||||
f"resolution={resolution}, duration={result.get('duration', 5.0)}s, cost=${actual_cost:.2f}"
|
||||
)
|
||||
|
||||
return result
|
||||
156
backend/services/video_studio/platform_specs.py
Normal file
156
backend/services/video_studio/platform_specs.py
Normal file
@@ -0,0 +1,156 @@
|
||||
"""
|
||||
Platform specifications for Social Optimizer.
|
||||
|
||||
Defines aspect ratios, duration limits, file size limits, and other requirements
|
||||
for each social media platform.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class Platform(Enum):
|
||||
"""Social media platforms."""
|
||||
INSTAGRAM = "instagram"
|
||||
TIKTOK = "tiktok"
|
||||
YOUTUBE = "youtube"
|
||||
LINKEDIN = "linkedin"
|
||||
FACEBOOK = "facebook"
|
||||
TWITTER = "twitter"
|
||||
|
||||
|
||||
@dataclass
|
||||
class PlatformSpec:
|
||||
"""Platform specification for video optimization."""
|
||||
platform: Platform
|
||||
name: str
|
||||
aspect_ratio: str # e.g., "9:16", "16:9", "1:1"
|
||||
width: int
|
||||
height: int
|
||||
max_duration: float # seconds
|
||||
max_file_size_mb: float # MB
|
||||
formats: List[str] # e.g., ["mp4", "mov"]
|
||||
description: str
|
||||
|
||||
|
||||
# Platform specifications
|
||||
PLATFORM_SPECS: List[PlatformSpec] = [
|
||||
PlatformSpec(
|
||||
platform=Platform.INSTAGRAM,
|
||||
name="Instagram Reels",
|
||||
aspect_ratio="9:16",
|
||||
width=1080,
|
||||
height=1920,
|
||||
max_duration=90.0, # 90 seconds
|
||||
max_file_size_mb=4000.0, # 4GB
|
||||
formats=["mp4"],
|
||||
description="Vertical video format for Instagram Reels",
|
||||
),
|
||||
PlatformSpec(
|
||||
platform=Platform.TIKTOK,
|
||||
name="TikTok",
|
||||
aspect_ratio="9:16",
|
||||
width=1080,
|
||||
height=1920,
|
||||
max_duration=60.0, # 60 seconds
|
||||
max_file_size_mb=287.0, # 287MB
|
||||
formats=["mp4", "mov"],
|
||||
description="Vertical video format for TikTok",
|
||||
),
|
||||
PlatformSpec(
|
||||
platform=Platform.YOUTUBE,
|
||||
name="YouTube Shorts",
|
||||
aspect_ratio="9:16",
|
||||
width=1080,
|
||||
height=1920,
|
||||
max_duration=60.0, # 60 seconds
|
||||
max_file_size_mb=256000.0, # 256GB (very high limit)
|
||||
formats=["mp4", "mov", "webm"],
|
||||
description="Vertical video format for YouTube Shorts",
|
||||
),
|
||||
PlatformSpec(
|
||||
platform=Platform.LINKEDIN,
|
||||
name="LinkedIn Video",
|
||||
aspect_ratio="16:9",
|
||||
width=1920,
|
||||
height=1080,
|
||||
max_duration=600.0, # 10 minutes
|
||||
max_file_size_mb=5000.0, # 5GB
|
||||
formats=["mp4"],
|
||||
description="Horizontal video format for LinkedIn",
|
||||
),
|
||||
PlatformSpec(
|
||||
platform=Platform.LINKEDIN,
|
||||
name="LinkedIn Video (Square)",
|
||||
aspect_ratio="1:1",
|
||||
width=1080,
|
||||
height=1080,
|
||||
max_duration=600.0, # 10 minutes
|
||||
max_file_size_mb=5000.0, # 5GB
|
||||
formats=["mp4"],
|
||||
description="Square video format for LinkedIn",
|
||||
),
|
||||
PlatformSpec(
|
||||
platform=Platform.FACEBOOK,
|
||||
name="Facebook Video",
|
||||
aspect_ratio="16:9",
|
||||
width=1920,
|
||||
height=1080,
|
||||
max_duration=240.0, # 240 seconds (4 minutes)
|
||||
max_file_size_mb=4000.0, # 4GB
|
||||
formats=["mp4", "mov"],
|
||||
description="Horizontal video format for Facebook",
|
||||
),
|
||||
PlatformSpec(
|
||||
platform=Platform.FACEBOOK,
|
||||
name="Facebook Video (Square)",
|
||||
aspect_ratio="1:1",
|
||||
width=1080,
|
||||
height=1080,
|
||||
max_duration=240.0, # 240 seconds
|
||||
max_file_size_mb=4000.0, # 4GB
|
||||
formats=["mp4", "mov"],
|
||||
description="Square video format for Facebook",
|
||||
),
|
||||
PlatformSpec(
|
||||
platform=Platform.TWITTER,
|
||||
name="Twitter/X Video",
|
||||
aspect_ratio="16:9",
|
||||
width=1920,
|
||||
height=1080,
|
||||
max_duration=140.0, # 140 seconds (2:20)
|
||||
max_file_size_mb=512.0, # 512MB
|
||||
formats=["mp4"],
|
||||
description="Horizontal video format for Twitter/X",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def get_platform_specs(platform: Platform) -> List[PlatformSpec]:
|
||||
"""Get all specifications for a platform."""
|
||||
return [spec for spec in PLATFORM_SPECS if spec.platform == platform]
|
||||
|
||||
|
||||
def get_platform_spec(platform: Platform, aspect_ratio: Optional[str] = None) -> Optional[PlatformSpec]:
|
||||
"""Get a specific platform specification."""
|
||||
specs = get_platform_specs(platform)
|
||||
if aspect_ratio:
|
||||
for spec in specs:
|
||||
if spec.aspect_ratio == aspect_ratio:
|
||||
return spec
|
||||
return specs[0] if specs else None
|
||||
|
||||
|
||||
def get_all_platforms() -> List[Platform]:
|
||||
"""Get all available platforms."""
|
||||
return list(Platform)
|
||||
|
||||
|
||||
def get_platform_by_name(name: str) -> Optional[Platform]:
|
||||
"""Get platform enum by name."""
|
||||
name_lower = name.lower()
|
||||
for platform in Platform:
|
||||
if platform.value == name_lower:
|
||||
return platform
|
||||
return None
|
||||
269
backend/services/video_studio/social_optimizer_service.py
Normal file
269
backend/services/video_studio/social_optimizer_service.py
Normal file
@@ -0,0 +1,269 @@
|
||||
"""
|
||||
Social Optimizer service for platform-specific video optimization.
|
||||
|
||||
Creates optimized versions of videos for Instagram, TikTok, YouTube, LinkedIn, Facebook, and Twitter.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, List, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
from utils.logger_utils import get_service_logger
|
||||
from .platform_specs import Platform, PlatformSpec, get_platform_spec, get_platform_specs
|
||||
from .video_processors import (
|
||||
convert_aspect_ratio,
|
||||
trim_video,
|
||||
compress_video,
|
||||
extract_thumbnail,
|
||||
)
|
||||
|
||||
logger = get_service_logger("video_studio.social_optimizer")
|
||||
|
||||
|
||||
@dataclass
|
||||
class OptimizationOptions:
|
||||
"""Options for video optimization."""
|
||||
auto_crop: bool = True
|
||||
generate_thumbnails: bool = True
|
||||
compress: bool = True
|
||||
trim_mode: str = "beginning" # "beginning", "middle", "end"
|
||||
|
||||
|
||||
@dataclass
|
||||
class PlatformResult:
|
||||
"""Result for a single platform optimization."""
|
||||
platform: str
|
||||
name: str
|
||||
aspect_ratio: str
|
||||
video_url: str
|
||||
thumbnail_url: Optional[str] = None
|
||||
duration: float = 0.0
|
||||
file_size: int = 0
|
||||
width: int = 0
|
||||
height: int = 0
|
||||
|
||||
|
||||
class SocialOptimizerService:
|
||||
"""Service for optimizing videos for social media platforms."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize Social Optimizer service."""
|
||||
logger.info("[SocialOptimizer] Service initialized")
|
||||
|
||||
async def optimize_for_platforms(
|
||||
self,
|
||||
video_bytes: bytes,
|
||||
platforms: List[str],
|
||||
options: OptimizationOptions,
|
||||
user_id: str,
|
||||
video_studio_service: Any, # VideoStudioService
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Optimize video for multiple platforms.
|
||||
|
||||
Args:
|
||||
video_bytes: Source video as bytes
|
||||
platforms: List of platform names (e.g., ["instagram", "tiktok"])
|
||||
options: Optimization options
|
||||
user_id: User ID for file storage
|
||||
video_studio_service: VideoStudioService instance for saving files
|
||||
|
||||
Returns:
|
||||
Dict with results for each platform
|
||||
"""
|
||||
logger.info(
|
||||
f"[SocialOptimizer] Optimizing video for platforms: {platforms}, "
|
||||
f"user={user_id}"
|
||||
)
|
||||
|
||||
results: List[PlatformResult] = []
|
||||
errors: List[Dict[str, str]] = []
|
||||
|
||||
# Process each platform
|
||||
for platform_name in platforms:
|
||||
try:
|
||||
platform_enum = Platform(platform_name.lower())
|
||||
platform_specs = get_platform_specs(platform_enum)
|
||||
|
||||
# Process each format variant for the platform
|
||||
for spec in platform_specs:
|
||||
try:
|
||||
result = await self._optimize_for_spec(
|
||||
video_bytes=video_bytes,
|
||||
spec=spec,
|
||||
options=options,
|
||||
user_id=user_id,
|
||||
video_studio_service=video_studio_service,
|
||||
)
|
||||
results.append(result)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"[SocialOptimizer] Failed to optimize for {spec.name}: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
errors.append({
|
||||
"platform": platform_name,
|
||||
"format": spec.name,
|
||||
"error": str(e),
|
||||
})
|
||||
except ValueError:
|
||||
logger.warning(f"[SocialOptimizer] Unknown platform: {platform_name}")
|
||||
errors.append({
|
||||
"platform": platform_name,
|
||||
"error": f"Unknown platform: {platform_name}",
|
||||
})
|
||||
|
||||
# Calculate total cost (free - FFmpeg processing)
|
||||
total_cost = 0.0
|
||||
|
||||
logger.info(
|
||||
f"[SocialOptimizer] Optimization complete: "
|
||||
f"{len(results)} successful, {len(errors)} errors"
|
||||
)
|
||||
|
||||
return {
|
||||
"success": len(results) > 0,
|
||||
"results": [
|
||||
{
|
||||
"platform": r.platform,
|
||||
"name": r.name,
|
||||
"aspect_ratio": r.aspect_ratio,
|
||||
"video_url": r.video_url,
|
||||
"thumbnail_url": r.thumbnail_url,
|
||||
"duration": r.duration,
|
||||
"file_size": r.file_size,
|
||||
"width": r.width,
|
||||
"height": r.height,
|
||||
}
|
||||
for r in results
|
||||
],
|
||||
"errors": errors,
|
||||
"cost": total_cost,
|
||||
}
|
||||
|
||||
async def _optimize_for_spec(
|
||||
self,
|
||||
video_bytes: bytes,
|
||||
spec: PlatformSpec,
|
||||
options: OptimizationOptions,
|
||||
user_id: str,
|
||||
video_studio_service: Any,
|
||||
) -> PlatformResult:
|
||||
"""
|
||||
Optimize video for a specific platform specification.
|
||||
|
||||
Args:
|
||||
video_bytes: Source video as bytes
|
||||
spec: Platform specification
|
||||
options: Optimization options
|
||||
user_id: User ID for file storage
|
||||
video_studio_service: VideoStudioService instance
|
||||
|
||||
Returns:
|
||||
PlatformResult with optimized video URL and metadata
|
||||
"""
|
||||
logger.info(
|
||||
f"[SocialOptimizer] Optimizing for {spec.name} "
|
||||
f"({spec.aspect_ratio}, max {spec.max_duration}s)"
|
||||
)
|
||||
|
||||
processed_video = video_bytes
|
||||
original_size_mb = len(video_bytes) / (1024 * 1024)
|
||||
|
||||
# Step 1: Convert aspect ratio if needed
|
||||
if options.auto_crop:
|
||||
processed_video = await asyncio.to_thread(
|
||||
convert_aspect_ratio,
|
||||
processed_video,
|
||||
spec.aspect_ratio,
|
||||
"center", # Use center crop for social media
|
||||
)
|
||||
logger.debug(f"[SocialOptimizer] Aspect ratio converted to {spec.aspect_ratio}")
|
||||
|
||||
# Step 2: Trim if video exceeds max duration
|
||||
if spec.max_duration > 0:
|
||||
# Get video duration (we'll need to check this)
|
||||
# For now, we'll trim if the video is likely too long
|
||||
# In a real implementation, we'd use MoviePy to get duration first
|
||||
processed_video = await asyncio.to_thread(
|
||||
trim_video,
|
||||
processed_video,
|
||||
start_time=0.0,
|
||||
end_time=None,
|
||||
max_duration=spec.max_duration,
|
||||
trim_mode=options.trim_mode,
|
||||
)
|
||||
logger.debug(f"[SocialOptimizer] Video trimmed to max {spec.max_duration}s")
|
||||
|
||||
# Step 3: Compress if needed and file size exceeds limit
|
||||
if options.compress:
|
||||
current_size_mb = len(processed_video) / (1024 * 1024)
|
||||
if current_size_mb > spec.max_file_size_mb:
|
||||
# Calculate target size (90% of max to be safe)
|
||||
target_size_mb = spec.max_file_size_mb * 0.9
|
||||
processed_video = await asyncio.to_thread(
|
||||
compress_video,
|
||||
processed_video,
|
||||
target_size_mb=target_size_mb,
|
||||
quality="medium",
|
||||
)
|
||||
logger.debug(
|
||||
f"[SocialOptimizer] Video compressed: "
|
||||
f"{current_size_mb:.2f}MB -> {len(processed_video) / (1024 * 1024):.2f}MB"
|
||||
)
|
||||
|
||||
# Step 4: Save optimized video
|
||||
save_result = video_studio_service._save_video_file(
|
||||
video_bytes=processed_video,
|
||||
operation_type=f"social_optimizer_{spec.platform.value}",
|
||||
user_id=user_id,
|
||||
)
|
||||
video_url = save_result["file_url"]
|
||||
|
||||
# Step 5: Generate thumbnail if requested
|
||||
thumbnail_url = None
|
||||
if options.generate_thumbnails:
|
||||
try:
|
||||
thumbnail_bytes = await asyncio.to_thread(
|
||||
extract_thumbnail,
|
||||
processed_video,
|
||||
time_position=None, # Middle of video
|
||||
width=spec.width,
|
||||
height=spec.height,
|
||||
)
|
||||
|
||||
# Save thumbnail
|
||||
thumbnail_save_result = video_studio_service._save_video_file(
|
||||
video_bytes=thumbnail_bytes,
|
||||
operation_type=f"social_optimizer_thumbnail_{spec.platform.value}",
|
||||
user_id=user_id,
|
||||
)
|
||||
thumbnail_url = thumbnail_save_result["file_url"]
|
||||
logger.debug(f"[SocialOptimizer] Thumbnail generated: {thumbnail_url}")
|
||||
except Exception as e:
|
||||
logger.warning(f"[SocialOptimizer] Failed to generate thumbnail: {e}")
|
||||
|
||||
# Get video metadata (duration, file size)
|
||||
# For now, we'll estimate based on file size
|
||||
# In a real implementation, we'd use MoviePy to get actual duration
|
||||
file_size = len(processed_video)
|
||||
estimated_duration = spec.max_duration if spec.max_duration > 0 else 10.0
|
||||
|
||||
logger.info(
|
||||
f"[SocialOptimizer] Optimization complete for {spec.name}: "
|
||||
f"video_url={video_url}, size={file_size} bytes"
|
||||
)
|
||||
|
||||
return PlatformResult(
|
||||
platform=spec.platform.value,
|
||||
name=spec.name,
|
||||
aspect_ratio=spec.aspect_ratio,
|
||||
video_url=video_url,
|
||||
thumbnail_url=thumbnail_url,
|
||||
duration=estimated_duration,
|
||||
file_size=file_size,
|
||||
width=spec.width,
|
||||
height=spec.height,
|
||||
)
|
||||
@@ -0,0 +1,129 @@
|
||||
"""
|
||||
Video Background Remover service for Video Studio.
|
||||
|
||||
Removes or replaces video backgrounds using WaveSpeed Video Background Remover.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
from typing import Dict, Any, Optional, Callable
|
||||
from fastapi import HTTPException
|
||||
|
||||
from utils.logger_utils import get_service_logger
|
||||
from ..wavespeed.client import WaveSpeedClient
|
||||
|
||||
logger = get_service_logger("video_studio.video_background_remover")
|
||||
|
||||
|
||||
class VideoBackgroundRemoverService:
|
||||
"""Service for video background removal/replacement operations."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize Video Background Remover service."""
|
||||
self.wavespeed_client = WaveSpeedClient()
|
||||
logger.info("[VideoBackgroundRemover] Service initialized")
|
||||
|
||||
def calculate_cost(self, duration: float = 10.0) -> float:
|
||||
"""
|
||||
Calculate cost for video background removal operation.
|
||||
|
||||
Pricing from WaveSpeed documentation:
|
||||
- Rate: $0.01 per second
|
||||
- Minimum: $0.05 for ≤5 seconds
|
||||
- Maximum: $6.00 for 600 seconds (10 minutes)
|
||||
|
||||
Args:
|
||||
duration: Video duration in seconds
|
||||
|
||||
Returns:
|
||||
Cost in USD
|
||||
"""
|
||||
# Pricing: $0.01 per second
|
||||
# Minimum charge: $0.05 for ≤5 seconds
|
||||
# Maximum: $6.00 for 600 seconds (10 minutes)
|
||||
cost_per_second = 0.01
|
||||
if duration <= 5.0:
|
||||
return 0.05 # Minimum charge
|
||||
elif duration >= 600.0:
|
||||
return 6.00 # Maximum charge
|
||||
else:
|
||||
return duration * cost_per_second
|
||||
|
||||
async def remove_background(
|
||||
self,
|
||||
video_data: bytes,
|
||||
background_image_data: Optional[bytes] = None,
|
||||
user_id: str = None,
|
||||
progress_callback: Optional[Callable[[float, str], None]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Remove or replace video background.
|
||||
|
||||
Args:
|
||||
video_data: Source video as bytes
|
||||
background_image_data: Optional replacement background image as bytes
|
||||
user_id: User ID for tracking
|
||||
progress_callback: Optional callback for progress updates
|
||||
|
||||
Returns:
|
||||
Dict with processed video_url, cost, and metadata
|
||||
"""
|
||||
try:
|
||||
logger.info(f"[VideoBackgroundRemover] Background removal request: user={user_id}, has_background={background_image_data is not None}")
|
||||
|
||||
# Convert video to base64 data URI
|
||||
video_b64 = base64.b64encode(video_data).decode('utf-8')
|
||||
video_uri = f"data:video/mp4;base64,{video_b64}"
|
||||
|
||||
# Convert background image to base64 if provided
|
||||
background_image_uri = None
|
||||
if background_image_data:
|
||||
image_b64 = base64.b64encode(background_image_data).decode('utf-8')
|
||||
background_image_uri = f"data:image/jpeg;base64,{image_b64}"
|
||||
|
||||
# Call WaveSpeed API
|
||||
processed_video_bytes = await asyncio.to_thread(
|
||||
self.wavespeed_client.remove_background,
|
||||
video=video_uri,
|
||||
background_image=background_image_uri,
|
||||
enable_sync_mode=False, # Always use async with polling
|
||||
timeout=600, # 10 minutes max for long videos
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
|
||||
# Estimate video duration (rough estimate: 1MB ≈ 1 second at 1080p)
|
||||
estimated_duration = max(5, len(video_data) / (1024 * 1024)) # Minimum 5 seconds
|
||||
cost = self.calculate_cost(estimated_duration)
|
||||
|
||||
# Save processed video
|
||||
from .video_studio_service import VideoStudioService
|
||||
video_service = VideoStudioService()
|
||||
save_result = video_service._save_video_file(
|
||||
video_bytes=processed_video_bytes,
|
||||
operation_type="background_removal",
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
logger.info(f"[VideoBackgroundRemover] Background removal successful: user={user_id}, cost=${cost:.4f}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"video_url": save_result["file_url"],
|
||||
"video_bytes": processed_video_bytes,
|
||||
"cost": cost,
|
||||
"has_background_replacement": background_image_data is not None,
|
||||
"metadata": {
|
||||
"original_size": len(video_data),
|
||||
"processed_size": len(processed_video_bytes),
|
||||
"estimated_duration": estimated_duration,
|
||||
},
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[VideoBackgroundRemover] Background removal failed: {e}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Video background removal failed: {str(e)}"
|
||||
)
|
||||
647
backend/services/video_studio/video_processors.py
Normal file
647
backend/services/video_studio/video_processors.py
Normal file
@@ -0,0 +1,647 @@
|
||||
"""
|
||||
Video processing utilities for Transform Studio.
|
||||
|
||||
Handles format conversion, aspect ratio conversion, speed adjustment,
|
||||
resolution scaling, and compression using MoviePy/FFmpeg.
|
||||
"""
|
||||
|
||||
import io
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Optional, Tuple, Dict, Any
|
||||
from fastapi import HTTPException
|
||||
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("video_studio.video_processors")
|
||||
|
||||
try:
|
||||
from moviepy import VideoFileClip
|
||||
MOVIEPY_AVAILABLE = True
|
||||
except ImportError:
|
||||
MOVIEPY_AVAILABLE = False
|
||||
logger.warning("[VideoProcessors] MoviePy not available. Video processing will not work.")
|
||||
|
||||
|
||||
def _check_moviepy():
|
||||
"""Check if MoviePy is available."""
|
||||
if not MOVIEPY_AVAILABLE:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="MoviePy is not installed. Please install it: pip install moviepy imageio imageio-ffmpeg"
|
||||
)
|
||||
|
||||
|
||||
def _get_resolution_dimensions(resolution: str) -> Tuple[int, int]:
|
||||
"""Get width and height for a resolution string."""
|
||||
resolution_map = {
|
||||
"480p": (854, 480),
|
||||
"720p": (1280, 720),
|
||||
"1080p": (1920, 1080),
|
||||
"1440p": (2560, 1440),
|
||||
"4k": (3840, 2160),
|
||||
}
|
||||
return resolution_map.get(resolution.lower(), (1280, 720))
|
||||
|
||||
|
||||
def _get_aspect_ratio_dimensions(aspect_ratio: str, target_height: int = 720) -> Tuple[int, int]:
|
||||
"""Get width and height for an aspect ratio."""
|
||||
aspect_map = {
|
||||
"16:9": (16, 9),
|
||||
"9:16": (9, 16),
|
||||
"1:1": (1, 1),
|
||||
"4:5": (4, 5),
|
||||
"21:9": (21, 9),
|
||||
}
|
||||
|
||||
if aspect_ratio not in aspect_map:
|
||||
return (1280, 720) # Default to 16:9
|
||||
|
||||
width_ratio, height_ratio = aspect_map[aspect_ratio]
|
||||
width = int((width_ratio / height_ratio) * target_height)
|
||||
return (width, target_height)
|
||||
|
||||
|
||||
def convert_format(
|
||||
video_bytes: bytes,
|
||||
output_format: str = "mp4",
|
||||
codec: str = "libx264",
|
||||
quality: str = "medium",
|
||||
audio_codec: str = "aac",
|
||||
) -> bytes:
|
||||
"""
|
||||
Convert video to a different format.
|
||||
|
||||
Args:
|
||||
video_bytes: Input video as bytes
|
||||
output_format: Output format (mp4, mov, webm, gif)
|
||||
codec: Video codec (libx264, libvpx-vp9, etc.)
|
||||
quality: Quality preset (high, medium, low)
|
||||
audio_codec: Audio codec (aac, mp3, opus, etc.)
|
||||
|
||||
Returns:
|
||||
Converted video as bytes
|
||||
"""
|
||||
_check_moviepy()
|
||||
|
||||
quality_presets = {
|
||||
"high": {"bitrate": "5000k", "preset": "slow"},
|
||||
"medium": {"bitrate": "2500k", "preset": "medium"},
|
||||
"low": {"bitrate": "1000k", "preset": "fast"},
|
||||
}
|
||||
preset = quality_presets.get(quality, quality_presets["medium"])
|
||||
|
||||
# Save input to temp file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as input_file:
|
||||
input_file.write(video_bytes)
|
||||
input_path = input_file.name
|
||||
|
||||
try:
|
||||
# Load video
|
||||
clip = VideoFileClip(input_path)
|
||||
|
||||
# Format-specific codec selection
|
||||
if output_format == "webm":
|
||||
codec = "libvpx-vp9"
|
||||
audio_codec = "libopus"
|
||||
elif output_format == "gif":
|
||||
# For GIF, we need to handle differently
|
||||
codec = None
|
||||
audio_codec = None
|
||||
elif output_format == "mov":
|
||||
codec = "libx264"
|
||||
audio_codec = "aac"
|
||||
else: # mp4
|
||||
codec = codec or "libx264"
|
||||
audio_codec = audio_codec or "aac"
|
||||
|
||||
# Write to temp output file
|
||||
output_suffix = f".{output_format}" if output_format != "gif" else ".gif"
|
||||
with tempfile.NamedTemporaryFile(suffix=output_suffix, delete=False) as output_file:
|
||||
output_path = output_file.name
|
||||
|
||||
if output_format == "gif":
|
||||
# For GIF, use write_gif
|
||||
clip.write_gif(output_path, fps=15, logger=None)
|
||||
else:
|
||||
# For video formats
|
||||
clip.write_videofile(
|
||||
output_path,
|
||||
codec=codec,
|
||||
audio_codec=audio_codec,
|
||||
bitrate=preset["bitrate"],
|
||||
preset=preset["preset"],
|
||||
threads=4,
|
||||
logger=None,
|
||||
)
|
||||
|
||||
# Read output file
|
||||
with open(output_path, "rb") as f:
|
||||
output_bytes = f.read()
|
||||
|
||||
# Cleanup
|
||||
clip.close()
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True)
|
||||
|
||||
logger.info(f"[VideoProcessors] Format conversion successful: {output_format}, size={len(output_bytes)} bytes")
|
||||
return output_bytes
|
||||
|
||||
except Exception as e:
|
||||
# Cleanup on error
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True) if 'output_path' in locals() else None
|
||||
logger.error(f"[VideoProcessors] Format conversion failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Format conversion failed: {str(e)}")
|
||||
|
||||
|
||||
def convert_aspect_ratio(
|
||||
video_bytes: bytes,
|
||||
target_aspect: str,
|
||||
crop_mode: str = "center",
|
||||
) -> bytes:
|
||||
"""
|
||||
Convert video to a different aspect ratio.
|
||||
|
||||
Args:
|
||||
video_bytes: Input video as bytes
|
||||
target_aspect: Target aspect ratio (16:9, 9:16, 1:1, 4:5, 21:9)
|
||||
crop_mode: Crop mode (center, smart, letterbox)
|
||||
|
||||
Returns:
|
||||
Converted video as bytes
|
||||
"""
|
||||
_check_moviepy()
|
||||
|
||||
# Save input to temp file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as input_file:
|
||||
input_file.write(video_bytes)
|
||||
input_path = input_file.name
|
||||
|
||||
try:
|
||||
# Load video
|
||||
clip = VideoFileClip(input_path)
|
||||
original_width, original_height = clip.size
|
||||
|
||||
# Calculate target dimensions
|
||||
target_width, target_height = _get_aspect_ratio_dimensions(target_aspect, original_height)
|
||||
target_aspect_ratio = target_width / target_height
|
||||
original_aspect_ratio = original_width / original_height
|
||||
|
||||
# Determine crop dimensions
|
||||
if crop_mode == "letterbox":
|
||||
# Letterboxing: add black bars
|
||||
if target_aspect_ratio > original_aspect_ratio:
|
||||
# Target is wider, add horizontal bars
|
||||
new_height = int(original_width / target_aspect_ratio)
|
||||
y_offset = (original_height - new_height) // 2
|
||||
clip = clip.crop(y1=y_offset, y2=y_offset + new_height)
|
||||
else:
|
||||
# Target is taller, add vertical bars
|
||||
new_width = int(original_height * target_aspect_ratio)
|
||||
x_offset = (original_width - new_width) // 2
|
||||
clip = clip.crop(x1=x_offset, x2=x_offset + new_width)
|
||||
else:
|
||||
# Center crop (default)
|
||||
if target_aspect_ratio > original_aspect_ratio:
|
||||
# Need to crop height
|
||||
new_height = int(original_width / target_aspect_ratio)
|
||||
y_offset = (original_height - new_height) // 2
|
||||
clip = clip.crop(y1=y_offset, y2=y_offset + new_height)
|
||||
else:
|
||||
# Need to crop width
|
||||
new_width = int(original_height * target_aspect_ratio)
|
||||
x_offset = (original_width - new_width) // 2
|
||||
clip = clip.crop(x1=x_offset, x2=x_offset + new_width)
|
||||
|
||||
# Resize to target dimensions (maintain quality)
|
||||
clip = clip.resize((target_width, target_height))
|
||||
|
||||
# Write to temp output file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as output_file:
|
||||
output_path = output_file.name
|
||||
|
||||
clip.write_videofile(
|
||||
output_path,
|
||||
codec="libx264",
|
||||
audio_codec="aac",
|
||||
preset="medium",
|
||||
threads=4,
|
||||
logger=None,
|
||||
)
|
||||
|
||||
# Read output file
|
||||
with open(output_path, "rb") as f:
|
||||
output_bytes = f.read()
|
||||
|
||||
# Cleanup
|
||||
clip.close()
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True)
|
||||
|
||||
logger.info(f"[VideoProcessors] Aspect ratio conversion successful: {target_aspect}, size={len(output_bytes)} bytes")
|
||||
return output_bytes
|
||||
|
||||
except Exception as e:
|
||||
# Cleanup on error
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True) if 'output_path' in locals() else None
|
||||
logger.error(f"[VideoProcessors] Aspect ratio conversion failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Aspect ratio conversion failed: {str(e)}")
|
||||
|
||||
|
||||
def adjust_speed(
|
||||
video_bytes: bytes,
|
||||
speed_factor: float,
|
||||
) -> bytes:
|
||||
"""
|
||||
Adjust video playback speed.
|
||||
|
||||
Args:
|
||||
video_bytes: Input video as bytes
|
||||
speed_factor: Speed multiplier (0.25, 0.5, 1.0, 1.5, 2.0, 4.0)
|
||||
|
||||
Returns:
|
||||
Speed-adjusted video as bytes
|
||||
"""
|
||||
_check_moviepy()
|
||||
|
||||
if speed_factor <= 0:
|
||||
raise HTTPException(status_code=400, detail="Speed factor must be greater than 0")
|
||||
|
||||
# Save input to temp file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as input_file:
|
||||
input_file.write(video_bytes)
|
||||
input_path = input_file.name
|
||||
|
||||
try:
|
||||
# Load video
|
||||
clip = VideoFileClip(input_path)
|
||||
|
||||
# Adjust speed using MoviePy's speedx effect
|
||||
try:
|
||||
# Try MoviePy v2 API first
|
||||
from moviepy.video.fx.speedx import speedx
|
||||
clip = clip.fx(speedx, speed_factor)
|
||||
except (ImportError, AttributeError):
|
||||
try:
|
||||
# Fallback: try direct import
|
||||
from moviepy.video.fx import speedx
|
||||
clip = clip.fx(speedx, speed_factor)
|
||||
except (ImportError, AttributeError):
|
||||
# Fallback: Manual speed adjustment (less accurate but works)
|
||||
# This maintains audio sync by adjusting fps and duration
|
||||
original_fps = clip.fps
|
||||
new_fps = original_fps * speed_factor
|
||||
original_duration = clip.duration
|
||||
new_duration = original_duration / speed_factor
|
||||
clip = clip.with_fps(new_fps).with_duration(new_duration)
|
||||
|
||||
# Write to temp output file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as output_file:
|
||||
output_path = output_file.name
|
||||
|
||||
clip.write_videofile(
|
||||
output_path,
|
||||
codec="libx264",
|
||||
audio_codec="aac",
|
||||
preset="medium",
|
||||
threads=4,
|
||||
logger=None,
|
||||
)
|
||||
|
||||
# Read output file
|
||||
with open(output_path, "rb") as f:
|
||||
output_bytes = f.read()
|
||||
|
||||
# Cleanup
|
||||
clip.close()
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True)
|
||||
|
||||
logger.info(f"[VideoProcessors] Speed adjustment successful: {speed_factor}x, size={len(output_bytes)} bytes")
|
||||
return output_bytes
|
||||
|
||||
except Exception as e:
|
||||
# Cleanup on error
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True) if 'output_path' in locals() else None
|
||||
logger.error(f"[VideoProcessors] Speed adjustment failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Speed adjustment failed: {str(e)}")
|
||||
|
||||
|
||||
def scale_resolution(
|
||||
video_bytes: bytes,
|
||||
target_resolution: str,
|
||||
maintain_aspect: bool = True,
|
||||
) -> bytes:
|
||||
"""
|
||||
Scale video to target resolution.
|
||||
|
||||
Args:
|
||||
video_bytes: Input video as bytes
|
||||
target_resolution: Target resolution (480p, 720p, 1080p, 1440p, 4k)
|
||||
maintain_aspect: Whether to maintain aspect ratio
|
||||
|
||||
Returns:
|
||||
Scaled video as bytes
|
||||
"""
|
||||
_check_moviepy()
|
||||
|
||||
# Save input to temp file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as input_file:
|
||||
input_file.write(video_bytes)
|
||||
input_path = input_file.name
|
||||
|
||||
try:
|
||||
# Load video
|
||||
clip = VideoFileClip(input_path)
|
||||
target_width, target_height = _get_resolution_dimensions(target_resolution)
|
||||
|
||||
# Resize
|
||||
if maintain_aspect:
|
||||
clip = clip.resize(height=target_height) # Maintain aspect ratio
|
||||
else:
|
||||
clip = clip.resize((target_width, target_height))
|
||||
|
||||
# Write to temp output file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as output_file:
|
||||
output_path = output_file.name
|
||||
|
||||
clip.write_videofile(
|
||||
output_path,
|
||||
codec="libx264",
|
||||
audio_codec="aac",
|
||||
preset="medium",
|
||||
threads=4,
|
||||
logger=None,
|
||||
)
|
||||
|
||||
# Read output file
|
||||
with open(output_path, "rb") as f:
|
||||
output_bytes = f.read()
|
||||
|
||||
# Cleanup
|
||||
clip.close()
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True)
|
||||
|
||||
logger.info(f"[VideoProcessors] Resolution scaling successful: {target_resolution}, size={len(output_bytes)} bytes")
|
||||
return output_bytes
|
||||
|
||||
except Exception as e:
|
||||
# Cleanup on error
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True) if 'output_path' in locals() else None
|
||||
logger.error(f"[VideoProcessors] Resolution scaling failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Resolution scaling failed: {str(e)}")
|
||||
|
||||
|
||||
def compress_video(
|
||||
video_bytes: bytes,
|
||||
target_size_mb: Optional[float] = None,
|
||||
quality: str = "medium",
|
||||
) -> bytes:
|
||||
"""
|
||||
Compress video to reduce file size.
|
||||
|
||||
Args:
|
||||
video_bytes: Input video as bytes
|
||||
target_size_mb: Target file size in MB (optional)
|
||||
quality: Quality preset (high, medium, low)
|
||||
|
||||
Returns:
|
||||
Compressed video as bytes
|
||||
"""
|
||||
_check_moviepy()
|
||||
|
||||
quality_presets = {
|
||||
"high": {"bitrate": "5000k", "preset": "slow"},
|
||||
"medium": {"bitrate": "2500k", "preset": "medium"},
|
||||
"low": {"bitrate": "1000k", "preset": "fast"},
|
||||
}
|
||||
preset = quality_presets.get(quality, quality_presets["medium"])
|
||||
|
||||
# Save input to temp file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as input_file:
|
||||
input_file.write(video_bytes)
|
||||
input_path = input_file.name
|
||||
|
||||
try:
|
||||
# Load video
|
||||
clip = VideoFileClip(input_path)
|
||||
|
||||
# Calculate bitrate if target size is specified
|
||||
if target_size_mb:
|
||||
duration = clip.duration
|
||||
target_size_bits = target_size_mb * 8 * 1024 * 1024 # Convert MB to bits
|
||||
calculated_bitrate = int(target_size_bits / duration)
|
||||
# Ensure reasonable bitrate (min 500k, max 10000k)
|
||||
bitrate = f"{max(500, min(10000, calculated_bitrate // 1000))}k"
|
||||
else:
|
||||
bitrate = preset["bitrate"]
|
||||
|
||||
# Write to temp output file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as output_file:
|
||||
output_path = output_file.name
|
||||
|
||||
clip.write_videofile(
|
||||
output_path,
|
||||
codec="libx264",
|
||||
audio_codec="aac",
|
||||
bitrate=bitrate,
|
||||
preset=preset["preset"],
|
||||
threads=4,
|
||||
logger=None,
|
||||
)
|
||||
|
||||
# Read output file
|
||||
with open(output_path, "rb") as f:
|
||||
output_bytes = f.read()
|
||||
|
||||
# Cleanup
|
||||
clip.close()
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True)
|
||||
|
||||
original_size_mb = len(video_bytes) / (1024 * 1024)
|
||||
compressed_size_mb = len(output_bytes) / (1024 * 1024)
|
||||
compression_ratio = (1 - compressed_size_mb / original_size_mb) * 100
|
||||
|
||||
logger.info(
|
||||
f"[VideoProcessors] Compression successful: "
|
||||
f"{original_size_mb:.2f}MB -> {compressed_size_mb:.2f}MB ({compression_ratio:.1f}% reduction)"
|
||||
)
|
||||
return output_bytes
|
||||
|
||||
except Exception as e:
|
||||
# Cleanup on error
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True) if 'output_path' in locals() else None
|
||||
logger.error(f"[VideoProcessors] Compression failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Compression failed: {str(e)}")
|
||||
|
||||
|
||||
def trim_video(
|
||||
video_bytes: bytes,
|
||||
start_time: float = 0.0,
|
||||
end_time: Optional[float] = None,
|
||||
max_duration: Optional[float] = None,
|
||||
trim_mode: str = "beginning",
|
||||
) -> bytes:
|
||||
"""
|
||||
Trim video to specified duration or time range.
|
||||
|
||||
Args:
|
||||
video_bytes: Input video as bytes
|
||||
start_time: Start time in seconds (default: 0.0)
|
||||
end_time: End time in seconds (optional, uses video duration if not provided)
|
||||
max_duration: Maximum duration in seconds (trims if video is longer)
|
||||
trim_mode: How to trim if max_duration is set ("beginning", "middle", "end")
|
||||
|
||||
Returns:
|
||||
Trimmed video as bytes
|
||||
"""
|
||||
_check_moviepy()
|
||||
|
||||
# Save input to temp file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as input_file:
|
||||
input_file.write(video_bytes)
|
||||
input_path = input_file.name
|
||||
|
||||
try:
|
||||
# Load video
|
||||
clip = VideoFileClip(input_path)
|
||||
original_duration = clip.duration
|
||||
|
||||
# Determine trim range
|
||||
if max_duration and original_duration > max_duration:
|
||||
# Need to trim to max_duration
|
||||
if trim_mode == "beginning":
|
||||
# Keep the beginning
|
||||
start_time = 0.0
|
||||
end_time = max_duration
|
||||
elif trim_mode == "end":
|
||||
# Keep the end
|
||||
start_time = original_duration - max_duration
|
||||
end_time = original_duration
|
||||
else: # middle
|
||||
# Keep the middle
|
||||
start_time = (original_duration - max_duration) / 2
|
||||
end_time = start_time + max_duration
|
||||
else:
|
||||
# Use provided times or full video
|
||||
if end_time is None:
|
||||
end_time = original_duration
|
||||
|
||||
# Ensure valid range
|
||||
start_time = max(0.0, min(start_time, original_duration))
|
||||
end_time = max(start_time, min(end_time, original_duration))
|
||||
|
||||
# Trim video
|
||||
trimmed_clip = clip.subclip(start_time, end_time)
|
||||
|
||||
# Write to temp output file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as output_file:
|
||||
output_path = output_file.name
|
||||
|
||||
trimmed_clip.write_videofile(
|
||||
output_path,
|
||||
codec="libx264",
|
||||
audio_codec="aac",
|
||||
preset="medium",
|
||||
threads=4,
|
||||
logger=None,
|
||||
)
|
||||
|
||||
# Read output file
|
||||
with open(output_path, "rb") as f:
|
||||
output_bytes = f.read()
|
||||
|
||||
# Cleanup
|
||||
trimmed_clip.close()
|
||||
clip.close()
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True)
|
||||
|
||||
logger.info(
|
||||
f"[VideoProcessors] Video trimmed: {start_time:.2f}s-{end_time:.2f}s, "
|
||||
f"duration={end_time - start_time:.2f}s, size={len(output_bytes)} bytes"
|
||||
)
|
||||
return output_bytes
|
||||
|
||||
except Exception as e:
|
||||
# Cleanup on error
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
Path(output_path).unlink(missing_ok=True) if 'output_path' in locals() else None
|
||||
logger.error(f"[VideoProcessors] Video trimming failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Video trimming failed: {str(e)}")
|
||||
|
||||
|
||||
def extract_thumbnail(
|
||||
video_bytes: bytes,
|
||||
time_position: Optional[float] = None,
|
||||
width: int = 1280,
|
||||
height: int = 720,
|
||||
) -> bytes:
|
||||
"""
|
||||
Extract a thumbnail frame from video.
|
||||
|
||||
Args:
|
||||
video_bytes: Input video as bytes
|
||||
time_position: Time position in seconds (default: middle of video)
|
||||
width: Thumbnail width (default: 1280)
|
||||
height: Thumbnail height (default: 720)
|
||||
|
||||
Returns:
|
||||
Thumbnail image as bytes (JPEG format)
|
||||
"""
|
||||
_check_moviepy()
|
||||
|
||||
# Save input to temp file
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as input_file:
|
||||
input_file.write(video_bytes)
|
||||
input_path = input_file.name
|
||||
|
||||
try:
|
||||
# Load video
|
||||
clip = VideoFileClip(input_path)
|
||||
|
||||
# Determine time position
|
||||
if time_position is None:
|
||||
time_position = clip.duration / 2 # Middle of video
|
||||
|
||||
# Ensure valid time position
|
||||
time_position = max(0.0, min(time_position, clip.duration))
|
||||
|
||||
# Get frame at specified time
|
||||
frame = clip.get_frame(time_position)
|
||||
|
||||
# Convert numpy array to PIL Image
|
||||
from PIL import Image
|
||||
img = Image.fromarray(frame)
|
||||
|
||||
# Resize if needed
|
||||
if img.size != (width, height):
|
||||
img = img.resize((width, height), Image.Resampling.LANCZOS)
|
||||
|
||||
# Convert to bytes (JPEG)
|
||||
output_buffer = io.BytesIO()
|
||||
img.save(output_buffer, format="JPEG", quality=90)
|
||||
output_bytes = output_buffer.getvalue()
|
||||
|
||||
# Cleanup
|
||||
clip.close()
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
|
||||
logger.info(
|
||||
f"[VideoProcessors] Thumbnail extracted: time={time_position:.2f}s, "
|
||||
f"size={width}x{height}, image_size={len(output_bytes)} bytes"
|
||||
)
|
||||
return output_bytes
|
||||
|
||||
except Exception as e:
|
||||
# Cleanup on error
|
||||
Path(input_path).unlink(missing_ok=True)
|
||||
logger.error(f"[VideoProcessors] Thumbnail extraction failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Thumbnail extraction failed: {str(e)}")
|
||||
1063
backend/services/video_studio/video_studio_service.py
Normal file
1063
backend/services/video_studio/video_studio_service.py
Normal file
File diff suppressed because it is too large
Load Diff
135
backend/services/video_studio/video_translate_service.py
Normal file
135
backend/services/video_studio/video_translate_service.py
Normal file
@@ -0,0 +1,135 @@
|
||||
"""
|
||||
Video Translate service for Video Studio.
|
||||
|
||||
Uses HeyGen Video Translate (heygen/video-translate) for video translation.
|
||||
"""
|
||||
|
||||
import base64
|
||||
from typing import Dict, Any, Optional, Callable
|
||||
from fastapi import HTTPException
|
||||
|
||||
from utils.logger_utils import get_service_logger
|
||||
from ..wavespeed.client import WaveSpeedClient
|
||||
|
||||
logger = get_service_logger("video_studio.video_translate")
|
||||
|
||||
|
||||
class VideoTranslateService:
|
||||
"""Service for video translation operations."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize Video Translate service."""
|
||||
self.wavespeed_client = WaveSpeedClient()
|
||||
logger.info("[VideoTranslate] Service initialized")
|
||||
|
||||
def calculate_cost(self, duration: float = 10.0) -> float:
|
||||
"""
|
||||
Calculate cost for video translation operation.
|
||||
|
||||
Args:
|
||||
duration: Video duration in seconds
|
||||
|
||||
Returns:
|
||||
Cost in USD
|
||||
"""
|
||||
# HeyGen Video Translate pricing: $0.0375/s
|
||||
# No minimum charge mentioned in docs, but we'll use 1 second minimum
|
||||
cost_per_second = 0.0375
|
||||
billed_duration = max(1.0, duration)
|
||||
return cost_per_second * billed_duration
|
||||
|
||||
async def translate_video(
|
||||
self,
|
||||
video_data: bytes,
|
||||
output_language: str = "English",
|
||||
user_id: str = None,
|
||||
progress_callback: Optional[Callable[[float, str], None]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Translate video to target language using HeyGen Video Translate.
|
||||
|
||||
Args:
|
||||
video_data: Source video as bytes
|
||||
output_language: Target language for translation
|
||||
user_id: User ID for tracking
|
||||
progress_callback: Optional callback for progress updates
|
||||
|
||||
Returns:
|
||||
Dict with translated video_url, cost, and metadata
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
f"[VideoTranslate] Video translate request: user={user_id}, "
|
||||
f"output_language={output_language}"
|
||||
)
|
||||
|
||||
if not user_id:
|
||||
raise ValueError("user_id is required for video translation")
|
||||
|
||||
# Convert video to base64 data URI
|
||||
video_b64 = base64.b64encode(video_data).decode('utf-8')
|
||||
video_uri = f"data:video/mp4;base64,{video_b64}"
|
||||
|
||||
# Estimate duration (we'll use a default, actual duration would come from video metadata)
|
||||
estimated_duration = 10.0 # Default estimate, should be improved with actual video duration
|
||||
|
||||
# Calculate cost estimate
|
||||
cost = self.calculate_cost(estimated_duration)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(10.0, f"Submitting video translation request to HeyGen ({output_language})...")
|
||||
|
||||
# Perform video translation
|
||||
# video_translate is synchronous (uses sync_mode internally)
|
||||
translated_video_bytes = self.wavespeed_client.video_translate(
|
||||
video=video_uri,
|
||||
output_language=output_language,
|
||||
enable_sync_mode=True,
|
||||
timeout=600, # 10 minutes timeout
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(90.0, "Video translation complete, saving video...")
|
||||
|
||||
# Save translated video
|
||||
from . import VideoStudioService
|
||||
video_service = VideoStudioService()
|
||||
save_result = video_service._save_video_file(
|
||||
video_bytes=translated_video_bytes,
|
||||
operation_type="video_translate",
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
# Recalculate cost with actual duration if available
|
||||
# For now, use estimated cost
|
||||
actual_cost = cost
|
||||
|
||||
logger.info(
|
||||
f"[VideoTranslate] Video translate successful: user={user_id}, "
|
||||
f"output_language={output_language}, cost=${actual_cost:.4f}"
|
||||
)
|
||||
|
||||
metadata = {
|
||||
"original_video_size": len(video_data),
|
||||
"translated_video_size": len(translated_video_bytes),
|
||||
"output_language": output_language,
|
||||
}
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"video_url": save_result["file_url"],
|
||||
"video_bytes": translated_video_bytes,
|
||||
"cost": actual_cost,
|
||||
"output_language": output_language,
|
||||
"metadata": metadata,
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[VideoTranslate] Video translate error: {e}", exc_info=True)
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
}
|
||||
Reference in New Issue
Block a user