Base code

This commit is contained in:
Kunthawat Greethong
2026-01-08 22:39:53 +07:00
parent 697115c61a
commit c35fa52117
2169 changed files with 626670 additions and 0 deletions

View File

@@ -0,0 +1,15 @@
"""
Video Studio Services
Provides AI-powered video generation capabilities including:
- Text-to-video generation
- Image-to-video transformation
- Avatar and face generation
- Video enhancement
Integrates with WaveSpeed AI models for high-quality results.
"""
from .video_studio_service import VideoStudioService
__all__ = ["VideoStudioService"]

View File

@@ -0,0 +1,142 @@
"""
Add Audio to Video service for Video Studio.
Supports multiple models for adding audio to videos:
1. Hunyuan Video Foley - Generate realistic Foley and ambient audio from video
2. Think Sound - (To be added)
"""
import asyncio
import base64
from typing import Dict, Any, Optional, Callable
from fastapi import HTTPException
from utils.logger_utils import get_service_logger
from ..wavespeed.client import WaveSpeedClient
logger = get_service_logger("video_studio.add_audio_to_video")
class AddAudioToVideoService:
"""Service for adding audio to video operations."""
def __init__(self):
"""Initialize Add Audio to Video service."""
self.wavespeed_client = WaveSpeedClient()
logger.info("[AddAudioToVideo] Service initialized")
def calculate_cost(self, model: str, duration: float = 10.0) -> float:
"""
Calculate cost for adding audio to video operation.
Args:
model: Model to use ("hunyuan-video-foley" or "think-sound")
duration: Video duration in seconds (for Hunyuan Video Foley)
Returns:
Cost in USD
"""
if model == "hunyuan-video-foley":
# Estimated pricing: $0.02/s (similar to other video processing models)
# Minimum charge: 5 seconds
# Maximum: 600 seconds (10 minutes)
cost_per_second = 0.02
billed_duration = max(5.0, min(duration, 600.0))
return cost_per_second * billed_duration
elif model == "think-sound":
# Think Sound pricing: $0.05 per video (flat rate)
return 0.05
else:
# Default fallback
cost_per_second = 0.02
billed_duration = max(5.0, min(duration, 600.0))
return cost_per_second * billed_duration
async def add_audio(
self,
video_data: bytes,
model: str = "hunyuan-video-foley",
prompt: Optional[str] = None,
seed: Optional[int] = None,
user_id: str = None,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> Dict[str, Any]:
"""
Add audio to video using AI models.
Args:
video_data: Source video as bytes
model: Model to use ("hunyuan-video-foley" or "think-sound")
prompt: Optional text prompt describing desired sounds (Hunyuan Video Foley)
seed: Random seed for reproducibility (-1 for random)
user_id: User ID for tracking
progress_callback: Optional callback for progress updates
Returns:
Dict with processed video_url, cost, and metadata
"""
try:
logger.info(f"[AddAudioToVideo] Audio addition request: user={user_id}, model={model}, has_prompt={prompt is not None}")
# Convert video to base64 data URI
video_b64 = base64.b64encode(video_data).decode('utf-8')
video_uri = f"data:video/mp4;base64,{video_b64}"
# Handle different models
if model == "hunyuan-video-foley":
# Use Hunyuan Video Foley
processed_video_bytes = await asyncio.to_thread(
self.wavespeed_client.hunyuan_video_foley,
video=video_uri,
prompt=prompt,
seed=seed if seed is not None else -1,
enable_sync_mode=False, # Always use async with polling
timeout=600, # 10 minutes max for long videos
progress_callback=progress_callback,
)
else:
# Think Sound or other models (to be implemented)
logger.warning(f"[AddAudioToVideo] Model '{model}' not yet implemented")
raise HTTPException(
status_code=400,
detail=f"Model '{model}' is not yet supported. Currently only 'hunyuan-video-foley' is available."
)
# Estimate video duration (rough estimate: 1MB ≈ 1 second at 1080p)
# Only needed for Hunyuan Video Foley (per-second pricing)
estimated_duration = max(5, len(video_data) / (1024 * 1024)) if model == "hunyuan-video-foley" else 10.0
cost = self.calculate_cost(model, estimated_duration)
# Save processed video
from .video_studio_service import VideoStudioService
video_service = VideoStudioService()
save_result = video_service._save_video_file(
video_bytes=processed_video_bytes,
operation_type="add_audio",
user_id=user_id,
)
logger.info(f"[AddAudioToVideo] Audio addition successful: user={user_id}, model={model}, cost=${cost:.4f}")
return {
"success": True,
"video_url": save_result["file_url"],
"video_bytes": processed_video_bytes,
"cost": cost,
"model_used": model,
"metadata": {
"original_size": len(video_data),
"processed_size": len(processed_video_bytes),
"estimated_duration": estimated_duration,
"has_prompt": prompt is not None,
},
}
except HTTPException:
raise
except Exception as e:
logger.error(f"[AddAudioToVideo] Audio addition failed: {e}", exc_info=True)
raise HTTPException(
status_code=500,
detail=f"Adding audio to video failed: {str(e)}"
)

View File

@@ -0,0 +1,122 @@
"""
Avatar Studio Service
Service for creating talking avatars using InfiniteTalk and Hunyuan Avatar.
Supports both models with automatic selection or explicit model choice.
"""
from typing import Dict, Any, Optional
from fastapi import HTTPException
from loguru import logger
from services.image_studio.infinitetalk_adapter import InfiniteTalkService
from services.video_studio.hunyuan_avatar_adapter import HunyuanAvatarService
from utils.logger_utils import get_service_logger
logger = get_service_logger("video_studio.avatar")
class AvatarStudioService:
"""Service for Avatar Studio operations using InfiniteTalk and Hunyuan Avatar."""
def __init__(self):
"""Initialize Avatar Studio service."""
self.infinitetalk_service = InfiniteTalkService()
self.hunyuan_avatar_service = HunyuanAvatarService()
logger.info("[AvatarStudio] Service initialized with InfiniteTalk and Hunyuan Avatar")
async def create_talking_avatar(
self,
image_base64: str,
audio_base64: str,
resolution: str = "720p",
prompt: Optional[str] = None,
mask_image_base64: Optional[str] = None,
seed: Optional[int] = None,
user_id: str = "video_studio",
model: str = "infinitetalk",
progress_callback: Optional[callable] = None,
) -> Dict[str, Any]:
"""
Create talking avatar video using InfiniteTalk or Hunyuan Avatar.
Args:
image_base64: Person image in base64 or data URI
audio_base64: Audio file in base64 or data URI
resolution: Output resolution (480p or 720p)
prompt: Optional prompt for expression/style
mask_image_base64: Optional mask for animatable regions (InfiniteTalk only)
seed: Optional random seed
user_id: User ID for tracking
model: Model to use - "infinitetalk" (default) or "hunyuan-avatar"
progress_callback: Optional progress callback function
Returns:
Dictionary with video_bytes, metadata, cost, and file info
"""
logger.info(
f"[AvatarStudio] Creating talking avatar: user={user_id}, resolution={resolution}, model={model}"
)
try:
if model == "hunyuan-avatar":
# Use Hunyuan Avatar (doesn't support mask_image)
result = await self.hunyuan_avatar_service.create_talking_avatar(
image_base64=image_base64,
audio_base64=audio_base64,
resolution=resolution,
prompt=prompt,
seed=seed,
user_id=user_id,
progress_callback=progress_callback,
)
else:
# Default to InfiniteTalk
result = await self.infinitetalk_service.create_talking_avatar(
image_base64=image_base64,
audio_base64=audio_base64,
resolution=resolution,
prompt=prompt,
mask_image_base64=mask_image_base64,
seed=seed,
user_id=user_id,
)
logger.info(
f"[AvatarStudio] ✅ Talking avatar created: "
f"model={model}, resolution={resolution}, duration={result.get('duration', 0)}s, "
f"cost=${result.get('cost', 0):.2f}"
)
return result
except HTTPException:
raise
except Exception as e:
logger.error(f"[AvatarStudio] ❌ Error creating talking avatar: {str(e)}", exc_info=True)
raise HTTPException(
status_code=500,
detail=f"Failed to create talking avatar: {str(e)}"
)
def calculate_cost_estimate(
self,
resolution: str,
estimated_duration: float,
model: str = "infinitetalk",
) -> float:
"""
Calculate estimated cost for talking avatar generation.
Args:
resolution: Output resolution (480p or 720p)
estimated_duration: Estimated video duration in seconds
model: Model to use - "infinitetalk" (default) or "hunyuan-avatar"
Returns:
Estimated cost in USD
"""
if model == "hunyuan-avatar":
return self.hunyuan_avatar_service.calculate_cost(resolution, estimated_duration)
else:
return self.infinitetalk_service.calculate_cost(resolution, estimated_duration)

View File

@@ -0,0 +1,206 @@
"""
Face Swap service for Video Studio.
Supports two models:
1. MoCha (wavespeed-ai/wan-2.1/mocha) - Character replacement with motion preservation
2. Video Face Swap (wavespeed-ai/video-face-swap) - Simple face swap with multi-face support
"""
import base64
from typing import Dict, Any, Optional, Callable
from fastapi import HTTPException
from utils.logger_utils import get_service_logger
from ..wavespeed.client import WaveSpeedClient
logger = get_service_logger("video_studio.face_swap")
class FaceSwapService:
"""Service for face/character swap operations."""
def __init__(self):
"""Initialize Face Swap service."""
self.wavespeed_client = WaveSpeedClient()
logger.info("[FaceSwap] Service initialized")
def calculate_cost(self, model: str, resolution: Optional[str] = None, duration: float = 10.0) -> float:
"""
Calculate cost for face swap operation.
Args:
model: Model to use ("mocha" or "video-face-swap")
resolution: Output resolution for MoCha ("480p" or "720p"), ignored for video-face-swap
duration: Video duration in seconds
Returns:
Cost in USD
"""
if model == "video-face-swap":
# Video Face Swap pricing: $0.01/s
# Minimum charge: 5 seconds
# Maximum: 600 seconds (10 minutes)
cost_per_second = 0.01
billed_duration = max(5.0, min(duration, 600.0))
return cost_per_second * billed_duration
else:
# MoCha pricing: $0.04/s (480p), $0.08/s (720p)
# Minimum charge: 5 seconds
# Maximum billed: 120 seconds
pricing = {
"480p": 0.04,
"720p": 0.08,
}
cost_per_second = pricing.get(resolution or "480p", pricing["480p"])
billed_duration = max(5.0, min(duration, 120.0))
return cost_per_second * billed_duration
async def swap_face(
self,
image_data: bytes,
video_data: bytes,
model: str = "mocha",
prompt: Optional[str] = None,
resolution: str = "480p",
seed: Optional[int] = None,
target_gender: str = "all",
target_index: int = 0,
user_id: str = None,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> Dict[str, Any]:
"""
Perform face/character swap using MoCha or Video Face Swap.
Args:
image_data: Reference image as bytes
video_data: Source video as bytes
model: Model to use ("mocha" or "video-face-swap")
prompt: Optional prompt to guide the swap (MoCha only)
resolution: Output resolution for MoCha ("480p" or "720p")
seed: Random seed for reproducibility (MoCha only)
target_gender: Filter which faces to swap (video-face-swap only: "all", "female", "male")
target_index: Select which face to swap (video-face-swap only: 0 = largest)
user_id: User ID for tracking
progress_callback: Optional callback for progress updates
Returns:
Dict with swapped video_url, cost, and metadata
"""
try:
logger.info(
f"[FaceSwap] Face swap request: user={user_id}, "
f"model={model}, resolution={resolution if model == 'mocha' else 'N/A'}"
)
if not user_id:
raise ValueError("user_id is required for face swap")
# Validate model
if model not in ("mocha", "video-face-swap"):
raise ValueError("Model must be 'mocha' or 'video-face-swap'")
# Convert image to base64 data URI
image_b64 = base64.b64encode(image_data).decode('utf-8')
image_uri = f"data:image/png;base64,{image_b64}"
# Convert video to base64 data URI
video_b64 = base64.b64encode(video_data).decode('utf-8')
video_uri = f"data:video/mp4;base64,{video_b64}"
# Estimate duration (we'll use a default, actual duration would come from video metadata)
estimated_duration = 10.0 # Default estimate, should be improved with actual video duration
# Calculate cost estimate
cost = self.calculate_cost(model, resolution if model == "mocha" else None, estimated_duration)
if progress_callback:
model_name = "MoCha" if model == "mocha" else "Video Face Swap"
progress_callback(10.0, f"Submitting face swap request to {model_name}...")
# Perform face swap based on model
if model == "mocha":
# Validate resolution for MoCha
if resolution not in ("480p", "720p"):
raise ValueError("Resolution must be '480p' or '720p' for MoCha")
# face_swap is synchronous (uses sync_mode internally)
swapped_video_bytes = self.wavespeed_client.face_swap(
image=image_uri,
video=video_uri,
prompt=prompt,
resolution=resolution,
seed=seed,
enable_sync_mode=True,
timeout=600, # 10 minutes timeout
progress_callback=progress_callback,
)
else: # video-face-swap
# video_face_swap is synchronous (uses sync_mode internally)
swapped_video_bytes = self.wavespeed_client.video_face_swap(
video=video_uri,
face_image=image_uri,
target_gender=target_gender,
target_index=target_index,
enable_sync_mode=True,
timeout=600, # 10 minutes timeout
progress_callback=progress_callback,
)
if progress_callback:
progress_callback(90.0, "Face swap complete, saving video...")
# Save swapped video
from . import VideoStudioService
video_service = VideoStudioService()
save_result = video_service._save_video_file(
video_bytes=swapped_video_bytes,
operation_type="face_swap",
user_id=user_id,
)
# Recalculate cost with actual duration if available
# For now, use estimated cost
actual_cost = cost
logger.info(
f"[FaceSwap] Face swap successful: user={user_id}, "
f"resolution={resolution}, cost=${actual_cost:.4f}"
)
metadata = {
"original_image_size": len(image_data),
"original_video_size": len(video_data),
"swapped_video_size": len(swapped_video_bytes),
"model": model,
}
if model == "mocha":
metadata.update({
"resolution": resolution,
"seed": seed,
"prompt": prompt,
})
else: # video-face-swap
metadata.update({
"target_gender": target_gender,
"target_index": target_index,
})
return {
"success": True,
"video_url": save_result["file_url"],
"video_bytes": swapped_video_bytes,
"cost": actual_cost,
"model": model,
"resolution": resolution if model == "mocha" else None,
"metadata": metadata,
}
except HTTPException:
raise
except Exception as e:
logger.error(f"[FaceSwap] Face swap error: {e}", exc_info=True)
return {
"success": False,
"error": str(e)
}

View File

@@ -0,0 +1,148 @@
"""Hunyuan Avatar adapter for Avatar Studio."""
import asyncio
from typing import Any, Dict, Optional
from fastapi import HTTPException
from loguru import logger
from services.wavespeed.hunyuan_avatar import create_hunyuan_avatar, calculate_hunyuan_avatar_cost
from services.wavespeed.client import WaveSpeedClient
from utils.logger_utils import get_service_logger
logger = get_service_logger("video_studio.hunyuan_avatar")
class HunyuanAvatarService:
"""Adapter for Hunyuan Avatar in Avatar Studio context."""
def __init__(self, client: Optional[WaveSpeedClient] = None):
"""Initialize Hunyuan Avatar service adapter."""
self.client = client or WaveSpeedClient()
logger.info("[Hunyuan Avatar Adapter] Service initialized")
def calculate_cost(self, resolution: str, duration: float) -> float:
"""Calculate cost for Hunyuan Avatar video.
Args:
resolution: Output resolution (480p or 720p)
duration: Video duration in seconds
Returns:
Cost in USD
"""
return calculate_hunyuan_avatar_cost(resolution, duration)
async def create_talking_avatar(
self,
image_base64: str,
audio_base64: str,
resolution: str = "480p",
prompt: Optional[str] = None,
seed: Optional[int] = None,
user_id: str = "video_studio",
progress_callback: Optional[callable] = None,
) -> Dict[str, Any]:
"""Create talking avatar video using Hunyuan Avatar.
Args:
image_base64: Person image in base64 or data URI
audio_base64: Audio file in base64 or data URI
resolution: Output resolution (480p or 720p, default: 480p)
prompt: Optional prompt for expression/style
seed: Optional random seed
user_id: User ID for tracking
progress_callback: Optional progress callback function
Returns:
Dictionary with video_bytes, metadata, and cost
"""
# Validate resolution
if resolution not in ["480p", "720p"]:
raise HTTPException(
status_code=400,
detail="Resolution must be '480p' or '720p' for Hunyuan Avatar"
)
# Decode image
import base64
try:
if image_base64.startswith("data:"):
if "," not in image_base64:
raise ValueError("Invalid data URI format: missing comma separator")
header, encoded = image_base64.split(",", 1)
mime_parts = header.split(":")[1].split(";")[0] if ":" in header else "image/png"
image_mime = mime_parts.strip() or "image/png"
image_bytes = base64.b64decode(encoded)
else:
image_bytes = base64.b64decode(image_base64)
image_mime = "image/png"
except Exception as e:
raise HTTPException(
status_code=400,
detail=f"Failed to decode image: {str(e)}"
)
# Decode audio
try:
if audio_base64.startswith("data:"):
if "," not in audio_base64:
raise ValueError("Invalid data URI format: missing comma separator")
header, encoded = audio_base64.split(",", 1)
mime_parts = header.split(":")[1].split(";")[0] if ":" in header else "audio/mpeg"
audio_mime = mime_parts.strip() or "audio/mpeg"
audio_bytes = base64.b64decode(encoded)
else:
audio_bytes = base64.b64decode(audio_base64)
audio_mime = "audio/mpeg"
except Exception as e:
raise HTTPException(
status_code=400,
detail=f"Failed to decode audio: {str(e)}"
)
# Call Hunyuan Avatar function (run in thread since it's synchronous)
try:
result = await asyncio.to_thread(
create_hunyuan_avatar,
image_bytes=image_bytes,
audio_bytes=audio_bytes,
resolution=resolution,
prompt=prompt,
seed=seed,
user_id=user_id,
image_mime=image_mime,
audio_mime=audio_mime,
client=self.client,
progress_callback=progress_callback,
)
except HTTPException:
raise
except Exception as e:
logger.error(f"[Hunyuan Avatar Adapter] Error: {str(e)}", exc_info=True)
raise HTTPException(
status_code=500,
detail=f"Hunyuan Avatar generation failed: {str(e)}"
)
# Calculate actual cost based on duration
actual_cost = self.calculate_cost(resolution, result.get("duration", 5.0))
# Update result with actual cost and additional metadata
result["cost"] = actual_cost
result["resolution"] = resolution
# Get video dimensions from resolution
resolution_dims = {
"480p": (854, 480),
"720p": (1280, 720),
}
width, height = resolution_dims.get(resolution, (854, 480))
result["width"] = width
result["height"] = height
logger.info(
f"[Hunyuan Avatar Adapter] ✅ Generated talking avatar: "
f"resolution={resolution}, duration={result.get('duration', 5.0)}s, cost=${actual_cost:.2f}"
)
return result

View File

@@ -0,0 +1,156 @@
"""
Platform specifications for Social Optimizer.
Defines aspect ratios, duration limits, file size limits, and other requirements
for each social media platform.
"""
from dataclasses import dataclass
from typing import List, Optional
from enum import Enum
class Platform(Enum):
"""Social media platforms."""
INSTAGRAM = "instagram"
TIKTOK = "tiktok"
YOUTUBE = "youtube"
LINKEDIN = "linkedin"
FACEBOOK = "facebook"
TWITTER = "twitter"
@dataclass
class PlatformSpec:
"""Platform specification for video optimization."""
platform: Platform
name: str
aspect_ratio: str # e.g., "9:16", "16:9", "1:1"
width: int
height: int
max_duration: float # seconds
max_file_size_mb: float # MB
formats: List[str] # e.g., ["mp4", "mov"]
description: str
# Platform specifications
PLATFORM_SPECS: List[PlatformSpec] = [
PlatformSpec(
platform=Platform.INSTAGRAM,
name="Instagram Reels",
aspect_ratio="9:16",
width=1080,
height=1920,
max_duration=90.0, # 90 seconds
max_file_size_mb=4000.0, # 4GB
formats=["mp4"],
description="Vertical video format for Instagram Reels",
),
PlatformSpec(
platform=Platform.TIKTOK,
name="TikTok",
aspect_ratio="9:16",
width=1080,
height=1920,
max_duration=60.0, # 60 seconds
max_file_size_mb=287.0, # 287MB
formats=["mp4", "mov"],
description="Vertical video format for TikTok",
),
PlatformSpec(
platform=Platform.YOUTUBE,
name="YouTube Shorts",
aspect_ratio="9:16",
width=1080,
height=1920,
max_duration=60.0, # 60 seconds
max_file_size_mb=256000.0, # 256GB (very high limit)
formats=["mp4", "mov", "webm"],
description="Vertical video format for YouTube Shorts",
),
PlatformSpec(
platform=Platform.LINKEDIN,
name="LinkedIn Video",
aspect_ratio="16:9",
width=1920,
height=1080,
max_duration=600.0, # 10 minutes
max_file_size_mb=5000.0, # 5GB
formats=["mp4"],
description="Horizontal video format for LinkedIn",
),
PlatformSpec(
platform=Platform.LINKEDIN,
name="LinkedIn Video (Square)",
aspect_ratio="1:1",
width=1080,
height=1080,
max_duration=600.0, # 10 minutes
max_file_size_mb=5000.0, # 5GB
formats=["mp4"],
description="Square video format for LinkedIn",
),
PlatformSpec(
platform=Platform.FACEBOOK,
name="Facebook Video",
aspect_ratio="16:9",
width=1920,
height=1080,
max_duration=240.0, # 240 seconds (4 minutes)
max_file_size_mb=4000.0, # 4GB
formats=["mp4", "mov"],
description="Horizontal video format for Facebook",
),
PlatformSpec(
platform=Platform.FACEBOOK,
name="Facebook Video (Square)",
aspect_ratio="1:1",
width=1080,
height=1080,
max_duration=240.0, # 240 seconds
max_file_size_mb=4000.0, # 4GB
formats=["mp4", "mov"],
description="Square video format for Facebook",
),
PlatformSpec(
platform=Platform.TWITTER,
name="Twitter/X Video",
aspect_ratio="16:9",
width=1920,
height=1080,
max_duration=140.0, # 140 seconds (2:20)
max_file_size_mb=512.0, # 512MB
formats=["mp4"],
description="Horizontal video format for Twitter/X",
),
]
def get_platform_specs(platform: Platform) -> List[PlatformSpec]:
"""Get all specifications for a platform."""
return [spec for spec in PLATFORM_SPECS if spec.platform == platform]
def get_platform_spec(platform: Platform, aspect_ratio: Optional[str] = None) -> Optional[PlatformSpec]:
"""Get a specific platform specification."""
specs = get_platform_specs(platform)
if aspect_ratio:
for spec in specs:
if spec.aspect_ratio == aspect_ratio:
return spec
return specs[0] if specs else None
def get_all_platforms() -> List[Platform]:
"""Get all available platforms."""
return list(Platform)
def get_platform_by_name(name: str) -> Optional[Platform]:
"""Get platform enum by name."""
name_lower = name.lower()
for platform in Platform:
if platform.value == name_lower:
return platform
return None

View File

@@ -0,0 +1,269 @@
"""
Social Optimizer service for platform-specific video optimization.
Creates optimized versions of videos for Instagram, TikTok, YouTube, LinkedIn, Facebook, and Twitter.
"""
import asyncio
import base64
from pathlib import Path
from typing import Dict, Any, List, Optional
from dataclasses import dataclass
from utils.logger_utils import get_service_logger
from .platform_specs import Platform, PlatformSpec, get_platform_spec, get_platform_specs
from .video_processors import (
convert_aspect_ratio,
trim_video,
compress_video,
extract_thumbnail,
)
logger = get_service_logger("video_studio.social_optimizer")
@dataclass
class OptimizationOptions:
"""Options for video optimization."""
auto_crop: bool = True
generate_thumbnails: bool = True
compress: bool = True
trim_mode: str = "beginning" # "beginning", "middle", "end"
@dataclass
class PlatformResult:
"""Result for a single platform optimization."""
platform: str
name: str
aspect_ratio: str
video_url: str
thumbnail_url: Optional[str] = None
duration: float = 0.0
file_size: int = 0
width: int = 0
height: int = 0
class SocialOptimizerService:
"""Service for optimizing videos for social media platforms."""
def __init__(self):
"""Initialize Social Optimizer service."""
logger.info("[SocialOptimizer] Service initialized")
async def optimize_for_platforms(
self,
video_bytes: bytes,
platforms: List[str],
options: OptimizationOptions,
user_id: str,
video_studio_service: Any, # VideoStudioService
) -> Dict[str, Any]:
"""
Optimize video for multiple platforms.
Args:
video_bytes: Source video as bytes
platforms: List of platform names (e.g., ["instagram", "tiktok"])
options: Optimization options
user_id: User ID for file storage
video_studio_service: VideoStudioService instance for saving files
Returns:
Dict with results for each platform
"""
logger.info(
f"[SocialOptimizer] Optimizing video for platforms: {platforms}, "
f"user={user_id}"
)
results: List[PlatformResult] = []
errors: List[Dict[str, str]] = []
# Process each platform
for platform_name in platforms:
try:
platform_enum = Platform(platform_name.lower())
platform_specs = get_platform_specs(platform_enum)
# Process each format variant for the platform
for spec in platform_specs:
try:
result = await self._optimize_for_spec(
video_bytes=video_bytes,
spec=spec,
options=options,
user_id=user_id,
video_studio_service=video_studio_service,
)
results.append(result)
except Exception as e:
logger.error(
f"[SocialOptimizer] Failed to optimize for {spec.name}: {e}",
exc_info=True
)
errors.append({
"platform": platform_name,
"format": spec.name,
"error": str(e),
})
except ValueError:
logger.warning(f"[SocialOptimizer] Unknown platform: {platform_name}")
errors.append({
"platform": platform_name,
"error": f"Unknown platform: {platform_name}",
})
# Calculate total cost (free - FFmpeg processing)
total_cost = 0.0
logger.info(
f"[SocialOptimizer] Optimization complete: "
f"{len(results)} successful, {len(errors)} errors"
)
return {
"success": len(results) > 0,
"results": [
{
"platform": r.platform,
"name": r.name,
"aspect_ratio": r.aspect_ratio,
"video_url": r.video_url,
"thumbnail_url": r.thumbnail_url,
"duration": r.duration,
"file_size": r.file_size,
"width": r.width,
"height": r.height,
}
for r in results
],
"errors": errors,
"cost": total_cost,
}
async def _optimize_for_spec(
self,
video_bytes: bytes,
spec: PlatformSpec,
options: OptimizationOptions,
user_id: str,
video_studio_service: Any,
) -> PlatformResult:
"""
Optimize video for a specific platform specification.
Args:
video_bytes: Source video as bytes
spec: Platform specification
options: Optimization options
user_id: User ID for file storage
video_studio_service: VideoStudioService instance
Returns:
PlatformResult with optimized video URL and metadata
"""
logger.info(
f"[SocialOptimizer] Optimizing for {spec.name} "
f"({spec.aspect_ratio}, max {spec.max_duration}s)"
)
processed_video = video_bytes
original_size_mb = len(video_bytes) / (1024 * 1024)
# Step 1: Convert aspect ratio if needed
if options.auto_crop:
processed_video = await asyncio.to_thread(
convert_aspect_ratio,
processed_video,
spec.aspect_ratio,
"center", # Use center crop for social media
)
logger.debug(f"[SocialOptimizer] Aspect ratio converted to {spec.aspect_ratio}")
# Step 2: Trim if video exceeds max duration
if spec.max_duration > 0:
# Get video duration (we'll need to check this)
# For now, we'll trim if the video is likely too long
# In a real implementation, we'd use MoviePy to get duration first
processed_video = await asyncio.to_thread(
trim_video,
processed_video,
start_time=0.0,
end_time=None,
max_duration=spec.max_duration,
trim_mode=options.trim_mode,
)
logger.debug(f"[SocialOptimizer] Video trimmed to max {spec.max_duration}s")
# Step 3: Compress if needed and file size exceeds limit
if options.compress:
current_size_mb = len(processed_video) / (1024 * 1024)
if current_size_mb > spec.max_file_size_mb:
# Calculate target size (90% of max to be safe)
target_size_mb = spec.max_file_size_mb * 0.9
processed_video = await asyncio.to_thread(
compress_video,
processed_video,
target_size_mb=target_size_mb,
quality="medium",
)
logger.debug(
f"[SocialOptimizer] Video compressed: "
f"{current_size_mb:.2f}MB -> {len(processed_video) / (1024 * 1024):.2f}MB"
)
# Step 4: Save optimized video
save_result = video_studio_service._save_video_file(
video_bytes=processed_video,
operation_type=f"social_optimizer_{spec.platform.value}",
user_id=user_id,
)
video_url = save_result["file_url"]
# Step 5: Generate thumbnail if requested
thumbnail_url = None
if options.generate_thumbnails:
try:
thumbnail_bytes = await asyncio.to_thread(
extract_thumbnail,
processed_video,
time_position=None, # Middle of video
width=spec.width,
height=spec.height,
)
# Save thumbnail
thumbnail_save_result = video_studio_service._save_video_file(
video_bytes=thumbnail_bytes,
operation_type=f"social_optimizer_thumbnail_{spec.platform.value}",
user_id=user_id,
)
thumbnail_url = thumbnail_save_result["file_url"]
logger.debug(f"[SocialOptimizer] Thumbnail generated: {thumbnail_url}")
except Exception as e:
logger.warning(f"[SocialOptimizer] Failed to generate thumbnail: {e}")
# Get video metadata (duration, file size)
# For now, we'll estimate based on file size
# In a real implementation, we'd use MoviePy to get actual duration
file_size = len(processed_video)
estimated_duration = spec.max_duration if spec.max_duration > 0 else 10.0
logger.info(
f"[SocialOptimizer] Optimization complete for {spec.name}: "
f"video_url={video_url}, size={file_size} bytes"
)
return PlatformResult(
platform=spec.platform.value,
name=spec.name,
aspect_ratio=spec.aspect_ratio,
video_url=video_url,
thumbnail_url=thumbnail_url,
duration=estimated_duration,
file_size=file_size,
width=spec.width,
height=spec.height,
)

View File

@@ -0,0 +1,129 @@
"""
Video Background Remover service for Video Studio.
Removes or replaces video backgrounds using WaveSpeed Video Background Remover.
"""
import asyncio
import base64
from typing import Dict, Any, Optional, Callable
from fastapi import HTTPException
from utils.logger_utils import get_service_logger
from ..wavespeed.client import WaveSpeedClient
logger = get_service_logger("video_studio.video_background_remover")
class VideoBackgroundRemoverService:
"""Service for video background removal/replacement operations."""
def __init__(self):
"""Initialize Video Background Remover service."""
self.wavespeed_client = WaveSpeedClient()
logger.info("[VideoBackgroundRemover] Service initialized")
def calculate_cost(self, duration: float = 10.0) -> float:
"""
Calculate cost for video background removal operation.
Pricing from WaveSpeed documentation:
- Rate: $0.01 per second
- Minimum: $0.05 for ≤5 seconds
- Maximum: $6.00 for 600 seconds (10 minutes)
Args:
duration: Video duration in seconds
Returns:
Cost in USD
"""
# Pricing: $0.01 per second
# Minimum charge: $0.05 for ≤5 seconds
# Maximum: $6.00 for 600 seconds (10 minutes)
cost_per_second = 0.01
if duration <= 5.0:
return 0.05 # Minimum charge
elif duration >= 600.0:
return 6.00 # Maximum charge
else:
return duration * cost_per_second
async def remove_background(
self,
video_data: bytes,
background_image_data: Optional[bytes] = None,
user_id: str = None,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> Dict[str, Any]:
"""
Remove or replace video background.
Args:
video_data: Source video as bytes
background_image_data: Optional replacement background image as bytes
user_id: User ID for tracking
progress_callback: Optional callback for progress updates
Returns:
Dict with processed video_url, cost, and metadata
"""
try:
logger.info(f"[VideoBackgroundRemover] Background removal request: user={user_id}, has_background={background_image_data is not None}")
# Convert video to base64 data URI
video_b64 = base64.b64encode(video_data).decode('utf-8')
video_uri = f"data:video/mp4;base64,{video_b64}"
# Convert background image to base64 if provided
background_image_uri = None
if background_image_data:
image_b64 = base64.b64encode(background_image_data).decode('utf-8')
background_image_uri = f"data:image/jpeg;base64,{image_b64}"
# Call WaveSpeed API
processed_video_bytes = await asyncio.to_thread(
self.wavespeed_client.remove_background,
video=video_uri,
background_image=background_image_uri,
enable_sync_mode=False, # Always use async with polling
timeout=600, # 10 minutes max for long videos
progress_callback=progress_callback,
)
# Estimate video duration (rough estimate: 1MB ≈ 1 second at 1080p)
estimated_duration = max(5, len(video_data) / (1024 * 1024)) # Minimum 5 seconds
cost = self.calculate_cost(estimated_duration)
# Save processed video
from .video_studio_service import VideoStudioService
video_service = VideoStudioService()
save_result = video_service._save_video_file(
video_bytes=processed_video_bytes,
operation_type="background_removal",
user_id=user_id,
)
logger.info(f"[VideoBackgroundRemover] Background removal successful: user={user_id}, cost=${cost:.4f}")
return {
"success": True,
"video_url": save_result["file_url"],
"video_bytes": processed_video_bytes,
"cost": cost,
"has_background_replacement": background_image_data is not None,
"metadata": {
"original_size": len(video_data),
"processed_size": len(processed_video_bytes),
"estimated_duration": estimated_duration,
},
}
except HTTPException:
raise
except Exception as e:
logger.error(f"[VideoBackgroundRemover] Background removal failed: {e}", exc_info=True)
raise HTTPException(
status_code=500,
detail=f"Video background removal failed: {str(e)}"
)

View File

@@ -0,0 +1,647 @@
"""
Video processing utilities for Transform Studio.
Handles format conversion, aspect ratio conversion, speed adjustment,
resolution scaling, and compression using MoviePy/FFmpeg.
"""
import io
import tempfile
from pathlib import Path
from typing import Optional, Tuple, Dict, Any
from fastapi import HTTPException
from utils.logger_utils import get_service_logger
logger = get_service_logger("video_studio.video_processors")
try:
from moviepy import VideoFileClip
MOVIEPY_AVAILABLE = True
except ImportError:
MOVIEPY_AVAILABLE = False
logger.warning("[VideoProcessors] MoviePy not available. Video processing will not work.")
def _check_moviepy():
"""Check if MoviePy is available."""
if not MOVIEPY_AVAILABLE:
raise HTTPException(
status_code=500,
detail="MoviePy is not installed. Please install it: pip install moviepy imageio imageio-ffmpeg"
)
def _get_resolution_dimensions(resolution: str) -> Tuple[int, int]:
"""Get width and height for a resolution string."""
resolution_map = {
"480p": (854, 480),
"720p": (1280, 720),
"1080p": (1920, 1080),
"1440p": (2560, 1440),
"4k": (3840, 2160),
}
return resolution_map.get(resolution.lower(), (1280, 720))
def _get_aspect_ratio_dimensions(aspect_ratio: str, target_height: int = 720) -> Tuple[int, int]:
"""Get width and height for an aspect ratio."""
aspect_map = {
"16:9": (16, 9),
"9:16": (9, 16),
"1:1": (1, 1),
"4:5": (4, 5),
"21:9": (21, 9),
}
if aspect_ratio not in aspect_map:
return (1280, 720) # Default to 16:9
width_ratio, height_ratio = aspect_map[aspect_ratio]
width = int((width_ratio / height_ratio) * target_height)
return (width, target_height)
def convert_format(
video_bytes: bytes,
output_format: str = "mp4",
codec: str = "libx264",
quality: str = "medium",
audio_codec: str = "aac",
) -> bytes:
"""
Convert video to a different format.
Args:
video_bytes: Input video as bytes
output_format: Output format (mp4, mov, webm, gif)
codec: Video codec (libx264, libvpx-vp9, etc.)
quality: Quality preset (high, medium, low)
audio_codec: Audio codec (aac, mp3, opus, etc.)
Returns:
Converted video as bytes
"""
_check_moviepy()
quality_presets = {
"high": {"bitrate": "5000k", "preset": "slow"},
"medium": {"bitrate": "2500k", "preset": "medium"},
"low": {"bitrate": "1000k", "preset": "fast"},
}
preset = quality_presets.get(quality, quality_presets["medium"])
# Save input to temp file
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as input_file:
input_file.write(video_bytes)
input_path = input_file.name
try:
# Load video
clip = VideoFileClip(input_path)
# Format-specific codec selection
if output_format == "webm":
codec = "libvpx-vp9"
audio_codec = "libopus"
elif output_format == "gif":
# For GIF, we need to handle differently
codec = None
audio_codec = None
elif output_format == "mov":
codec = "libx264"
audio_codec = "aac"
else: # mp4
codec = codec or "libx264"
audio_codec = audio_codec or "aac"
# Write to temp output file
output_suffix = f".{output_format}" if output_format != "gif" else ".gif"
with tempfile.NamedTemporaryFile(suffix=output_suffix, delete=False) as output_file:
output_path = output_file.name
if output_format == "gif":
# For GIF, use write_gif
clip.write_gif(output_path, fps=15, logger=None)
else:
# For video formats
clip.write_videofile(
output_path,
codec=codec,
audio_codec=audio_codec,
bitrate=preset["bitrate"],
preset=preset["preset"],
threads=4,
logger=None,
)
# Read output file
with open(output_path, "rb") as f:
output_bytes = f.read()
# Cleanup
clip.close()
Path(input_path).unlink(missing_ok=True)
Path(output_path).unlink(missing_ok=True)
logger.info(f"[VideoProcessors] Format conversion successful: {output_format}, size={len(output_bytes)} bytes")
return output_bytes
except Exception as e:
# Cleanup on error
Path(input_path).unlink(missing_ok=True)
Path(output_path).unlink(missing_ok=True) if 'output_path' in locals() else None
logger.error(f"[VideoProcessors] Format conversion failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Format conversion failed: {str(e)}")
def convert_aspect_ratio(
video_bytes: bytes,
target_aspect: str,
crop_mode: str = "center",
) -> bytes:
"""
Convert video to a different aspect ratio.
Args:
video_bytes: Input video as bytes
target_aspect: Target aspect ratio (16:9, 9:16, 1:1, 4:5, 21:9)
crop_mode: Crop mode (center, smart, letterbox)
Returns:
Converted video as bytes
"""
_check_moviepy()
# Save input to temp file
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as input_file:
input_file.write(video_bytes)
input_path = input_file.name
try:
# Load video
clip = VideoFileClip(input_path)
original_width, original_height = clip.size
# Calculate target dimensions
target_width, target_height = _get_aspect_ratio_dimensions(target_aspect, original_height)
target_aspect_ratio = target_width / target_height
original_aspect_ratio = original_width / original_height
# Determine crop dimensions
if crop_mode == "letterbox":
# Letterboxing: add black bars
if target_aspect_ratio > original_aspect_ratio:
# Target is wider, add horizontal bars
new_height = int(original_width / target_aspect_ratio)
y_offset = (original_height - new_height) // 2
clip = clip.crop(y1=y_offset, y2=y_offset + new_height)
else:
# Target is taller, add vertical bars
new_width = int(original_height * target_aspect_ratio)
x_offset = (original_width - new_width) // 2
clip = clip.crop(x1=x_offset, x2=x_offset + new_width)
else:
# Center crop (default)
if target_aspect_ratio > original_aspect_ratio:
# Need to crop height
new_height = int(original_width / target_aspect_ratio)
y_offset = (original_height - new_height) // 2
clip = clip.crop(y1=y_offset, y2=y_offset + new_height)
else:
# Need to crop width
new_width = int(original_height * target_aspect_ratio)
x_offset = (original_width - new_width) // 2
clip = clip.crop(x1=x_offset, x2=x_offset + new_width)
# Resize to target dimensions (maintain quality)
clip = clip.resize((target_width, target_height))
# Write to temp output file
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as output_file:
output_path = output_file.name
clip.write_videofile(
output_path,
codec="libx264",
audio_codec="aac",
preset="medium",
threads=4,
logger=None,
)
# Read output file
with open(output_path, "rb") as f:
output_bytes = f.read()
# Cleanup
clip.close()
Path(input_path).unlink(missing_ok=True)
Path(output_path).unlink(missing_ok=True)
logger.info(f"[VideoProcessors] Aspect ratio conversion successful: {target_aspect}, size={len(output_bytes)} bytes")
return output_bytes
except Exception as e:
# Cleanup on error
Path(input_path).unlink(missing_ok=True)
Path(output_path).unlink(missing_ok=True) if 'output_path' in locals() else None
logger.error(f"[VideoProcessors] Aspect ratio conversion failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Aspect ratio conversion failed: {str(e)}")
def adjust_speed(
video_bytes: bytes,
speed_factor: float,
) -> bytes:
"""
Adjust video playback speed.
Args:
video_bytes: Input video as bytes
speed_factor: Speed multiplier (0.25, 0.5, 1.0, 1.5, 2.0, 4.0)
Returns:
Speed-adjusted video as bytes
"""
_check_moviepy()
if speed_factor <= 0:
raise HTTPException(status_code=400, detail="Speed factor must be greater than 0")
# Save input to temp file
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as input_file:
input_file.write(video_bytes)
input_path = input_file.name
try:
# Load video
clip = VideoFileClip(input_path)
# Adjust speed using MoviePy's speedx effect
try:
# Try MoviePy v2 API first
from moviepy.video.fx.speedx import speedx
clip = clip.fx(speedx, speed_factor)
except (ImportError, AttributeError):
try:
# Fallback: try direct import
from moviepy.video.fx import speedx
clip = clip.fx(speedx, speed_factor)
except (ImportError, AttributeError):
# Fallback: Manual speed adjustment (less accurate but works)
# This maintains audio sync by adjusting fps and duration
original_fps = clip.fps
new_fps = original_fps * speed_factor
original_duration = clip.duration
new_duration = original_duration / speed_factor
clip = clip.with_fps(new_fps).with_duration(new_duration)
# Write to temp output file
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as output_file:
output_path = output_file.name
clip.write_videofile(
output_path,
codec="libx264",
audio_codec="aac",
preset="medium",
threads=4,
logger=None,
)
# Read output file
with open(output_path, "rb") as f:
output_bytes = f.read()
# Cleanup
clip.close()
Path(input_path).unlink(missing_ok=True)
Path(output_path).unlink(missing_ok=True)
logger.info(f"[VideoProcessors] Speed adjustment successful: {speed_factor}x, size={len(output_bytes)} bytes")
return output_bytes
except Exception as e:
# Cleanup on error
Path(input_path).unlink(missing_ok=True)
Path(output_path).unlink(missing_ok=True) if 'output_path' in locals() else None
logger.error(f"[VideoProcessors] Speed adjustment failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Speed adjustment failed: {str(e)}")
def scale_resolution(
video_bytes: bytes,
target_resolution: str,
maintain_aspect: bool = True,
) -> bytes:
"""
Scale video to target resolution.
Args:
video_bytes: Input video as bytes
target_resolution: Target resolution (480p, 720p, 1080p, 1440p, 4k)
maintain_aspect: Whether to maintain aspect ratio
Returns:
Scaled video as bytes
"""
_check_moviepy()
# Save input to temp file
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as input_file:
input_file.write(video_bytes)
input_path = input_file.name
try:
# Load video
clip = VideoFileClip(input_path)
target_width, target_height = _get_resolution_dimensions(target_resolution)
# Resize
if maintain_aspect:
clip = clip.resize(height=target_height) # Maintain aspect ratio
else:
clip = clip.resize((target_width, target_height))
# Write to temp output file
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as output_file:
output_path = output_file.name
clip.write_videofile(
output_path,
codec="libx264",
audio_codec="aac",
preset="medium",
threads=4,
logger=None,
)
# Read output file
with open(output_path, "rb") as f:
output_bytes = f.read()
# Cleanup
clip.close()
Path(input_path).unlink(missing_ok=True)
Path(output_path).unlink(missing_ok=True)
logger.info(f"[VideoProcessors] Resolution scaling successful: {target_resolution}, size={len(output_bytes)} bytes")
return output_bytes
except Exception as e:
# Cleanup on error
Path(input_path).unlink(missing_ok=True)
Path(output_path).unlink(missing_ok=True) if 'output_path' in locals() else None
logger.error(f"[VideoProcessors] Resolution scaling failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Resolution scaling failed: {str(e)}")
def compress_video(
video_bytes: bytes,
target_size_mb: Optional[float] = None,
quality: str = "medium",
) -> bytes:
"""
Compress video to reduce file size.
Args:
video_bytes: Input video as bytes
target_size_mb: Target file size in MB (optional)
quality: Quality preset (high, medium, low)
Returns:
Compressed video as bytes
"""
_check_moviepy()
quality_presets = {
"high": {"bitrate": "5000k", "preset": "slow"},
"medium": {"bitrate": "2500k", "preset": "medium"},
"low": {"bitrate": "1000k", "preset": "fast"},
}
preset = quality_presets.get(quality, quality_presets["medium"])
# Save input to temp file
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as input_file:
input_file.write(video_bytes)
input_path = input_file.name
try:
# Load video
clip = VideoFileClip(input_path)
# Calculate bitrate if target size is specified
if target_size_mb:
duration = clip.duration
target_size_bits = target_size_mb * 8 * 1024 * 1024 # Convert MB to bits
calculated_bitrate = int(target_size_bits / duration)
# Ensure reasonable bitrate (min 500k, max 10000k)
bitrate = f"{max(500, min(10000, calculated_bitrate // 1000))}k"
else:
bitrate = preset["bitrate"]
# Write to temp output file
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as output_file:
output_path = output_file.name
clip.write_videofile(
output_path,
codec="libx264",
audio_codec="aac",
bitrate=bitrate,
preset=preset["preset"],
threads=4,
logger=None,
)
# Read output file
with open(output_path, "rb") as f:
output_bytes = f.read()
# Cleanup
clip.close()
Path(input_path).unlink(missing_ok=True)
Path(output_path).unlink(missing_ok=True)
original_size_mb = len(video_bytes) / (1024 * 1024)
compressed_size_mb = len(output_bytes) / (1024 * 1024)
compression_ratio = (1 - compressed_size_mb / original_size_mb) * 100
logger.info(
f"[VideoProcessors] Compression successful: "
f"{original_size_mb:.2f}MB -> {compressed_size_mb:.2f}MB ({compression_ratio:.1f}% reduction)"
)
return output_bytes
except Exception as e:
# Cleanup on error
Path(input_path).unlink(missing_ok=True)
Path(output_path).unlink(missing_ok=True) if 'output_path' in locals() else None
logger.error(f"[VideoProcessors] Compression failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Compression failed: {str(e)}")
def trim_video(
video_bytes: bytes,
start_time: float = 0.0,
end_time: Optional[float] = None,
max_duration: Optional[float] = None,
trim_mode: str = "beginning",
) -> bytes:
"""
Trim video to specified duration or time range.
Args:
video_bytes: Input video as bytes
start_time: Start time in seconds (default: 0.0)
end_time: End time in seconds (optional, uses video duration if not provided)
max_duration: Maximum duration in seconds (trims if video is longer)
trim_mode: How to trim if max_duration is set ("beginning", "middle", "end")
Returns:
Trimmed video as bytes
"""
_check_moviepy()
# Save input to temp file
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as input_file:
input_file.write(video_bytes)
input_path = input_file.name
try:
# Load video
clip = VideoFileClip(input_path)
original_duration = clip.duration
# Determine trim range
if max_duration and original_duration > max_duration:
# Need to trim to max_duration
if trim_mode == "beginning":
# Keep the beginning
start_time = 0.0
end_time = max_duration
elif trim_mode == "end":
# Keep the end
start_time = original_duration - max_duration
end_time = original_duration
else: # middle
# Keep the middle
start_time = (original_duration - max_duration) / 2
end_time = start_time + max_duration
else:
# Use provided times or full video
if end_time is None:
end_time = original_duration
# Ensure valid range
start_time = max(0.0, min(start_time, original_duration))
end_time = max(start_time, min(end_time, original_duration))
# Trim video
trimmed_clip = clip.subclip(start_time, end_time)
# Write to temp output file
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as output_file:
output_path = output_file.name
trimmed_clip.write_videofile(
output_path,
codec="libx264",
audio_codec="aac",
preset="medium",
threads=4,
logger=None,
)
# Read output file
with open(output_path, "rb") as f:
output_bytes = f.read()
# Cleanup
trimmed_clip.close()
clip.close()
Path(input_path).unlink(missing_ok=True)
Path(output_path).unlink(missing_ok=True)
logger.info(
f"[VideoProcessors] Video trimmed: {start_time:.2f}s-{end_time:.2f}s, "
f"duration={end_time - start_time:.2f}s, size={len(output_bytes)} bytes"
)
return output_bytes
except Exception as e:
# Cleanup on error
Path(input_path).unlink(missing_ok=True)
Path(output_path).unlink(missing_ok=True) if 'output_path' in locals() else None
logger.error(f"[VideoProcessors] Video trimming failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Video trimming failed: {str(e)}")
def extract_thumbnail(
video_bytes: bytes,
time_position: Optional[float] = None,
width: int = 1280,
height: int = 720,
) -> bytes:
"""
Extract a thumbnail frame from video.
Args:
video_bytes: Input video as bytes
time_position: Time position in seconds (default: middle of video)
width: Thumbnail width (default: 1280)
height: Thumbnail height (default: 720)
Returns:
Thumbnail image as bytes (JPEG format)
"""
_check_moviepy()
# Save input to temp file
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as input_file:
input_file.write(video_bytes)
input_path = input_file.name
try:
# Load video
clip = VideoFileClip(input_path)
# Determine time position
if time_position is None:
time_position = clip.duration / 2 # Middle of video
# Ensure valid time position
time_position = max(0.0, min(time_position, clip.duration))
# Get frame at specified time
frame = clip.get_frame(time_position)
# Convert numpy array to PIL Image
from PIL import Image
img = Image.fromarray(frame)
# Resize if needed
if img.size != (width, height):
img = img.resize((width, height), Image.Resampling.LANCZOS)
# Convert to bytes (JPEG)
output_buffer = io.BytesIO()
img.save(output_buffer, format="JPEG", quality=90)
output_bytes = output_buffer.getvalue()
# Cleanup
clip.close()
Path(input_path).unlink(missing_ok=True)
logger.info(
f"[VideoProcessors] Thumbnail extracted: time={time_position:.2f}s, "
f"size={width}x{height}, image_size={len(output_bytes)} bytes"
)
return output_bytes
except Exception as e:
# Cleanup on error
Path(input_path).unlink(missing_ok=True)
logger.error(f"[VideoProcessors] Thumbnail extraction failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Thumbnail extraction failed: {str(e)}")

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,135 @@
"""
Video Translate service for Video Studio.
Uses HeyGen Video Translate (heygen/video-translate) for video translation.
"""
import base64
from typing import Dict, Any, Optional, Callable
from fastapi import HTTPException
from utils.logger_utils import get_service_logger
from ..wavespeed.client import WaveSpeedClient
logger = get_service_logger("video_studio.video_translate")
class VideoTranslateService:
"""Service for video translation operations."""
def __init__(self):
"""Initialize Video Translate service."""
self.wavespeed_client = WaveSpeedClient()
logger.info("[VideoTranslate] Service initialized")
def calculate_cost(self, duration: float = 10.0) -> float:
"""
Calculate cost for video translation operation.
Args:
duration: Video duration in seconds
Returns:
Cost in USD
"""
# HeyGen Video Translate pricing: $0.0375/s
# No minimum charge mentioned in docs, but we'll use 1 second minimum
cost_per_second = 0.0375
billed_duration = max(1.0, duration)
return cost_per_second * billed_duration
async def translate_video(
self,
video_data: bytes,
output_language: str = "English",
user_id: str = None,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> Dict[str, Any]:
"""
Translate video to target language using HeyGen Video Translate.
Args:
video_data: Source video as bytes
output_language: Target language for translation
user_id: User ID for tracking
progress_callback: Optional callback for progress updates
Returns:
Dict with translated video_url, cost, and metadata
"""
try:
logger.info(
f"[VideoTranslate] Video translate request: user={user_id}, "
f"output_language={output_language}"
)
if not user_id:
raise ValueError("user_id is required for video translation")
# Convert video to base64 data URI
video_b64 = base64.b64encode(video_data).decode('utf-8')
video_uri = f"data:video/mp4;base64,{video_b64}"
# Estimate duration (we'll use a default, actual duration would come from video metadata)
estimated_duration = 10.0 # Default estimate, should be improved with actual video duration
# Calculate cost estimate
cost = self.calculate_cost(estimated_duration)
if progress_callback:
progress_callback(10.0, f"Submitting video translation request to HeyGen ({output_language})...")
# Perform video translation
# video_translate is synchronous (uses sync_mode internally)
translated_video_bytes = self.wavespeed_client.video_translate(
video=video_uri,
output_language=output_language,
enable_sync_mode=True,
timeout=600, # 10 minutes timeout
progress_callback=progress_callback,
)
if progress_callback:
progress_callback(90.0, "Video translation complete, saving video...")
# Save translated video
from . import VideoStudioService
video_service = VideoStudioService()
save_result = video_service._save_video_file(
video_bytes=translated_video_bytes,
operation_type="video_translate",
user_id=user_id,
)
# Recalculate cost with actual duration if available
# For now, use estimated cost
actual_cost = cost
logger.info(
f"[VideoTranslate] Video translate successful: user={user_id}, "
f"output_language={output_language}, cost=${actual_cost:.4f}"
)
metadata = {
"original_video_size": len(video_data),
"translated_video_size": len(translated_video_bytes),
"output_language": output_language,
}
return {
"success": True,
"video_url": save_result["file_url"],
"video_bytes": translated_video_bytes,
"cost": actual_cost,
"output_language": output_language,
"metadata": metadata,
}
except HTTPException:
raise
except Exception as e:
logger.error(f"[VideoTranslate] Video translate error: {e}", exc_info=True)
return {
"success": False,
"error": str(e)
}