Files
ALwrity/backend/services/video_studio/video_studio_service.py

1063 lines
42 KiB
Python

"""
Video Studio Service
Main service for AI video generation operations including:
- Text-to-video generation
- Image-to-video transformation
- Avatar generation
- Video enhancement
Integrates with WaveSpeed AI models and handles cost tracking.
"""
import asyncio
import base64
import io
import uuid
from pathlib import Path
from typing import Dict, Any, Optional, List, Callable
from datetime import datetime
from fastapi import HTTPException
from ..wavespeed.client import WaveSpeedClient
from ..llm_providers.main_video_generation import ai_video_generate
from ..subscription.pricing_service import PricingService
from ..database import get_db
from utils.logger_utils import get_service_logger
from utils.file_storage import save_file_safely, sanitize_filename
from .video_processors import (
convert_format,
convert_aspect_ratio,
adjust_speed,
scale_resolution,
compress_video,
)
logger = get_service_logger("video_studio")
class VideoStudioService:
"""Main service for Video Studio operations."""
def __init__(self):
"""Initialize Video Studio service."""
self.wavespeed_client = WaveSpeedClient()
# Video output directory
# __file__ is: backend/services/video_studio/video_studio_service.py
# We need: backend/video_studio_videos
base_dir = Path(__file__).parent.parent.parent.parent
self.output_dir = base_dir / "video_studio_videos"
self.output_dir.mkdir(parents=True, exist_ok=True)
# Verify directory was created
if not self.output_dir.exists():
raise RuntimeError(f"Failed to create video_studio_videos directory: {self.output_dir}")
logger.info(f"[VideoStudio] Initialized with output directory: {self.output_dir}")
def _save_video_file(
self,
video_bytes: bytes,
operation_type: str,
user_id: str,
) -> Dict[str, Any]:
"""Save video file to disk.
Args:
video_bytes: Video content as bytes
operation_type: Type of operation (e.g., "text-to-video", "image-to-video")
user_id: User ID for directory organization
Returns:
Dictionary with filename, file_path, and file_url
"""
# Create user-specific directory
user_dir = self.output_dir / user_id
user_dir.mkdir(parents=True, exist_ok=True)
# Generate filename
filename = f"{operation_type}_{uuid.uuid4().hex[:8]}.mp4"
filename = sanitize_filename(filename)
# Save file
file_path, error = save_file_safely(
content=video_bytes,
directory=user_dir,
filename=filename,
max_file_size=500 * 1024 * 1024 # 500MB max for videos
)
if error:
raise HTTPException(
status_code=500,
detail=f"Failed to save video file: {error}"
)
file_url = f"/api/video-studio/videos/{user_id}/{filename}"
return {
"filename": filename,
"file_path": str(file_path),
"file_url": file_url,
"file_size": len(video_bytes),
}
async def generate_text_to_video(
self,
prompt: str,
negative_prompt: Optional[str] = None,
duration: int = 5,
resolution: str = "720p",
aspect_ratio: str = "16:9",
motion_preset: str = "medium",
provider: str = "wavespeed",
model: str = "hunyuan-video-1.5",
user_id: str = None,
) -> Dict[str, Any]:
"""
Generate video from text prompt using AI models.
Args:
prompt: Text description of desired video
negative_prompt: What to avoid in the video
duration: Video duration in seconds
resolution: Video resolution (480p, 720p, 1080p)
aspect_ratio: Video aspect ratio (9:16, 1:1, 16:9)
motion_preset: Motion intensity (subtle, medium, dynamic)
provider: AI provider (wavespeed, huggingface, etc.)
model: Specific model to use
user_id: User ID for tracking
Returns:
Dict with video_url, cost, and metadata
"""
try:
logger.info(f"[VideoStudio] Text-to-video: model={model}, duration={duration}s, user={user_id}")
# Map model names to WaveSpeed endpoints
model_mapping = {
"hunyuan-video-1.5": "hunyuan-video-1.5/text-to-video",
"lightricks/ltx-2-pro": "lightricks/ltx-2-pro/text-to-video",
"lightricks/ltx-2-fast": "lightricks/ltx-2-fast/text-to-video",
"lightricks/ltx-2-retake": "lightricks/ltx-2-retake/text-to-video",
}
wavespeed_model = model_mapping.get(model, model)
# Prepare parameters
params = {
"duration": duration,
"resolution": resolution,
"aspect_ratio": aspect_ratio,
"motion_preset": motion_preset,
}
if negative_prompt:
params["negative_prompt"] = negative_prompt
# Generate video using WaveSpeed
result = await self.wavespeed_client.generate_video(
prompt=prompt,
model=wavespeed_model,
**params
)
if result.get("success"):
# Calculate cost
cost = self._calculate_cost(
operation="text-to-video",
model=model,
duration=duration,
resolution=resolution
)
return {
"success": True,
"video_url": result.get("video_url"),
"cost": cost,
"estimated_duration": duration,
"model_used": model,
"provider": provider,
}
else:
return {
"success": False,
"error": result.get("error", "Video generation failed")
}
except Exception as e:
logger.error(f"[VideoStudio] Text-to-video error: {e}", exc_info=True)
return {
"success": False,
"error": str(e)
}
async def generate_image_to_video(
self,
image_data: bytes,
prompt: Optional[str] = None,
duration: int = 5,
resolution: str = "720p",
aspect_ratio: str = "16:9",
motion_preset: str = "medium",
provider: str = "wavespeed",
model: str = "alibaba/wan-2.5",
user_id: str = None,
) -> Dict[str, Any]:
"""
Transform image to video using unified video generation entry point.
Args:
image_data: Image file data as bytes
prompt: Optional text prompt to guide transformation
duration: Video duration in seconds
resolution: Video resolution
aspect_ratio: Video aspect ratio (not used by WAN 2.5, kept for API compatibility)
motion_preset: Motion intensity (not used by WAN 2.5, kept for API compatibility)
provider: AI provider (must be "wavespeed" for image-to-video)
model: Specific model to use (alibaba/wan-2.5 or wavespeed/kandinsky5-pro)
user_id: User ID for tracking
Returns:
Dict with video_url, cost, and metadata
"""
try:
logger.info(f"[VideoStudio] Image-to-video: model={model}, duration={duration}s, user={user_id}")
if not user_id:
raise ValueError("user_id is required for video generation")
# Map model names to full model paths
model_mapping = {
"alibaba/wan-2.5": "alibaba/wan-2.5/image-to-video",
"wavespeed/kandinsky5-pro": "wavespeed/kandinsky5-pro/image-to-video",
}
full_model = model_mapping.get(model, model)
# Use unified video generation entry point
# This handles pre-flight validation, generation, and usage tracking
# Returns dict with video_bytes and full metadata
result = ai_video_generate(
image_data=image_data,
prompt=prompt or "",
operation_type="image-to-video",
provider=provider,
user_id=user_id,
duration=duration,
resolution=resolution,
model=full_model,
# Note: aspect_ratio and motion_preset are not supported by WAN 2.5
# but we keep them in the API for future compatibility
)
# Extract video bytes and metadata
video_bytes = result["video_bytes"]
# Save video to disk
save_result = self._save_video_file(
video_bytes=video_bytes,
operation_type="image-to-video",
user_id=user_id,
)
# Save to asset library
try:
from utils.asset_tracker import save_asset_to_library
db = next(get_db())
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="video",
source_module="video_studio",
filename=save_result["filename"],
file_url=save_result["file_url"],
file_path=save_result["file_path"],
file_size=save_result["file_size"],
mime_type="video/mp4",
title=f"Video Studio: Image-to-Video ({resolution})",
description=f"Generated video: {prompt[:100] if prompt else 'No prompt'}",
prompt=result.get("prompt", prompt or ""),
tags=["video_studio", "image-to-video", resolution],
provider=result.get("provider", provider),
model=result.get("model_name", model),
cost=result.get("cost", 0.0),
asset_metadata={
"resolution": result.get("resolution", resolution),
"duration": result.get("duration", float(duration)),
"operation": "image-to-video",
"width": result.get("width", 1280),
"height": result.get("height", 720),
}
)
logger.info(f"[VideoStudio] Video saved to asset library")
finally:
db.close()
except Exception as e:
logger.warning(f"[VideoStudio] Failed to save to asset library: {e}")
return {
"success": True,
"video_url": save_result["file_url"],
"cost": result.get("cost", 0.0),
"estimated_duration": result.get("duration", float(duration)),
"model_used": result.get("model_name", model),
"provider": result.get("provider", provider),
"resolution": result.get("resolution", resolution),
"width": result.get("width", 1280),
"height": result.get("height", 720),
"file_size": save_result["file_size"],
"metadata": result.get("metadata", {}),
}
except Exception as e:
logger.error(f"[VideoStudio] Image-to-video error: {e}", exc_info=True)
return {
"success": False,
"error": str(e)
}
async def generate_avatar_video(
self,
avatar_data: bytes,
audio_data: Optional[bytes] = None,
video_data: Optional[bytes] = None,
text: Optional[str] = None,
language: str = "en",
provider: str = "wavespeed",
model: str = "wavespeed/mocha",
user_id: str = None,
) -> Dict[str, Any]:
"""
Generate talking avatar video or perform face swap.
Args:
avatar_data: Avatar/face image as bytes
audio_data: Audio file data for lip sync
video_data: Source video for face swap
text: Text to convert to speech
language: Language for text-to-speech
provider: AI provider
model: Specific model to use
user_id: User ID for tracking
Returns:
Dict with video_url, cost, and metadata
"""
try:
logger.info(f"[VideoStudio] Avatar generation: model={model}, user={user_id}")
# Convert avatar to base64
avatar_b64 = base64.b64encode(avatar_data).decode('utf-8')
avatar_uri = f"data:image/png;base64,{avatar_b64}"
# Map model names to WaveSpeed endpoints
model_mapping = {
"wavespeed/mocha": "wavespeed/mocha/face-swap",
"heygen/video-translate": "heygen/video-translate",
}
wavespeed_model = model_mapping.get(model, model)
# Prepare parameters
params = {
"avatar": avatar_uri,
"language": language,
}
if audio_data:
audio_b64 = base64.b64encode(audio_data).decode('utf-8')
params["audio"] = f"data:audio/wav;base64,{audio_b64}"
elif text:
params["text"] = text
elif video_data:
video_b64 = base64.b64encode(video_data).decode('utf-8')
params["source_video"] = f"data:video/mp4;base64,{video_b64}"
# Generate avatar video using WaveSpeed
result = await self.wavespeed_client.generate_video(
model=wavespeed_model,
**params
)
if result.get("success"):
# Calculate cost (avatars are typically more expensive)
cost = self._calculate_cost(
operation="avatar",
model=model,
duration=10 # Assume 10 second avatar videos
)
return {
"success": True,
"video_url": result.get("video_url"),
"cost": cost,
"model_used": model,
"provider": provider,
}
else:
return {
"success": False,
"error": result.get("error", "Avatar generation failed")
}
except Exception as e:
logger.error(f"[VideoStudio] Avatar generation error: {e}", exc_info=True)
return {
"success": False,
"error": str(e)
}
async def enhance_video(
self,
video_data: bytes,
enhancement_type: str,
target_resolution: Optional[str] = None,
provider: str = "wavespeed",
model: str = "flashvsr",
user_id: str = None,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> Dict[str, Any]:
"""
Enhance existing video using AI models.
Args:
video_data: Video file data as bytes
enhancement_type: Type of enhancement (upscale, stabilize, etc.)
target_resolution: Target resolution for upscale ("720p", "1080p", "2k", "4k")
provider: AI provider
model: Specific model to use (default: "flashvsr")
user_id: User ID for tracking
progress_callback: Optional callback for progress updates
Returns:
Dict with enhanced video_url, cost, and metadata
"""
try:
logger.info(f"[VideoStudio] Video enhancement: type={enhancement_type}, model={model}, resolution={target_resolution}, user={user_id}")
# Default target resolution for upscale
if enhancement_type == "upscale" and not target_resolution:
target_resolution = "1080p"
# Convert video to base64 data URI
video_b64 = base64.b64encode(video_data).decode('utf-8')
video_uri = f"data:video/mp4;base64,{video_b64}"
# Handle different enhancement types
if enhancement_type == "upscale" and model in ("flashvsr", "wavespeed/flashvsr", "wavespeed-ai/flashvsr"):
# Use FlashVSR for upscaling
enhanced_video_bytes = await asyncio.to_thread(
self.wavespeed_client.upscale_video,
video=video_uri,
target_resolution=target_resolution or "1080p",
enable_sync_mode=False, # Always use async with polling
timeout=600, # 10 minutes max for long videos
progress_callback=progress_callback,
)
# Calculate cost based on video duration and resolution
# FlashVSR pricing: $0.06-$0.16 per 5 seconds based on resolution
pricing = {
"720p": 0.06 / 5, # $0.012 per second
"1080p": 0.09 / 5, # $0.018 per second
"2k": 0.12 / 5, # $0.024 per second
"4k": 0.16 / 5, # $0.032 per second
}
# Estimate video duration (rough estimate: 1MB ≈ 1 second at 1080p)
# In production, you'd parse the video file to get actual duration
estimated_duration = max(5, len(video_data) / (1024 * 1024)) # Minimum 5 seconds
resolution_key = (target_resolution or "1080p").lower()
cost_per_second = pricing.get(resolution_key, pricing["1080p"])
cost = estimated_duration * cost_per_second
# Save enhanced video
save_result = self._save_video_file(
video_bytes=enhanced_video_bytes,
operation_type="enhancement_upscale",
user_id=user_id,
)
logger.info(f"[VideoStudio] Video upscaling successful: user={user_id}, cost=${cost:.4f}")
return {
"success": True,
"video_url": save_result["file_url"],
"video_bytes": enhanced_video_bytes,
"cost": cost,
"enhancement_type": enhancement_type,
"target_resolution": target_resolution,
"model_used": "wavespeed-ai/flashvsr",
"provider": provider,
"metadata": {
"original_size": len(video_data),
"enhanced_size": len(enhanced_video_bytes),
"estimated_duration": estimated_duration,
},
}
else:
# Other enhancement types (stabilize, colorize, etc.) - to be implemented
logger.warning(f"[VideoStudio] Enhancement type '{enhancement_type}' not yet implemented")
return {
"success": False,
"error": f"Enhancement type '{enhancement_type}' is not yet supported. Currently only 'upscale' with FlashVSR is available."
}
except HTTPException:
raise
except Exception as e:
logger.error(f"[VideoStudio] Video enhancement error: {e}", exc_info=True)
return {
"success": False,
"error": str(e)
}
async def extend_video(
self,
video_data: bytes,
prompt: str,
model: str = "wan-2.5",
audio_data: Optional[bytes] = None,
negative_prompt: Optional[str] = None,
resolution: str = "720p",
duration: int = 5,
enable_prompt_expansion: bool = False,
generate_audio: bool = True,
camera_fixed: bool = False,
seed: Optional[int] = None,
user_id: str = None,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> Dict[str, Any]:
"""
Extend video duration using WAN 2.5, WAN 2.2 Spicy, or Seedance 1.5 Pro video-extend.
Args:
video_data: Video file data as bytes
prompt: Text prompt describing how to extend the video
model: Model to use ("wan-2.5", "wan-2.2-spicy", or "seedance-1.5-pro")
audio_data: Optional audio file data as bytes (WAN 2.5 only)
negative_prompt: Optional negative prompt (WAN 2.5 only)
resolution: Output resolution (varies by model)
duration: Duration of extended video in seconds (varies by model)
enable_prompt_expansion: Enable prompt optimizer (WAN 2.5 only)
generate_audio: Generate audio for extended video (Seedance 1.5 Pro only)
camera_fixed: Fix camera position (Seedance 1.5 Pro only)
seed: Random seed for reproducibility
user_id: User ID for tracking
progress_callback: Optional callback for progress updates
Returns:
Dict with extended video_url, cost, and metadata
"""
try:
logger.info(f"[VideoStudio] Video extension: model={model}, duration={duration}s, resolution={resolution}, user={user_id}")
# Validate model-specific constraints
if model in ("wan-2.2-spicy", "wavespeed-ai/wan-2.2-spicy/video-extend"):
if resolution not in ["480p", "720p"]:
raise ValueError("WAN 2.2 Spicy only supports 480p and 720p resolutions")
if duration not in [5, 8]:
raise ValueError("WAN 2.2 Spicy only supports 5 or 8 second durations")
if audio_data:
logger.warning("[VideoStudio] Audio not supported for WAN 2.2 Spicy, ignoring")
audio_data = None
if negative_prompt:
logger.warning("[VideoStudio] Negative prompt not supported for WAN 2.2 Spicy, ignoring")
negative_prompt = None
if enable_prompt_expansion:
logger.warning("[VideoStudio] Prompt expansion not supported for WAN 2.2 Spicy, ignoring")
enable_prompt_expansion = False
elif model in ("seedance-1.5-pro", "bytedance/seedance-v1.5-pro/video-extend"):
if resolution not in ["480p", "720p"]:
raise ValueError("Seedance 1.5 Pro only supports 480p and 720p resolutions")
if duration < 4 or duration > 12:
raise ValueError("Seedance 1.5 Pro only supports 4-12 second durations")
if audio_data:
logger.warning("[VideoStudio] Audio upload not supported for Seedance 1.5 Pro (use generate_audio instead), ignoring")
audio_data = None
if negative_prompt:
logger.warning("[VideoStudio] Negative prompt not supported for Seedance 1.5 Pro, ignoring")
negative_prompt = None
if enable_prompt_expansion:
logger.warning("[VideoStudio] Prompt expansion not supported for Seedance 1.5 Pro, ignoring")
enable_prompt_expansion = False
# Convert video to base64 data URI
video_b64 = base64.b64encode(video_data).decode('utf-8')
video_uri = f"data:video/mp4;base64,{video_b64}"
# Convert audio to base64 if provided (WAN 2.5 only)
audio_uri = None
if audio_data and model not in ("wan-2.2-spicy", "wavespeed-ai/wan-2.2-spicy/video-extend", "seedance-1.5-pro", "bytedance/seedance-v1.5-pro/video-extend"):
audio_b64 = base64.b64encode(audio_data).decode('utf-8')
audio_uri = f"data:audio/mp3;base64,{audio_b64}"
# Extend video using WaveSpeed
extended_video_bytes = await asyncio.to_thread(
self.wavespeed_client.extend_video,
video=video_uri,
prompt=prompt,
model=model,
audio=audio_uri,
negative_prompt=negative_prompt,
resolution=resolution,
duration=duration,
enable_prompt_expansion=enable_prompt_expansion,
generate_audio=generate_audio,
camera_fixed=camera_fixed,
seed=seed,
enable_sync_mode=False, # Always use async with polling
timeout=600, # 10 minutes max
progress_callback=progress_callback,
)
# Calculate cost (model-specific pricing)
if model in ("wan-2.2-spicy", "wavespeed-ai/wan-2.2-spicy/video-extend"):
# WAN 2.2 Spicy pricing: $0.03/s (480p), $0.06/s (720p)
pricing = {
"480p": 0.03,
"720p": 0.06,
}
elif model in ("seedance-1.5-pro", "bytedance/seedance-v1.5-pro/video-extend"):
# Seedance 1.5 Pro pricing varies by audio generation
# With audio: $0.024/s (480p), $0.052/s (720p)
# Without audio: $0.012/s (480p), $0.026/s (720p)
if generate_audio:
pricing = {
"480p": 0.024,
"720p": 0.052,
}
else:
pricing = {
"480p": 0.012,
"720p": 0.026,
}
else:
# WAN 2.5 pricing: $0.05/s (480p), $0.10/s (720p), $0.15/s (1080p)
pricing = {
"480p": 0.05,
"720p": 0.10,
"1080p": 0.15,
}
cost = pricing.get(resolution, pricing.get("720p", 0.10)) * duration
# Determine model name for metadata
if model in ("wan-2.2-spicy", "wavespeed-ai/wan-2.2-spicy/video-extend"):
model_name = "wavespeed-ai/wan-2.2-spicy/video-extend"
elif model in ("seedance-1.5-pro", "bytedance/seedance-v1.5-pro/video-extend"):
model_name = "bytedance/seedance-v1.5-pro/video-extend"
else:
model_name = "alibaba/wan-2.5/video-extend"
# Save extended video
save_result = self._save_video_file(
video_bytes=extended_video_bytes,
operation_type="extend",
user_id=user_id,
)
logger.info(f"[VideoStudio] Video extension successful: user={user_id}, model={model_name}, cost=${cost:.4f}")
return {
"success": True,
"video_url": save_result["file_url"],
"video_bytes": extended_video_bytes,
"cost": cost,
"duration": duration,
"resolution": resolution,
"model_used": model_name,
"provider": "wavespeed",
"metadata": {
"original_size": len(video_data),
"extended_size": len(extended_video_bytes),
"duration": duration,
},
}
except HTTPException:
raise
except Exception as e:
logger.error(f"[VideoStudio] Video extension error: {e}", exc_info=True)
return {
"success": False,
"error": str(e)
}
async def transform_video(
self,
video_data: bytes,
transform_type: str,
user_id: str = None,
# Format conversion parameters
output_format: Optional[str] = None,
codec: Optional[str] = None,
quality: Optional[str] = None,
audio_codec: Optional[str] = None,
# Aspect ratio parameters
target_aspect: Optional[str] = None,
crop_mode: Optional[str] = None,
# Speed parameters
speed_factor: Optional[float] = None,
# Resolution parameters
target_resolution: Optional[str] = None,
maintain_aspect: bool = True,
# Compression parameters
target_size_mb: Optional[float] = None,
compress_quality: Optional[str] = None,
) -> Dict[str, Any]:
"""
Transform video using FFmpeg/MoviePy (format, aspect, speed, resolution, compression).
Args:
video_data: Video file data as bytes
transform_type: Type of transformation ("format", "aspect", "speed", "resolution", "compress")
user_id: User ID for tracking
output_format: Output format for format conversion (mp4, mov, webm, gif)
codec: Video codec (libx264, libvpx-vp9, etc.)
quality: Quality preset (high, medium, low)
audio_codec: Audio codec (aac, mp3, opus, etc.)
target_aspect: Target aspect ratio (16:9, 9:16, 1:1, 4:5, 21:9)
crop_mode: Crop mode for aspect conversion (center, letterbox)
speed_factor: Speed multiplier (0.25, 0.5, 1.0, 1.5, 2.0, 4.0)
target_resolution: Target resolution (480p, 720p, 1080p, 1440p, 4k)
maintain_aspect: Whether to maintain aspect ratio when scaling
target_size_mb: Target file size in MB for compression
compress_quality: Quality preset for compression (high, medium, low)
Returns:
Dict with transformed video_url, cost (0 for FFmpeg operations), and metadata
"""
try:
logger.info(f"[VideoStudio] Video transformation: type={transform_type}, user={user_id}")
if not user_id:
raise ValueError("user_id is required for video transformation")
# Process video based on transform type
transformed_video_bytes = None
if transform_type == "format":
if not output_format:
raise ValueError("output_format is required for format conversion")
transformed_video_bytes = await asyncio.to_thread(
convert_format,
video_bytes=video_data,
output_format=output_format,
codec=codec or "libx264",
quality=quality or "medium",
audio_codec=audio_codec or "aac",
)
elif transform_type == "aspect":
if not target_aspect:
raise ValueError("target_aspect is required for aspect ratio conversion")
transformed_video_bytes = await asyncio.to_thread(
convert_aspect_ratio,
video_bytes=video_data,
target_aspect=target_aspect,
crop_mode=crop_mode or "center",
)
elif transform_type == "speed":
if speed_factor is None:
raise ValueError("speed_factor is required for speed adjustment")
transformed_video_bytes = await asyncio.to_thread(
adjust_speed,
video_bytes=video_data,
speed_factor=speed_factor,
)
elif transform_type == "resolution":
if not target_resolution:
raise ValueError("target_resolution is required for resolution scaling")
transformed_video_bytes = await asyncio.to_thread(
scale_resolution,
video_bytes=video_data,
target_resolution=target_resolution,
maintain_aspect=maintain_aspect,
)
elif transform_type == "compress":
transformed_video_bytes = await asyncio.to_thread(
compress_video,
video_bytes=video_data,
target_size_mb=target_size_mb,
quality=compress_quality or "medium",
)
else:
raise ValueError(f"Unsupported transform type: {transform_type}")
if not transformed_video_bytes:
raise RuntimeError("Video transformation failed - no output generated")
# Save transformed video
save_result = self._save_video_file(
video_bytes=transformed_video_bytes,
operation_type=f"transform_{transform_type}",
user_id=user_id,
)
# FFmpeg operations are free (no AI cost)
cost = 0.0
logger.info(
f"[VideoStudio] Video transformation successful: "
f"type={transform_type}, user={user_id}, "
f"original={len(video_data)} bytes, transformed={len(transformed_video_bytes)} bytes"
)
return {
"success": True,
"video_url": save_result["file_url"],
"video_bytes": transformed_video_bytes,
"cost": cost,
"transform_type": transform_type,
"metadata": {
"original_size": len(video_data),
"transformed_size": len(transformed_video_bytes),
"transform_type": transform_type,
},
}
except HTTPException:
raise
except Exception as e:
logger.error(f"[VideoStudio] Video transformation error: {e}", exc_info=True)
return {
"success": False,
"error": str(e)
}
def get_available_models(self, operation_type: Optional[str] = None) -> List[Dict[str, Any]]:
"""
Get available AI models for video operations.
Args:
operation_type: Filter by operation type (optional)
Returns:
List of available models with metadata
"""
all_models = {
"text-to-video": [
{
"id": "hunyuan-video-1.5",
"name": "Hunyuan Video 1.5",
"provider": "wavespeed",
"description": "High-quality text-to-video generation",
"cost_per_second": 0.10,
"supported_resolutions": ["720p", "1080p"],
"max_duration": 10,
},
{
"id": "lightricks/ltx-2-pro",
"name": "LTX-2 Pro",
"provider": "wavespeed",
"description": "Professional quality text-to-video",
"cost_per_second": 0.15,
"supported_resolutions": ["720p", "1080p"],
"max_duration": 10,
},
{
"id": "lightricks/ltx-2-fast",
"name": "LTX-2 Fast",
"provider": "wavespeed",
"description": "Fast text-to-video generation",
"cost_per_second": 0.08,
"supported_resolutions": ["720p"],
"max_duration": 10,
},
],
"image-to-video": [
{
"id": "alibaba/wan-2.5",
"name": "WAN 2.5",
"provider": "wavespeed",
"description": "Advanced image-to-video transformation",
"cost_per_second": 0.12,
"supported_resolutions": ["480p", "720p", "1080p"],
"max_duration": 10,
},
{
"id": "wavespeed/kandinsky5-pro",
"name": "Kandinsky 5 Pro",
"provider": "wavespeed",
"description": "Artistic image-to-video generation",
"cost_per_second": 0.10,
"supported_resolutions": ["720p", "1080p"],
"max_duration": 8,
},
],
"avatar": [
{
"id": "wavespeed/mocha",
"name": "MoCha Face Swap",
"provider": "wavespeed",
"description": "Advanced face swap and avatar generation",
"cost_per_video": 0.50,
"supported_languages": ["en", "es", "fr", "de"],
},
{
"id": "heygen/video-translate",
"name": "HeyGen Video Translate",
"provider": "wavespeed",
"description": "Multi-language avatar video translation",
"cost_per_video": 0.75,
"supported_languages": ["en", "es", "fr", "de", "it", "pt", "ja", "ko", "zh"],
},
],
"enhancement": [
{
"id": "wavespeed/flashvsr",
"name": "FlashVSR",
"provider": "wavespeed",
"description": "Video super-resolution and enhancement",
"cost_per_video": 0.20,
},
{
"id": "wavespeed/ditto",
"name": "Ditto",
"provider": "wavespeed",
"description": "Synthetic to real video conversion",
"cost_per_video": 0.30,
},
],
}
if operation_type:
return all_models.get(operation_type, [])
else:
# Return all models flattened
result = []
for op_type, models in all_models.items():
for model in models:
model["operation_type"] = op_type
result.append(model)
return result
def estimate_cost(
self,
operation_type: str,
duration: Optional[int] = None,
resolution: Optional[str] = None,
model: Optional[str] = None,
) -> Dict[str, Any]:
"""
Estimate cost for video generation operations.
Args:
operation_type: Type of operation
duration: Video duration in seconds
resolution: Video resolution
model: Specific model
Returns:
Cost estimate with breakdown
"""
try:
# Get pricing from database
db = next(get_db())
pricing_service = PricingService(db)
# Default values
duration = duration or 5
resolution = resolution or "720p"
model = model or self._get_default_model(operation_type)
# Get pricing for the model
pricing = pricing_service.get_pricing_for_provider_model("video", model)
if pricing and pricing.get("cost_per_request"):
base_cost = pricing["cost_per_request"]
else:
# Fallback pricing
base_cost = self._calculate_cost(operation_type, model, duration, resolution)
# Apply resolution multiplier
resolution_multiplier = {
"480p": 0.8,
"720p": 1.0,
"1080p": 1.5,
}.get(resolution, 1.0)
estimated_cost = base_cost * resolution_multiplier
return {
"estimated_cost": round(estimated_cost, 2),
"currency": "USD",
"breakdown": {
"base_cost": base_cost,
"resolution_multiplier": resolution_multiplier,
"duration": duration,
"resolution": resolution,
},
"model": model,
"operation_type": operation_type,
}
except Exception as e:
logger.error(f"[VideoStudio] Cost estimation error: {e}", exc_info=True)
return {
"estimated_cost": 0.50, # Fallback
"currency": "USD",
"error": "Could not calculate exact cost",
}
finally:
db.close()
def _calculate_cost(
self,
operation: str,
model: str,
duration: int = 5,
resolution: str = "720p"
) -> float:
"""Calculate cost for video operations."""
# Base pricing per operation type
base_pricing = {
"text-to-video": 0.10, # per second
"image-to-video": 0.12, # per second
"avatar": 0.50, # per video
"enhancement": 0.20, # per video
}
# Model-specific multipliers
model_multipliers = {
"lightricks/ltx-2-pro": 1.5,
"hunyuan-video-1.5": 1.0,
"lightricks/ltx-2-fast": 0.8,
"alibaba/wan-2.5": 1.2,
"wavespeed/mocha": 1.0,
"heygen/video-translate": 1.5,
}
# Resolution multipliers
resolution_multipliers = {
"480p": 0.8,
"720p": 1.0,
"1080p": 1.5,
}
base_cost = base_pricing.get(operation, 0.10)
model_multiplier = model_multipliers.get(model, 1.0)
resolution_multiplier = resolution_multipliers.get(resolution, 1.0)
if operation in ["avatar", "enhancement"]:
# Fixed cost per video
return base_cost * model_multiplier
else:
# Cost per second
return base_cost * duration * model_multiplier * resolution_multiplier
def _get_default_model(self, operation_type: str) -> str:
"""Get default model for operation type (OSS-focused defaults)."""
defaults = {
"text-to-video": "wan-2.5", # OSS: WAN 2.5 ($0.25) vs HunyuanVideo ($0.10) - better quality/value
"image-to-video": "wan-2.5", # OSS: WAN 2.5 (same as text-to-video)
"avatar": "wavespeed/mocha",
"enhancement": "wavespeed/flashvsr",
}
return defaults.get(operation_type, "wan-2.5") # Default to OSS model