""" Avatar generation endpoints. """ from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, BackgroundTasks from sqlalchemy.orm import Session from typing import Optional, Dict, Any import base64 import uuid from ...database import get_db from ...models.content_asset_models import AssetSource, AssetType from ...services.video_studio import VideoStudioService from ...services.video_studio.avatar_service import AvatarStudioService from ...services.asset_service import ContentAssetService from ...utils.auth import get_current_user, require_authenticated_user from ...utils.logger_utils import get_service_logger from api.story_writer.task_manager import task_manager from ..tasks.avatar_generation import execute_avatar_generation_task logger = get_service_logger("video_studio.endpoints.avatar") router = APIRouter() @router.post("/avatars") async def generate_avatar_video( background_tasks: BackgroundTasks, avatar_file: UploadFile = File(..., description="Avatar/face image"), audio_file: Optional[UploadFile] = File(None, description="Audio file for lip sync"), video_file: Optional[UploadFile] = File(None, description="Source video for face swap"), text: Optional[str] = Form(None, description="Text to speak (alternative to audio)"), language: str = Form("en", description="Language for text-to-speech"), provider: str = Form("wavespeed", description="AI provider to use"), model: str = Form("wavespeed/mocha", description="Specific AI model to use"), current_user: Dict[str, Any] = Depends(get_current_user), db: Session = Depends(get_db), ) -> Dict[str, Any]: """ Generate talking avatar video or perform face swap. Supports both text-to-speech and audio input for natural lip sync. """ try: user_id = require_authenticated_user(current_user) # Validate inputs if not avatar_file.content_type.startswith('image/'): raise HTTPException(status_code=400, detail="Avatar file must be an image") if not any([audio_file, video_file, text]): raise HTTPException(status_code=400, detail="Must provide audio file, video file, or text") # Initialize services video_service = VideoStudioService() asset_service = ContentAssetService(db) logger.info(f"[VideoStudio] Avatar generation request: user={user_id}, model={model}") # Read files avatar_data = await avatar_file.read() audio_data = await audio_file.read() if audio_file else None video_data = await video_file.read() if video_file else None # Generate avatar video result = await video_service.generate_avatar_video( avatar_data=avatar_data, audio_data=audio_data, video_data=video_data, text=text, language=language, provider=provider, model=model, user_id=user_id, ) if not result.get("success"): raise HTTPException( status_code=500, detail=f"Avatar generation failed: {result.get('error', 'Unknown error')}" ) # Store in asset library if successful video_url = result.get("video_url") if video_url: asset_metadata = { "avatar_file": avatar_file.filename, "audio_file": audio_file.filename if audio_file else None, "video_file": video_file.filename if video_file else None, "text": text, "language": language, "provider": provider, "model": model, "generation_type": "avatar", } asset_service.create_asset( user_id=user_id, filename=f"avatar_{uuid.uuid4().hex[:8]}.mp4", file_url=video_url, asset_type=AssetType.VIDEO, source_module=AssetSource.VIDEO_STUDIO, asset_metadata=asset_metadata, cost=result.get("cost", 0), tags=["video_studio", "avatar", "ai-generated"] ) logger.info(f"[VideoStudio] Avatar generation successful: user={user_id}, url={video_url}") return { "success": True, "video_url": video_url, "cost": result.get("cost", 0), "model_used": model, "provider": provider, } except HTTPException: raise except Exception as e: logger.error(f"[VideoStudio] Avatar generation error: {e}", exc_info=True) raise HTTPException(status_code=500, detail=f"Avatar generation failed: {str(e)}") @router.post("/avatar/create-async") async def create_avatar_async( background_tasks: BackgroundTasks, image: UploadFile = File(..., description="Image file for avatar"), audio: UploadFile = File(..., description="Audio file for lip-sync"), resolution: str = Form("720p", description="Video resolution (480p or 720p)"), prompt: Optional[str] = Form(None, description="Optional prompt for expression/style"), mask_image: Optional[UploadFile] = File(None, description="Optional mask image (InfiniteTalk only)"), seed: Optional[int] = Form(None, description="Optional random seed"), model: str = Form("infinitetalk", description="Model to use: 'infinitetalk' or 'hunyuan-avatar'"), current_user: Dict[str, Any] = Depends(get_current_user), ) -> Dict[str, Any]: """ Create talking avatar asynchronously with polling support. Upload a photo and audio to create a talking avatar with perfect lip-sync. Supports resolutions of 480p and 720p. - InfiniteTalk: up to 10 minutes long - Hunyuan Avatar: up to 2 minutes (120 seconds) long Returns task_id for polling. Frontend can poll /api/video-studio/task/{task_id}/status to get progress updates and final result. """ try: user_id = require_authenticated_user(current_user) # Validate resolution if resolution not in ["480p", "720p"]: raise HTTPException( status_code=400, detail="Resolution must be '480p' or '720p'" ) # Read image data image_data = await image.read() if len(image_data) == 0: raise HTTPException(status_code=400, detail="Image file is empty") # Read audio data audio_data = await audio.read() if len(audio_data) == 0: raise HTTPException(status_code=400, detail="Audio file is empty") # Convert to base64 image_base64 = base64.b64encode(image_data).decode('utf-8') # Add data URI prefix image_mime = image.content_type or "image/png" image_base64 = f"data:{image_mime};base64,{image_base64}" audio_base64 = base64.b64encode(audio_data).decode('utf-8') audio_mime = audio.content_type or "audio/mpeg" audio_base64 = f"data:{audio_mime};base64,{audio_base64}" # Handle optional mask image mask_image_base64 = None if mask_image: mask_data = await mask_image.read() if len(mask_data) > 0: mask_base64 = base64.b64encode(mask_data).decode('utf-8') mask_mime = mask_image.content_type or "image/png" mask_image_base64 = f"data:{mask_mime};base64,{mask_base64}" # Create task task_id = task_manager.create_task("avatar_generation") # Validate model if model not in ["infinitetalk", "hunyuan-avatar"]: raise HTTPException( status_code=400, detail="Model must be 'infinitetalk' or 'hunyuan-avatar'" ) # Start background task background_tasks.add_task( execute_avatar_generation_task, task_id=task_id, user_id=user_id, image_base64=image_base64, audio_base64=audio_base64, resolution=resolution, prompt=prompt, mask_image_base64=mask_image_base64, seed=seed, model=model, ) logger.info(f"[AvatarStudio] Started async avatar generation: task_id={task_id}, user={user_id}") return { "task_id": task_id, "status": "pending", "message": f"Avatar generation started. This may take several minutes. Poll /api/video-studio/task/{task_id}/status for updates." } except HTTPException: raise except Exception as e: logger.error(f"[AvatarStudio] Failed to start async avatar generation: {e}", exc_info=True) raise HTTPException(status_code=500, detail=f"Failed to start avatar generation: {str(e)}") @router.post("/avatar/estimate-cost") async def estimate_avatar_cost( resolution: str = Form("720p", description="Video resolution (480p or 720p)"), estimated_duration: float = Form(10.0, description="Estimated video duration in seconds", ge=5.0, le=600.0), model: str = Form("infinitetalk", description="Model to use: 'infinitetalk' or 'hunyuan-avatar'"), current_user: Dict[str, Any] = Depends(get_current_user), ) -> Dict[str, Any]: """ Estimate cost for talking avatar generation. Returns estimated cost based on resolution, duration, and model. """ try: require_authenticated_user(current_user) # Validate resolution if resolution not in ["480p", "720p"]: raise HTTPException( status_code=400, detail="Resolution must be '480p' or '720p'" ) # Validate model if model not in ["infinitetalk", "hunyuan-avatar"]: raise HTTPException( status_code=400, detail="Model must be 'infinitetalk' or 'hunyuan-avatar'" ) # Validate duration for Hunyuan Avatar (max 120 seconds) if model == "hunyuan-avatar" and estimated_duration > 120: raise HTTPException( status_code=400, detail="Hunyuan Avatar supports maximum 120 seconds (2 minutes)" ) avatar_service = AvatarStudioService() estimated_cost = avatar_service.calculate_cost_estimate(resolution, estimated_duration, model) # Return pricing info based on model if model == "hunyuan-avatar": cost_per_5_seconds = 0.15 if resolution == "480p" else 0.30 return { "estimated_cost": estimated_cost, "resolution": resolution, "estimated_duration": estimated_duration, "model": model, "cost_per_5_seconds": cost_per_5_seconds, "pricing_model": "per_5_seconds", "max_duration": 120, } else: cost_per_second = 0.03 if resolution == "480p" else 0.06 return { "estimated_cost": estimated_cost, "resolution": resolution, "estimated_duration": estimated_duration, "model": model, "cost_per_second": cost_per_second, "pricing_model": "per_second", "max_duration": 600, } except HTTPException: raise except Exception as e: logger.error(f"[AvatarStudio] Failed to estimate cost: {e}", exc_info=True) raise HTTPException(status_code=500, detail=f"Failed to estimate cost: {str(e)}")