""" Podcast Image Handlers Image generation and serving endpoints. """ from fastapi import APIRouter, Depends, HTTPException from fastapi.responses import FileResponse from sqlalchemy.orm import Session from typing import Dict, Any from pathlib import Path import uuid from services.database import get_db from middleware.auth_middleware import get_current_user, get_current_user_with_query_token from api.story_writer.utils.auth import require_authenticated_user from services.llm_providers.main_image_generation import generate_image, generate_character_image from utils.asset_tracker import save_asset_to_library from loguru import logger from ..constants import PODCAST_IMAGES_DIR from ..models import PodcastImageRequest, PodcastImageResponse router = APIRouter() @router.post("/image", response_model=PodcastImageResponse) async def generate_podcast_scene_image( request: PodcastImageRequest, current_user: Dict[str, Any] = Depends(get_current_user), db: Session = Depends(get_db), ): """ Generate an AI image for a podcast scene. Creates a professional, podcast-appropriate image based on scene title and content. """ user_id = require_authenticated_user(current_user) if not request.scene_title: raise HTTPException(status_code=400, detail="Scene title is required") try: # PRE-FLIGHT VALIDATION: Check subscription limits before any API calls from services.subscription import PricingService from services.subscription.preflight_validator import validate_image_generation_operations from fastapi import HTTPException as FastAPIHTTPException pricing_service = PricingService(db) try: # Raises HTTPException immediately if validation fails validate_image_generation_operations( pricing_service=pricing_service, user_id=user_id, num_images=1 ) logger.info(f"[Podcast] ✅ Pre-flight validation passed for user {user_id}") except FastAPIHTTPException as http_ex: logger.error(f"[Podcast] ❌ Pre-flight validation failed for user {user_id}: {http_ex.detail}") raise # If base avatar is provided, create scene-specific variation # Otherwise, generate from scratch logger.info(f"[Podcast] Image generation request for scene {request.scene_id}") logger.info(f"[Podcast] base_avatar_url={request.base_avatar_url}") logger.info(f"[Podcast] custom_prompt={request.custom_prompt}") logger.info(f"[Podcast] style={request.style}, rendering_speed={request.rendering_speed}, aspect_ratio={request.aspect_ratio}") if request.base_avatar_url: # Load base avatar image for reference from ..utils import load_podcast_image_bytes try: logger.info(f"[Podcast] Attempting to load base avatar from: {request.base_avatar_url}") base_avatar_bytes = load_podcast_image_bytes(request.base_avatar_url) logger.info(f"[Podcast] ✅ Successfully loaded base avatar ({len(base_avatar_bytes)} bytes) for scene {request.scene_id}") except Exception as e: logger.error(f"[Podcast] ❌ Failed to load base avatar from {request.base_avatar_url}: {e}", exc_info=True) # If base avatar fails to load, we cannot maintain character consistency # Raise an error instead of falling back to standard generation raise HTTPException( status_code=500, detail={ "error": "Failed to load base avatar", "message": f"Could not load the base avatar image for character consistency: {str(e)}. Please ensure the avatar image is accessible.", }, ) else: logger.info(f"[Podcast] No base avatar URL provided, will generate from scratch") base_avatar_bytes = None # Build optimized prompt for scene image generation # When base avatar is provided, use Ideogram Character to maintain consistency # Otherwise, generate from scratch with podcast-optimized prompt image_prompt = "" # Initialize prompt variable if base_avatar_bytes: # Use Ideogram Character API for consistent character generation # Use custom prompt if provided, otherwise build scene-specific prompt if request.custom_prompt: # User provided custom prompt - use it directly image_prompt = request.custom_prompt logger.info(f"[Podcast] Using custom prompt from user for scene {request.scene_id}") else: # Build scene-specific prompt that respects the base avatar prompt_parts = [] # Scene context (primary focus) if request.scene_title: prompt_parts.append(f"Scene: {request.scene_title}") # Scene content insights for visual context if request.scene_content: content_preview = request.scene_content[:200].replace("\n", " ").strip() # Extract visualizable themes visual_keywords = [] content_lower = content_preview.lower() if any(word in content_lower for word in ["data", "statistics", "numbers", "chart", "graph"]): visual_keywords.append("data visualization background") if any(word in content_lower for word in ["technology", "tech", "digital", "ai", "software"]): visual_keywords.append("modern tech studio setting") if any(word in content_lower for word in ["business", "growth", "strategy", "market"]): visual_keywords.append("professional business studio") if visual_keywords: prompt_parts.append(", ".join(visual_keywords)) # Podcast theme context if request.idea: idea_preview = request.idea[:60].strip() prompt_parts.append(f"Topic: {idea_preview}") # Studio setting (maintains podcast aesthetic) prompt_parts.extend([ "Professional podcast recording studio", "Modern microphone setup", "Clean background, professional lighting", "16:9 aspect ratio, video-optimized composition" ]) image_prompt = ", ".join(prompt_parts) logger.info(f"[Podcast] Using Ideogram Character for scene {request.scene_id} with base avatar") logger.info(f"[Podcast] Scene prompt: {image_prompt[:150]}...") # Use centralized character image generation with subscription checks and tracking # Use custom settings if provided, otherwise use defaults style = request.style or "Realistic" # Default to Realistic for professional podcast presenters rendering_speed = request.rendering_speed or "Quality" # Default to Quality for podcast videos # Calculate aspect ratio from custom setting or dimensions if request.aspect_ratio: aspect_ratio = request.aspect_ratio else: aspect_ratio_map = { (1024, 1024): "1:1", (1920, 1080): "16:9", (1080, 1920): "9:16", (1280, 960): "4:3", (960, 1280): "3:4", } aspect_ratio = aspect_ratio_map.get((request.width, request.height), "16:9") logger.info(f"[Podcast] Ideogram Character settings: style={style}, rendering_speed={rendering_speed}, aspect_ratio={aspect_ratio}") try: image_bytes = generate_character_image( prompt=image_prompt, reference_image_bytes=base_avatar_bytes, user_id=user_id, style=style, aspect_ratio=aspect_ratio, rendering_speed=rendering_speed, timeout=None, # No timeout - poll until WaveSpeed says it's done or failed ) # Create result object compatible with ImageGenerationResult from services.llm_providers.image_generation.base import ImageGenerationResult result = ImageGenerationResult( image_bytes=image_bytes, provider="wavespeed", model="ideogram-ai/ideogram-character", width=request.width, height=request.height, ) logger.info(f"[Podcast] ✅ Successfully generated character-consistent scene image") except HTTPException as http_err: # Re-raise HTTPExceptions from wavespeed client as-is logger.error(f"[Podcast] ❌ Ideogram Character HTTPException: {http_err.status_code} - {http_err.detail}") raise except Exception as char_error: error_msg = str(char_error) error_type = type(char_error).__name__ logger.error(f"[Podcast] ❌ Ideogram Character failed: {error_type}: {error_msg}", exc_info=True) # If Ideogram Character fails, we should NOT fall back to standard generation # because that would lose character consistency. Instead, raise an error. # However, if it's a timeout/connection issue, we can provide a helpful message. error_msg_lower = error_msg.lower() if "timeout" in error_msg_lower or "connection" in error_msg_lower or "504" in error_msg: raise HTTPException( status_code=504, detail={ "error": "Image generation service unavailable", "message": "The character-consistent image generation service is currently unavailable. Please try again in a few moments. If the problem persists, the service may be experiencing high load.", "retry_recommended": True, }, ) else: raise HTTPException( status_code=502, detail={ "error": "Character-consistent image generation failed", "message": f"Failed to generate image with character consistency: {error_msg}", "retry_recommended": True, }, ) # CRITICAL: If base_avatar_url was provided but we don't have base_avatar_bytes, # this means either loading failed (already raised error) or Ideogram Character failed (already raised error) # So this path should only be reached if NO base_avatar_url was provided in the first place if not base_avatar_bytes: logger.info(f"[Podcast] No base avatar provided - generating standard image from scratch") # Standard generation from scratch (no base avatar provided) prompt_parts = [] # Core podcast studio elements prompt_parts.extend([ "Professional podcast recording studio", "Modern podcast setup with high-quality microphone", "Clean, minimalist background suitable for video", "Professional studio lighting with soft, even illumination", "Podcast host environment, professional and inviting" ]) # Scene-specific context if request.scene_title: prompt_parts.append(f"Scene theme: {request.scene_title}") # Content context for visual relevance if request.scene_content: content_preview = request.scene_content[:150].replace("\n", " ").strip() visual_keywords = [] content_lower = content_preview.lower() if any(word in content_lower for word in ["data", "statistics", "numbers", "chart", "graph"]): visual_keywords.append("data visualization elements") if any(word in content_lower for word in ["technology", "tech", "digital", "ai", "software"]): visual_keywords.append("modern technology aesthetic") if any(word in content_lower for word in ["business", "growth", "strategy", "market"]): visual_keywords.append("professional business environment") if visual_keywords: prompt_parts.append(", ".join(visual_keywords)) # Podcast theme context if request.idea: idea_preview = request.idea[:80].strip() prompt_parts.append(f"Podcast topic context: {idea_preview}") # Technical requirements for video generation prompt_parts.extend([ "16:9 aspect ratio optimized for video", "Center-focused composition for talking avatar overlay", "Neutral color palette with professional tones", "High resolution, sharp focus, professional photography quality", "No text, no logos, no distracting elements", "Suitable for InfiniteTalk video generation with animated avatar" ]) # Style constraints prompt_parts.extend([ "Realistic photography style, not illustration or cartoon", "Professional broadcast quality", "Warm, inviting atmosphere", "Clean composition with breathing room for avatar placement" ]) image_prompt = ", ".join(prompt_parts) logger.info(f"[Podcast] Generating image for scene {request.scene_id}: {request.scene_title}") # Generate image using main_image_generation service image_options = { "provider": None, # Auto-select provider "width": request.width, "height": request.height, } result = generate_image( prompt=image_prompt, options=image_options, user_id=user_id ) # Save image to podcast images directory PODCAST_IMAGES_DIR.mkdir(parents=True, exist_ok=True) # Generate filename clean_title = "".join(c if c.isalnum() or c in ('-', '_') else '_' for c in request.scene_title[:30]) unique_id = str(uuid.uuid4())[:8] image_filename = f"scene_{request.scene_id}_{clean_title}_{unique_id}.png" image_path = PODCAST_IMAGES_DIR / image_filename # Save image with open(image_path, "wb") as f: f.write(result.image_bytes) logger.info(f"[Podcast] Saved image to: {image_path}") # Create image URL (served via API endpoint) image_url = f"/api/podcast/images/{image_filename}" # Estimate cost (rough estimate: ~$0.04 per image for most providers, ~$0.10 for Ideogram Character) # Note: Actual usage tracking is handled by centralized generate_image()/generate_character_image() functions cost = 0.10 if result.provider == "wavespeed" and result.model == "ideogram-ai/ideogram-character" else 0.04 # Save to asset library try: save_asset_to_library( db=db, user_id=user_id, asset_type="image", source_module="podcast_maker", filename=image_filename, file_url=image_url, file_path=str(image_path), file_size=len(result.image_bytes), mime_type="image/png", title=f"{request.scene_title} - Podcast Scene", description=f"Podcast scene image: {request.scene_title}", prompt=image_prompt, tags=["podcast", "scene", request.scene_id], provider=result.provider, model=result.model, asset_metadata={ "scene_id": request.scene_id, "scene_title": request.scene_title, "status": "completed", }, ) except Exception as e: logger.warning(f"[Podcast] Failed to save image asset: {e}") return PodcastImageResponse( scene_id=request.scene_id, scene_title=request.scene_title, image_filename=image_filename, image_url=image_url, width=result.width, height=result.height, provider=result.provider, model=result.model, cost=cost, ) except HTTPException: # Re-raise HTTPExceptions as-is (they already have proper error details) raise except Exception as exc: # Log the full exception for debugging error_msg = str(exc) error_type = type(exc).__name__ logger.error(f"[Podcast] Image generation failed: {error_type}: {error_msg}", exc_info=True) # Create a safe error detail raise HTTPException( status_code=500, detail={ "error": "Image generation failed", "message": error_msg, "type": error_type, } ) @router.get("/images/{path:path}") async def serve_podcast_image( path: str, # Changed from filename to path to support subdirectories current_user: Dict[str, Any] = Depends(get_current_user_with_query_token), ): """Serve generated podcast scene images and avatars. Supports authentication via Authorization header or token query parameter. Query parameter is useful for HTML elements like that cannot send custom headers. Supports subdirectories like avatars/ """ require_authenticated_user(current_user) # Security check: ensure path doesn't contain path traversal or absolute paths if ".." in path or path.startswith("/"): raise HTTPException(status_code=400, detail="Invalid path") image_path = (PODCAST_IMAGES_DIR / path).resolve() # Security check: ensure resolved path is within PODCAST_IMAGES_DIR if not str(image_path).startswith(str(PODCAST_IMAGES_DIR)): raise HTTPException(status_code=403, detail="Access denied") if not image_path.exists(): raise HTTPException(status_code=404, detail="Image not found") return FileResponse(image_path, media_type="image/png")