400 lines
19 KiB
Python
400 lines
19 KiB
Python
"""
|
|
Podcast Image Handlers
|
|
|
|
Image generation and serving endpoints.
|
|
"""
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException
|
|
from fastapi.responses import FileResponse
|
|
from sqlalchemy.orm import Session
|
|
from typing import Dict, Any
|
|
from pathlib import Path
|
|
import uuid
|
|
|
|
from services.database import get_db
|
|
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
|
|
from api.story_writer.utils.auth import require_authenticated_user
|
|
from services.llm_providers.main_image_generation import generate_image, generate_character_image
|
|
from utils.asset_tracker import save_asset_to_library
|
|
from loguru import logger
|
|
from ..constants import PODCAST_IMAGES_DIR
|
|
from ..models import PodcastImageRequest, PodcastImageResponse
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
@router.post("/image", response_model=PodcastImageResponse)
|
|
async def generate_podcast_scene_image(
|
|
request: PodcastImageRequest,
|
|
current_user: Dict[str, Any] = Depends(get_current_user),
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""
|
|
Generate an AI image for a podcast scene.
|
|
Creates a professional, podcast-appropriate image based on scene title and content.
|
|
"""
|
|
user_id = require_authenticated_user(current_user)
|
|
|
|
if not request.scene_title:
|
|
raise HTTPException(status_code=400, detail="Scene title is required")
|
|
|
|
try:
|
|
# PRE-FLIGHT VALIDATION: Check subscription limits before any API calls
|
|
from services.subscription import PricingService
|
|
from services.subscription.preflight_validator import validate_image_generation_operations
|
|
from fastapi import HTTPException as FastAPIHTTPException
|
|
|
|
pricing_service = PricingService(db)
|
|
try:
|
|
# Raises HTTPException immediately if validation fails
|
|
validate_image_generation_operations(
|
|
pricing_service=pricing_service,
|
|
user_id=user_id,
|
|
num_images=1
|
|
)
|
|
logger.info(f"[Podcast] ✅ Pre-flight validation passed for user {user_id}")
|
|
except FastAPIHTTPException as http_ex:
|
|
logger.error(f"[Podcast] ❌ Pre-flight validation failed for user {user_id}: {http_ex.detail}")
|
|
raise
|
|
|
|
# If base avatar is provided, create scene-specific variation
|
|
# Otherwise, generate from scratch
|
|
logger.info(f"[Podcast] Image generation request for scene {request.scene_id}")
|
|
logger.info(f"[Podcast] base_avatar_url={request.base_avatar_url}")
|
|
logger.info(f"[Podcast] custom_prompt={request.custom_prompt}")
|
|
logger.info(f"[Podcast] style={request.style}, rendering_speed={request.rendering_speed}, aspect_ratio={request.aspect_ratio}")
|
|
|
|
if request.base_avatar_url:
|
|
# Load base avatar image for reference
|
|
from ..utils import load_podcast_image_bytes
|
|
try:
|
|
logger.info(f"[Podcast] Attempting to load base avatar from: {request.base_avatar_url}")
|
|
base_avatar_bytes = load_podcast_image_bytes(request.base_avatar_url)
|
|
logger.info(f"[Podcast] ✅ Successfully loaded base avatar ({len(base_avatar_bytes)} bytes) for scene {request.scene_id}")
|
|
except Exception as e:
|
|
logger.error(f"[Podcast] ❌ Failed to load base avatar from {request.base_avatar_url}: {e}", exc_info=True)
|
|
# If base avatar fails to load, we cannot maintain character consistency
|
|
# Raise an error instead of falling back to standard generation
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail={
|
|
"error": "Failed to load base avatar",
|
|
"message": f"Could not load the base avatar image for character consistency: {str(e)}. Please ensure the avatar image is accessible.",
|
|
},
|
|
)
|
|
else:
|
|
logger.info(f"[Podcast] No base avatar URL provided, will generate from scratch")
|
|
base_avatar_bytes = None
|
|
|
|
# Build optimized prompt for scene image generation
|
|
# When base avatar is provided, use Ideogram Character to maintain consistency
|
|
# Otherwise, generate from scratch with podcast-optimized prompt
|
|
image_prompt = "" # Initialize prompt variable
|
|
|
|
if base_avatar_bytes:
|
|
# Use Ideogram Character API for consistent character generation
|
|
# Use custom prompt if provided, otherwise build scene-specific prompt
|
|
if request.custom_prompt:
|
|
# User provided custom prompt - use it directly
|
|
image_prompt = request.custom_prompt
|
|
logger.info(f"[Podcast] Using custom prompt from user for scene {request.scene_id}")
|
|
else:
|
|
# Build scene-specific prompt that respects the base avatar
|
|
prompt_parts = []
|
|
|
|
# Scene context (primary focus)
|
|
if request.scene_title:
|
|
prompt_parts.append(f"Scene: {request.scene_title}")
|
|
|
|
# Scene content insights for visual context
|
|
if request.scene_content:
|
|
content_preview = request.scene_content[:200].replace("\n", " ").strip()
|
|
# Extract visualizable themes
|
|
visual_keywords = []
|
|
content_lower = content_preview.lower()
|
|
if any(word in content_lower for word in ["data", "statistics", "numbers", "chart", "graph"]):
|
|
visual_keywords.append("data visualization background")
|
|
if any(word in content_lower for word in ["technology", "tech", "digital", "ai", "software"]):
|
|
visual_keywords.append("modern tech studio setting")
|
|
if any(word in content_lower for word in ["business", "growth", "strategy", "market"]):
|
|
visual_keywords.append("professional business studio")
|
|
if visual_keywords:
|
|
prompt_parts.append(", ".join(visual_keywords))
|
|
|
|
# Podcast theme context
|
|
if request.idea:
|
|
idea_preview = request.idea[:60].strip()
|
|
prompt_parts.append(f"Topic: {idea_preview}")
|
|
|
|
# Studio setting (maintains podcast aesthetic)
|
|
prompt_parts.extend([
|
|
"Professional podcast recording studio",
|
|
"Modern microphone setup",
|
|
"Clean background, professional lighting",
|
|
"16:9 aspect ratio, video-optimized composition"
|
|
])
|
|
|
|
image_prompt = ", ".join(prompt_parts)
|
|
|
|
logger.info(f"[Podcast] Using Ideogram Character for scene {request.scene_id} with base avatar")
|
|
logger.info(f"[Podcast] Scene prompt: {image_prompt[:150]}...")
|
|
|
|
# Use centralized character image generation with subscription checks and tracking
|
|
# Use custom settings if provided, otherwise use defaults
|
|
style = request.style or "Realistic" # Default to Realistic for professional podcast presenters
|
|
rendering_speed = request.rendering_speed or "Quality" # Default to Quality for podcast videos
|
|
|
|
# Calculate aspect ratio from custom setting or dimensions
|
|
if request.aspect_ratio:
|
|
aspect_ratio = request.aspect_ratio
|
|
else:
|
|
aspect_ratio_map = {
|
|
(1024, 1024): "1:1",
|
|
(1920, 1080): "16:9",
|
|
(1080, 1920): "9:16",
|
|
(1280, 960): "4:3",
|
|
(960, 1280): "3:4",
|
|
}
|
|
aspect_ratio = aspect_ratio_map.get((request.width, request.height), "16:9")
|
|
|
|
logger.info(f"[Podcast] Ideogram Character settings: style={style}, rendering_speed={rendering_speed}, aspect_ratio={aspect_ratio}")
|
|
|
|
try:
|
|
image_bytes = generate_character_image(
|
|
prompt=image_prompt,
|
|
reference_image_bytes=base_avatar_bytes,
|
|
user_id=user_id,
|
|
style=style,
|
|
aspect_ratio=aspect_ratio,
|
|
rendering_speed=rendering_speed,
|
|
timeout=None, # No timeout - poll until WaveSpeed says it's done or failed
|
|
)
|
|
|
|
# Create result object compatible with ImageGenerationResult
|
|
from services.llm_providers.image_generation.base import ImageGenerationResult
|
|
result = ImageGenerationResult(
|
|
image_bytes=image_bytes,
|
|
provider="wavespeed",
|
|
model="ideogram-ai/ideogram-character",
|
|
width=request.width,
|
|
height=request.height,
|
|
)
|
|
|
|
logger.info(f"[Podcast] ✅ Successfully generated character-consistent scene image")
|
|
except HTTPException as http_err:
|
|
# Re-raise HTTPExceptions from wavespeed client as-is
|
|
logger.error(f"[Podcast] ❌ Ideogram Character HTTPException: {http_err.status_code} - {http_err.detail}")
|
|
raise
|
|
except Exception as char_error:
|
|
error_msg = str(char_error)
|
|
error_type = type(char_error).__name__
|
|
logger.error(f"[Podcast] ❌ Ideogram Character failed: {error_type}: {error_msg}", exc_info=True)
|
|
|
|
# If Ideogram Character fails, we should NOT fall back to standard generation
|
|
# because that would lose character consistency. Instead, raise an error.
|
|
# However, if it's a timeout/connection issue, we can provide a helpful message.
|
|
error_msg_lower = error_msg.lower()
|
|
if "timeout" in error_msg_lower or "connection" in error_msg_lower or "504" in error_msg:
|
|
raise HTTPException(
|
|
status_code=504,
|
|
detail={
|
|
"error": "Image generation service unavailable",
|
|
"message": "The character-consistent image generation service is currently unavailable. Please try again in a few moments. If the problem persists, the service may be experiencing high load.",
|
|
"retry_recommended": True,
|
|
},
|
|
)
|
|
else:
|
|
raise HTTPException(
|
|
status_code=502,
|
|
detail={
|
|
"error": "Character-consistent image generation failed",
|
|
"message": f"Failed to generate image with character consistency: {error_msg}",
|
|
"retry_recommended": True,
|
|
},
|
|
)
|
|
|
|
# CRITICAL: If base_avatar_url was provided but we don't have base_avatar_bytes,
|
|
# this means either loading failed (already raised error) or Ideogram Character failed (already raised error)
|
|
# So this path should only be reached if NO base_avatar_url was provided in the first place
|
|
if not base_avatar_bytes:
|
|
logger.info(f"[Podcast] No base avatar provided - generating standard image from scratch")
|
|
# Standard generation from scratch (no base avatar provided)
|
|
prompt_parts = []
|
|
|
|
# Core podcast studio elements
|
|
prompt_parts.extend([
|
|
"Professional podcast recording studio",
|
|
"Modern podcast setup with high-quality microphone",
|
|
"Clean, minimalist background suitable for video",
|
|
"Professional studio lighting with soft, even illumination",
|
|
"Podcast host environment, professional and inviting"
|
|
])
|
|
|
|
# Scene-specific context
|
|
if request.scene_title:
|
|
prompt_parts.append(f"Scene theme: {request.scene_title}")
|
|
|
|
# Content context for visual relevance
|
|
if request.scene_content:
|
|
content_preview = request.scene_content[:150].replace("\n", " ").strip()
|
|
visual_keywords = []
|
|
content_lower = content_preview.lower()
|
|
if any(word in content_lower for word in ["data", "statistics", "numbers", "chart", "graph"]):
|
|
visual_keywords.append("data visualization elements")
|
|
if any(word in content_lower for word in ["technology", "tech", "digital", "ai", "software"]):
|
|
visual_keywords.append("modern technology aesthetic")
|
|
if any(word in content_lower for word in ["business", "growth", "strategy", "market"]):
|
|
visual_keywords.append("professional business environment")
|
|
if visual_keywords:
|
|
prompt_parts.append(", ".join(visual_keywords))
|
|
|
|
# Podcast theme context
|
|
if request.idea:
|
|
idea_preview = request.idea[:80].strip()
|
|
prompt_parts.append(f"Podcast topic context: {idea_preview}")
|
|
|
|
# Technical requirements for video generation
|
|
prompt_parts.extend([
|
|
"16:9 aspect ratio optimized for video",
|
|
"Center-focused composition for talking avatar overlay",
|
|
"Neutral color palette with professional tones",
|
|
"High resolution, sharp focus, professional photography quality",
|
|
"No text, no logos, no distracting elements",
|
|
"Suitable for InfiniteTalk video generation with animated avatar"
|
|
])
|
|
|
|
# Style constraints
|
|
prompt_parts.extend([
|
|
"Realistic photography style, not illustration or cartoon",
|
|
"Professional broadcast quality",
|
|
"Warm, inviting atmosphere",
|
|
"Clean composition with breathing room for avatar placement"
|
|
])
|
|
|
|
image_prompt = ", ".join(prompt_parts)
|
|
|
|
logger.info(f"[Podcast] Generating image for scene {request.scene_id}: {request.scene_title}")
|
|
|
|
# Generate image using main_image_generation service
|
|
image_options = {
|
|
"provider": None, # Auto-select provider
|
|
"width": request.width,
|
|
"height": request.height,
|
|
}
|
|
|
|
result = generate_image(
|
|
prompt=image_prompt,
|
|
options=image_options,
|
|
user_id=user_id
|
|
)
|
|
|
|
# Save image to podcast images directory
|
|
PODCAST_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Generate filename
|
|
clean_title = "".join(c if c.isalnum() or c in ('-', '_') else '_' for c in request.scene_title[:30])
|
|
unique_id = str(uuid.uuid4())[:8]
|
|
image_filename = f"scene_{request.scene_id}_{clean_title}_{unique_id}.png"
|
|
image_path = PODCAST_IMAGES_DIR / image_filename
|
|
|
|
# Save image
|
|
with open(image_path, "wb") as f:
|
|
f.write(result.image_bytes)
|
|
|
|
logger.info(f"[Podcast] Saved image to: {image_path}")
|
|
|
|
# Create image URL (served via API endpoint)
|
|
image_url = f"/api/podcast/images/{image_filename}"
|
|
|
|
# Estimate cost (rough estimate: ~$0.04 per image for most providers, ~$0.10 for Ideogram Character)
|
|
# Note: Actual usage tracking is handled by centralized generate_image()/generate_character_image() functions
|
|
cost = 0.10 if result.provider == "wavespeed" and result.model == "ideogram-ai/ideogram-character" else 0.04
|
|
|
|
# Save to asset library
|
|
try:
|
|
save_asset_to_library(
|
|
db=db,
|
|
user_id=user_id,
|
|
asset_type="image",
|
|
source_module="podcast_maker",
|
|
filename=image_filename,
|
|
file_url=image_url,
|
|
file_path=str(image_path),
|
|
file_size=len(result.image_bytes),
|
|
mime_type="image/png",
|
|
title=f"{request.scene_title} - Podcast Scene",
|
|
description=f"Podcast scene image: {request.scene_title}",
|
|
prompt=image_prompt,
|
|
tags=["podcast", "scene", request.scene_id],
|
|
provider=result.provider,
|
|
model=result.model,
|
|
asset_metadata={
|
|
"scene_id": request.scene_id,
|
|
"scene_title": request.scene_title,
|
|
"status": "completed",
|
|
},
|
|
)
|
|
except Exception as e:
|
|
logger.warning(f"[Podcast] Failed to save image asset: {e}")
|
|
|
|
return PodcastImageResponse(
|
|
scene_id=request.scene_id,
|
|
scene_title=request.scene_title,
|
|
image_filename=image_filename,
|
|
image_url=image_url,
|
|
width=result.width,
|
|
height=result.height,
|
|
provider=result.provider,
|
|
model=result.model,
|
|
cost=cost,
|
|
)
|
|
|
|
except HTTPException:
|
|
# Re-raise HTTPExceptions as-is (they already have proper error details)
|
|
raise
|
|
except Exception as exc:
|
|
# Log the full exception for debugging
|
|
error_msg = str(exc)
|
|
error_type = type(exc).__name__
|
|
logger.error(f"[Podcast] Image generation failed: {error_type}: {error_msg}", exc_info=True)
|
|
|
|
# Create a safe error detail
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail={
|
|
"error": "Image generation failed",
|
|
"message": error_msg,
|
|
"type": error_type,
|
|
}
|
|
)
|
|
|
|
|
|
@router.get("/images/{path:path}")
|
|
async def serve_podcast_image(
|
|
path: str, # Changed from filename to path to support subdirectories
|
|
current_user: Dict[str, Any] = Depends(get_current_user_with_query_token),
|
|
):
|
|
"""Serve generated podcast scene images and avatars.
|
|
|
|
Supports authentication via Authorization header or token query parameter.
|
|
Query parameter is useful for HTML elements like <img> that cannot send custom headers.
|
|
Supports subdirectories like avatars/
|
|
"""
|
|
require_authenticated_user(current_user)
|
|
|
|
# Security check: ensure path doesn't contain path traversal or absolute paths
|
|
if ".." in path or path.startswith("/"):
|
|
raise HTTPException(status_code=400, detail="Invalid path")
|
|
|
|
image_path = (PODCAST_IMAGES_DIR / path).resolve()
|
|
|
|
# Security check: ensure resolved path is within PODCAST_IMAGES_DIR
|
|
if not str(image_path).startswith(str(PODCAST_IMAGES_DIR)):
|
|
raise HTTPException(status_code=403, detail="Access denied")
|
|
|
|
if not image_path.exists():
|
|
raise HTTPException(status_code=404, detail="Image not found")
|
|
|
|
return FileResponse(image_path, media_type="image/png")
|
|
|