Base code

This commit is contained in:
Kunthawat Greethong
2026-01-08 22:39:53 +07:00
parent 697115c61a
commit c35fa52117
2169 changed files with 626670 additions and 0 deletions

View File

@@ -0,0 +1,399 @@
"""
Podcast Image Handlers
Image generation and serving endpoints.
"""
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session
from typing import Dict, Any
from pathlib import Path
import uuid
from services.database import get_db
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
from api.story_writer.utils.auth import require_authenticated_user
from services.llm_providers.main_image_generation import generate_image, generate_character_image
from utils.asset_tracker import save_asset_to_library
from loguru import logger
from ..constants import PODCAST_IMAGES_DIR
from ..models import PodcastImageRequest, PodcastImageResponse
router = APIRouter()
@router.post("/image", response_model=PodcastImageResponse)
async def generate_podcast_scene_image(
request: PodcastImageRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Generate an AI image for a podcast scene.
Creates a professional, podcast-appropriate image based on scene title and content.
"""
user_id = require_authenticated_user(current_user)
if not request.scene_title:
raise HTTPException(status_code=400, detail="Scene title is required")
try:
# PRE-FLIGHT VALIDATION: Check subscription limits before any API calls
from services.subscription import PricingService
from services.subscription.preflight_validator import validate_image_generation_operations
from fastapi import HTTPException as FastAPIHTTPException
pricing_service = PricingService(db)
try:
# Raises HTTPException immediately if validation fails
validate_image_generation_operations(
pricing_service=pricing_service,
user_id=user_id,
num_images=1
)
logger.info(f"[Podcast] ✅ Pre-flight validation passed for user {user_id}")
except FastAPIHTTPException as http_ex:
logger.error(f"[Podcast] ❌ Pre-flight validation failed for user {user_id}: {http_ex.detail}")
raise
# If base avatar is provided, create scene-specific variation
# Otherwise, generate from scratch
logger.info(f"[Podcast] Image generation request for scene {request.scene_id}")
logger.info(f"[Podcast] base_avatar_url={request.base_avatar_url}")
logger.info(f"[Podcast] custom_prompt={request.custom_prompt}")
logger.info(f"[Podcast] style={request.style}, rendering_speed={request.rendering_speed}, aspect_ratio={request.aspect_ratio}")
if request.base_avatar_url:
# Load base avatar image for reference
from ..utils import load_podcast_image_bytes
try:
logger.info(f"[Podcast] Attempting to load base avatar from: {request.base_avatar_url}")
base_avatar_bytes = load_podcast_image_bytes(request.base_avatar_url)
logger.info(f"[Podcast] ✅ Successfully loaded base avatar ({len(base_avatar_bytes)} bytes) for scene {request.scene_id}")
except Exception as e:
logger.error(f"[Podcast] ❌ Failed to load base avatar from {request.base_avatar_url}: {e}", exc_info=True)
# If base avatar fails to load, we cannot maintain character consistency
# Raise an error instead of falling back to standard generation
raise HTTPException(
status_code=500,
detail={
"error": "Failed to load base avatar",
"message": f"Could not load the base avatar image for character consistency: {str(e)}. Please ensure the avatar image is accessible.",
},
)
else:
logger.info(f"[Podcast] No base avatar URL provided, will generate from scratch")
base_avatar_bytes = None
# Build optimized prompt for scene image generation
# When base avatar is provided, use Ideogram Character to maintain consistency
# Otherwise, generate from scratch with podcast-optimized prompt
image_prompt = "" # Initialize prompt variable
if base_avatar_bytes:
# Use Ideogram Character API for consistent character generation
# Use custom prompt if provided, otherwise build scene-specific prompt
if request.custom_prompt:
# User provided custom prompt - use it directly
image_prompt = request.custom_prompt
logger.info(f"[Podcast] Using custom prompt from user for scene {request.scene_id}")
else:
# Build scene-specific prompt that respects the base avatar
prompt_parts = []
# Scene context (primary focus)
if request.scene_title:
prompt_parts.append(f"Scene: {request.scene_title}")
# Scene content insights for visual context
if request.scene_content:
content_preview = request.scene_content[:200].replace("\n", " ").strip()
# Extract visualizable themes
visual_keywords = []
content_lower = content_preview.lower()
if any(word in content_lower for word in ["data", "statistics", "numbers", "chart", "graph"]):
visual_keywords.append("data visualization background")
if any(word in content_lower for word in ["technology", "tech", "digital", "ai", "software"]):
visual_keywords.append("modern tech studio setting")
if any(word in content_lower for word in ["business", "growth", "strategy", "market"]):
visual_keywords.append("professional business studio")
if visual_keywords:
prompt_parts.append(", ".join(visual_keywords))
# Podcast theme context
if request.idea:
idea_preview = request.idea[:60].strip()
prompt_parts.append(f"Topic: {idea_preview}")
# Studio setting (maintains podcast aesthetic)
prompt_parts.extend([
"Professional podcast recording studio",
"Modern microphone setup",
"Clean background, professional lighting",
"16:9 aspect ratio, video-optimized composition"
])
image_prompt = ", ".join(prompt_parts)
logger.info(f"[Podcast] Using Ideogram Character for scene {request.scene_id} with base avatar")
logger.info(f"[Podcast] Scene prompt: {image_prompt[:150]}...")
# Use centralized character image generation with subscription checks and tracking
# Use custom settings if provided, otherwise use defaults
style = request.style or "Realistic" # Default to Realistic for professional podcast presenters
rendering_speed = request.rendering_speed or "Quality" # Default to Quality for podcast videos
# Calculate aspect ratio from custom setting or dimensions
if request.aspect_ratio:
aspect_ratio = request.aspect_ratio
else:
aspect_ratio_map = {
(1024, 1024): "1:1",
(1920, 1080): "16:9",
(1080, 1920): "9:16",
(1280, 960): "4:3",
(960, 1280): "3:4",
}
aspect_ratio = aspect_ratio_map.get((request.width, request.height), "16:9")
logger.info(f"[Podcast] Ideogram Character settings: style={style}, rendering_speed={rendering_speed}, aspect_ratio={aspect_ratio}")
try:
image_bytes = generate_character_image(
prompt=image_prompt,
reference_image_bytes=base_avatar_bytes,
user_id=user_id,
style=style,
aspect_ratio=aspect_ratio,
rendering_speed=rendering_speed,
timeout=None, # No timeout - poll until WaveSpeed says it's done or failed
)
# Create result object compatible with ImageGenerationResult
from services.llm_providers.image_generation.base import ImageGenerationResult
result = ImageGenerationResult(
image_bytes=image_bytes,
provider="wavespeed",
model="ideogram-ai/ideogram-character",
width=request.width,
height=request.height,
)
logger.info(f"[Podcast] ✅ Successfully generated character-consistent scene image")
except HTTPException as http_err:
# Re-raise HTTPExceptions from wavespeed client as-is
logger.error(f"[Podcast] ❌ Ideogram Character HTTPException: {http_err.status_code} - {http_err.detail}")
raise
except Exception as char_error:
error_msg = str(char_error)
error_type = type(char_error).__name__
logger.error(f"[Podcast] ❌ Ideogram Character failed: {error_type}: {error_msg}", exc_info=True)
# If Ideogram Character fails, we should NOT fall back to standard generation
# because that would lose character consistency. Instead, raise an error.
# However, if it's a timeout/connection issue, we can provide a helpful message.
error_msg_lower = error_msg.lower()
if "timeout" in error_msg_lower or "connection" in error_msg_lower or "504" in error_msg:
raise HTTPException(
status_code=504,
detail={
"error": "Image generation service unavailable",
"message": "The character-consistent image generation service is currently unavailable. Please try again in a few moments. If the problem persists, the service may be experiencing high load.",
"retry_recommended": True,
},
)
else:
raise HTTPException(
status_code=502,
detail={
"error": "Character-consistent image generation failed",
"message": f"Failed to generate image with character consistency: {error_msg}",
"retry_recommended": True,
},
)
# CRITICAL: If base_avatar_url was provided but we don't have base_avatar_bytes,
# this means either loading failed (already raised error) or Ideogram Character failed (already raised error)
# So this path should only be reached if NO base_avatar_url was provided in the first place
if not base_avatar_bytes:
logger.info(f"[Podcast] No base avatar provided - generating standard image from scratch")
# Standard generation from scratch (no base avatar provided)
prompt_parts = []
# Core podcast studio elements
prompt_parts.extend([
"Professional podcast recording studio",
"Modern podcast setup with high-quality microphone",
"Clean, minimalist background suitable for video",
"Professional studio lighting with soft, even illumination",
"Podcast host environment, professional and inviting"
])
# Scene-specific context
if request.scene_title:
prompt_parts.append(f"Scene theme: {request.scene_title}")
# Content context for visual relevance
if request.scene_content:
content_preview = request.scene_content[:150].replace("\n", " ").strip()
visual_keywords = []
content_lower = content_preview.lower()
if any(word in content_lower for word in ["data", "statistics", "numbers", "chart", "graph"]):
visual_keywords.append("data visualization elements")
if any(word in content_lower for word in ["technology", "tech", "digital", "ai", "software"]):
visual_keywords.append("modern technology aesthetic")
if any(word in content_lower for word in ["business", "growth", "strategy", "market"]):
visual_keywords.append("professional business environment")
if visual_keywords:
prompt_parts.append(", ".join(visual_keywords))
# Podcast theme context
if request.idea:
idea_preview = request.idea[:80].strip()
prompt_parts.append(f"Podcast topic context: {idea_preview}")
# Technical requirements for video generation
prompt_parts.extend([
"16:9 aspect ratio optimized for video",
"Center-focused composition for talking avatar overlay",
"Neutral color palette with professional tones",
"High resolution, sharp focus, professional photography quality",
"No text, no logos, no distracting elements",
"Suitable for InfiniteTalk video generation with animated avatar"
])
# Style constraints
prompt_parts.extend([
"Realistic photography style, not illustration or cartoon",
"Professional broadcast quality",
"Warm, inviting atmosphere",
"Clean composition with breathing room for avatar placement"
])
image_prompt = ", ".join(prompt_parts)
logger.info(f"[Podcast] Generating image for scene {request.scene_id}: {request.scene_title}")
# Generate image using main_image_generation service
image_options = {
"provider": None, # Auto-select provider
"width": request.width,
"height": request.height,
}
result = generate_image(
prompt=image_prompt,
options=image_options,
user_id=user_id
)
# Save image to podcast images directory
PODCAST_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
# Generate filename
clean_title = "".join(c if c.isalnum() or c in ('-', '_') else '_' for c in request.scene_title[:30])
unique_id = str(uuid.uuid4())[:8]
image_filename = f"scene_{request.scene_id}_{clean_title}_{unique_id}.png"
image_path = PODCAST_IMAGES_DIR / image_filename
# Save image
with open(image_path, "wb") as f:
f.write(result.image_bytes)
logger.info(f"[Podcast] Saved image to: {image_path}")
# Create image URL (served via API endpoint)
image_url = f"/api/podcast/images/{image_filename}"
# Estimate cost (rough estimate: ~$0.04 per image for most providers, ~$0.10 for Ideogram Character)
# Note: Actual usage tracking is handled by centralized generate_image()/generate_character_image() functions
cost = 0.10 if result.provider == "wavespeed" and result.model == "ideogram-ai/ideogram-character" else 0.04
# Save to asset library
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="image",
source_module="podcast_maker",
filename=image_filename,
file_url=image_url,
file_path=str(image_path),
file_size=len(result.image_bytes),
mime_type="image/png",
title=f"{request.scene_title} - Podcast Scene",
description=f"Podcast scene image: {request.scene_title}",
prompt=image_prompt,
tags=["podcast", "scene", request.scene_id],
provider=result.provider,
model=result.model,
asset_metadata={
"scene_id": request.scene_id,
"scene_title": request.scene_title,
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save image asset: {e}")
return PodcastImageResponse(
scene_id=request.scene_id,
scene_title=request.scene_title,
image_filename=image_filename,
image_url=image_url,
width=result.width,
height=result.height,
provider=result.provider,
model=result.model,
cost=cost,
)
except HTTPException:
# Re-raise HTTPExceptions as-is (they already have proper error details)
raise
except Exception as exc:
# Log the full exception for debugging
error_msg = str(exc)
error_type = type(exc).__name__
logger.error(f"[Podcast] Image generation failed: {error_type}: {error_msg}", exc_info=True)
# Create a safe error detail
raise HTTPException(
status_code=500,
detail={
"error": "Image generation failed",
"message": error_msg,
"type": error_type,
}
)
@router.get("/images/{path:path}")
async def serve_podcast_image(
path: str, # Changed from filename to path to support subdirectories
current_user: Dict[str, Any] = Depends(get_current_user_with_query_token),
):
"""Serve generated podcast scene images and avatars.
Supports authentication via Authorization header or token query parameter.
Query parameter is useful for HTML elements like <img> that cannot send custom headers.
Supports subdirectories like avatars/
"""
require_authenticated_user(current_user)
# Security check: ensure path doesn't contain path traversal or absolute paths
if ".." in path or path.startswith("/"):
raise HTTPException(status_code=400, detail="Invalid path")
image_path = (PODCAST_IMAGES_DIR / path).resolve()
# Security check: ensure resolved path is within PODCAST_IMAGES_DIR
if not str(image_path).startswith(str(PODCAST_IMAGES_DIR)):
raise HTTPException(status_code=403, detail="Access denied")
if not image_path.exists():
raise HTTPException(status_code=404, detail="Image not found")
return FileResponse(image_path, media_type="image/png")