Base code

This commit is contained in:
Kunthawat Greethong
2026-01-08 22:39:53 +07:00
parent 697115c61a
commit c35fa52117
2169 changed files with 626670 additions and 0 deletions

View File

@@ -0,0 +1,11 @@
"""
YouTube Creator handler package.
Contains endpoints for avatar upload/optimization and scene image generation.
"""
# Explicitly define __all__ for clarity
__all__ = []
"""YouTube Creator handlers package."""

View File

@@ -0,0 +1,465 @@
"""YouTube Creator scene audio generation handlers."""
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session
from typing import Dict, Any, Optional
from pydantic import BaseModel
from services.database import get_db
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
from api.story_writer.utils.auth import require_authenticated_user
from utils.asset_tracker import save_asset_to_library
from models.story_models import StoryAudioResult
from services.story_writer.audio_generation_service import StoryAudioGenerationService
from pathlib import Path
from utils.logger_utils import get_service_logger
router = APIRouter(tags=["youtube-audio"])
logger = get_service_logger("api.youtube.audio")
# Audio output directory
base_dir = Path(__file__).parent.parent.parent.parent
YOUTUBE_AUDIO_DIR = base_dir / "youtube_audio"
YOUTUBE_AUDIO_DIR.mkdir(parents=True, exist_ok=True)
# Initialize audio service
audio_service = StoryAudioGenerationService(output_dir=str(YOUTUBE_AUDIO_DIR))
# WaveSpeed Minimax Speech voice ids include language-specific voices
# Ref: https://wavespeed.ai/docs/docs-api/minimax/minimax_speech_voice_id
LANGUAGE_CODE_TO_LANGUAGE_BOOST = {
"en": "English",
"es": "Spanish",
"fr": "French",
"de": "German",
"pt": "Portuguese",
"it": "Italian",
"hi": "Hindi",
"ar": "Arabic",
"ru": "Russian",
"ja": "Japanese",
"ko": "Korean",
"zh": "Chinese",
"vi": "Vietnamese",
"id": "Indonesian",
"tr": "Turkish",
"nl": "Dutch",
"pl": "Polish",
"th": "Thai",
"uk": "Ukrainian",
"el": "Greek",
"cs": "Czech",
"fi": "Finnish",
"ro": "Romanian",
}
# Default language-specific Minimax voices (first-choice). We keep English on the existing "persona" voices.
LANGUAGE_BOOST_TO_DEFAULT_VOICE_ID = {
"Spanish": "Spanish_male_1_v1",
"French": "French_male_1_v1",
"German": "German_male_1_v1",
"Portuguese": "Portuguese_male_1_v1",
"Italian": "Italian_male_1_v1",
"Hindi": "Hindi_male_1_v1",
"Arabic": "Arabic_male_1_v1",
"Russian": "Russian_male_1_v1",
"Japanese": "Japanese_male_1_v1",
"Korean": "Korean_male_1_v1",
"Chinese": "Chinese_male_1_v1",
"Vietnamese": "Vietnamese_male_1_v1",
"Indonesian": "Indonesian_male_1_v1",
"Turkish": "Turkish_male_1_v1",
"Dutch": "Dutch_male_1_v1",
"Polish": "Polish_male_1_v1",
"Thai": "Thai_male_1_v1",
"Ukrainian": "Ukrainian_male_1_v1",
"Greek": "Greek_male_1_v1",
"Czech": "Czech_male_1_v1",
"Finnish": "Finnish_male_1_v1",
"Romanian": "Romanian_male_1_v1",
}
def _resolve_language_boost(language: Optional[str], explicit_language_boost: Optional[str]) -> str:
"""
Determine the effective WaveSpeed `language_boost`.
- If user explicitly provided language_boost, use it (including "auto").
- Else if language code provided, map to the WaveSpeed boost label.
- Else default to English (backwards compatible).
"""
if explicit_language_boost is not None and str(explicit_language_boost).strip() != "":
return str(explicit_language_boost).strip()
if language is not None and str(language).strip() != "":
lang_code = str(language).strip().lower()
return LANGUAGE_CODE_TO_LANGUAGE_BOOST.get(lang_code, "auto")
return "English"
def select_optimal_emotion(scene_title: str, narration: str, video_plan_context: Optional[Dict[str, Any]] = None) -> str:
"""
Intelligently select the best emotion for YouTube content based on scene analysis.
Available emotions: "happy", "sad", "angry", "fearful", "disgusted", "surprised", "neutral"
Returns the selected emotion string.
"""
# Default to happy for engaging YouTube content
selected_emotion = "happy"
scene_text = f"{scene_title} {narration}".lower()
# Hook scenes need excitement and energy
if "hook" in scene_title.lower() or any(word in scene_text for word in ["exciting", "amazing", "unbelievable", "shocking", "wow"]):
selected_emotion = "surprised" # Excited and attention-grabbing
# Emotional stories or inspirational content
elif any(word in scene_text for word in ["emotional", "touching", "heartwarming", "inspiring", "motivational"]):
selected_emotion = "happy" # Warm and uplifting
# Serious or professional content
elif any(word in scene_text for word in ["important", "critical", "serious", "professional", "expert"]):
selected_emotion = "neutral" # Professional and serious
# Problem-solving or tutorial content
elif any(word in scene_text for word in ["problem", "solution", "fix", "help", "guide"]):
selected_emotion = "happy" # Helpful and encouraging
# Call-to-action scenes
elif "cta" in scene_title.lower() or any(word in scene_text for word in ["subscribe", "like", "comment", "share", "action"]):
selected_emotion = "happy" # Confident and encouraging
# Negative or concerning topics
elif any(word in scene_text for word in ["warning", "danger", "risk", "problem", "issue"]):
selected_emotion = "neutral" # Serious but not alarming
# Check video plan context for overall tone
if video_plan_context:
tone = video_plan_context.get("tone", "").lower()
if "serious" in tone or "professional" in tone:
selected_emotion = "neutral"
elif "fun" in tone or "entertaining" in tone:
selected_emotion = "happy"
return selected_emotion
def select_optimal_voice(scene_title: str, narration: str, video_plan_context: Optional[Dict[str, Any]] = None) -> str:
"""
Intelligently select the best voice for YouTube content based on scene analysis.
Analyzes scene title, narration content, and video plan context to choose
the most appropriate voice from available Minimax voices.
Available voices: Wise_Woman, Friendly_Person, Inspirational_girl, Deep_Voice_Man,
Calm_Woman, Casual_Guy, Lively_Girl, Patient_Man, Young_Knight, Determined_Man,
Lovely_Girl, Decent_Boy, Imposing_Manner, Elegant_Man, Abbess, Sweet_Girl_2, Exuberant_Girl
Returns the selected voice_id string.
"""
# Default to Casual_Guy for engaging YouTube content
selected_voice = "Casual_Guy"
# Analyze video plan context for content type
if video_plan_context:
video_type = video_plan_context.get("video_type", "").lower()
target_audience = video_plan_context.get("target_audience", "").lower()
tone = video_plan_context.get("tone", "").lower()
# Educational/Professional content
if any(keyword in video_type for keyword in ["tutorial", "educational", "how-to", "guide", "course"]):
if "professional" in tone or "expert" in target_audience:
selected_voice = "Wise_Woman" # Authoritative and trustworthy
else:
selected_voice = "Patient_Man" # Clear and instructional
# Entertainment/Casual content
elif any(keyword in video_type for keyword in ["entertainment", "vlog", "lifestyle", "story", "review"]):
if "young" in target_audience or "millennial" in target_audience:
selected_voice = "Casual_Guy" # Friendly and relatable
elif "female" in target_audience or "women" in target_audience:
selected_voice = "Lively_Girl" # Energetic and engaging
else:
selected_voice = "Friendly_Person" # Approachable
# Motivational/Inspirational content
elif any(keyword in video_type for keyword in ["motivational", "inspirational", "success", "mindset"]):
selected_voice = "Inspirational_girl" # Uplifting and motivational
# Business/Corporate content
elif any(keyword in video_type for keyword in ["business", "corporate", "finance", "marketing"]):
selected_voice = "Elegant_Man" # Professional and sophisticated
# Tech/Gaming content
elif any(keyword in video_type for keyword in ["tech", "gaming", "software", "app"]):
selected_voice = "Young_Knight" # Energetic and modern
# Analyze scene content for specific voice requirements
scene_text = f"{scene_title} {narration}".lower()
# Hook scenes need energetic, attention-grabbing voices
if "hook" in scene_title.lower() or any(word in scene_text for word in ["attention", "grab", "exciting", "amazing", "unbelievable"]):
selected_voice = "Exuberant_Girl" # Very energetic and enthusiastic
# Emotional/stories need more expressive voices
elif any(word in scene_text for word in ["story", "emotional", "heartwarming", "touching", "inspiring"]):
selected_voice = "Inspirational_girl" # Emotional and inspiring
# Technical explanations need clear, precise voices
elif any(word in scene_text for word in ["technical", "explain", "step-by-step", "process", "how-to"]):
selected_voice = "Calm_Woman" # Clear and methodical
# Call-to-action scenes need confident, persuasive voices
elif "cta" in scene_title.lower() or any(word in scene_text for word in ["subscribe", "like", "comment", "share", "now", "today"]):
selected_voice = "Determined_Man" # Confident and persuasive
logger.info(f"[VoiceSelection] Selected '{selected_voice}' for scene: {scene_title[:50]}...")
return selected_voice
class YouTubeAudioRequest(BaseModel):
scene_id: str
scene_title: str
text: str
voice_id: Optional[str] = None # Will auto-select based on content if not provided
language: Optional[str] = None # Language code for multilingual audio (e.g., "en", "es", "fr")
speed: float = 1.0
volume: float = 1.0
pitch: float = 0.0
emotion: str = "happy" # More engaging for YouTube content
english_normalization: bool = False
# Enhanced defaults for high-quality YouTube audio using Minimax Speech 02 HD
# Higher quality settings for professional YouTube content
sample_rate: Optional[int] = 44100 # CD quality: 44100 Hz (valid values: 8000, 16000, 22050, 24000, 32000, 44100)
bitrate: int = 256000 # Highest quality: 256kbps (valid values: 32000, 64000, 128000, 256000)
channel: Optional[str] = "2" # Stereo for richer audio (valid values: "1" or "2")
format: Optional[str] = "mp3" # Universal format for web
language_boost: Optional[str] = None # If not provided, inferred from `language` (or defaults to English)
enable_sync_mode: bool = True
# Context for intelligent voice/emotion selection
video_plan_context: Optional[Dict[str, Any]] = None # Optional video plan for context-aware voice selection
class YouTubeAudioResponse(BaseModel):
scene_id: str
scene_title: str
audio_filename: str
audio_url: str
provider: str
model: str
voice_id: str
text_length: int
file_size: int
cost: float
@router.post("/audio", response_model=YouTubeAudioResponse)
async def generate_youtube_scene_audio(
request: YouTubeAudioRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Generate AI audio for a YouTube scene using shared audio service.
Similar to Podcast's audio generation endpoint.
"""
user_id = require_authenticated_user(current_user)
if not request.text or not request.text.strip():
raise HTTPException(status_code=400, detail="Text is required")
try:
# Preprocess text to remove instructional markers that shouldn't be spoken
# Remove patterns like [Pacing: slow], [Instructions: ...], etc.
import re
processed_text = request.text.strip()
# Remove instructional markers that contain pacing, timing, or other non-spoken content
instructional_patterns = [
r'\[Pacing:\s*[^\]]+\]', # [Pacing: slow]
r'\[Instructions?:\s*[^\]]+\]', # [Instructions: ...]
r'\[Timing:\s*[^\]]+\]', # [Timing: ...]
r'\[Note:\s*[^\]]+\]', # [Note: ...]
r'\[Internal:\s*[^\]]+\]', # [Internal: ...]
]
for pattern in instructional_patterns:
processed_text = re.sub(pattern, '', processed_text, flags=re.IGNORECASE)
# Clean up extra whitespace and normalize
processed_text = re.sub(r'\s+', ' ', processed_text).strip()
if not processed_text:
raise HTTPException(status_code=400, detail="Text became empty after removing instructions. Please provide clean narration text.")
logger.info(f"[YouTubeAudio] Text preprocessing: {len(request.text)} -> {len(processed_text)} characters")
effective_language_boost = _resolve_language_boost(request.language, request.language_boost)
# Intelligent voice and emotion selection based on content analysis
if not request.voice_id:
# If non-English language is selected, default to the language-specific Minimax voice_id.
# Otherwise keep the existing English persona voice selection logic.
if effective_language_boost in LANGUAGE_BOOST_TO_DEFAULT_VOICE_ID and effective_language_boost not in ["English", "auto"]:
selected_voice = LANGUAGE_BOOST_TO_DEFAULT_VOICE_ID[effective_language_boost]
logger.info(
f"[VoiceSelection] Using language-specific default voice '{selected_voice}' "
f"(language_boost={effective_language_boost}, language={request.language})"
)
else:
selected_voice = select_optimal_voice(
request.scene_title,
processed_text,
request.video_plan_context
)
else:
selected_voice = request.voice_id
# Auto-select emotion if not specified or if using defaults
if request.emotion == "happy": # This means it wasn't specifically set by user
selected_emotion = select_optimal_emotion(
request.scene_title,
processed_text,
request.video_plan_context
)
else:
selected_emotion = request.emotion
logger.info(
f"[YouTubeAudio] Voice selection: {selected_voice}, Emotion: {selected_emotion}, "
f"language={request.language}, language_boost={effective_language_boost}"
)
# Build kwargs for optional parameters - use defaults if None
# WaveSpeed API requires specific values, so we provide sensible defaults
# This matches Podcast's approach but with explicit defaults to avoid None errors
optional_kwargs = {}
# DEBUG: Log what values we received
logger.info(
f"[YouTubeAudio] Request parameters: sample_rate={request.sample_rate}, bitrate={request.bitrate}, "
f"channel={request.channel}, format={request.format}, language_boost={request.language_boost}, "
f"effective_language_boost={effective_language_boost}, language={request.language}"
)
# sample_rate: Use provided value or omit (WaveSpeed will use default)
if request.sample_rate is not None:
optional_kwargs["sample_rate"] = request.sample_rate
# bitrate: Always provide a value (default: 128000 = 128kbps)
# Valid values: 32000, 64000, 128000, 256000
# Model already has default of 128000, so request.bitrate will never be None
optional_kwargs["bitrate"] = request.bitrate
# channel: Only include if valid (WaveSpeed only accepts "1" or "2" as strings)
# If None, empty string, or invalid, omit it and WaveSpeed will use default
# NEVER include channel if it's not exactly "1" or "2"
if request.channel is not None and str(request.channel).strip() in ["1", "2"]:
optional_kwargs["channel"] = str(request.channel).strip()
logger.info(f"[YouTubeAudio] Including valid channel: {optional_kwargs['channel']}")
else:
logger.info(f"[YouTubeAudio] Omitting invalid channel: {request.channel}")
# format: Use provided value or omit (WaveSpeed will use default)
if request.format is not None:
optional_kwargs["format"] = request.format
# language_boost: always send resolved value (improves pronunciation and helps multilingual voices)
if effective_language_boost is not None and str(effective_language_boost).strip() != "":
optional_kwargs["language_boost"] = effective_language_boost
logger.info(f"[YouTubeAudio] Final optional_kwargs: {optional_kwargs}")
result: StoryAudioResult = audio_service.generate_ai_audio(
scene_number=0,
scene_title=request.scene_title,
text=processed_text,
user_id=user_id,
voice_id=selected_voice,
speed=request.speed or 1.0,
volume=request.volume or 1.0,
pitch=request.pitch or 0.0,
emotion=selected_emotion,
english_normalization=request.english_normalization or False,
enable_sync_mode=request.enable_sync_mode,
**optional_kwargs,
)
# Override URL to use YouTube endpoint instead of story endpoint
if result.get("audio_url") and "/api/story/audio/" in result.get("audio_url", ""):
audio_filename = result.get("audio_filename", "")
result["audio_url"] = f"/api/youtube/audio/{audio_filename}"
except Exception as exc:
logger.error(f"[YouTube] Audio generation failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Audio generation failed: {exc}")
# Save to asset library (youtube_creator module)
try:
if result.get("audio_url"):
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="audio",
source_module="youtube_creator",
filename=result.get("audio_filename", ""),
file_url=result.get("audio_url", ""),
file_path=result.get("audio_path"),
file_size=result.get("file_size"),
mime_type="audio/mpeg",
title=f"{request.scene_title} - YouTube",
description="YouTube scene narration",
tags=["youtube_creator", "audio", request.scene_id],
provider=result.get("provider"),
model=result.get("model"),
cost=result.get("cost"),
asset_metadata={
"scene_id": request.scene_id,
"scene_title": request.scene_title,
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[YouTube] Failed to save audio asset: {e}")
return YouTubeAudioResponse(
scene_id=request.scene_id,
scene_title=request.scene_title,
audio_filename=result.get("audio_filename", ""),
audio_url=result.get("audio_url", ""),
provider=result.get("provider", "wavespeed"),
model=result.get("model", "minimax/speech-02-hd"),
voice_id=result.get("voice_id", selected_voice),
text_length=result.get("text_length", len(request.text)),
file_size=result.get("file_size", 0),
cost=result.get("cost", 0.0),
)
@router.get("/audio/{filename}")
async def serve_youtube_audio(
filename: str,
current_user: Dict[str, Any] = Depends(get_current_user_with_query_token),
):
"""Serve generated YouTube scene audio files.
Supports authentication via Authorization header or token query parameter.
Query parameter is useful for HTML elements like <audio> that cannot send custom headers.
"""
require_authenticated_user(current_user)
# Security check: ensure filename doesn't contain path traversal
if ".." in filename or "/" in filename or "\\" in filename:
raise HTTPException(status_code=400, detail="Invalid filename")
audio_path = (YOUTUBE_AUDIO_DIR / filename).resolve()
# Security check: ensure path is within YOUTUBE_AUDIO_DIR
if not str(audio_path).startswith(str(YOUTUBE_AUDIO_DIR)):
raise HTTPException(status_code=403, detail="Access denied")
if not audio_path.exists():
raise HTTPException(status_code=404, detail="Audio file not found")
return FileResponse(audio_path, media_type="audio/mpeg")

View File

@@ -0,0 +1,557 @@
"""YouTube Creator avatar upload and AI optimization handlers."""
from pathlib import Path
import uuid
from typing import Dict, Any, Optional
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session
from middleware.auth_middleware import get_current_user
from services.database import get_db
from services.llm_providers.main_image_generation import generate_image
from services.llm_providers.main_image_editing import edit_image
from utils.asset_tracker import save_asset_to_library
from utils.logger_utils import get_service_logger
router = APIRouter(prefix="/avatar", tags=["youtube-avatar"])
logger = get_service_logger("api.youtube.avatar")
# Directories
base_dir = Path(__file__).parent.parent.parent.parent
YOUTUBE_AVATARS_DIR = base_dir / "youtube_avatars"
YOUTUBE_AVATARS_DIR.mkdir(parents=True, exist_ok=True)
def require_authenticated_user(current_user: Dict[str, Any]) -> str:
"""Extract and validate user ID from current user."""
user_id = current_user.get("id") if current_user else None
if not user_id:
raise HTTPException(status_code=401, detail="Authentication required")
return str(user_id)
def _load_youtube_image_bytes(image_url: str) -> bytes:
"""Load avatar bytes from a stored YouTube avatar URL."""
filename = image_url.split("/")[-1].split("?")[0]
image_path = YOUTUBE_AVATARS_DIR / filename
if not image_path.exists() or not image_path.is_file():
raise HTTPException(status_code=404, detail="Avatar image not found")
return image_path.read_bytes()
async def _generate_avatar_from_context(
user_id: str,
project_id: Optional[str],
audience: Optional[str] = None,
content_type: Optional[str] = None,
video_plan_json: Optional[str] = None,
brand_style: Optional[str] = None,
db: Optional[Session] = None,
) -> Dict[str, Any]:
"""
Internal function to generate avatar from context.
Can be called from route handler or directly from router.
"""
# Parse video plan if provided
plan_data = {}
avatar_recommendations = {}
if video_plan_json:
try:
import json
plan_data = json.loads(video_plan_json)
avatar_recommendations = plan_data.get("avatar_recommendations", {})
except Exception as e:
logger.warning(f"[YouTube] Failed to parse video plan JSON: {e}")
# Extract context - prioritize user inputs over plan data
# User inputs are more reliable as they represent explicit choices
# Priority: user input > plan data > defaults
plan_target_audience = audience or plan_data.get("target_audience", "")
plan_video_type = content_type or plan_data.get("video_type", "")
# Use user's brand_style if provided, otherwise use plan's visual_style
plan_visual_style = brand_style or plan_data.get("visual_style", "")
plan_tone = plan_data.get("tone", "")
logger.info(
f"[YouTube] Avatar generation context: "
f"video_type={plan_video_type}, audience={plan_target_audience[:50] if plan_target_audience else 'none'}, "
f"brand_style={plan_visual_style[:50] if plan_visual_style else 'none'}"
)
# Build optimized prompt using plan data
prompt_parts = []
# Base avatar description - use recommendations if available
if avatar_recommendations and avatar_recommendations.get("description"):
prompt_parts.append(avatar_recommendations["description"])
else:
prompt_parts.append("Half-length portrait of a professional YouTube creator (25-35 years old)")
# Video type optimization
if plan_video_type:
video_type_lower = plan_video_type.lower()
if video_type_lower == "tutorial":
prompt_parts.append("approachable instructor, professional yet friendly, clear presentation style")
elif video_type_lower == "review":
prompt_parts.append("trustworthy reviewer, confident, credible appearance")
elif video_type_lower == "educational":
prompt_parts.append("knowledgeable educator, professional, warm and engaging")
elif video_type_lower == "entertainment":
prompt_parts.append("energetic creator, expressive, fun and relatable")
elif video_type_lower == "vlog":
prompt_parts.append("authentic person, approachable, real and relatable")
elif video_type_lower == "product_demo":
prompt_parts.append("professional presenter, polished, confident and enthusiastic")
elif video_type_lower == "reaction":
prompt_parts.append("expressive creator, authentic reactions, engaging")
elif video_type_lower == "storytelling":
prompt_parts.append("storyteller, warm, engaging narrator")
elif "tech" in video_type_lower:
prompt_parts.append("tech-forward style")
elif "travel" in video_type_lower:
prompt_parts.append("travel vlogger aesthetic")
elif "education" in video_type_lower or "learn" in video_type_lower:
prompt_parts.append("educational creator, clean and clear presentation")
else:
prompt_parts.append("modern creator style")
elif content_type:
content_lower = content_type.lower()
if "tech" in content_lower:
prompt_parts.append("tech-forward style")
elif "travel" in content_lower:
prompt_parts.append("travel vlogger aesthetic")
elif "education" in content_lower or "learn" in content_lower:
prompt_parts.append("educational creator, clean and clear presentation")
else:
prompt_parts.append("modern creator style")
# Audience optimization
target_audience = plan_target_audience or audience
if target_audience:
audience_lower = target_audience.lower()
if "young" in audience_lower or "gen z" in audience_lower or "millennial" in audience_lower:
prompt_parts.append("youthful, vibrant, modern vibe")
elif "executive" in audience_lower or "professional" in audience_lower or "business" in audience_lower:
prompt_parts.append("polished, credible, authoritative presence")
elif "creative" in audience_lower:
prompt_parts.append("artistic, expressive, creative professional")
elif "parents" in audience_lower or "family" in audience_lower:
prompt_parts.append("warm, approachable, trustworthy presence")
# Visual style from plan
if plan_visual_style:
visual_lower = plan_visual_style.lower()
if "minimal" in visual_lower or "minimalist" in visual_lower:
prompt_parts.append("clean, minimalist aesthetic")
if "tech" in visual_lower or "modern" in visual_lower:
prompt_parts.append("tech-forward, modern style")
if "energetic" in visual_lower or "colorful" in visual_lower or "vibrant" in visual_lower:
prompt_parts.append("vibrant, energetic appearance")
if "cinematic" in visual_lower:
prompt_parts.append("cinematic, polished presentation")
if "professional" in visual_lower:
prompt_parts.append("professional, polished aesthetic")
# Tone from plan
if plan_tone:
tone_lower = plan_tone.lower()
if "casual" in tone_lower:
prompt_parts.append("casual, approachable style")
if "professional" in tone_lower:
prompt_parts.append("professional attire and presentation")
if "energetic" in tone_lower or "fun" in tone_lower:
prompt_parts.append("energetic, lively expression")
if "warm" in tone_lower:
prompt_parts.append("warm, friendly expression")
# Avatar recommendations from plan
if avatar_recommendations:
if avatar_recommendations.get("style"):
prompt_parts.append(avatar_recommendations["style"])
if avatar_recommendations.get("energy"):
prompt_parts.append(avatar_recommendations["energy"])
# Base technical requirements
prompt_parts.extend([
"photo-realistic, professional photography",
"confident, engaging expression",
"professional studio lighting, clean background",
"suitable for video generation and thumbnails",
"ultra realistic, 4k quality, 85mm lens",
"looking at camera, center-focused composition"
])
prompt = ", ".join(prompt_parts)
seed = int(uuid.uuid4().int % (2**32))
image_options = {
"provider": "wavespeed",
"model": "ideogram-v3-turbo",
"width": 1024,
"height": 1024,
"seed": seed,
}
result = generate_image(
prompt=prompt,
options=image_options,
user_id=user_id,
)
unique_id = str(uuid.uuid4())[:8]
avatar_filename = f"yt_generated_{project_id or 'temp'}_{unique_id}.png"
avatar_path = YOUTUBE_AVATARS_DIR / avatar_filename
with open(avatar_path, "wb") as f:
f.write(result.image_bytes)
avatar_url = f"/api/youtube/images/avatars/{avatar_filename}"
logger.info(f"[YouTube] Generated creator avatar: {avatar_path}")
if project_id and db:
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="image",
source_module="youtube_creator",
filename=avatar_filename,
file_url=avatar_url,
file_path=str(avatar_path),
file_size=len(result.image_bytes),
mime_type="image/png",
title=f"YouTube Creator Avatar (Generated) - {project_id}",
description="AI-generated YouTube creator avatar",
prompt=prompt,
tags=["youtube", "avatar", "generated", project_id],
provider=result.provider,
model=result.model,
asset_metadata={
"project_id": project_id,
"type": "generated_presenter",
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[YouTube] Failed to save generated avatar asset: {e}")
return {
"avatar_url": avatar_url,
"avatar_filename": avatar_filename,
"avatar_prompt": prompt,
"message": "Avatar generated successfully",
}
@router.post("/upload")
async def upload_youtube_avatar(
file: UploadFile = File(...),
project_id: Optional[str] = Form(None),
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Upload a YouTube creator avatar image."""
user_id = require_authenticated_user(current_user)
if not file:
raise HTTPException(status_code=400, detail="No file uploaded")
file_content = await file.read()
# Validate size (max 5MB)
if len(file_content) > 5 * 1024 * 1024:
raise HTTPException(status_code=400, detail="Image file size must be less than 5MB")
try:
file_ext = Path(file.filename).suffix or ".png"
unique_id = str(uuid.uuid4())[:8]
avatar_filename = f"yt_avatar_{project_id or 'temp'}_{unique_id}{file_ext}"
avatar_path = YOUTUBE_AVATARS_DIR / avatar_filename
with open(avatar_path, "wb") as f:
f.write(file_content)
avatar_url = f"/api/youtube/images/avatars/{avatar_filename}"
logger.info(f"[YouTube] Avatar uploaded: {avatar_path}")
if project_id:
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="image",
source_module="youtube_creator",
filename=avatar_filename,
file_url=avatar_url,
file_path=str(avatar_path),
file_size=len(file_content),
mime_type=file.content_type or "image/png",
title=f"YouTube Creator Avatar - {project_id}",
description="YouTube creator avatar image",
tags=["youtube", "avatar", project_id],
asset_metadata={
"project_id": project_id,
"type": "creator_avatar",
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[YouTube] Failed to save avatar asset: {e}")
return {
"avatar_url": avatar_url,
"avatar_filename": avatar_filename,
"message": "Avatar uploaded successfully",
}
except Exception as exc:
logger.error(f"[YouTube] Avatar upload failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Avatar upload failed: {str(exc)}")
@router.post("/make-presentable")
async def make_avatar_presentable(
avatar_url: str = Form(...),
project_id: Optional[str] = Form(None),
video_type: Optional[str] = Form(None),
target_audience: Optional[str] = Form(None),
video_goal: Optional[str] = Form(None),
brand_style: Optional[str] = Form(None),
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Transform an uploaded avatar image into a YouTube-appropriate creator.
Uses AI image editing with enhanced prompts to optimize the uploaded photo.
"""
user_id = require_authenticated_user(current_user)
try:
avatar_bytes = _load_youtube_image_bytes(avatar_url)
logger.info(f"[YouTube] 🔍 Starting avatar transformation for user_id={user_id}, project={project_id}")
logger.info(f"[YouTube] Transforming avatar for project {project_id}")
# Build context-aware transformation prompt using user inputs
prompt_parts = [
"Transform this photo into a professional YouTube creator avatar:",
"Significantly enhance and optimize the image for YouTube video production;",
"Apply professional photo editing: improve lighting, color grading, and composition;",
"Enhance facial features: brighten eyes, smooth skin, add professional makeup if needed;",
"Improve background: replace with clean, professional studio background or subtle gradient;",
"Adjust clothing: ensure professional, YouTube-appropriate attire;",
"Optimize for video: ensure the person looks natural and engaging on camera;",
"Half-length portrait format, person looking directly at camera with confident, engaging expression;",
"Professional studio lighting with soft shadows, high-quality photography;",
"Maintain the person's core appearance and identity while making significant improvements;",
"Ultra realistic, 4k quality, professional photography style;",
"Suitable for video generation, thumbnails, and YouTube channel branding."
]
# Add context from user inputs to make transformation more targeted
if video_type:
video_type_lower = video_type.lower()
if video_type_lower == "tutorial":
prompt_parts.append("Approachable instructor style, professional yet friendly appearance")
elif video_type_lower == "review":
prompt_parts.append("Trustworthy reviewer style, confident and credible appearance")
elif video_type_lower == "educational":
prompt_parts.append("Knowledgeable educator style, professional and warm appearance")
elif video_type_lower == "entertainment":
prompt_parts.append("Energetic creator style, expressive and fun appearance")
elif video_type_lower == "vlog":
prompt_parts.append("Authentic vlogger style, approachable and relatable appearance")
elif video_type_lower == "product_demo":
prompt_parts.append("Professional presenter style, polished and enthusiastic appearance")
elif video_type_lower == "reaction":
prompt_parts.append("Expressive creator style, authentic and engaging appearance")
elif video_type_lower == "storytelling":
prompt_parts.append("Storyteller style, warm and engaging narrator appearance")
if target_audience:
audience_lower = target_audience.lower()
if "young" in audience_lower or "gen z" in audience_lower or "millennial" in audience_lower:
prompt_parts.append("Modern, youthful, vibrant aesthetic")
elif "executive" in audience_lower or "professional" in audience_lower or "business" in audience_lower:
prompt_parts.append("Polished, credible, authoritative professional appearance")
elif "creative" in audience_lower:
prompt_parts.append("Artistic, expressive, creative professional style")
if brand_style:
style_lower = brand_style.lower()
if "minimal" in style_lower or "minimalist" in style_lower:
prompt_parts.append("Clean, minimalist aesthetic")
if "tech" in style_lower or "modern" in style_lower:
prompt_parts.append("Tech-forward, modern style")
if "energetic" in style_lower or "colorful" in style_lower:
prompt_parts.append("Vibrant, energetic appearance")
base_prompt = " ".join(prompt_parts)
# Optimize the prompt using WaveSpeed prompt optimizer for better results
try:
from services.wavespeed.client import WaveSpeedClient
wavespeed_client = WaveSpeedClient()
logger.info(f"[YouTube] Optimizing transformation prompt using WaveSpeed prompt optimizer")
transformation_prompt = wavespeed_client.optimize_prompt(
text=base_prompt,
mode="image",
style="realistic", # Use realistic style for photo editing
enable_sync_mode=True,
timeout=30
)
logger.info(f"[YouTube] Prompt optimized successfully (length: {len(transformation_prompt)} chars)")
except Exception as opt_error:
logger.warning(f"[YouTube] Prompt optimization failed, using base prompt: {opt_error}")
transformation_prompt = base_prompt
# Use HuggingFace for image editing (only available option)
# Note: This uses async processing with polling (~30 seconds expected)
image_options = {
"provider": "huggingface", # Explicitly use HuggingFace (only option for image editing)
"model": None, # Use default model (Qwen/Qwen-Image-Edit)
}
logger.info(f"[YouTube] Starting avatar transformation (this may take ~30 seconds due to async processing)")
result = edit_image(
input_image_bytes=avatar_bytes,
prompt=transformation_prompt,
options=image_options,
user_id=user_id,
)
logger.info(f"[YouTube] ✅ Avatar transformation completed successfully")
unique_id = str(uuid.uuid4())[:8]
transformed_filename = f"yt_presenter_{project_id or 'temp'}_{unique_id}.png"
transformed_path = YOUTUBE_AVATARS_DIR / transformed_filename
with open(transformed_path, "wb") as f:
f.write(result.image_bytes)
transformed_url = f"/api/youtube/images/avatars/{transformed_filename}"
logger.info(f"[YouTube] Transformed avatar saved to: {transformed_path}")
if project_id:
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="image",
source_module="youtube_creator",
filename=transformed_filename,
file_url=transformed_url,
file_path=str(transformed_path),
file_size=len(result.image_bytes),
mime_type="image/png",
title=f"YouTube Creator (Transformed) - {project_id}",
description="AI-transformed YouTube creator avatar from uploaded photo",
prompt=transformation_prompt,
tags=["youtube", "avatar", "presenter", project_id],
provider=result.provider,
model=result.model,
asset_metadata={
"project_id": project_id,
"type": "transformed_presenter",
"original_avatar_url": avatar_url,
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[YouTube] Failed to save transformed avatar asset: {e}")
return {
"avatar_url": transformed_url,
"avatar_filename": transformed_filename,
"message": "Avatar transformed successfully",
}
except Exception as exc:
logger.error(f"[YouTube] Avatar transformation failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Avatar transformation failed: {str(exc)}")
@router.post("/generate")
async def generate_creator_avatar(
project_id: Optional[str] = Form(None),
audience: Optional[str] = Form(None),
content_type: Optional[str] = Form(None),
video_plan_json: Optional[str] = Form(None),
brand_style: Optional[str] = Form(None),
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Auto-generate a YouTube creator avatar optimized from video plan context.
Uses video plan data (if provided) and user inputs to generate an avatar that matches
the video type, audience, tone, and brand style.
"""
user_id = require_authenticated_user(current_user)
try:
return await _generate_avatar_from_context(
user_id=user_id,
project_id=project_id,
audience=audience,
content_type=content_type,
video_plan_json=video_plan_json,
brand_style=brand_style,
db=db,
)
except Exception as exc:
logger.error(f"[YouTube] Avatar generation failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Avatar generation failed: {str(exc)}")
@router.post("/regenerate")
async def regenerate_creator_avatar(
video_plan_json: str = Form(...),
project_id: Optional[str] = Form(None),
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Regenerate a YouTube creator avatar using the same video plan context.
Takes the video plan JSON and regenerates an avatar with a different seed
to provide variation while maintaining the same optimization based on plan data.
"""
user_id = require_authenticated_user(current_user)
try:
# Parse video plan to extract context
import json
plan_data = json.loads(video_plan_json)
# Extract context from plan data
audience = plan_data.get("target_audience", "")
content_type = plan_data.get("video_type", "")
brand_style = plan_data.get("visual_style", "")
logger.info(
f"[YouTube] Regenerating avatar for project {project_id}: "
f"video_type={content_type}, audience={audience[:50] if audience else 'none'}"
)
avatar_response = await _generate_avatar_from_context(
user_id=user_id,
project_id=project_id,
audience=audience,
content_type=content_type,
video_plan_json=video_plan_json,
brand_style=brand_style,
db=db,
)
# Return the avatar prompt along with the URL for the frontend
return {
"avatar_url": avatar_response.get("avatar_url"),
"avatar_filename": avatar_response.get("avatar_filename"),
"avatar_prompt": avatar_response.get("avatar_prompt"),
"message": "Avatar regenerated successfully",
}
except Exception as exc:
logger.error(f"[YouTube] Avatar regeneration failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Avatar regeneration failed: {str(exc)}")

View File

@@ -0,0 +1,470 @@
"""YouTube Creator scene image generation handlers."""
from pathlib import Path
from typing import Dict, Any, Optional
import uuid
from concurrent.futures import ThreadPoolExecutor
from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
from fastapi.responses import FileResponse
from pydantic import BaseModel
from sqlalchemy.orm import Session
from middleware.auth_middleware import get_current_user
from services.database import get_db
from services.subscription import PricingService
from services.subscription.preflight_validator import validate_image_generation_operations
from services.llm_providers.main_image_generation import generate_image, generate_character_image
from utils.asset_tracker import save_asset_to_library
from utils.logger_utils import get_service_logger
from ..task_manager import task_manager
router = APIRouter(tags=["youtube-image"])
logger = get_service_logger("api.youtube.image")
# Directories
base_dir = Path(__file__).parent.parent.parent.parent
YOUTUBE_IMAGES_DIR = base_dir / "youtube_images"
YOUTUBE_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
YOUTUBE_AVATARS_DIR = base_dir / "youtube_avatars"
# Thread pool for background image generation
_image_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="youtube_image")
class YouTubeImageRequest(BaseModel):
scene_id: str
scene_title: Optional[str] = None
scene_content: Optional[str] = None
base_avatar_url: Optional[str] = None
idea: Optional[str] = None
width: Optional[int] = 1024
height: Optional[int] = 1024
custom_prompt: Optional[str] = None
style: Optional[str] = None # e.g., "Realistic", "Fiction"
rendering_speed: Optional[str] = None # e.g., "Quality", "Turbo"
aspect_ratio: Optional[str] = None # e.g., "16:9"
model: Optional[str] = None # e.g., "ideogram-v3-turbo", "qwen-image"
def require_authenticated_user(current_user: Dict[str, Any]) -> str:
"""Extract and validate user ID from current user."""
user_id = current_user.get("id") if current_user else None
if not user_id:
raise HTTPException(status_code=401, detail="Authentication required")
return str(user_id)
def _load_base_avatar_bytes(avatar_url: str) -> Optional[bytes]:
"""Load base avatar bytes for character consistency."""
try:
# Handle different avatar URL formats
if avatar_url.startswith("/api/youtube/avatars/"):
# YouTube avatar
filename = avatar_url.split("/")[-1].split("?")[0]
avatar_path = YOUTUBE_AVATARS_DIR / filename
elif avatar_url.startswith("/api/podcast/avatars/"):
# Podcast avatar (cross-module usage)
filename = avatar_url.split("/")[-1].split("?")[0]
from pathlib import Path
podcast_avatars_dir = Path(__file__).parent.parent.parent.parent / "podcast_avatars"
avatar_path = podcast_avatars_dir / filename
else:
# Try to extract filename and check YouTube avatars first
filename = avatar_url.split("/")[-1].split("?")[0]
avatar_path = YOUTUBE_AVATARS_DIR / filename
if not avatar_path.exists():
# Fallback to podcast avatars
podcast_avatars_dir = Path(__file__).parent.parent.parent.parent / "podcast_avatars"
avatar_path = podcast_avatars_dir / filename
if not avatar_path.exists() or not avatar_path.is_file():
logger.warning(f"[YouTube] Avatar file not found: {avatar_path}")
return None
logger.info(f"[YouTube] Successfully loaded avatar: {avatar_path}")
return avatar_path.read_bytes()
except Exception as e:
logger.error(f"[YouTube] Error loading avatar from {avatar_url}: {e}")
return None
def _save_scene_image(image_bytes: bytes, scene_id: str) -> Dict[str, str]:
"""Persist generated scene image and return file/url info."""
unique_id = str(uuid.uuid4())[:8]
image_filename = f"yt_scene_{scene_id}_{unique_id}.png"
image_path = YOUTUBE_IMAGES_DIR / image_filename
with open(image_path, "wb") as f:
f.write(image_bytes)
image_url = f"/api/youtube/images/scenes/{image_filename}"
return {
"image_filename": image_filename,
"image_path": str(image_path),
"image_url": image_url,
}
class YouTubeImageTaskResponse(BaseModel):
success: bool
task_id: str
message: str
@router.post("/image", response_model=YouTubeImageTaskResponse)
async def generate_youtube_scene_image(
background_tasks: BackgroundTasks,
request: YouTubeImageRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Generate a YouTube scene image with background task processing."""
logger.info(f"[YouTube] Image generation request received: scene='{request.scene_title}', user={current_user.get('id')}")
user_id = require_authenticated_user(current_user)
logger.info(f"[YouTube] User authenticated: {user_id}")
if not request.scene_title:
raise HTTPException(status_code=400, detail="Scene title is required")
try:
# Pre-flight subscription validation
pricing_service = PricingService(db)
validate_image_generation_operations(
pricing_service=pricing_service,
user_id=user_id,
num_images=1,
)
logger.info(f"[YouTube] ✅ Pre-flight validation passed for user {user_id}")
# Create background task
logger.info(f"[YouTube] Creating task for user {user_id}")
task_id = task_manager.create_task("youtube_image_generation")
logger.info(
f"[YouTube] Created image generation task {task_id} for user {user_id}, "
f"scene='{request.scene_title}'"
)
# Verify task was created
initial_status = task_manager.get_task_status(task_id)
if not initial_status:
logger.error(f"[YouTube] Failed to create task {task_id} - task not found immediately after creation")
return YouTubeImageTaskResponse(
success=False,
task_id="",
message="Failed to create image generation task. Please try again."
)
# Add background task (pass request data, not database session)
try:
background_tasks.add_task(
_execute_image_generation_task,
task_id=task_id,
request_data=request.dict(), # Convert to dict for background task
user_id=user_id,
)
logger.info(f"[YouTube] Background image generation task added for task {task_id}")
except Exception as bg_error:
logger.error(f"[YouTube] Failed to add background task for {task_id}: {bg_error}", exc_info=True)
# Mark task as failed
task_manager.update_task_status(
task_id,
"failed",
error=str(bg_error),
message="Failed to start image generation task"
)
return YouTubeImageTaskResponse(
success=False,
task_id="",
message=f"Failed to start image generation task: {str(bg_error)}"
)
logger.info(f"[YouTube] Returning success response for task {task_id}")
return YouTubeImageTaskResponse(
success=True,
task_id=task_id,
message=f"Image generation started for '{request.scene_title}'"
)
except HTTPException:
raise
except Exception as exc:
logger.error(f"[YouTube] Failed to create image generation task: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to start image generation: {str(exc)}")
def _execute_image_generation_task(task_id: str, request_data: dict, user_id: str):
"""Background task to generate YouTube scene image."""
# Reconstruct request object from dict
request = YouTubeImageRequest(**request_data)
logger.info(
f"[YouTubeImageGen] Background task started for task {task_id}, "
f"scene='{request.scene_title}', user={user_id}"
)
db = None
try:
# Update task status to processing
task_manager.update_task_status(
task_id, "processing", progress=10.0, message="Preparing image generation..."
)
# Get database session for this background task
from services.database import get_db
db = next(get_db())
logger.info(f"[YouTubeImageGen] Database session acquired for task {task_id}")
# Load avatar if provided
base_avatar_bytes = None
if request.base_avatar_url:
base_avatar_bytes = _load_base_avatar_bytes(request.base_avatar_url)
if base_avatar_bytes:
logger.info(f"[YouTubeImageGen] Loaded base avatar for task {task_id}")
else:
logger.warning(f"[YouTubeImageGen] Could not load base avatar for task {task_id}")
# Build prompt (same logic as before)
if base_avatar_bytes:
prompt_parts = []
if request.scene_title:
prompt_parts.append(f"Scene: {request.scene_title}")
if request.scene_content:
content_preview = request.scene_content[:200].replace("\n", " ").strip()
prompt_parts.append(f"Context: {content_preview}")
if request.idea:
prompt_parts.append(f"Video idea: {request.idea[:80].strip()}")
prompt_parts.append("YouTube creator on camera, engaging and dynamic framing")
prompt_parts.append("Clean background, good lighting, thumbnail-friendly composition")
image_prompt = ", ".join(prompt_parts)
else:
prompt_parts = [
"YouTube creator scene",
"clean, modern background",
"good lighting, high contrast for thumbnail clarity",
]
if request.scene_title:
prompt_parts.append(f"Scene theme: {request.scene_title}")
if request.scene_content:
prompt_parts.append(f"Context: {request.scene_content[:120].replace(chr(10), ' ')}")
if request.idea:
prompt_parts.append(f"Topic: {request.idea[:80]}")
prompt_parts.append("video-optimized composition, 16:9 aspect ratio")
image_prompt = ", ".join(prompt_parts)
task_manager.update_task_status(
task_id, "processing", progress=30.0, message="Generating image..."
)
logger.info(f"[YouTubeImageGen] Starting image generation for task {task_id}")
# Generate image (same logic as before)
provider = "wavespeed"
model = "ideogram-v3-turbo"
if base_avatar_bytes:
logger.info(f"[YouTubeImageGen] Using character-consistent generation for task {task_id}")
style = request.style or "Realistic"
rendering_speed = request.rendering_speed or "Quality"
aspect_ratio = request.aspect_ratio or "16:9"
width = request.width or 1024
height = request.height or 576
try:
# Use centralized character image generation with subscription checks and tracking
image_bytes = generate_character_image(
prompt=image_prompt,
reference_image_bytes=base_avatar_bytes,
user_id=user_id,
style=style,
aspect_ratio=aspect_ratio,
rendering_speed=rendering_speed,
timeout=60,
)
model = "ideogram-character"
logger.info(f"[YouTubeImageGen] Character image generation successful for task {task_id}")
except Exception as char_error:
logger.warning(f"[YouTubeImageGen] Character generation failed for task {task_id}: {char_error}")
logger.info(f"[YouTubeImageGen] Falling back to regular image generation for task {task_id}")
# Fall back to regular image generation with subscription tracking
image_options = {
"provider": "wavespeed",
"model": request.model or "ideogram-v3-turbo",
"width": width,
"height": height,
}
result = generate_image(
prompt=image_prompt,
options=image_options,
user_id=user_id,
)
image_bytes = result.image_bytes
else:
logger.info(f"[YouTubeImageGen] Generating scene from scratch for task {task_id}")
# Use centralized image generation with subscription tracking
image_options = {
"provider": "wavespeed",
"model": request.model or "ideogram-v3-turbo",
"width": request.width or 1024,
"height": request.height or 576,
}
result = generate_image(
prompt=request.custom_prompt or image_prompt,
options=image_options,
user_id=user_id,
)
image_bytes = result.image_bytes
# Validate image bytes before saving
if not image_bytes or len(image_bytes) == 0:
raise ValueError("Image generation returned empty bytes")
# Basic validation: check if it's a valid image (PNG/JPEG header)
if not (image_bytes.startswith(b'\x89PNG') or image_bytes.startswith(b'\xff\xd8\xff')):
logger.warning(f"[YouTubeImageGen] Generated image may not be valid PNG/JPEG for task {task_id}")
# Don't fail - some formats might be valid, but log warning
task_manager.update_task_status(
task_id, "processing", progress=80.0, message="Saving image..."
)
# Save image with validation
try:
image_metadata = _save_scene_image(image_bytes, request.scene_id)
# Verify file was saved correctly
from pathlib import Path
saved_path = Path(image_metadata["image_path"])
if not saved_path.exists() or saved_path.stat().st_size == 0:
raise IOError(f"Image file was not saved correctly: {saved_path}")
logger.info(f"[YouTubeImageGen] Image saved successfully: {saved_path} ({saved_path.stat().st_size} bytes)")
except Exception as save_error:
logger.error(f"[YouTubeImageGen] Failed to save image for task {task_id}: {save_error}", exc_info=True)
raise
# Save to asset library
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="image",
source_module="youtube_creator",
filename=image_metadata["image_filename"],
file_url=image_metadata["image_url"],
file_path=image_metadata["image_path"],
file_size=len(image_bytes),
mime_type="image/png",
title=f"{request.scene_title} - YouTube Scene",
description=f"YouTube scene image for: {request.scene_title}",
tags=["youtube_creator", "scene_image", f"scene_{request.scene_id}"],
provider=provider,
model=model,
cost=0.10 if model == "ideogram-v3-turbo" else 0.05,
asset_metadata={
"scene_id": request.scene_id,
"scene_title": request.scene_title,
"generation_type": "character" if base_avatar_bytes else "scene",
"width": request.width or 1024,
"height": request.height or 576,
},
)
except Exception as e:
logger.warning(f"[YouTubeImageGen] Failed to save image asset to library: {e}")
# Success!
task_manager.update_task_status(
task_id,
"completed",
progress=100.0,
message=f"Image generated successfully for '{request.scene_title}'",
result={
"scene_id": request.scene_id,
"scene_title": request.scene_title,
"image_filename": image_metadata["image_filename"],
"image_url": image_metadata["image_url"],
"provider": provider,
"model": model,
"width": request.width or 1024,
"height": request.height or 576,
"file_size": len(image_bytes),
"cost": 0.10 if model == "ideogram-v3-turbo" else 0.05,
}
)
logger.info(f"[YouTubeImageGen] ✅ Task {task_id} completed successfully")
except Exception as exc:
error_msg = str(exc)
logger.error(f"[YouTubeImageGen] Task {task_id} failed: {error_msg}", exc_info=True)
task_manager.update_task_status(
task_id,
"failed",
error=error_msg,
message=f"Image generation failed: {error_msg}"
)
finally:
if db:
db.close()
logger.info(f"[YouTubeImageGen] Database session closed for task {task_id}")
@router.get("/image/status/{task_id}")
async def get_image_generation_status(
task_id: str,
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""
Get the status of an image generation task.
Returns current progress, status, and result when complete.
"""
require_authenticated_user(current_user)
logger.info(f"[YouTubeAPI] Getting image generation status for task: {task_id}")
task_status = task_manager.get_task_status(task_id)
if task_status:
logger.info(f"[YouTubeAPI] Task {task_id} status: {task_status.get('status', 'unknown')}, progress: {task_status.get('progress', 0)}, has_result: {'result' in task_status}")
if not task_status:
logger.warning(
f"[YouTubeAPI] Image generation task {task_id} not found."
)
raise HTTPException(
status_code=404,
detail={
"error": "Task not found",
"message": "The image generation task was not found. It may have expired, been cleaned up, or the server may have restarted.",
"task_id": task_id,
"user_action": "Please try generating the image again."
}
)
return task_status
@router.get("/images/{category}/{filename}")
async def serve_youtube_image(
category: str,
filename: str,
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""
Serve stored YouTube images (avatars or scenes).
Unified endpoint for both avatar and scene images.
"""
require_authenticated_user(current_user)
if category not in {"avatars", "scenes"}:
raise HTTPException(status_code=400, detail="Invalid image category. Must be 'avatars' or 'scenes'")
if ".." in filename or "/" in filename or "\\" in filename:
raise HTTPException(status_code=400, detail="Invalid filename")
directory = YOUTUBE_AVATARS_DIR if category == "avatars" else YOUTUBE_IMAGES_DIR
image_path = directory / filename
if not image_path.exists() or not image_path.is_file():
raise HTTPException(status_code=404, detail="Image not found")
return FileResponse(
path=str(image_path),
media_type="image/png",
filename=filename,
)