Base code
This commit is contained in:
2
backend/api/youtube/__init__.py
Normal file
2
backend/api/youtube/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
"""YouTube Creator Studio API endpoints."""
|
||||
|
||||
11
backend/api/youtube/handlers/__init__.py
Normal file
11
backend/api/youtube/handlers/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""
|
||||
YouTube Creator handler package.
|
||||
|
||||
Contains endpoints for avatar upload/optimization and scene image generation.
|
||||
"""
|
||||
|
||||
# Explicitly define __all__ for clarity
|
||||
__all__ = []
|
||||
"""YouTube Creator handlers package."""
|
||||
|
||||
|
||||
465
backend/api/youtube/handlers/audio.py
Normal file
465
backend/api/youtube/handlers/audio.py
Normal file
@@ -0,0 +1,465 @@
|
||||
"""YouTube Creator scene audio generation handlers."""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.orm import Session
|
||||
from typing import Dict, Any, Optional
|
||||
from pydantic import BaseModel
|
||||
|
||||
from services.database import get_db
|
||||
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
|
||||
from api.story_writer.utils.auth import require_authenticated_user
|
||||
from utils.asset_tracker import save_asset_to_library
|
||||
from models.story_models import StoryAudioResult
|
||||
from services.story_writer.audio_generation_service import StoryAudioGenerationService
|
||||
from pathlib import Path
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
router = APIRouter(tags=["youtube-audio"])
|
||||
logger = get_service_logger("api.youtube.audio")
|
||||
|
||||
# Audio output directory
|
||||
base_dir = Path(__file__).parent.parent.parent.parent
|
||||
YOUTUBE_AUDIO_DIR = base_dir / "youtube_audio"
|
||||
YOUTUBE_AUDIO_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Initialize audio service
|
||||
audio_service = StoryAudioGenerationService(output_dir=str(YOUTUBE_AUDIO_DIR))
|
||||
|
||||
# WaveSpeed Minimax Speech voice ids include language-specific voices
|
||||
# Ref: https://wavespeed.ai/docs/docs-api/minimax/minimax_speech_voice_id
|
||||
LANGUAGE_CODE_TO_LANGUAGE_BOOST = {
|
||||
"en": "English",
|
||||
"es": "Spanish",
|
||||
"fr": "French",
|
||||
"de": "German",
|
||||
"pt": "Portuguese",
|
||||
"it": "Italian",
|
||||
"hi": "Hindi",
|
||||
"ar": "Arabic",
|
||||
"ru": "Russian",
|
||||
"ja": "Japanese",
|
||||
"ko": "Korean",
|
||||
"zh": "Chinese",
|
||||
"vi": "Vietnamese",
|
||||
"id": "Indonesian",
|
||||
"tr": "Turkish",
|
||||
"nl": "Dutch",
|
||||
"pl": "Polish",
|
||||
"th": "Thai",
|
||||
"uk": "Ukrainian",
|
||||
"el": "Greek",
|
||||
"cs": "Czech",
|
||||
"fi": "Finnish",
|
||||
"ro": "Romanian",
|
||||
}
|
||||
|
||||
# Default language-specific Minimax voices (first-choice). We keep English on the existing "persona" voices.
|
||||
LANGUAGE_BOOST_TO_DEFAULT_VOICE_ID = {
|
||||
"Spanish": "Spanish_male_1_v1",
|
||||
"French": "French_male_1_v1",
|
||||
"German": "German_male_1_v1",
|
||||
"Portuguese": "Portuguese_male_1_v1",
|
||||
"Italian": "Italian_male_1_v1",
|
||||
"Hindi": "Hindi_male_1_v1",
|
||||
"Arabic": "Arabic_male_1_v1",
|
||||
"Russian": "Russian_male_1_v1",
|
||||
"Japanese": "Japanese_male_1_v1",
|
||||
"Korean": "Korean_male_1_v1",
|
||||
"Chinese": "Chinese_male_1_v1",
|
||||
"Vietnamese": "Vietnamese_male_1_v1",
|
||||
"Indonesian": "Indonesian_male_1_v1",
|
||||
"Turkish": "Turkish_male_1_v1",
|
||||
"Dutch": "Dutch_male_1_v1",
|
||||
"Polish": "Polish_male_1_v1",
|
||||
"Thai": "Thai_male_1_v1",
|
||||
"Ukrainian": "Ukrainian_male_1_v1",
|
||||
"Greek": "Greek_male_1_v1",
|
||||
"Czech": "Czech_male_1_v1",
|
||||
"Finnish": "Finnish_male_1_v1",
|
||||
"Romanian": "Romanian_male_1_v1",
|
||||
}
|
||||
|
||||
|
||||
def _resolve_language_boost(language: Optional[str], explicit_language_boost: Optional[str]) -> str:
|
||||
"""
|
||||
Determine the effective WaveSpeed `language_boost`.
|
||||
- If user explicitly provided language_boost, use it (including "auto").
|
||||
- Else if language code provided, map to the WaveSpeed boost label.
|
||||
- Else default to English (backwards compatible).
|
||||
"""
|
||||
if explicit_language_boost is not None and str(explicit_language_boost).strip() != "":
|
||||
return str(explicit_language_boost).strip()
|
||||
|
||||
if language is not None and str(language).strip() != "":
|
||||
lang_code = str(language).strip().lower()
|
||||
return LANGUAGE_CODE_TO_LANGUAGE_BOOST.get(lang_code, "auto")
|
||||
|
||||
return "English"
|
||||
|
||||
def select_optimal_emotion(scene_title: str, narration: str, video_plan_context: Optional[Dict[str, Any]] = None) -> str:
|
||||
"""
|
||||
Intelligently select the best emotion for YouTube content based on scene analysis.
|
||||
|
||||
Available emotions: "happy", "sad", "angry", "fearful", "disgusted", "surprised", "neutral"
|
||||
|
||||
Returns the selected emotion string.
|
||||
"""
|
||||
# Default to happy for engaging YouTube content
|
||||
selected_emotion = "happy"
|
||||
|
||||
scene_text = f"{scene_title} {narration}".lower()
|
||||
|
||||
# Hook scenes need excitement and energy
|
||||
if "hook" in scene_title.lower() or any(word in scene_text for word in ["exciting", "amazing", "unbelievable", "shocking", "wow"]):
|
||||
selected_emotion = "surprised" # Excited and attention-grabbing
|
||||
|
||||
# Emotional stories or inspirational content
|
||||
elif any(word in scene_text for word in ["emotional", "touching", "heartwarming", "inspiring", "motivational"]):
|
||||
selected_emotion = "happy" # Warm and uplifting
|
||||
|
||||
# Serious or professional content
|
||||
elif any(word in scene_text for word in ["important", "critical", "serious", "professional", "expert"]):
|
||||
selected_emotion = "neutral" # Professional and serious
|
||||
|
||||
# Problem-solving or tutorial content
|
||||
elif any(word in scene_text for word in ["problem", "solution", "fix", "help", "guide"]):
|
||||
selected_emotion = "happy" # Helpful and encouraging
|
||||
|
||||
# Call-to-action scenes
|
||||
elif "cta" in scene_title.lower() or any(word in scene_text for word in ["subscribe", "like", "comment", "share", "action"]):
|
||||
selected_emotion = "happy" # Confident and encouraging
|
||||
|
||||
# Negative or concerning topics
|
||||
elif any(word in scene_text for word in ["warning", "danger", "risk", "problem", "issue"]):
|
||||
selected_emotion = "neutral" # Serious but not alarming
|
||||
|
||||
# Check video plan context for overall tone
|
||||
if video_plan_context:
|
||||
tone = video_plan_context.get("tone", "").lower()
|
||||
if "serious" in tone or "professional" in tone:
|
||||
selected_emotion = "neutral"
|
||||
elif "fun" in tone or "entertaining" in tone:
|
||||
selected_emotion = "happy"
|
||||
|
||||
return selected_emotion
|
||||
|
||||
|
||||
def select_optimal_voice(scene_title: str, narration: str, video_plan_context: Optional[Dict[str, Any]] = None) -> str:
|
||||
"""
|
||||
Intelligently select the best voice for YouTube content based on scene analysis.
|
||||
|
||||
Analyzes scene title, narration content, and video plan context to choose
|
||||
the most appropriate voice from available Minimax voices.
|
||||
|
||||
Available voices: Wise_Woman, Friendly_Person, Inspirational_girl, Deep_Voice_Man,
|
||||
Calm_Woman, Casual_Guy, Lively_Girl, Patient_Man, Young_Knight, Determined_Man,
|
||||
Lovely_Girl, Decent_Boy, Imposing_Manner, Elegant_Man, Abbess, Sweet_Girl_2, Exuberant_Girl
|
||||
|
||||
Returns the selected voice_id string.
|
||||
"""
|
||||
# Default to Casual_Guy for engaging YouTube content
|
||||
selected_voice = "Casual_Guy"
|
||||
|
||||
# Analyze video plan context for content type
|
||||
if video_plan_context:
|
||||
video_type = video_plan_context.get("video_type", "").lower()
|
||||
target_audience = video_plan_context.get("target_audience", "").lower()
|
||||
tone = video_plan_context.get("tone", "").lower()
|
||||
|
||||
# Educational/Professional content
|
||||
if any(keyword in video_type for keyword in ["tutorial", "educational", "how-to", "guide", "course"]):
|
||||
if "professional" in tone or "expert" in target_audience:
|
||||
selected_voice = "Wise_Woman" # Authoritative and trustworthy
|
||||
else:
|
||||
selected_voice = "Patient_Man" # Clear and instructional
|
||||
|
||||
# Entertainment/Casual content
|
||||
elif any(keyword in video_type for keyword in ["entertainment", "vlog", "lifestyle", "story", "review"]):
|
||||
if "young" in target_audience or "millennial" in target_audience:
|
||||
selected_voice = "Casual_Guy" # Friendly and relatable
|
||||
elif "female" in target_audience or "women" in target_audience:
|
||||
selected_voice = "Lively_Girl" # Energetic and engaging
|
||||
else:
|
||||
selected_voice = "Friendly_Person" # Approachable
|
||||
|
||||
# Motivational/Inspirational content
|
||||
elif any(keyword in video_type for keyword in ["motivational", "inspirational", "success", "mindset"]):
|
||||
selected_voice = "Inspirational_girl" # Uplifting and motivational
|
||||
|
||||
# Business/Corporate content
|
||||
elif any(keyword in video_type for keyword in ["business", "corporate", "finance", "marketing"]):
|
||||
selected_voice = "Elegant_Man" # Professional and sophisticated
|
||||
|
||||
# Tech/Gaming content
|
||||
elif any(keyword in video_type for keyword in ["tech", "gaming", "software", "app"]):
|
||||
selected_voice = "Young_Knight" # Energetic and modern
|
||||
|
||||
# Analyze scene content for specific voice requirements
|
||||
scene_text = f"{scene_title} {narration}".lower()
|
||||
|
||||
# Hook scenes need energetic, attention-grabbing voices
|
||||
if "hook" in scene_title.lower() or any(word in scene_text for word in ["attention", "grab", "exciting", "amazing", "unbelievable"]):
|
||||
selected_voice = "Exuberant_Girl" # Very energetic and enthusiastic
|
||||
|
||||
# Emotional/stories need more expressive voices
|
||||
elif any(word in scene_text for word in ["story", "emotional", "heartwarming", "touching", "inspiring"]):
|
||||
selected_voice = "Inspirational_girl" # Emotional and inspiring
|
||||
|
||||
# Technical explanations need clear, precise voices
|
||||
elif any(word in scene_text for word in ["technical", "explain", "step-by-step", "process", "how-to"]):
|
||||
selected_voice = "Calm_Woman" # Clear and methodical
|
||||
|
||||
# Call-to-action scenes need confident, persuasive voices
|
||||
elif "cta" in scene_title.lower() or any(word in scene_text for word in ["subscribe", "like", "comment", "share", "now", "today"]):
|
||||
selected_voice = "Determined_Man" # Confident and persuasive
|
||||
|
||||
logger.info(f"[VoiceSelection] Selected '{selected_voice}' for scene: {scene_title[:50]}...")
|
||||
return selected_voice
|
||||
|
||||
|
||||
class YouTubeAudioRequest(BaseModel):
|
||||
scene_id: str
|
||||
scene_title: str
|
||||
text: str
|
||||
voice_id: Optional[str] = None # Will auto-select based on content if not provided
|
||||
language: Optional[str] = None # Language code for multilingual audio (e.g., "en", "es", "fr")
|
||||
speed: float = 1.0
|
||||
volume: float = 1.0
|
||||
pitch: float = 0.0
|
||||
emotion: str = "happy" # More engaging for YouTube content
|
||||
english_normalization: bool = False
|
||||
# Enhanced defaults for high-quality YouTube audio using Minimax Speech 02 HD
|
||||
# Higher quality settings for professional YouTube content
|
||||
sample_rate: Optional[int] = 44100 # CD quality: 44100 Hz (valid values: 8000, 16000, 22050, 24000, 32000, 44100)
|
||||
bitrate: int = 256000 # Highest quality: 256kbps (valid values: 32000, 64000, 128000, 256000)
|
||||
channel: Optional[str] = "2" # Stereo for richer audio (valid values: "1" or "2")
|
||||
format: Optional[str] = "mp3" # Universal format for web
|
||||
language_boost: Optional[str] = None # If not provided, inferred from `language` (or defaults to English)
|
||||
enable_sync_mode: bool = True
|
||||
# Context for intelligent voice/emotion selection
|
||||
video_plan_context: Optional[Dict[str, Any]] = None # Optional video plan for context-aware voice selection
|
||||
|
||||
|
||||
class YouTubeAudioResponse(BaseModel):
|
||||
scene_id: str
|
||||
scene_title: str
|
||||
audio_filename: str
|
||||
audio_url: str
|
||||
provider: str
|
||||
model: str
|
||||
voice_id: str
|
||||
text_length: int
|
||||
file_size: int
|
||||
cost: float
|
||||
|
||||
|
||||
@router.post("/audio", response_model=YouTubeAudioResponse)
|
||||
async def generate_youtube_scene_audio(
|
||||
request: YouTubeAudioRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Generate AI audio for a YouTube scene using shared audio service.
|
||||
Similar to Podcast's audio generation endpoint.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
if not request.text or not request.text.strip():
|
||||
raise HTTPException(status_code=400, detail="Text is required")
|
||||
|
||||
try:
|
||||
# Preprocess text to remove instructional markers that shouldn't be spoken
|
||||
# Remove patterns like [Pacing: slow], [Instructions: ...], etc.
|
||||
import re
|
||||
processed_text = request.text.strip()
|
||||
|
||||
# Remove instructional markers that contain pacing, timing, or other non-spoken content
|
||||
instructional_patterns = [
|
||||
r'\[Pacing:\s*[^\]]+\]', # [Pacing: slow]
|
||||
r'\[Instructions?:\s*[^\]]+\]', # [Instructions: ...]
|
||||
r'\[Timing:\s*[^\]]+\]', # [Timing: ...]
|
||||
r'\[Note:\s*[^\]]+\]', # [Note: ...]
|
||||
r'\[Internal:\s*[^\]]+\]', # [Internal: ...]
|
||||
]
|
||||
|
||||
for pattern in instructional_patterns:
|
||||
processed_text = re.sub(pattern, '', processed_text, flags=re.IGNORECASE)
|
||||
|
||||
# Clean up extra whitespace and normalize
|
||||
processed_text = re.sub(r'\s+', ' ', processed_text).strip()
|
||||
|
||||
if not processed_text:
|
||||
raise HTTPException(status_code=400, detail="Text became empty after removing instructions. Please provide clean narration text.")
|
||||
|
||||
logger.info(f"[YouTubeAudio] Text preprocessing: {len(request.text)} -> {len(processed_text)} characters")
|
||||
|
||||
effective_language_boost = _resolve_language_boost(request.language, request.language_boost)
|
||||
|
||||
# Intelligent voice and emotion selection based on content analysis
|
||||
if not request.voice_id:
|
||||
# If non-English language is selected, default to the language-specific Minimax voice_id.
|
||||
# Otherwise keep the existing English persona voice selection logic.
|
||||
if effective_language_boost in LANGUAGE_BOOST_TO_DEFAULT_VOICE_ID and effective_language_boost not in ["English", "auto"]:
|
||||
selected_voice = LANGUAGE_BOOST_TO_DEFAULT_VOICE_ID[effective_language_boost]
|
||||
logger.info(
|
||||
f"[VoiceSelection] Using language-specific default voice '{selected_voice}' "
|
||||
f"(language_boost={effective_language_boost}, language={request.language})"
|
||||
)
|
||||
else:
|
||||
selected_voice = select_optimal_voice(
|
||||
request.scene_title,
|
||||
processed_text,
|
||||
request.video_plan_context
|
||||
)
|
||||
else:
|
||||
selected_voice = request.voice_id
|
||||
|
||||
# Auto-select emotion if not specified or if using defaults
|
||||
if request.emotion == "happy": # This means it wasn't specifically set by user
|
||||
selected_emotion = select_optimal_emotion(
|
||||
request.scene_title,
|
||||
processed_text,
|
||||
request.video_plan_context
|
||||
)
|
||||
else:
|
||||
selected_emotion = request.emotion
|
||||
|
||||
logger.info(
|
||||
f"[YouTubeAudio] Voice selection: {selected_voice}, Emotion: {selected_emotion}, "
|
||||
f"language={request.language}, language_boost={effective_language_boost}"
|
||||
)
|
||||
|
||||
# Build kwargs for optional parameters - use defaults if None
|
||||
# WaveSpeed API requires specific values, so we provide sensible defaults
|
||||
# This matches Podcast's approach but with explicit defaults to avoid None errors
|
||||
optional_kwargs = {}
|
||||
|
||||
# DEBUG: Log what values we received
|
||||
logger.info(
|
||||
f"[YouTubeAudio] Request parameters: sample_rate={request.sample_rate}, bitrate={request.bitrate}, "
|
||||
f"channel={request.channel}, format={request.format}, language_boost={request.language_boost}, "
|
||||
f"effective_language_boost={effective_language_boost}, language={request.language}"
|
||||
)
|
||||
|
||||
# sample_rate: Use provided value or omit (WaveSpeed will use default)
|
||||
if request.sample_rate is not None:
|
||||
optional_kwargs["sample_rate"] = request.sample_rate
|
||||
|
||||
# bitrate: Always provide a value (default: 128000 = 128kbps)
|
||||
# Valid values: 32000, 64000, 128000, 256000
|
||||
# Model already has default of 128000, so request.bitrate will never be None
|
||||
optional_kwargs["bitrate"] = request.bitrate
|
||||
|
||||
# channel: Only include if valid (WaveSpeed only accepts "1" or "2" as strings)
|
||||
# If None, empty string, or invalid, omit it and WaveSpeed will use default
|
||||
# NEVER include channel if it's not exactly "1" or "2"
|
||||
if request.channel is not None and str(request.channel).strip() in ["1", "2"]:
|
||||
optional_kwargs["channel"] = str(request.channel).strip()
|
||||
logger.info(f"[YouTubeAudio] Including valid channel: {optional_kwargs['channel']}")
|
||||
else:
|
||||
logger.info(f"[YouTubeAudio] Omitting invalid channel: {request.channel}")
|
||||
|
||||
# format: Use provided value or omit (WaveSpeed will use default)
|
||||
if request.format is not None:
|
||||
optional_kwargs["format"] = request.format
|
||||
|
||||
# language_boost: always send resolved value (improves pronunciation and helps multilingual voices)
|
||||
if effective_language_boost is not None and str(effective_language_boost).strip() != "":
|
||||
optional_kwargs["language_boost"] = effective_language_boost
|
||||
|
||||
logger.info(f"[YouTubeAudio] Final optional_kwargs: {optional_kwargs}")
|
||||
|
||||
result: StoryAudioResult = audio_service.generate_ai_audio(
|
||||
scene_number=0,
|
||||
scene_title=request.scene_title,
|
||||
text=processed_text,
|
||||
user_id=user_id,
|
||||
voice_id=selected_voice,
|
||||
speed=request.speed or 1.0,
|
||||
volume=request.volume or 1.0,
|
||||
pitch=request.pitch or 0.0,
|
||||
emotion=selected_emotion,
|
||||
english_normalization=request.english_normalization or False,
|
||||
enable_sync_mode=request.enable_sync_mode,
|
||||
**optional_kwargs,
|
||||
)
|
||||
|
||||
# Override URL to use YouTube endpoint instead of story endpoint
|
||||
if result.get("audio_url") and "/api/story/audio/" in result.get("audio_url", ""):
|
||||
audio_filename = result.get("audio_filename", "")
|
||||
result["audio_url"] = f"/api/youtube/audio/{audio_filename}"
|
||||
except Exception as exc:
|
||||
logger.error(f"[YouTube] Audio generation failed: {exc}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Audio generation failed: {exc}")
|
||||
|
||||
# Save to asset library (youtube_creator module)
|
||||
try:
|
||||
if result.get("audio_url"):
|
||||
save_asset_to_library(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
asset_type="audio",
|
||||
source_module="youtube_creator",
|
||||
filename=result.get("audio_filename", ""),
|
||||
file_url=result.get("audio_url", ""),
|
||||
file_path=result.get("audio_path"),
|
||||
file_size=result.get("file_size"),
|
||||
mime_type="audio/mpeg",
|
||||
title=f"{request.scene_title} - YouTube",
|
||||
description="YouTube scene narration",
|
||||
tags=["youtube_creator", "audio", request.scene_id],
|
||||
provider=result.get("provider"),
|
||||
model=result.get("model"),
|
||||
cost=result.get("cost"),
|
||||
asset_metadata={
|
||||
"scene_id": request.scene_id,
|
||||
"scene_title": request.scene_title,
|
||||
"status": "completed",
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[YouTube] Failed to save audio asset: {e}")
|
||||
|
||||
return YouTubeAudioResponse(
|
||||
scene_id=request.scene_id,
|
||||
scene_title=request.scene_title,
|
||||
audio_filename=result.get("audio_filename", ""),
|
||||
audio_url=result.get("audio_url", ""),
|
||||
provider=result.get("provider", "wavespeed"),
|
||||
model=result.get("model", "minimax/speech-02-hd"),
|
||||
voice_id=result.get("voice_id", selected_voice),
|
||||
text_length=result.get("text_length", len(request.text)),
|
||||
file_size=result.get("file_size", 0),
|
||||
cost=result.get("cost", 0.0),
|
||||
)
|
||||
|
||||
|
||||
@router.get("/audio/{filename}")
|
||||
async def serve_youtube_audio(
|
||||
filename: str,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user_with_query_token),
|
||||
):
|
||||
"""Serve generated YouTube scene audio files.
|
||||
|
||||
Supports authentication via Authorization header or token query parameter.
|
||||
Query parameter is useful for HTML elements like <audio> that cannot send custom headers.
|
||||
"""
|
||||
require_authenticated_user(current_user)
|
||||
|
||||
# Security check: ensure filename doesn't contain path traversal
|
||||
if ".." in filename or "/" in filename or "\\" in filename:
|
||||
raise HTTPException(status_code=400, detail="Invalid filename")
|
||||
|
||||
audio_path = (YOUTUBE_AUDIO_DIR / filename).resolve()
|
||||
|
||||
# Security check: ensure path is within YOUTUBE_AUDIO_DIR
|
||||
if not str(audio_path).startswith(str(YOUTUBE_AUDIO_DIR)):
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
if not audio_path.exists():
|
||||
raise HTTPException(status_code=404, detail="Audio file not found")
|
||||
|
||||
return FileResponse(audio_path, media_type="audio/mpeg")
|
||||
|
||||
557
backend/api/youtube/handlers/avatar.py
Normal file
557
backend/api/youtube/handlers/avatar.py
Normal file
@@ -0,0 +1,557 @@
|
||||
"""YouTube Creator avatar upload and AI optimization handlers."""
|
||||
|
||||
from pathlib import Path
|
||||
import uuid
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from middleware.auth_middleware import get_current_user
|
||||
from services.database import get_db
|
||||
from services.llm_providers.main_image_generation import generate_image
|
||||
from services.llm_providers.main_image_editing import edit_image
|
||||
from utils.asset_tracker import save_asset_to_library
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
router = APIRouter(prefix="/avatar", tags=["youtube-avatar"])
|
||||
logger = get_service_logger("api.youtube.avatar")
|
||||
|
||||
# Directories
|
||||
base_dir = Path(__file__).parent.parent.parent.parent
|
||||
YOUTUBE_AVATARS_DIR = base_dir / "youtube_avatars"
|
||||
YOUTUBE_AVATARS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def require_authenticated_user(current_user: Dict[str, Any]) -> str:
|
||||
"""Extract and validate user ID from current user."""
|
||||
user_id = current_user.get("id") if current_user else None
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=401, detail="Authentication required")
|
||||
return str(user_id)
|
||||
|
||||
|
||||
def _load_youtube_image_bytes(image_url: str) -> bytes:
|
||||
"""Load avatar bytes from a stored YouTube avatar URL."""
|
||||
filename = image_url.split("/")[-1].split("?")[0]
|
||||
image_path = YOUTUBE_AVATARS_DIR / filename
|
||||
if not image_path.exists() or not image_path.is_file():
|
||||
raise HTTPException(status_code=404, detail="Avatar image not found")
|
||||
return image_path.read_bytes()
|
||||
|
||||
|
||||
async def _generate_avatar_from_context(
|
||||
user_id: str,
|
||||
project_id: Optional[str],
|
||||
audience: Optional[str] = None,
|
||||
content_type: Optional[str] = None,
|
||||
video_plan_json: Optional[str] = None,
|
||||
brand_style: Optional[str] = None,
|
||||
db: Optional[Session] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Internal function to generate avatar from context.
|
||||
Can be called from route handler or directly from router.
|
||||
"""
|
||||
# Parse video plan if provided
|
||||
plan_data = {}
|
||||
avatar_recommendations = {}
|
||||
if video_plan_json:
|
||||
try:
|
||||
import json
|
||||
plan_data = json.loads(video_plan_json)
|
||||
avatar_recommendations = plan_data.get("avatar_recommendations", {})
|
||||
except Exception as e:
|
||||
logger.warning(f"[YouTube] Failed to parse video plan JSON: {e}")
|
||||
|
||||
# Extract context - prioritize user inputs over plan data
|
||||
# User inputs are more reliable as they represent explicit choices
|
||||
# Priority: user input > plan data > defaults
|
||||
plan_target_audience = audience or plan_data.get("target_audience", "")
|
||||
plan_video_type = content_type or plan_data.get("video_type", "")
|
||||
# Use user's brand_style if provided, otherwise use plan's visual_style
|
||||
plan_visual_style = brand_style or plan_data.get("visual_style", "")
|
||||
plan_tone = plan_data.get("tone", "")
|
||||
|
||||
logger.info(
|
||||
f"[YouTube] Avatar generation context: "
|
||||
f"video_type={plan_video_type}, audience={plan_target_audience[:50] if plan_target_audience else 'none'}, "
|
||||
f"brand_style={plan_visual_style[:50] if plan_visual_style else 'none'}"
|
||||
)
|
||||
|
||||
# Build optimized prompt using plan data
|
||||
prompt_parts = []
|
||||
|
||||
# Base avatar description - use recommendations if available
|
||||
if avatar_recommendations and avatar_recommendations.get("description"):
|
||||
prompt_parts.append(avatar_recommendations["description"])
|
||||
else:
|
||||
prompt_parts.append("Half-length portrait of a professional YouTube creator (25-35 years old)")
|
||||
|
||||
# Video type optimization
|
||||
if plan_video_type:
|
||||
video_type_lower = plan_video_type.lower()
|
||||
if video_type_lower == "tutorial":
|
||||
prompt_parts.append("approachable instructor, professional yet friendly, clear presentation style")
|
||||
elif video_type_lower == "review":
|
||||
prompt_parts.append("trustworthy reviewer, confident, credible appearance")
|
||||
elif video_type_lower == "educational":
|
||||
prompt_parts.append("knowledgeable educator, professional, warm and engaging")
|
||||
elif video_type_lower == "entertainment":
|
||||
prompt_parts.append("energetic creator, expressive, fun and relatable")
|
||||
elif video_type_lower == "vlog":
|
||||
prompt_parts.append("authentic person, approachable, real and relatable")
|
||||
elif video_type_lower == "product_demo":
|
||||
prompt_parts.append("professional presenter, polished, confident and enthusiastic")
|
||||
elif video_type_lower == "reaction":
|
||||
prompt_parts.append("expressive creator, authentic reactions, engaging")
|
||||
elif video_type_lower == "storytelling":
|
||||
prompt_parts.append("storyteller, warm, engaging narrator")
|
||||
elif "tech" in video_type_lower:
|
||||
prompt_parts.append("tech-forward style")
|
||||
elif "travel" in video_type_lower:
|
||||
prompt_parts.append("travel vlogger aesthetic")
|
||||
elif "education" in video_type_lower or "learn" in video_type_lower:
|
||||
prompt_parts.append("educational creator, clean and clear presentation")
|
||||
else:
|
||||
prompt_parts.append("modern creator style")
|
||||
elif content_type:
|
||||
content_lower = content_type.lower()
|
||||
if "tech" in content_lower:
|
||||
prompt_parts.append("tech-forward style")
|
||||
elif "travel" in content_lower:
|
||||
prompt_parts.append("travel vlogger aesthetic")
|
||||
elif "education" in content_lower or "learn" in content_lower:
|
||||
prompt_parts.append("educational creator, clean and clear presentation")
|
||||
else:
|
||||
prompt_parts.append("modern creator style")
|
||||
|
||||
# Audience optimization
|
||||
target_audience = plan_target_audience or audience
|
||||
if target_audience:
|
||||
audience_lower = target_audience.lower()
|
||||
if "young" in audience_lower or "gen z" in audience_lower or "millennial" in audience_lower:
|
||||
prompt_parts.append("youthful, vibrant, modern vibe")
|
||||
elif "executive" in audience_lower or "professional" in audience_lower or "business" in audience_lower:
|
||||
prompt_parts.append("polished, credible, authoritative presence")
|
||||
elif "creative" in audience_lower:
|
||||
prompt_parts.append("artistic, expressive, creative professional")
|
||||
elif "parents" in audience_lower or "family" in audience_lower:
|
||||
prompt_parts.append("warm, approachable, trustworthy presence")
|
||||
|
||||
# Visual style from plan
|
||||
if plan_visual_style:
|
||||
visual_lower = plan_visual_style.lower()
|
||||
if "minimal" in visual_lower or "minimalist" in visual_lower:
|
||||
prompt_parts.append("clean, minimalist aesthetic")
|
||||
if "tech" in visual_lower or "modern" in visual_lower:
|
||||
prompt_parts.append("tech-forward, modern style")
|
||||
if "energetic" in visual_lower or "colorful" in visual_lower or "vibrant" in visual_lower:
|
||||
prompt_parts.append("vibrant, energetic appearance")
|
||||
if "cinematic" in visual_lower:
|
||||
prompt_parts.append("cinematic, polished presentation")
|
||||
if "professional" in visual_lower:
|
||||
prompt_parts.append("professional, polished aesthetic")
|
||||
|
||||
# Tone from plan
|
||||
if plan_tone:
|
||||
tone_lower = plan_tone.lower()
|
||||
if "casual" in tone_lower:
|
||||
prompt_parts.append("casual, approachable style")
|
||||
if "professional" in tone_lower:
|
||||
prompt_parts.append("professional attire and presentation")
|
||||
if "energetic" in tone_lower or "fun" in tone_lower:
|
||||
prompt_parts.append("energetic, lively expression")
|
||||
if "warm" in tone_lower:
|
||||
prompt_parts.append("warm, friendly expression")
|
||||
|
||||
# Avatar recommendations from plan
|
||||
if avatar_recommendations:
|
||||
if avatar_recommendations.get("style"):
|
||||
prompt_parts.append(avatar_recommendations["style"])
|
||||
if avatar_recommendations.get("energy"):
|
||||
prompt_parts.append(avatar_recommendations["energy"])
|
||||
|
||||
# Base technical requirements
|
||||
prompt_parts.extend([
|
||||
"photo-realistic, professional photography",
|
||||
"confident, engaging expression",
|
||||
"professional studio lighting, clean background",
|
||||
"suitable for video generation and thumbnails",
|
||||
"ultra realistic, 4k quality, 85mm lens",
|
||||
"looking at camera, center-focused composition"
|
||||
])
|
||||
|
||||
prompt = ", ".join(prompt_parts)
|
||||
seed = int(uuid.uuid4().int % (2**32))
|
||||
|
||||
image_options = {
|
||||
"provider": "wavespeed",
|
||||
"model": "ideogram-v3-turbo",
|
||||
"width": 1024,
|
||||
"height": 1024,
|
||||
"seed": seed,
|
||||
}
|
||||
|
||||
result = generate_image(
|
||||
prompt=prompt,
|
||||
options=image_options,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
avatar_filename = f"yt_generated_{project_id or 'temp'}_{unique_id}.png"
|
||||
avatar_path = YOUTUBE_AVATARS_DIR / avatar_filename
|
||||
|
||||
with open(avatar_path, "wb") as f:
|
||||
f.write(result.image_bytes)
|
||||
|
||||
avatar_url = f"/api/youtube/images/avatars/{avatar_filename}"
|
||||
logger.info(f"[YouTube] Generated creator avatar: {avatar_path}")
|
||||
|
||||
if project_id and db:
|
||||
try:
|
||||
save_asset_to_library(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
asset_type="image",
|
||||
source_module="youtube_creator",
|
||||
filename=avatar_filename,
|
||||
file_url=avatar_url,
|
||||
file_path=str(avatar_path),
|
||||
file_size=len(result.image_bytes),
|
||||
mime_type="image/png",
|
||||
title=f"YouTube Creator Avatar (Generated) - {project_id}",
|
||||
description="AI-generated YouTube creator avatar",
|
||||
prompt=prompt,
|
||||
tags=["youtube", "avatar", "generated", project_id],
|
||||
provider=result.provider,
|
||||
model=result.model,
|
||||
asset_metadata={
|
||||
"project_id": project_id,
|
||||
"type": "generated_presenter",
|
||||
"status": "completed",
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[YouTube] Failed to save generated avatar asset: {e}")
|
||||
|
||||
return {
|
||||
"avatar_url": avatar_url,
|
||||
"avatar_filename": avatar_filename,
|
||||
"avatar_prompt": prompt,
|
||||
"message": "Avatar generated successfully",
|
||||
}
|
||||
|
||||
|
||||
@router.post("/upload")
|
||||
async def upload_youtube_avatar(
|
||||
file: UploadFile = File(...),
|
||||
project_id: Optional[str] = Form(None),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Upload a YouTube creator avatar image."""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
if not file:
|
||||
raise HTTPException(status_code=400, detail="No file uploaded")
|
||||
|
||||
file_content = await file.read()
|
||||
|
||||
# Validate size (max 5MB)
|
||||
if len(file_content) > 5 * 1024 * 1024:
|
||||
raise HTTPException(status_code=400, detail="Image file size must be less than 5MB")
|
||||
|
||||
try:
|
||||
file_ext = Path(file.filename).suffix or ".png"
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
avatar_filename = f"yt_avatar_{project_id or 'temp'}_{unique_id}{file_ext}"
|
||||
avatar_path = YOUTUBE_AVATARS_DIR / avatar_filename
|
||||
|
||||
with open(avatar_path, "wb") as f:
|
||||
f.write(file_content)
|
||||
|
||||
avatar_url = f"/api/youtube/images/avatars/{avatar_filename}"
|
||||
logger.info(f"[YouTube] Avatar uploaded: {avatar_path}")
|
||||
|
||||
if project_id:
|
||||
try:
|
||||
save_asset_to_library(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
asset_type="image",
|
||||
source_module="youtube_creator",
|
||||
filename=avatar_filename,
|
||||
file_url=avatar_url,
|
||||
file_path=str(avatar_path),
|
||||
file_size=len(file_content),
|
||||
mime_type=file.content_type or "image/png",
|
||||
title=f"YouTube Creator Avatar - {project_id}",
|
||||
description="YouTube creator avatar image",
|
||||
tags=["youtube", "avatar", project_id],
|
||||
asset_metadata={
|
||||
"project_id": project_id,
|
||||
"type": "creator_avatar",
|
||||
"status": "completed",
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[YouTube] Failed to save avatar asset: {e}")
|
||||
|
||||
return {
|
||||
"avatar_url": avatar_url,
|
||||
"avatar_filename": avatar_filename,
|
||||
"message": "Avatar uploaded successfully",
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.error(f"[YouTube] Avatar upload failed: {exc}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Avatar upload failed: {str(exc)}")
|
||||
|
||||
|
||||
@router.post("/make-presentable")
|
||||
async def make_avatar_presentable(
|
||||
avatar_url: str = Form(...),
|
||||
project_id: Optional[str] = Form(None),
|
||||
video_type: Optional[str] = Form(None),
|
||||
target_audience: Optional[str] = Form(None),
|
||||
video_goal: Optional[str] = Form(None),
|
||||
brand_style: Optional[str] = Form(None),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Transform an uploaded avatar image into a YouTube-appropriate creator.
|
||||
Uses AI image editing with enhanced prompts to optimize the uploaded photo.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
try:
|
||||
avatar_bytes = _load_youtube_image_bytes(avatar_url)
|
||||
logger.info(f"[YouTube] 🔍 Starting avatar transformation for user_id={user_id}, project={project_id}")
|
||||
logger.info(f"[YouTube] Transforming avatar for project {project_id}")
|
||||
|
||||
# Build context-aware transformation prompt using user inputs
|
||||
prompt_parts = [
|
||||
"Transform this photo into a professional YouTube creator avatar:",
|
||||
"Significantly enhance and optimize the image for YouTube video production;",
|
||||
"Apply professional photo editing: improve lighting, color grading, and composition;",
|
||||
"Enhance facial features: brighten eyes, smooth skin, add professional makeup if needed;",
|
||||
"Improve background: replace with clean, professional studio background or subtle gradient;",
|
||||
"Adjust clothing: ensure professional, YouTube-appropriate attire;",
|
||||
"Optimize for video: ensure the person looks natural and engaging on camera;",
|
||||
"Half-length portrait format, person looking directly at camera with confident, engaging expression;",
|
||||
"Professional studio lighting with soft shadows, high-quality photography;",
|
||||
"Maintain the person's core appearance and identity while making significant improvements;",
|
||||
"Ultra realistic, 4k quality, professional photography style;",
|
||||
"Suitable for video generation, thumbnails, and YouTube channel branding."
|
||||
]
|
||||
|
||||
# Add context from user inputs to make transformation more targeted
|
||||
if video_type:
|
||||
video_type_lower = video_type.lower()
|
||||
if video_type_lower == "tutorial":
|
||||
prompt_parts.append("Approachable instructor style, professional yet friendly appearance")
|
||||
elif video_type_lower == "review":
|
||||
prompt_parts.append("Trustworthy reviewer style, confident and credible appearance")
|
||||
elif video_type_lower == "educational":
|
||||
prompt_parts.append("Knowledgeable educator style, professional and warm appearance")
|
||||
elif video_type_lower == "entertainment":
|
||||
prompt_parts.append("Energetic creator style, expressive and fun appearance")
|
||||
elif video_type_lower == "vlog":
|
||||
prompt_parts.append("Authentic vlogger style, approachable and relatable appearance")
|
||||
elif video_type_lower == "product_demo":
|
||||
prompt_parts.append("Professional presenter style, polished and enthusiastic appearance")
|
||||
elif video_type_lower == "reaction":
|
||||
prompt_parts.append("Expressive creator style, authentic and engaging appearance")
|
||||
elif video_type_lower == "storytelling":
|
||||
prompt_parts.append("Storyteller style, warm and engaging narrator appearance")
|
||||
|
||||
if target_audience:
|
||||
audience_lower = target_audience.lower()
|
||||
if "young" in audience_lower or "gen z" in audience_lower or "millennial" in audience_lower:
|
||||
prompt_parts.append("Modern, youthful, vibrant aesthetic")
|
||||
elif "executive" in audience_lower or "professional" in audience_lower or "business" in audience_lower:
|
||||
prompt_parts.append("Polished, credible, authoritative professional appearance")
|
||||
elif "creative" in audience_lower:
|
||||
prompt_parts.append("Artistic, expressive, creative professional style")
|
||||
|
||||
if brand_style:
|
||||
style_lower = brand_style.lower()
|
||||
if "minimal" in style_lower or "minimalist" in style_lower:
|
||||
prompt_parts.append("Clean, minimalist aesthetic")
|
||||
if "tech" in style_lower or "modern" in style_lower:
|
||||
prompt_parts.append("Tech-forward, modern style")
|
||||
if "energetic" in style_lower or "colorful" in style_lower:
|
||||
prompt_parts.append("Vibrant, energetic appearance")
|
||||
|
||||
base_prompt = " ".join(prompt_parts)
|
||||
|
||||
# Optimize the prompt using WaveSpeed prompt optimizer for better results
|
||||
try:
|
||||
from services.wavespeed.client import WaveSpeedClient
|
||||
wavespeed_client = WaveSpeedClient()
|
||||
logger.info(f"[YouTube] Optimizing transformation prompt using WaveSpeed prompt optimizer")
|
||||
transformation_prompt = wavespeed_client.optimize_prompt(
|
||||
text=base_prompt,
|
||||
mode="image",
|
||||
style="realistic", # Use realistic style for photo editing
|
||||
enable_sync_mode=True,
|
||||
timeout=30
|
||||
)
|
||||
logger.info(f"[YouTube] Prompt optimized successfully (length: {len(transformation_prompt)} chars)")
|
||||
except Exception as opt_error:
|
||||
logger.warning(f"[YouTube] Prompt optimization failed, using base prompt: {opt_error}")
|
||||
transformation_prompt = base_prompt
|
||||
|
||||
# Use HuggingFace for image editing (only available option)
|
||||
# Note: This uses async processing with polling (~30 seconds expected)
|
||||
image_options = {
|
||||
"provider": "huggingface", # Explicitly use HuggingFace (only option for image editing)
|
||||
"model": None, # Use default model (Qwen/Qwen-Image-Edit)
|
||||
}
|
||||
|
||||
logger.info(f"[YouTube] Starting avatar transformation (this may take ~30 seconds due to async processing)")
|
||||
result = edit_image(
|
||||
input_image_bytes=avatar_bytes,
|
||||
prompt=transformation_prompt,
|
||||
options=image_options,
|
||||
user_id=user_id,
|
||||
)
|
||||
logger.info(f"[YouTube] ✅ Avatar transformation completed successfully")
|
||||
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
transformed_filename = f"yt_presenter_{project_id or 'temp'}_{unique_id}.png"
|
||||
transformed_path = YOUTUBE_AVATARS_DIR / transformed_filename
|
||||
|
||||
with open(transformed_path, "wb") as f:
|
||||
f.write(result.image_bytes)
|
||||
|
||||
transformed_url = f"/api/youtube/images/avatars/{transformed_filename}"
|
||||
logger.info(f"[YouTube] Transformed avatar saved to: {transformed_path}")
|
||||
|
||||
if project_id:
|
||||
try:
|
||||
save_asset_to_library(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
asset_type="image",
|
||||
source_module="youtube_creator",
|
||||
filename=transformed_filename,
|
||||
file_url=transformed_url,
|
||||
file_path=str(transformed_path),
|
||||
file_size=len(result.image_bytes),
|
||||
mime_type="image/png",
|
||||
title=f"YouTube Creator (Transformed) - {project_id}",
|
||||
description="AI-transformed YouTube creator avatar from uploaded photo",
|
||||
prompt=transformation_prompt,
|
||||
tags=["youtube", "avatar", "presenter", project_id],
|
||||
provider=result.provider,
|
||||
model=result.model,
|
||||
asset_metadata={
|
||||
"project_id": project_id,
|
||||
"type": "transformed_presenter",
|
||||
"original_avatar_url": avatar_url,
|
||||
"status": "completed",
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[YouTube] Failed to save transformed avatar asset: {e}")
|
||||
|
||||
return {
|
||||
"avatar_url": transformed_url,
|
||||
"avatar_filename": transformed_filename,
|
||||
"message": "Avatar transformed successfully",
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.error(f"[YouTube] Avatar transformation failed: {exc}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Avatar transformation failed: {str(exc)}")
|
||||
|
||||
|
||||
@router.post("/generate")
|
||||
async def generate_creator_avatar(
|
||||
project_id: Optional[str] = Form(None),
|
||||
audience: Optional[str] = Form(None),
|
||||
content_type: Optional[str] = Form(None),
|
||||
video_plan_json: Optional[str] = Form(None),
|
||||
brand_style: Optional[str] = Form(None),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Auto-generate a YouTube creator avatar optimized from video plan context.
|
||||
|
||||
Uses video plan data (if provided) and user inputs to generate an avatar that matches
|
||||
the video type, audience, tone, and brand style.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
try:
|
||||
return await _generate_avatar_from_context(
|
||||
user_id=user_id,
|
||||
project_id=project_id,
|
||||
audience=audience,
|
||||
content_type=content_type,
|
||||
video_plan_json=video_plan_json,
|
||||
brand_style=brand_style,
|
||||
db=db,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error(f"[YouTube] Avatar generation failed: {exc}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Avatar generation failed: {str(exc)}")
|
||||
|
||||
|
||||
@router.post("/regenerate")
|
||||
async def regenerate_creator_avatar(
|
||||
video_plan_json: str = Form(...),
|
||||
project_id: Optional[str] = Form(None),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Regenerate a YouTube creator avatar using the same video plan context.
|
||||
|
||||
Takes the video plan JSON and regenerates an avatar with a different seed
|
||||
to provide variation while maintaining the same optimization based on plan data.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
try:
|
||||
# Parse video plan to extract context
|
||||
import json
|
||||
plan_data = json.loads(video_plan_json)
|
||||
|
||||
# Extract context from plan data
|
||||
audience = plan_data.get("target_audience", "")
|
||||
content_type = plan_data.get("video_type", "")
|
||||
brand_style = plan_data.get("visual_style", "")
|
||||
|
||||
logger.info(
|
||||
f"[YouTube] Regenerating avatar for project {project_id}: "
|
||||
f"video_type={content_type}, audience={audience[:50] if audience else 'none'}"
|
||||
)
|
||||
|
||||
avatar_response = await _generate_avatar_from_context(
|
||||
user_id=user_id,
|
||||
project_id=project_id,
|
||||
audience=audience,
|
||||
content_type=content_type,
|
||||
video_plan_json=video_plan_json,
|
||||
brand_style=brand_style,
|
||||
db=db,
|
||||
)
|
||||
|
||||
# Return the avatar prompt along with the URL for the frontend
|
||||
return {
|
||||
"avatar_url": avatar_response.get("avatar_url"),
|
||||
"avatar_filename": avatar_response.get("avatar_filename"),
|
||||
"avatar_prompt": avatar_response.get("avatar_prompt"),
|
||||
"message": "Avatar regenerated successfully",
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.error(f"[YouTube] Avatar regeneration failed: {exc}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Avatar regeneration failed: {str(exc)}")
|
||||
|
||||
|
||||
|
||||
470
backend/api/youtube/handlers/images.py
Normal file
470
backend/api/youtube/handlers/images.py
Normal file
@@ -0,0 +1,470 @@
|
||||
"""YouTube Creator scene image generation handlers."""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional
|
||||
import uuid
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
|
||||
from fastapi.responses import FileResponse
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from middleware.auth_middleware import get_current_user
|
||||
from services.database import get_db
|
||||
from services.subscription import PricingService
|
||||
from services.subscription.preflight_validator import validate_image_generation_operations
|
||||
from services.llm_providers.main_image_generation import generate_image, generate_character_image
|
||||
from utils.asset_tracker import save_asset_to_library
|
||||
from utils.logger_utils import get_service_logger
|
||||
from ..task_manager import task_manager
|
||||
|
||||
router = APIRouter(tags=["youtube-image"])
|
||||
logger = get_service_logger("api.youtube.image")
|
||||
|
||||
# Directories
|
||||
base_dir = Path(__file__).parent.parent.parent.parent
|
||||
YOUTUBE_IMAGES_DIR = base_dir / "youtube_images"
|
||||
YOUTUBE_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
|
||||
YOUTUBE_AVATARS_DIR = base_dir / "youtube_avatars"
|
||||
|
||||
# Thread pool for background image generation
|
||||
_image_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="youtube_image")
|
||||
|
||||
|
||||
class YouTubeImageRequest(BaseModel):
|
||||
scene_id: str
|
||||
scene_title: Optional[str] = None
|
||||
scene_content: Optional[str] = None
|
||||
base_avatar_url: Optional[str] = None
|
||||
idea: Optional[str] = None
|
||||
width: Optional[int] = 1024
|
||||
height: Optional[int] = 1024
|
||||
custom_prompt: Optional[str] = None
|
||||
style: Optional[str] = None # e.g., "Realistic", "Fiction"
|
||||
rendering_speed: Optional[str] = None # e.g., "Quality", "Turbo"
|
||||
aspect_ratio: Optional[str] = None # e.g., "16:9"
|
||||
model: Optional[str] = None # e.g., "ideogram-v3-turbo", "qwen-image"
|
||||
|
||||
|
||||
def require_authenticated_user(current_user: Dict[str, Any]) -> str:
|
||||
"""Extract and validate user ID from current user."""
|
||||
user_id = current_user.get("id") if current_user else None
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=401, detail="Authentication required")
|
||||
return str(user_id)
|
||||
|
||||
|
||||
def _load_base_avatar_bytes(avatar_url: str) -> Optional[bytes]:
|
||||
"""Load base avatar bytes for character consistency."""
|
||||
try:
|
||||
# Handle different avatar URL formats
|
||||
if avatar_url.startswith("/api/youtube/avatars/"):
|
||||
# YouTube avatar
|
||||
filename = avatar_url.split("/")[-1].split("?")[0]
|
||||
avatar_path = YOUTUBE_AVATARS_DIR / filename
|
||||
elif avatar_url.startswith("/api/podcast/avatars/"):
|
||||
# Podcast avatar (cross-module usage)
|
||||
filename = avatar_url.split("/")[-1].split("?")[0]
|
||||
from pathlib import Path
|
||||
podcast_avatars_dir = Path(__file__).parent.parent.parent.parent / "podcast_avatars"
|
||||
avatar_path = podcast_avatars_dir / filename
|
||||
else:
|
||||
# Try to extract filename and check YouTube avatars first
|
||||
filename = avatar_url.split("/")[-1].split("?")[0]
|
||||
avatar_path = YOUTUBE_AVATARS_DIR / filename
|
||||
if not avatar_path.exists():
|
||||
# Fallback to podcast avatars
|
||||
podcast_avatars_dir = Path(__file__).parent.parent.parent.parent / "podcast_avatars"
|
||||
avatar_path = podcast_avatars_dir / filename
|
||||
|
||||
if not avatar_path.exists() or not avatar_path.is_file():
|
||||
logger.warning(f"[YouTube] Avatar file not found: {avatar_path}")
|
||||
return None
|
||||
|
||||
logger.info(f"[YouTube] Successfully loaded avatar: {avatar_path}")
|
||||
return avatar_path.read_bytes()
|
||||
except Exception as e:
|
||||
logger.error(f"[YouTube] Error loading avatar from {avatar_url}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _save_scene_image(image_bytes: bytes, scene_id: str) -> Dict[str, str]:
|
||||
"""Persist generated scene image and return file/url info."""
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
image_filename = f"yt_scene_{scene_id}_{unique_id}.png"
|
||||
image_path = YOUTUBE_IMAGES_DIR / image_filename
|
||||
with open(image_path, "wb") as f:
|
||||
f.write(image_bytes)
|
||||
|
||||
image_url = f"/api/youtube/images/scenes/{image_filename}"
|
||||
return {
|
||||
"image_filename": image_filename,
|
||||
"image_path": str(image_path),
|
||||
"image_url": image_url,
|
||||
}
|
||||
|
||||
|
||||
class YouTubeImageTaskResponse(BaseModel):
|
||||
success: bool
|
||||
task_id: str
|
||||
message: str
|
||||
|
||||
@router.post("/image", response_model=YouTubeImageTaskResponse)
|
||||
async def generate_youtube_scene_image(
|
||||
background_tasks: BackgroundTasks,
|
||||
request: YouTubeImageRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Generate a YouTube scene image with background task processing."""
|
||||
logger.info(f"[YouTube] Image generation request received: scene='{request.scene_title}', user={current_user.get('id')}")
|
||||
user_id = require_authenticated_user(current_user)
|
||||
logger.info(f"[YouTube] User authenticated: {user_id}")
|
||||
|
||||
if not request.scene_title:
|
||||
raise HTTPException(status_code=400, detail="Scene title is required")
|
||||
|
||||
try:
|
||||
# Pre-flight subscription validation
|
||||
pricing_service = PricingService(db)
|
||||
validate_image_generation_operations(
|
||||
pricing_service=pricing_service,
|
||||
user_id=user_id,
|
||||
num_images=1,
|
||||
)
|
||||
logger.info(f"[YouTube] ✅ Pre-flight validation passed for user {user_id}")
|
||||
|
||||
# Create background task
|
||||
logger.info(f"[YouTube] Creating task for user {user_id}")
|
||||
task_id = task_manager.create_task("youtube_image_generation")
|
||||
logger.info(
|
||||
f"[YouTube] Created image generation task {task_id} for user {user_id}, "
|
||||
f"scene='{request.scene_title}'"
|
||||
)
|
||||
|
||||
# Verify task was created
|
||||
initial_status = task_manager.get_task_status(task_id)
|
||||
if not initial_status:
|
||||
logger.error(f"[YouTube] Failed to create task {task_id} - task not found immediately after creation")
|
||||
return YouTubeImageTaskResponse(
|
||||
success=False,
|
||||
task_id="",
|
||||
message="Failed to create image generation task. Please try again."
|
||||
)
|
||||
|
||||
# Add background task (pass request data, not database session)
|
||||
try:
|
||||
background_tasks.add_task(
|
||||
_execute_image_generation_task,
|
||||
task_id=task_id,
|
||||
request_data=request.dict(), # Convert to dict for background task
|
||||
user_id=user_id,
|
||||
)
|
||||
logger.info(f"[YouTube] Background image generation task added for task {task_id}")
|
||||
except Exception as bg_error:
|
||||
logger.error(f"[YouTube] Failed to add background task for {task_id}: {bg_error}", exc_info=True)
|
||||
# Mark task as failed
|
||||
task_manager.update_task_status(
|
||||
task_id,
|
||||
"failed",
|
||||
error=str(bg_error),
|
||||
message="Failed to start image generation task"
|
||||
)
|
||||
return YouTubeImageTaskResponse(
|
||||
success=False,
|
||||
task_id="",
|
||||
message=f"Failed to start image generation task: {str(bg_error)}"
|
||||
)
|
||||
|
||||
logger.info(f"[YouTube] Returning success response for task {task_id}")
|
||||
return YouTubeImageTaskResponse(
|
||||
success=True,
|
||||
task_id=task_id,
|
||||
message=f"Image generation started for '{request.scene_title}'"
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.error(f"[YouTube] Failed to create image generation task: {exc}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start image generation: {str(exc)}")
|
||||
|
||||
|
||||
def _execute_image_generation_task(task_id: str, request_data: dict, user_id: str):
|
||||
"""Background task to generate YouTube scene image."""
|
||||
# Reconstruct request object from dict
|
||||
request = YouTubeImageRequest(**request_data)
|
||||
|
||||
logger.info(
|
||||
f"[YouTubeImageGen] Background task started for task {task_id}, "
|
||||
f"scene='{request.scene_title}', user={user_id}"
|
||||
)
|
||||
|
||||
db = None
|
||||
try:
|
||||
# Update task status to processing
|
||||
task_manager.update_task_status(
|
||||
task_id, "processing", progress=10.0, message="Preparing image generation..."
|
||||
)
|
||||
|
||||
# Get database session for this background task
|
||||
from services.database import get_db
|
||||
db = next(get_db())
|
||||
logger.info(f"[YouTubeImageGen] Database session acquired for task {task_id}")
|
||||
|
||||
# Load avatar if provided
|
||||
base_avatar_bytes = None
|
||||
if request.base_avatar_url:
|
||||
base_avatar_bytes = _load_base_avatar_bytes(request.base_avatar_url)
|
||||
if base_avatar_bytes:
|
||||
logger.info(f"[YouTubeImageGen] Loaded base avatar for task {task_id}")
|
||||
else:
|
||||
logger.warning(f"[YouTubeImageGen] Could not load base avatar for task {task_id}")
|
||||
|
||||
# Build prompt (same logic as before)
|
||||
if base_avatar_bytes:
|
||||
prompt_parts = []
|
||||
if request.scene_title:
|
||||
prompt_parts.append(f"Scene: {request.scene_title}")
|
||||
if request.scene_content:
|
||||
content_preview = request.scene_content[:200].replace("\n", " ").strip()
|
||||
prompt_parts.append(f"Context: {content_preview}")
|
||||
if request.idea:
|
||||
prompt_parts.append(f"Video idea: {request.idea[:80].strip()}")
|
||||
prompt_parts.append("YouTube creator on camera, engaging and dynamic framing")
|
||||
prompt_parts.append("Clean background, good lighting, thumbnail-friendly composition")
|
||||
image_prompt = ", ".join(prompt_parts)
|
||||
else:
|
||||
prompt_parts = [
|
||||
"YouTube creator scene",
|
||||
"clean, modern background",
|
||||
"good lighting, high contrast for thumbnail clarity",
|
||||
]
|
||||
if request.scene_title:
|
||||
prompt_parts.append(f"Scene theme: {request.scene_title}")
|
||||
if request.scene_content:
|
||||
prompt_parts.append(f"Context: {request.scene_content[:120].replace(chr(10), ' ')}")
|
||||
if request.idea:
|
||||
prompt_parts.append(f"Topic: {request.idea[:80]}")
|
||||
prompt_parts.append("video-optimized composition, 16:9 aspect ratio")
|
||||
image_prompt = ", ".join(prompt_parts)
|
||||
|
||||
task_manager.update_task_status(
|
||||
task_id, "processing", progress=30.0, message="Generating image..."
|
||||
)
|
||||
|
||||
logger.info(f"[YouTubeImageGen] Starting image generation for task {task_id}")
|
||||
|
||||
# Generate image (same logic as before)
|
||||
provider = "wavespeed"
|
||||
model = "ideogram-v3-turbo"
|
||||
if base_avatar_bytes:
|
||||
logger.info(f"[YouTubeImageGen] Using character-consistent generation for task {task_id}")
|
||||
style = request.style or "Realistic"
|
||||
rendering_speed = request.rendering_speed or "Quality"
|
||||
aspect_ratio = request.aspect_ratio or "16:9"
|
||||
width = request.width or 1024
|
||||
height = request.height or 576
|
||||
|
||||
try:
|
||||
# Use centralized character image generation with subscription checks and tracking
|
||||
image_bytes = generate_character_image(
|
||||
prompt=image_prompt,
|
||||
reference_image_bytes=base_avatar_bytes,
|
||||
user_id=user_id,
|
||||
style=style,
|
||||
aspect_ratio=aspect_ratio,
|
||||
rendering_speed=rendering_speed,
|
||||
timeout=60,
|
||||
)
|
||||
model = "ideogram-character"
|
||||
logger.info(f"[YouTubeImageGen] Character image generation successful for task {task_id}")
|
||||
except Exception as char_error:
|
||||
logger.warning(f"[YouTubeImageGen] Character generation failed for task {task_id}: {char_error}")
|
||||
logger.info(f"[YouTubeImageGen] Falling back to regular image generation for task {task_id}")
|
||||
# Fall back to regular image generation with subscription tracking
|
||||
image_options = {
|
||||
"provider": "wavespeed",
|
||||
"model": request.model or "ideogram-v3-turbo",
|
||||
"width": width,
|
||||
"height": height,
|
||||
}
|
||||
result = generate_image(
|
||||
prompt=image_prompt,
|
||||
options=image_options,
|
||||
user_id=user_id,
|
||||
)
|
||||
image_bytes = result.image_bytes
|
||||
else:
|
||||
logger.info(f"[YouTubeImageGen] Generating scene from scratch for task {task_id}")
|
||||
# Use centralized image generation with subscription tracking
|
||||
image_options = {
|
||||
"provider": "wavespeed",
|
||||
"model": request.model or "ideogram-v3-turbo",
|
||||
"width": request.width or 1024,
|
||||
"height": request.height or 576,
|
||||
}
|
||||
result = generate_image(
|
||||
prompt=request.custom_prompt or image_prompt,
|
||||
options=image_options,
|
||||
user_id=user_id,
|
||||
)
|
||||
image_bytes = result.image_bytes
|
||||
|
||||
# Validate image bytes before saving
|
||||
if not image_bytes or len(image_bytes) == 0:
|
||||
raise ValueError("Image generation returned empty bytes")
|
||||
|
||||
# Basic validation: check if it's a valid image (PNG/JPEG header)
|
||||
if not (image_bytes.startswith(b'\x89PNG') or image_bytes.startswith(b'\xff\xd8\xff')):
|
||||
logger.warning(f"[YouTubeImageGen] Generated image may not be valid PNG/JPEG for task {task_id}")
|
||||
# Don't fail - some formats might be valid, but log warning
|
||||
|
||||
task_manager.update_task_status(
|
||||
task_id, "processing", progress=80.0, message="Saving image..."
|
||||
)
|
||||
|
||||
# Save image with validation
|
||||
try:
|
||||
image_metadata = _save_scene_image(image_bytes, request.scene_id)
|
||||
|
||||
# Verify file was saved correctly
|
||||
from pathlib import Path
|
||||
saved_path = Path(image_metadata["image_path"])
|
||||
if not saved_path.exists() or saved_path.stat().st_size == 0:
|
||||
raise IOError(f"Image file was not saved correctly: {saved_path}")
|
||||
|
||||
logger.info(f"[YouTubeImageGen] Image saved successfully: {saved_path} ({saved_path.stat().st_size} bytes)")
|
||||
except Exception as save_error:
|
||||
logger.error(f"[YouTubeImageGen] Failed to save image for task {task_id}: {save_error}", exc_info=True)
|
||||
raise
|
||||
|
||||
# Save to asset library
|
||||
try:
|
||||
save_asset_to_library(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
asset_type="image",
|
||||
source_module="youtube_creator",
|
||||
filename=image_metadata["image_filename"],
|
||||
file_url=image_metadata["image_url"],
|
||||
file_path=image_metadata["image_path"],
|
||||
file_size=len(image_bytes),
|
||||
mime_type="image/png",
|
||||
title=f"{request.scene_title} - YouTube Scene",
|
||||
description=f"YouTube scene image for: {request.scene_title}",
|
||||
tags=["youtube_creator", "scene_image", f"scene_{request.scene_id}"],
|
||||
provider=provider,
|
||||
model=model,
|
||||
cost=0.10 if model == "ideogram-v3-turbo" else 0.05,
|
||||
asset_metadata={
|
||||
"scene_id": request.scene_id,
|
||||
"scene_title": request.scene_title,
|
||||
"generation_type": "character" if base_avatar_bytes else "scene",
|
||||
"width": request.width or 1024,
|
||||
"height": request.height or 576,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[YouTubeImageGen] Failed to save image asset to library: {e}")
|
||||
|
||||
# Success!
|
||||
task_manager.update_task_status(
|
||||
task_id,
|
||||
"completed",
|
||||
progress=100.0,
|
||||
message=f"Image generated successfully for '{request.scene_title}'",
|
||||
result={
|
||||
"scene_id": request.scene_id,
|
||||
"scene_title": request.scene_title,
|
||||
"image_filename": image_metadata["image_filename"],
|
||||
"image_url": image_metadata["image_url"],
|
||||
"provider": provider,
|
||||
"model": model,
|
||||
"width": request.width or 1024,
|
||||
"height": request.height or 576,
|
||||
"file_size": len(image_bytes),
|
||||
"cost": 0.10 if model == "ideogram-v3-turbo" else 0.05,
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"[YouTubeImageGen] ✅ Task {task_id} completed successfully")
|
||||
|
||||
except Exception as exc:
|
||||
error_msg = str(exc)
|
||||
logger.error(f"[YouTubeImageGen] Task {task_id} failed: {error_msg}", exc_info=True)
|
||||
task_manager.update_task_status(
|
||||
task_id,
|
||||
"failed",
|
||||
error=error_msg,
|
||||
message=f"Image generation failed: {error_msg}"
|
||||
)
|
||||
finally:
|
||||
if db:
|
||||
db.close()
|
||||
logger.info(f"[YouTubeImageGen] Database session closed for task {task_id}")
|
||||
|
||||
|
||||
@router.get("/image/status/{task_id}")
|
||||
async def get_image_generation_status(
|
||||
task_id: str,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""
|
||||
Get the status of an image generation task.
|
||||
|
||||
Returns current progress, status, and result when complete.
|
||||
"""
|
||||
require_authenticated_user(current_user)
|
||||
|
||||
logger.info(f"[YouTubeAPI] Getting image generation status for task: {task_id}")
|
||||
task_status = task_manager.get_task_status(task_id)
|
||||
if task_status:
|
||||
logger.info(f"[YouTubeAPI] Task {task_id} status: {task_status.get('status', 'unknown')}, progress: {task_status.get('progress', 0)}, has_result: {'result' in task_status}")
|
||||
if not task_status:
|
||||
logger.warning(
|
||||
f"[YouTubeAPI] Image generation task {task_id} not found."
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail={
|
||||
"error": "Task not found",
|
||||
"message": "The image generation task was not found. It may have expired, been cleaned up, or the server may have restarted.",
|
||||
"task_id": task_id,
|
||||
"user_action": "Please try generating the image again."
|
||||
}
|
||||
)
|
||||
|
||||
return task_status
|
||||
|
||||
|
||||
@router.get("/images/{category}/{filename}")
|
||||
async def serve_youtube_image(
|
||||
category: str,
|
||||
filename: str,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""
|
||||
Serve stored YouTube images (avatars or scenes).
|
||||
Unified endpoint for both avatar and scene images.
|
||||
"""
|
||||
require_authenticated_user(current_user)
|
||||
|
||||
if category not in {"avatars", "scenes"}:
|
||||
raise HTTPException(status_code=400, detail="Invalid image category. Must be 'avatars' or 'scenes'")
|
||||
|
||||
if ".." in filename or "/" in filename or "\\" in filename:
|
||||
raise HTTPException(status_code=400, detail="Invalid filename")
|
||||
|
||||
directory = YOUTUBE_AVATARS_DIR if category == "avatars" else YOUTUBE_IMAGES_DIR
|
||||
image_path = directory / filename
|
||||
|
||||
if not image_path.exists() or not image_path.is_file():
|
||||
raise HTTPException(status_code=404, detail="Image not found")
|
||||
|
||||
return FileResponse(
|
||||
path=str(image_path),
|
||||
media_type="image/png",
|
||||
filename=filename,
|
||||
)
|
||||
1609
backend/api/youtube/router.py
Normal file
1609
backend/api/youtube/router.py
Normal file
File diff suppressed because it is too large
Load Diff
11
backend/api/youtube/task_manager.py
Normal file
11
backend/api/youtube/task_manager.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""
|
||||
Task Manager for YouTube Creator Studio
|
||||
|
||||
Reuses the Story Writer task manager pattern for async video rendering.
|
||||
"""
|
||||
|
||||
from api.story_writer.task_manager import TaskManager
|
||||
|
||||
# Shared task manager instance
|
||||
task_manager = TaskManager()
|
||||
|
||||
Reference in New Issue
Block a user