Added YouTube Creator scene building flow documentation

This commit is contained in:
ajaysi
2025-12-21 17:15:23 +05:30
parent 1d745c9bc8
commit 59913bffa9
51 changed files with 7478 additions and 631 deletions

View File

@@ -69,7 +69,8 @@ async def get_assets(
):
"""Get user's content assets with optional filtering."""
try:
user_id = current_user.get("user_id") or current_user.get("id")
# Auth middleware returns 'id' as the primary key
user_id = current_user.get("id") or current_user.get("user_id") or current_user.get("clerk_user_id")
if not user_id:
raise HTTPException(status_code=401, detail="User ID not found")

View File

@@ -0,0 +1,11 @@
"""
YouTube Creator handler package.
Contains endpoints for avatar upload/optimization and scene image generation.
"""
# Explicitly define __all__ for clarity
__all__ = []
"""YouTube Creator handlers package."""

View File

@@ -0,0 +1,557 @@
"""YouTube Creator avatar upload and AI optimization handlers."""
from pathlib import Path
import uuid
from typing import Dict, Any, Optional
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session
from middleware.auth_middleware import get_current_user
from services.database import get_db
from services.llm_providers.main_image_generation import generate_image
from services.llm_providers.main_image_editing import edit_image
from utils.asset_tracker import save_asset_to_library
from utils.logger_utils import get_service_logger
router = APIRouter(prefix="/avatar", tags=["youtube-avatar"])
logger = get_service_logger("api.youtube.avatar")
# Directories
base_dir = Path(__file__).parent.parent.parent.parent
YOUTUBE_AVATARS_DIR = base_dir / "youtube_avatars"
YOUTUBE_AVATARS_DIR.mkdir(parents=True, exist_ok=True)
def require_authenticated_user(current_user: Dict[str, Any]) -> str:
"""Extract and validate user ID from current user."""
user_id = current_user.get("id") if current_user else None
if not user_id:
raise HTTPException(status_code=401, detail="Authentication required")
return str(user_id)
def _load_youtube_image_bytes(image_url: str) -> bytes:
"""Load avatar bytes from a stored YouTube avatar URL."""
filename = image_url.split("/")[-1].split("?")[0]
image_path = YOUTUBE_AVATARS_DIR / filename
if not image_path.exists() or not image_path.is_file():
raise HTTPException(status_code=404, detail="Avatar image not found")
return image_path.read_bytes()
async def _generate_avatar_from_context(
user_id: str,
project_id: Optional[str],
audience: Optional[str] = None,
content_type: Optional[str] = None,
video_plan_json: Optional[str] = None,
brand_style: Optional[str] = None,
db: Optional[Session] = None,
) -> Dict[str, Any]:
"""
Internal function to generate avatar from context.
Can be called from route handler or directly from router.
"""
# Parse video plan if provided
plan_data = {}
avatar_recommendations = {}
if video_plan_json:
try:
import json
plan_data = json.loads(video_plan_json)
avatar_recommendations = plan_data.get("avatar_recommendations", {})
except Exception as e:
logger.warning(f"[YouTube] Failed to parse video plan JSON: {e}")
# Extract context - prioritize user inputs over plan data
# User inputs are more reliable as they represent explicit choices
# Priority: user input > plan data > defaults
plan_target_audience = audience or plan_data.get("target_audience", "")
plan_video_type = content_type or plan_data.get("video_type", "")
# Use user's brand_style if provided, otherwise use plan's visual_style
plan_visual_style = brand_style or plan_data.get("visual_style", "")
plan_tone = plan_data.get("tone", "")
logger.info(
f"[YouTube] Avatar generation context: "
f"video_type={plan_video_type}, audience={plan_target_audience[:50] if plan_target_audience else 'none'}, "
f"brand_style={plan_visual_style[:50] if plan_visual_style else 'none'}"
)
# Build optimized prompt using plan data
prompt_parts = []
# Base avatar description - use recommendations if available
if avatar_recommendations and avatar_recommendations.get("description"):
prompt_parts.append(avatar_recommendations["description"])
else:
prompt_parts.append("Half-length portrait of a professional YouTube creator (25-35 years old)")
# Video type optimization
if plan_video_type:
video_type_lower = plan_video_type.lower()
if video_type_lower == "tutorial":
prompt_parts.append("approachable instructor, professional yet friendly, clear presentation style")
elif video_type_lower == "review":
prompt_parts.append("trustworthy reviewer, confident, credible appearance")
elif video_type_lower == "educational":
prompt_parts.append("knowledgeable educator, professional, warm and engaging")
elif video_type_lower == "entertainment":
prompt_parts.append("energetic creator, expressive, fun and relatable")
elif video_type_lower == "vlog":
prompt_parts.append("authentic person, approachable, real and relatable")
elif video_type_lower == "product_demo":
prompt_parts.append("professional presenter, polished, confident and enthusiastic")
elif video_type_lower == "reaction":
prompt_parts.append("expressive creator, authentic reactions, engaging")
elif video_type_lower == "storytelling":
prompt_parts.append("storyteller, warm, engaging narrator")
elif "tech" in video_type_lower:
prompt_parts.append("tech-forward style")
elif "travel" in video_type_lower:
prompt_parts.append("travel vlogger aesthetic")
elif "education" in video_type_lower or "learn" in video_type_lower:
prompt_parts.append("educational creator, clean and clear presentation")
else:
prompt_parts.append("modern creator style")
elif content_type:
content_lower = content_type.lower()
if "tech" in content_lower:
prompt_parts.append("tech-forward style")
elif "travel" in content_lower:
prompt_parts.append("travel vlogger aesthetic")
elif "education" in content_lower or "learn" in content_lower:
prompt_parts.append("educational creator, clean and clear presentation")
else:
prompt_parts.append("modern creator style")
# Audience optimization
target_audience = plan_target_audience or audience
if target_audience:
audience_lower = target_audience.lower()
if "young" in audience_lower or "gen z" in audience_lower or "millennial" in audience_lower:
prompt_parts.append("youthful, vibrant, modern vibe")
elif "executive" in audience_lower or "professional" in audience_lower or "business" in audience_lower:
prompt_parts.append("polished, credible, authoritative presence")
elif "creative" in audience_lower:
prompt_parts.append("artistic, expressive, creative professional")
elif "parents" in audience_lower or "family" in audience_lower:
prompt_parts.append("warm, approachable, trustworthy presence")
# Visual style from plan
if plan_visual_style:
visual_lower = plan_visual_style.lower()
if "minimal" in visual_lower or "minimalist" in visual_lower:
prompt_parts.append("clean, minimalist aesthetic")
if "tech" in visual_lower or "modern" in visual_lower:
prompt_parts.append("tech-forward, modern style")
if "energetic" in visual_lower or "colorful" in visual_lower or "vibrant" in visual_lower:
prompt_parts.append("vibrant, energetic appearance")
if "cinematic" in visual_lower:
prompt_parts.append("cinematic, polished presentation")
if "professional" in visual_lower:
prompt_parts.append("professional, polished aesthetic")
# Tone from plan
if plan_tone:
tone_lower = plan_tone.lower()
if "casual" in tone_lower:
prompt_parts.append("casual, approachable style")
if "professional" in tone_lower:
prompt_parts.append("professional attire and presentation")
if "energetic" in tone_lower or "fun" in tone_lower:
prompt_parts.append("energetic, lively expression")
if "warm" in tone_lower:
prompt_parts.append("warm, friendly expression")
# Avatar recommendations from plan
if avatar_recommendations:
if avatar_recommendations.get("style"):
prompt_parts.append(avatar_recommendations["style"])
if avatar_recommendations.get("energy"):
prompt_parts.append(avatar_recommendations["energy"])
# Base technical requirements
prompt_parts.extend([
"photo-realistic, professional photography",
"confident, engaging expression",
"professional studio lighting, clean background",
"suitable for video generation and thumbnails",
"ultra realistic, 4k quality, 85mm lens",
"looking at camera, center-focused composition"
])
prompt = ", ".join(prompt_parts)
seed = int(uuid.uuid4().int % (2**32))
image_options = {
"provider": "wavespeed",
"model": "ideogram-v3-turbo",
"width": 1024,
"height": 1024,
"seed": seed,
}
result = generate_image(
prompt=prompt,
options=image_options,
user_id=user_id,
)
unique_id = str(uuid.uuid4())[:8]
avatar_filename = f"yt_generated_{project_id or 'temp'}_{unique_id}.png"
avatar_path = YOUTUBE_AVATARS_DIR / avatar_filename
with open(avatar_path, "wb") as f:
f.write(result.image_bytes)
avatar_url = f"/api/youtube/images/avatars/{avatar_filename}"
logger.info(f"[YouTube] Generated creator avatar: {avatar_path}")
if project_id and db:
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="image",
source_module="youtube_creator",
filename=avatar_filename,
file_url=avatar_url,
file_path=str(avatar_path),
file_size=len(result.image_bytes),
mime_type="image/png",
title=f"YouTube Creator Avatar (Generated) - {project_id}",
description="AI-generated YouTube creator avatar",
prompt=prompt,
tags=["youtube", "avatar", "generated", project_id],
provider=result.provider,
model=result.model,
asset_metadata={
"project_id": project_id,
"type": "generated_presenter",
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[YouTube] Failed to save generated avatar asset: {e}")
return {
"avatar_url": avatar_url,
"avatar_filename": avatar_filename,
"avatar_prompt": prompt,
"message": "Avatar generated successfully",
}
@router.post("/upload")
async def upload_youtube_avatar(
file: UploadFile = File(...),
project_id: Optional[str] = Form(None),
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Upload a YouTube creator avatar image."""
user_id = require_authenticated_user(current_user)
if not file:
raise HTTPException(status_code=400, detail="No file uploaded")
file_content = await file.read()
# Validate size (max 5MB)
if len(file_content) > 5 * 1024 * 1024:
raise HTTPException(status_code=400, detail="Image file size must be less than 5MB")
try:
file_ext = Path(file.filename).suffix or ".png"
unique_id = str(uuid.uuid4())[:8]
avatar_filename = f"yt_avatar_{project_id or 'temp'}_{unique_id}{file_ext}"
avatar_path = YOUTUBE_AVATARS_DIR / avatar_filename
with open(avatar_path, "wb") as f:
f.write(file_content)
avatar_url = f"/api/youtube/images/avatars/{avatar_filename}"
logger.info(f"[YouTube] Avatar uploaded: {avatar_path}")
if project_id:
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="image",
source_module="youtube_creator",
filename=avatar_filename,
file_url=avatar_url,
file_path=str(avatar_path),
file_size=len(file_content),
mime_type=file.content_type or "image/png",
title=f"YouTube Creator Avatar - {project_id}",
description="YouTube creator avatar image",
tags=["youtube", "avatar", project_id],
asset_metadata={
"project_id": project_id,
"type": "creator_avatar",
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[YouTube] Failed to save avatar asset: {e}")
return {
"avatar_url": avatar_url,
"avatar_filename": avatar_filename,
"message": "Avatar uploaded successfully",
}
except Exception as exc:
logger.error(f"[YouTube] Avatar upload failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Avatar upload failed: {str(exc)}")
@router.post("/make-presentable")
async def make_avatar_presentable(
avatar_url: str = Form(...),
project_id: Optional[str] = Form(None),
video_type: Optional[str] = Form(None),
target_audience: Optional[str] = Form(None),
video_goal: Optional[str] = Form(None),
brand_style: Optional[str] = Form(None),
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Transform an uploaded avatar image into a YouTube-appropriate creator.
Uses AI image editing with enhanced prompts to optimize the uploaded photo.
"""
user_id = require_authenticated_user(current_user)
try:
avatar_bytes = _load_youtube_image_bytes(avatar_url)
logger.info(f"[YouTube] 🔍 Starting avatar transformation for user_id={user_id}, project={project_id}")
logger.info(f"[YouTube] Transforming avatar for project {project_id}")
# Build context-aware transformation prompt using user inputs
prompt_parts = [
"Transform this photo into a professional YouTube creator avatar:",
"Significantly enhance and optimize the image for YouTube video production;",
"Apply professional photo editing: improve lighting, color grading, and composition;",
"Enhance facial features: brighten eyes, smooth skin, add professional makeup if needed;",
"Improve background: replace with clean, professional studio background or subtle gradient;",
"Adjust clothing: ensure professional, YouTube-appropriate attire;",
"Optimize for video: ensure the person looks natural and engaging on camera;",
"Half-length portrait format, person looking directly at camera with confident, engaging expression;",
"Professional studio lighting with soft shadows, high-quality photography;",
"Maintain the person's core appearance and identity while making significant improvements;",
"Ultra realistic, 4k quality, professional photography style;",
"Suitable for video generation, thumbnails, and YouTube channel branding."
]
# Add context from user inputs to make transformation more targeted
if video_type:
video_type_lower = video_type.lower()
if video_type_lower == "tutorial":
prompt_parts.append("Approachable instructor style, professional yet friendly appearance")
elif video_type_lower == "review":
prompt_parts.append("Trustworthy reviewer style, confident and credible appearance")
elif video_type_lower == "educational":
prompt_parts.append("Knowledgeable educator style, professional and warm appearance")
elif video_type_lower == "entertainment":
prompt_parts.append("Energetic creator style, expressive and fun appearance")
elif video_type_lower == "vlog":
prompt_parts.append("Authentic vlogger style, approachable and relatable appearance")
elif video_type_lower == "product_demo":
prompt_parts.append("Professional presenter style, polished and enthusiastic appearance")
elif video_type_lower == "reaction":
prompt_parts.append("Expressive creator style, authentic and engaging appearance")
elif video_type_lower == "storytelling":
prompt_parts.append("Storyteller style, warm and engaging narrator appearance")
if target_audience:
audience_lower = target_audience.lower()
if "young" in audience_lower or "gen z" in audience_lower or "millennial" in audience_lower:
prompt_parts.append("Modern, youthful, vibrant aesthetic")
elif "executive" in audience_lower or "professional" in audience_lower or "business" in audience_lower:
prompt_parts.append("Polished, credible, authoritative professional appearance")
elif "creative" in audience_lower:
prompt_parts.append("Artistic, expressive, creative professional style")
if brand_style:
style_lower = brand_style.lower()
if "minimal" in style_lower or "minimalist" in style_lower:
prompt_parts.append("Clean, minimalist aesthetic")
if "tech" in style_lower or "modern" in style_lower:
prompt_parts.append("Tech-forward, modern style")
if "energetic" in style_lower or "colorful" in style_lower:
prompt_parts.append("Vibrant, energetic appearance")
base_prompt = " ".join(prompt_parts)
# Optimize the prompt using WaveSpeed prompt optimizer for better results
try:
from services.wavespeed.client import WaveSpeedClient
wavespeed_client = WaveSpeedClient()
logger.info(f"[YouTube] Optimizing transformation prompt using WaveSpeed prompt optimizer")
transformation_prompt = wavespeed_client.optimize_prompt(
text=base_prompt,
mode="image",
style="realistic", # Use realistic style for photo editing
enable_sync_mode=True,
timeout=30
)
logger.info(f"[YouTube] Prompt optimized successfully (length: {len(transformation_prompt)} chars)")
except Exception as opt_error:
logger.warning(f"[YouTube] Prompt optimization failed, using base prompt: {opt_error}")
transformation_prompt = base_prompt
# Use HuggingFace for image editing (only available option)
# Note: This uses async processing with polling (~30 seconds expected)
image_options = {
"provider": "huggingface", # Explicitly use HuggingFace (only option for image editing)
"model": None, # Use default model (Qwen/Qwen-Image-Edit)
}
logger.info(f"[YouTube] Starting avatar transformation (this may take ~30 seconds due to async processing)")
result = edit_image(
input_image_bytes=avatar_bytes,
prompt=transformation_prompt,
options=image_options,
user_id=user_id,
)
logger.info(f"[YouTube] ✅ Avatar transformation completed successfully")
unique_id = str(uuid.uuid4())[:8]
transformed_filename = f"yt_presenter_{project_id or 'temp'}_{unique_id}.png"
transformed_path = YOUTUBE_AVATARS_DIR / transformed_filename
with open(transformed_path, "wb") as f:
f.write(result.image_bytes)
transformed_url = f"/api/youtube/images/avatars/{transformed_filename}"
logger.info(f"[YouTube] Transformed avatar saved to: {transformed_path}")
if project_id:
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="image",
source_module="youtube_creator",
filename=transformed_filename,
file_url=transformed_url,
file_path=str(transformed_path),
file_size=len(result.image_bytes),
mime_type="image/png",
title=f"YouTube Creator (Transformed) - {project_id}",
description="AI-transformed YouTube creator avatar from uploaded photo",
prompt=transformation_prompt,
tags=["youtube", "avatar", "presenter", project_id],
provider=result.provider,
model=result.model,
asset_metadata={
"project_id": project_id,
"type": "transformed_presenter",
"original_avatar_url": avatar_url,
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[YouTube] Failed to save transformed avatar asset: {e}")
return {
"avatar_url": transformed_url,
"avatar_filename": transformed_filename,
"message": "Avatar transformed successfully",
}
except Exception as exc:
logger.error(f"[YouTube] Avatar transformation failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Avatar transformation failed: {str(exc)}")
@router.post("/generate")
async def generate_creator_avatar(
project_id: Optional[str] = Form(None),
audience: Optional[str] = Form(None),
content_type: Optional[str] = Form(None),
video_plan_json: Optional[str] = Form(None),
brand_style: Optional[str] = Form(None),
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Auto-generate a YouTube creator avatar optimized from video plan context.
Uses video plan data (if provided) and user inputs to generate an avatar that matches
the video type, audience, tone, and brand style.
"""
user_id = require_authenticated_user(current_user)
try:
return await _generate_avatar_from_context(
user_id=user_id,
project_id=project_id,
audience=audience,
content_type=content_type,
video_plan_json=video_plan_json,
brand_style=brand_style,
db=db,
)
except Exception as exc:
logger.error(f"[YouTube] Avatar generation failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Avatar generation failed: {str(exc)}")
@router.post("/regenerate")
async def regenerate_creator_avatar(
video_plan_json: str = Form(...),
project_id: Optional[str] = Form(None),
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Regenerate a YouTube creator avatar using the same video plan context.
Takes the video plan JSON and regenerates an avatar with a different seed
to provide variation while maintaining the same optimization based on plan data.
"""
user_id = require_authenticated_user(current_user)
try:
# Parse video plan to extract context
import json
plan_data = json.loads(video_plan_json)
# Extract context from plan data
audience = plan_data.get("target_audience", "")
content_type = plan_data.get("video_type", "")
brand_style = plan_data.get("visual_style", "")
logger.info(
f"[YouTube] Regenerating avatar for project {project_id}: "
f"video_type={content_type}, audience={audience[:50] if audience else 'none'}"
)
avatar_response = await _generate_avatar_from_context(
user_id=user_id,
project_id=project_id,
audience=audience,
content_type=content_type,
video_plan_json=video_plan_json,
brand_style=brand_style,
db=db,
)
# Return the avatar prompt along with the URL for the frontend
return {
"avatar_url": avatar_response.get("avatar_url"),
"avatar_filename": avatar_response.get("avatar_filename"),
"avatar_prompt": avatar_response.get("avatar_prompt"),
"message": "Avatar regenerated successfully",
}
except Exception as exc:
logger.error(f"[YouTube] Avatar regeneration failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Avatar regeneration failed: {str(exc)}")

View File

@@ -0,0 +1,259 @@
"""YouTube Creator scene image generation handlers."""
from pathlib import Path
from typing import Dict, Any, Optional
import uuid
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import FileResponse
from pydantic import BaseModel
from sqlalchemy.orm import Session
from middleware.auth_middleware import get_current_user
from services.database import get_db
from services.subscription import PricingService
from services.subscription.preflight_validator import validate_image_generation_operations
from services.llm_providers.main_image_generation import generate_image
from services.wavespeed.client import WaveSpeedClient
from utils.asset_tracker import save_asset_to_library
from utils.logger_utils import get_service_logger
router = APIRouter(tags=["youtube-image"])
logger = get_service_logger("api.youtube.image")
# Directories
base_dir = Path(__file__).parent.parent.parent.parent
YOUTUBE_IMAGES_DIR = base_dir / "youtube_images"
YOUTUBE_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
YOUTUBE_AVATARS_DIR = base_dir / "youtube_avatars"
class YouTubeImageRequest(BaseModel):
scene_id: str
scene_title: Optional[str] = None
scene_content: Optional[str] = None
base_avatar_url: Optional[str] = None
idea: Optional[str] = None
width: Optional[int] = 1024
height: Optional[int] = 1024
custom_prompt: Optional[str] = None
style: Optional[str] = None # e.g., "Realistic", "Fiction"
rendering_speed: Optional[str] = None # e.g., "Quality", "Turbo"
aspect_ratio: Optional[str] = None # e.g., "16:9"
def require_authenticated_user(current_user: Dict[str, Any]) -> str:
"""Extract and validate user ID from current user."""
user_id = current_user.get("id") if current_user else None
if not user_id:
raise HTTPException(status_code=401, detail="Authentication required")
return str(user_id)
def _load_base_avatar_bytes(avatar_url: str) -> bytes:
"""Load base avatar bytes for character consistency."""
filename = avatar_url.split("/")[-1].split("?")[0]
avatar_path = YOUTUBE_AVATARS_DIR / filename
if not avatar_path.exists() or not avatar_path.is_file():
raise HTTPException(status_code=404, detail="Base avatar image not found")
return avatar_path.read_bytes()
def _save_scene_image(image_bytes: bytes, scene_id: str) -> Dict[str, str]:
"""Persist generated scene image and return file/url info."""
unique_id = str(uuid.uuid4())[:8]
image_filename = f"yt_scene_{scene_id}_{unique_id}.png"
image_path = YOUTUBE_IMAGES_DIR / image_filename
with open(image_path, "wb") as f:
f.write(image_bytes)
image_url = f"/api/youtube/images/scenes/{image_filename}"
return {
"image_filename": image_filename,
"image_path": str(image_path),
"image_url": image_url,
}
@router.post("/image")
async def generate_youtube_scene_image(
request: YouTubeImageRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Generate a YouTube scene image, with optional avatar consistency."""
user_id = require_authenticated_user(current_user)
if not request.scene_title:
raise HTTPException(status_code=400, detail="Scene title is required")
try:
# Pre-flight subscription validation
pricing_service = PricingService(db)
validate_image_generation_operations(
pricing_service=pricing_service,
user_id=user_id,
num_images=1,
)
logger.info(f"[YouTube] ✅ Pre-flight validation passed for user {user_id}")
base_avatar_bytes = None
if request.base_avatar_url:
try:
base_avatar_bytes = _load_base_avatar_bytes(request.base_avatar_url)
logger.info(f"[YouTube] Loaded base avatar for scene {request.scene_id}")
except HTTPException:
raise
except Exception as e:
logger.error(f"[YouTube] Failed to load base avatar: {e}", exc_info=True)
raise HTTPException(
status_code=500,
detail={
"error": "Failed to load base avatar",
"message": f"Could not load the base avatar image: {str(e)}",
},
)
# Build prompt
image_prompt = ""
if base_avatar_bytes:
prompt_parts = []
if request.scene_title:
prompt_parts.append(f"Scene: {request.scene_title}")
if request.scene_content:
content_preview = request.scene_content[:200].replace("\n", " ").strip()
prompt_parts.append(f"Context: {content_preview}")
if request.idea:
prompt_parts.append(f"Video idea: {request.idea[:80].strip()}")
prompt_parts.append("YouTube creator on camera, engaging and dynamic framing")
prompt_parts.append("Clean background, good lighting, thumbnail-friendly composition")
image_prompt = ", ".join(prompt_parts)
else:
prompt_parts = [
"YouTube creator scene",
"clean, modern background",
"good lighting, high contrast for thumbnail clarity",
]
if request.scene_title:
prompt_parts.append(f"Scene theme: {request.scene_title}")
if request.scene_content:
prompt_parts.append(f"Context: {request.scene_content[:120].replace(chr(10), ' ')}")
if request.idea:
prompt_parts.append(f"Topic: {request.idea[:80]}")
prompt_parts.append("video-optimized composition, 16:9 aspect ratio")
image_prompt = ", ".join(prompt_parts)
# Generate image
provider = "wavespeed"
model = "ideogram-v3-turbo"
if base_avatar_bytes:
logger.info(f"[YouTube] Using character-consistent generation for scene {request.scene_id}")
style = request.style or "Realistic"
rendering_speed = request.rendering_speed or "Quality"
aspect_ratio = request.aspect_ratio or "16:9"
width = request.width or 1024
height = request.height or 576
wavespeed_client = WaveSpeedClient()
image_bytes = wavespeed_client.generate_character_image(
prompt=image_prompt,
reference_image_bytes=base_avatar_bytes,
style=style,
aspect_ratio=aspect_ratio,
rendering_speed=rendering_speed,
timeout=None,
)
model = "ideogram-character"
else:
logger.info(f"[YouTube] Generating scene {request.scene_id} from scratch")
image_options = {
"provider": "wavespeed",
"model": "ideogram-v3-turbo",
"width": request.width or 1024,
"height": request.height or 576,
}
result = generate_image(
prompt=request.custom_prompt or image_prompt,
options=image_options,
user_id=user_id,
)
image_bytes = result.image_bytes
provider = result.provider
model = result.model
# Save image
saved = _save_scene_image(image_bytes, request.scene_id)
# Save to asset library
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="image",
source_module="youtube_creator",
filename=saved["image_filename"],
file_url=saved["image_url"],
file_path=saved["image_path"],
file_size=len(image_bytes),
mime_type="image/png",
title=f"YouTube Scene: {request.scene_title or request.scene_id}",
description=request.scene_content or f"Scene image for {request.scene_id}",
prompt=image_prompt,
tags=["youtube_creator", "scene", request.scene_id],
provider=provider,
model=model,
asset_metadata={
"scene_id": request.scene_id,
"scene_title": request.scene_title,
"has_base_avatar": bool(base_avatar_bytes),
"width": request.width or 1024,
"height": request.height or 576,
},
)
except Exception as e:
logger.warning(f"[YouTube] Failed to save scene image to asset library: {e}")
return {
"scene_id": request.scene_id,
"scene_title": request.scene_title,
"image_filename": saved["image_filename"],
"image_url": saved["image_url"],
"width": request.width or 1024,
"height": request.height or 576,
}
except HTTPException:
raise
except Exception as exc:
logger.error(f"[YouTube] Scene image generation failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to generate scene image: {str(exc)}")
@router.get("/images/{category}/{filename}")
async def serve_youtube_image(
category: str,
filename: str,
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""
Serve stored YouTube images (avatars or scenes).
Unified endpoint for both avatar and scene images.
"""
require_authenticated_user(current_user)
if category not in {"avatars", "scenes"}:
raise HTTPException(status_code=400, detail="Invalid image category. Must be 'avatars' or 'scenes'")
if ".." in filename or "/" in filename or "\\" in filename:
raise HTTPException(status_code=400, detail="Invalid filename")
directory = YOUTUBE_AVATARS_DIR if category == "avatars" else YOUTUBE_IMAGES_DIR
image_path = directory / filename
if not image_path.exists() or not image_path.is_file():
raise HTTPException(status_code=404, detail="Image not found")
return FileResponse(
path=str(image_path),
media_type="image/png",
filename=filename,
)

View File

@@ -23,14 +23,24 @@ from services.subscription.preflight_validator import validate_scene_animation_o
from utils.logger_utils import get_service_logger
from utils.asset_tracker import save_asset_to_library
from .task_manager import task_manager
from .handlers import avatar as avatar_handlers
from .handlers import images as image_handlers
router = APIRouter(prefix="/youtube", tags=["youtube"])
logger = get_service_logger("api.youtube")
# Video output directory
# Video output and image directories
base_dir = Path(__file__).parent.parent.parent.parent
YOUTUBE_VIDEO_DIR = base_dir / "youtube_videos"
YOUTUBE_VIDEO_DIR.mkdir(parents=True, exist_ok=True)
YOUTUBE_AVATARS_DIR = base_dir / "youtube_avatars"
YOUTUBE_AVATARS_DIR.mkdir(parents=True, exist_ok=True)
YOUTUBE_IMAGES_DIR = base_dir / "youtube_images"
YOUTUBE_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
# Include sub-routers for avatar and images
router.include_router(avatar_handlers.router)
router.include_router(image_handlers.router)
# Request/Response Models
@@ -42,6 +52,23 @@ class VideoPlanRequest(BaseModel):
pattern="^(shorts|medium|long)$",
description="Video duration type: shorts (≤60s), medium (1-4min), long (4-10min)"
)
video_type: Optional[str] = Field(
None,
pattern="^(tutorial|review|educational|entertainment|vlog|product_demo|reaction|storytelling)$",
description="Video format type: tutorial, review, educational, entertainment, vlog, product_demo, reaction, storytelling"
)
target_audience: Optional[str] = Field(
None,
description="Target audience description (helps optimize tone, pace, and style)"
)
video_goal: Optional[str] = Field(
None,
description="Primary goal of the video (educate, sell, entertain, etc.)"
)
brand_style: Optional[str] = Field(
None,
description="Brand visual aesthetic and style preferences"
)
reference_image_description: Optional[str] = Field(
None,
description="Optional description of reference image for visual inspiration"
@@ -55,6 +82,14 @@ class VideoPlanRequest(BaseModel):
pattern="^(blog|story)$",
description="Type of source content: blog or story"
)
avatar_url: Optional[str] = Field(
None,
description="Optional avatar URL if user uploaded one before plan generation"
)
enable_research: Optional[bool] = Field(
True,
description="Enable Exa research to enhance plan with current information, trends, and better SEO keywords (default: True)"
)
class VideoPlanResponse(BaseModel):
@@ -158,6 +193,12 @@ async def create_video_plan(
f"duration={request.duration_type}, user={user_id}"
)
# Note: Research subscription checks are handled by ResearchService internally
# ResearchService validates limits before making API calls and raises HTTPException(429) if exceeded
# Note: Subscription checks for LLM are handled by llm_text_gen internally
# It validates limits before making API calls and raises HTTPException(429) if exceeded
# Get persona data if available
persona_data = None
try:
@@ -168,17 +209,75 @@ async def create_video_plan(
# Generate plan (optimized: for shorts, combine plan + scenes in one call)
planner = YouTubePlannerService()
plan = planner.generate_video_plan(
plan = await planner.generate_video_plan(
user_idea=request.user_idea,
duration_type=request.duration_type,
video_type=request.video_type,
target_audience=request.target_audience,
video_goal=request.video_goal,
brand_style=request.brand_style,
persona_data=persona_data,
reference_image_description=request.reference_image_description,
source_content_id=request.source_content_id,
source_content_type=request.source_content_type,
user_id=user_id,
include_scenes=(request.duration_type == "shorts"), # Optimize shorts
enable_research=getattr(request, 'enable_research', True), # Research enabled by default
)
# Auto-generate avatar if user didn't upload one
# Try to reuse existing avatar from asset library first to save on AI calls during testing
auto_avatar_url = None
if not request.avatar_url:
try:
from services.content_asset_service import ContentAssetService
from models.content_asset_models import AssetType, AssetSource
# Check for existing YouTube creator avatar in asset library
asset_service = ContentAssetService(db)
existing_avatars = asset_service.get_assets(
user_id=user_id,
asset_type=AssetType.IMAGE,
source_module=AssetSource.YOUTUBE_CREATOR,
limit=1, # Get most recent one
)
if existing_avatars and len(existing_avatars) > 0:
# Reuse the most recent avatar
existing_avatar = existing_avatars[0]
auto_avatar_url = existing_avatar.file_url
plan["auto_generated_avatar_url"] = auto_avatar_url
plan["avatar_reused"] = True # Flag to indicate avatar was reused
logger.info(
f"[YouTubeAPI] ♻️ Reusing existing avatar from asset library to save AI call: {auto_avatar_url} "
f"(asset_id: {existing_avatar.id}, created: {existing_avatar.created_at})"
)
else:
# No existing avatar found, generate new one
import uuid
import json
from .handlers.avatar import _generate_avatar_from_context
# Pass both original user inputs AND plan data for better avatar generation
logger.info(f"[YouTubeAPI] 🎨 No existing avatar found, generating new avatar...")
avatar_response = await _generate_avatar_from_context(
user_id=user_id,
project_id=f"plan_{user_id}_{uuid.uuid4().hex[:8]}",
audience=request.target_audience or plan.get("target_audience"), # Prefer user input
content_type=request.video_type, # User's video type selection
video_plan_json=json.dumps(plan),
brand_style=request.brand_style, # User's brand style preference
db=db,
)
auto_avatar_url = avatar_response.get("avatar_url")
avatar_prompt = avatar_response.get("avatar_prompt")
plan["auto_generated_avatar_url"] = auto_avatar_url
plan["avatar_prompt"] = avatar_prompt # Store the AI prompt used for generation
plan["avatar_reused"] = False # Flag to indicate avatar was newly generated
logger.info(f"[YouTubeAPI] ✅ Auto-generated new avatar based on user inputs and plan: {auto_avatar_url}")
except Exception as e:
logger.warning(f"[YouTubeAPI] Avatar generation/reuse failed (non-critical): {e}")
# Non-critical, continue without avatar
return VideoPlanResponse(
success=True,
plan=plan,
@@ -212,12 +311,17 @@ async def build_scenes(
try:
user_id = require_authenticated_user(current_user)
duration_type = request.video_plan.get('duration_type', 'medium')
has_existing_scenes = bool(request.video_plan.get("scenes")) and request.video_plan.get("_scenes_included")
logger.info(
f"[YouTubeAPI] Building scenes: duration={request.video_plan.get('duration_type')}, "
f"custom_script={bool(request.custom_script)}, user={user_id}"
f"[YouTubeAPI] Building scenes: duration={duration_type}, "
f"custom_script={bool(request.custom_script)}, "
f"has_existing_scenes={has_existing_scenes}, "
f"user={user_id}"
)
# Build scenes
# Build scenes (optimized to reuse existing scenes if available)
scene_builder = YouTubeSceneBuilderService()
scenes = scene_builder.build_scenes_from_plan(
video_plan=request.video_plan,