Added YouTube Creator scene building flow documentation

2025-12-21 17:15:23 +05:30
parent 1d745c9bc8
commit 59913bffa9
51 changed files with 7478 additions and 631 deletions
--- a/backend/api/content_assets/router.py
+++ b/backend/api/content_assets/router.py
@@ -69,7 +69,8 @@ async def get_assets(
 ):
    """Get user's content assets with optional filtering."""
    try:
-        user_id = current_user.get("user_id") or current_user.get("id")
+        # Auth middleware returns 'id' as the primary key
+        user_id = current_user.get("id") or current_user.get("user_id") or current_user.get("clerk_user_id")
        if not user_id:
            raise HTTPException(status_code=401, detail="User ID not found")
        
--- a/backend/api/youtube/handlers/init.py
+++ b/backend/api/youtube/handlers/init.py
@@ -0,0 +1,11 @@
+"""
+YouTube Creator handler package.
+
+Contains endpoints for avatar upload/optimization and scene image generation.
+"""
+
+# Explicitly define __all__ for clarity
+__all__ = []
+"""YouTube Creator handlers package."""
+
+
--- a/backend/api/youtube/handlers/avatar.py
+++ b/backend/api/youtube/handlers/avatar.py
@@ -0,0 +1,557 @@
+"""YouTube Creator avatar upload and AI optimization handlers."""
+
+from pathlib import Path
+import uuid
+from typing import Dict, Any, Optional
+
+from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
+from fastapi.responses import FileResponse
+from sqlalchemy.orm import Session
+
+from middleware.auth_middleware import get_current_user
+from services.database import get_db
+from services.llm_providers.main_image_generation import generate_image
+from services.llm_providers.main_image_editing import edit_image
+from utils.asset_tracker import save_asset_to_library
+from utils.logger_utils import get_service_logger
+
+router = APIRouter(prefix="/avatar", tags=["youtube-avatar"])
+logger = get_service_logger("api.youtube.avatar")
+
+# Directories
+base_dir = Path(__file__).parent.parent.parent.parent
+YOUTUBE_AVATARS_DIR = base_dir / "youtube_avatars"
+YOUTUBE_AVATARS_DIR.mkdir(parents=True, exist_ok=True)
+
+
+def require_authenticated_user(current_user: Dict[str, Any]) -> str:
+    """Extract and validate user ID from current user."""
+    user_id = current_user.get("id") if current_user else None
+    if not user_id:
+        raise HTTPException(status_code=401, detail="Authentication required")
+    return str(user_id)
+
+
+def _load_youtube_image_bytes(image_url: str) -> bytes:
+    """Load avatar bytes from a stored YouTube avatar URL."""
+    filename = image_url.split("/")[-1].split("?")[0]
+    image_path = YOUTUBE_AVATARS_DIR / filename
+    if not image_path.exists() or not image_path.is_file():
+        raise HTTPException(status_code=404, detail="Avatar image not found")
+    return image_path.read_bytes()
+
+
+async def _generate_avatar_from_context(
+    user_id: str,
+    project_id: Optional[str],
+    audience: Optional[str] = None,
+    content_type: Optional[str] = None,
+    video_plan_json: Optional[str] = None,
+    brand_style: Optional[str] = None,
+    db: Optional[Session] = None,
+) -> Dict[str, Any]:
+    """
+    Internal function to generate avatar from context.
+    Can be called from route handler or directly from router.
+    """
+    # Parse video plan if provided
+    plan_data = {}
+    avatar_recommendations = {}
+    if video_plan_json:
+        try:
+            import json
+            plan_data = json.loads(video_plan_json)
+            avatar_recommendations = plan_data.get("avatar_recommendations", {})
+        except Exception as e:
+            logger.warning(f"[YouTube] Failed to parse video plan JSON: {e}")
+
+    # Extract context - prioritize user inputs over plan data
+    # User inputs are more reliable as they represent explicit choices
+    # Priority: user input > plan data > defaults
+    plan_target_audience = audience or plan_data.get("target_audience", "")
+    plan_video_type = content_type or plan_data.get("video_type", "")
+    # Use user's brand_style if provided, otherwise use plan's visual_style
+    plan_visual_style = brand_style or plan_data.get("visual_style", "")
+    plan_tone = plan_data.get("tone", "")
+    
+    logger.info(
+        f"[YouTube] Avatar generation context: "
+        f"video_type={plan_video_type}, audience={plan_target_audience[:50] if plan_target_audience else 'none'}, "
+        f"brand_style={plan_visual_style[:50] if plan_visual_style else 'none'}"
+    )
+
+    # Build optimized prompt using plan data
+    prompt_parts = []
+    
+    # Base avatar description - use recommendations if available
+    if avatar_recommendations and avatar_recommendations.get("description"):
+        prompt_parts.append(avatar_recommendations["description"])
+    else:
+        prompt_parts.append("Half-length portrait of a professional YouTube creator (25-35 years old)")
+    
+    # Video type optimization
+    if plan_video_type:
+        video_type_lower = plan_video_type.lower()
+        if video_type_lower == "tutorial":
+            prompt_parts.append("approachable instructor, professional yet friendly, clear presentation style")
+        elif video_type_lower == "review":
+            prompt_parts.append("trustworthy reviewer, confident, credible appearance")
+        elif video_type_lower == "educational":
+            prompt_parts.append("knowledgeable educator, professional, warm and engaging")
+        elif video_type_lower == "entertainment":
+            prompt_parts.append("energetic creator, expressive, fun and relatable")
+        elif video_type_lower == "vlog":
+            prompt_parts.append("authentic person, approachable, real and relatable")
+        elif video_type_lower == "product_demo":
+            prompt_parts.append("professional presenter, polished, confident and enthusiastic")
+        elif video_type_lower == "reaction":
+            prompt_parts.append("expressive creator, authentic reactions, engaging")
+        elif video_type_lower == "storytelling":
+            prompt_parts.append("storyteller, warm, engaging narrator")
+        elif "tech" in video_type_lower:
+            prompt_parts.append("tech-forward style")
+        elif "travel" in video_type_lower:
+            prompt_parts.append("travel vlogger aesthetic")
+        elif "education" in video_type_lower or "learn" in video_type_lower:
+            prompt_parts.append("educational creator, clean and clear presentation")
+        else:
+            prompt_parts.append("modern creator style")
+    elif content_type:
+        content_lower = content_type.lower()
+        if "tech" in content_lower:
+            prompt_parts.append("tech-forward style")
+        elif "travel" in content_lower:
+            prompt_parts.append("travel vlogger aesthetic")
+        elif "education" in content_lower or "learn" in content_lower:
+            prompt_parts.append("educational creator, clean and clear presentation")
+        else:
+            prompt_parts.append("modern creator style")
+
+    # Audience optimization
+    target_audience = plan_target_audience or audience
+    if target_audience:
+        audience_lower = target_audience.lower()
+        if "young" in audience_lower or "gen z" in audience_lower or "millennial" in audience_lower:
+            prompt_parts.append("youthful, vibrant, modern vibe")
+        elif "executive" in audience_lower or "professional" in audience_lower or "business" in audience_lower:
+            prompt_parts.append("polished, credible, authoritative presence")
+        elif "creative" in audience_lower:
+            prompt_parts.append("artistic, expressive, creative professional")
+        elif "parents" in audience_lower or "family" in audience_lower:
+            prompt_parts.append("warm, approachable, trustworthy presence")
+    
+    # Visual style from plan
+    if plan_visual_style:
+        visual_lower = plan_visual_style.lower()
+        if "minimal" in visual_lower or "minimalist" in visual_lower:
+            prompt_parts.append("clean, minimalist aesthetic")
+        if "tech" in visual_lower or "modern" in visual_lower:
+            prompt_parts.append("tech-forward, modern style")
+        if "energetic" in visual_lower or "colorful" in visual_lower or "vibrant" in visual_lower:
+            prompt_parts.append("vibrant, energetic appearance")
+        if "cinematic" in visual_lower:
+            prompt_parts.append("cinematic, polished presentation")
+        if "professional" in visual_lower:
+            prompt_parts.append("professional, polished aesthetic")
+    
+    # Tone from plan
+    if plan_tone:
+        tone_lower = plan_tone.lower()
+        if "casual" in tone_lower:
+            prompt_parts.append("casual, approachable style")
+        if "professional" in tone_lower:
+            prompt_parts.append("professional attire and presentation")
+        if "energetic" in tone_lower or "fun" in tone_lower:
+            prompt_parts.append("energetic, lively expression")
+        if "warm" in tone_lower:
+            prompt_parts.append("warm, friendly expression")
+    
+    # Avatar recommendations from plan
+    if avatar_recommendations:
+        if avatar_recommendations.get("style"):
+            prompt_parts.append(avatar_recommendations["style"])
+        if avatar_recommendations.get("energy"):
+            prompt_parts.append(avatar_recommendations["energy"])
+
+    # Base technical requirements
+    prompt_parts.extend([
+        "photo-realistic, professional photography",
+        "confident, engaging expression",
+        "professional studio lighting, clean background",
+        "suitable for video generation and thumbnails",
+        "ultra realistic, 4k quality, 85mm lens",
+        "looking at camera, center-focused composition"
+    ])
+
+    prompt = ", ".join(prompt_parts)
+    seed = int(uuid.uuid4().int % (2**32))
+
+    image_options = {
+        "provider": "wavespeed",
+        "model": "ideogram-v3-turbo",
+        "width": 1024,
+        "height": 1024,
+        "seed": seed,
+    }
+
+    result = generate_image(
+        prompt=prompt,
+        options=image_options,
+        user_id=user_id,
+    )
+
+    unique_id = str(uuid.uuid4())[:8]
+    avatar_filename = f"yt_generated_{project_id or 'temp'}_{unique_id}.png"
+    avatar_path = YOUTUBE_AVATARS_DIR / avatar_filename
+
+    with open(avatar_path, "wb") as f:
+        f.write(result.image_bytes)
+
+    avatar_url = f"/api/youtube/images/avatars/{avatar_filename}"
+    logger.info(f"[YouTube] Generated creator avatar: {avatar_path}")
+
+    if project_id and db:
+        try:
+            save_asset_to_library(
+                db=db,
+                user_id=user_id,
+                asset_type="image",
+                source_module="youtube_creator",
+                filename=avatar_filename,
+                file_url=avatar_url,
+                file_path=str(avatar_path),
+                file_size=len(result.image_bytes),
+                mime_type="image/png",
+                title=f"YouTube Creator Avatar (Generated) - {project_id}",
+                description="AI-generated YouTube creator avatar",
+                prompt=prompt,
+                tags=["youtube", "avatar", "generated", project_id],
+                provider=result.provider,
+                model=result.model,
+                asset_metadata={
+                    "project_id": project_id,
+                    "type": "generated_presenter",
+                    "status": "completed",
+                },
+            )
+        except Exception as e:
+            logger.warning(f"[YouTube] Failed to save generated avatar asset: {e}")
+
+    return {
+        "avatar_url": avatar_url,
+        "avatar_filename": avatar_filename,
+        "avatar_prompt": prompt,
+        "message": "Avatar generated successfully",
+    }
+
+
+@router.post("/upload")
+async def upload_youtube_avatar(
+    file: UploadFile = File(...),
+    project_id: Optional[str] = Form(None),
+    current_user: Dict[str, Any] = Depends(get_current_user),
+    db: Session = Depends(get_db),
+):
+    """Upload a YouTube creator avatar image."""
+    user_id = require_authenticated_user(current_user)
+
+    if not file:
+        raise HTTPException(status_code=400, detail="No file uploaded")
+
+    file_content = await file.read()
+
+    # Validate size (max 5MB)
+    if len(file_content) > 5 * 1024 * 1024:
+        raise HTTPException(status_code=400, detail="Image file size must be less than 5MB")
+
+    try:
+        file_ext = Path(file.filename).suffix or ".png"
+        unique_id = str(uuid.uuid4())[:8]
+        avatar_filename = f"yt_avatar_{project_id or 'temp'}_{unique_id}{file_ext}"
+        avatar_path = YOUTUBE_AVATARS_DIR / avatar_filename
+
+        with open(avatar_path, "wb") as f:
+            f.write(file_content)
+
+        avatar_url = f"/api/youtube/images/avatars/{avatar_filename}"
+        logger.info(f"[YouTube] Avatar uploaded: {avatar_path}")
+
+        if project_id:
+            try:
+                save_asset_to_library(
+                    db=db,
+                    user_id=user_id,
+                    asset_type="image",
+                    source_module="youtube_creator",
+                    filename=avatar_filename,
+                    file_url=avatar_url,
+                    file_path=str(avatar_path),
+                    file_size=len(file_content),
+                    mime_type=file.content_type or "image/png",
+                    title=f"YouTube Creator Avatar - {project_id}",
+                    description="YouTube creator avatar image",
+                    tags=["youtube", "avatar", project_id],
+                    asset_metadata={
+                        "project_id": project_id,
+                        "type": "creator_avatar",
+                        "status": "completed",
+                    },
+                )
+            except Exception as e:
+                logger.warning(f"[YouTube] Failed to save avatar asset: {e}")
+
+        return {
+            "avatar_url": avatar_url,
+            "avatar_filename": avatar_filename,
+            "message": "Avatar uploaded successfully",
+        }
+    except Exception as exc:
+        logger.error(f"[YouTube] Avatar upload failed: {exc}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Avatar upload failed: {str(exc)}")
+
+
+@router.post("/make-presentable")
+async def make_avatar_presentable(
+    avatar_url: str = Form(...),
+    project_id: Optional[str] = Form(None),
+    video_type: Optional[str] = Form(None),
+    target_audience: Optional[str] = Form(None),
+    video_goal: Optional[str] = Form(None),
+    brand_style: Optional[str] = Form(None),
+    current_user: Dict[str, Any] = Depends(get_current_user),
+    db: Session = Depends(get_db),
+):
+    """
+    Transform an uploaded avatar image into a YouTube-appropriate creator.
+    Uses AI image editing with enhanced prompts to optimize the uploaded photo.
+    """
+    user_id = require_authenticated_user(current_user)
+
+    try:
+        avatar_bytes = _load_youtube_image_bytes(avatar_url)
+        logger.info(f"[YouTube] 🔍 Starting avatar transformation for user_id={user_id}, project={project_id}")
+        logger.info(f"[YouTube] Transforming avatar for project {project_id}")
+
+        # Build context-aware transformation prompt using user inputs
+        prompt_parts = [
+            "Transform this photo into a professional YouTube creator avatar:",
+            "Significantly enhance and optimize the image for YouTube video production;",
+            "Apply professional photo editing: improve lighting, color grading, and composition;",
+            "Enhance facial features: brighten eyes, smooth skin, add professional makeup if needed;",
+            "Improve background: replace with clean, professional studio background or subtle gradient;",
+            "Adjust clothing: ensure professional, YouTube-appropriate attire;",
+            "Optimize for video: ensure the person looks natural and engaging on camera;",
+            "Half-length portrait format, person looking directly at camera with confident, engaging expression;",
+            "Professional studio lighting with soft shadows, high-quality photography;",
+            "Maintain the person's core appearance and identity while making significant improvements;",
+            "Ultra realistic, 4k quality, professional photography style;",
+            "Suitable for video generation, thumbnails, and YouTube channel branding."
+        ]
+
+        # Add context from user inputs to make transformation more targeted
+        if video_type:
+            video_type_lower = video_type.lower()
+            if video_type_lower == "tutorial":
+                prompt_parts.append("Approachable instructor style, professional yet friendly appearance")
+            elif video_type_lower == "review":
+                prompt_parts.append("Trustworthy reviewer style, confident and credible appearance")
+            elif video_type_lower == "educational":
+                prompt_parts.append("Knowledgeable educator style, professional and warm appearance")
+            elif video_type_lower == "entertainment":
+                prompt_parts.append("Energetic creator style, expressive and fun appearance")
+            elif video_type_lower == "vlog":
+                prompt_parts.append("Authentic vlogger style, approachable and relatable appearance")
+            elif video_type_lower == "product_demo":
+                prompt_parts.append("Professional presenter style, polished and enthusiastic appearance")
+            elif video_type_lower == "reaction":
+                prompt_parts.append("Expressive creator style, authentic and engaging appearance")
+            elif video_type_lower == "storytelling":
+                prompt_parts.append("Storyteller style, warm and engaging narrator appearance")
+
+        if target_audience:
+            audience_lower = target_audience.lower()
+            if "young" in audience_lower or "gen z" in audience_lower or "millennial" in audience_lower:
+                prompt_parts.append("Modern, youthful, vibrant aesthetic")
+            elif "executive" in audience_lower or "professional" in audience_lower or "business" in audience_lower:
+                prompt_parts.append("Polished, credible, authoritative professional appearance")
+            elif "creative" in audience_lower:
+                prompt_parts.append("Artistic, expressive, creative professional style")
+
+        if brand_style:
+            style_lower = brand_style.lower()
+            if "minimal" in style_lower or "minimalist" in style_lower:
+                prompt_parts.append("Clean, minimalist aesthetic")
+            if "tech" in style_lower or "modern" in style_lower:
+                prompt_parts.append("Tech-forward, modern style")
+            if "energetic" in style_lower or "colorful" in style_lower:
+                prompt_parts.append("Vibrant, energetic appearance")
+
+        base_prompt = " ".join(prompt_parts)
+
+        # Optimize the prompt using WaveSpeed prompt optimizer for better results
+        try:
+            from services.wavespeed.client import WaveSpeedClient
+            wavespeed_client = WaveSpeedClient()
+            logger.info(f"[YouTube] Optimizing transformation prompt using WaveSpeed prompt optimizer")
+            transformation_prompt = wavespeed_client.optimize_prompt(
+                text=base_prompt,
+                mode="image",
+                style="realistic",  # Use realistic style for photo editing
+                enable_sync_mode=True,
+                timeout=30
+            )
+            logger.info(f"[YouTube] Prompt optimized successfully (length: {len(transformation_prompt)} chars)")
+        except Exception as opt_error:
+            logger.warning(f"[YouTube] Prompt optimization failed, using base prompt: {opt_error}")
+            transformation_prompt = base_prompt
+
+        # Use HuggingFace for image editing (only available option)
+        # Note: This uses async processing with polling (~30 seconds expected)
+        image_options = {
+            "provider": "huggingface",  # Explicitly use HuggingFace (only option for image editing)
+            "model": None,  # Use default model (Qwen/Qwen-Image-Edit)
+        }
+
+        logger.info(f"[YouTube] Starting avatar transformation (this may take ~30 seconds due to async processing)")
+        result = edit_image(
+            input_image_bytes=avatar_bytes,
+            prompt=transformation_prompt,
+            options=image_options,
+            user_id=user_id,
+        )
+        logger.info(f"[YouTube] ✅ Avatar transformation completed successfully")
+
+        unique_id = str(uuid.uuid4())[:8]
+        transformed_filename = f"yt_presenter_{project_id or 'temp'}_{unique_id}.png"
+        transformed_path = YOUTUBE_AVATARS_DIR / transformed_filename
+
+        with open(transformed_path, "wb") as f:
+            f.write(result.image_bytes)
+
+        transformed_url = f"/api/youtube/images/avatars/{transformed_filename}"
+        logger.info(f"[YouTube] Transformed avatar saved to: {transformed_path}")
+
+        if project_id:
+            try:
+                save_asset_to_library(
+                    db=db,
+                    user_id=user_id,
+                    asset_type="image",
+                    source_module="youtube_creator",
+                    filename=transformed_filename,
+                    file_url=transformed_url,
+                    file_path=str(transformed_path),
+                    file_size=len(result.image_bytes),
+                    mime_type="image/png",
+                    title=f"YouTube Creator (Transformed) - {project_id}",
+                    description="AI-transformed YouTube creator avatar from uploaded photo",
+                    prompt=transformation_prompt,
+                    tags=["youtube", "avatar", "presenter", project_id],
+                    provider=result.provider,
+                    model=result.model,
+                    asset_metadata={
+                        "project_id": project_id,
+                        "type": "transformed_presenter",
+                        "original_avatar_url": avatar_url,
+                        "status": "completed",
+                    },
+                )
+            except Exception as e:
+                logger.warning(f"[YouTube] Failed to save transformed avatar asset: {e}")
+
+        return {
+            "avatar_url": transformed_url,
+            "avatar_filename": transformed_filename,
+            "message": "Avatar transformed successfully",
+        }
+    except Exception as exc:
+        logger.error(f"[YouTube] Avatar transformation failed: {exc}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Avatar transformation failed: {str(exc)}")
+
+
+@router.post("/generate")
+async def generate_creator_avatar(
+    project_id: Optional[str] = Form(None),
+    audience: Optional[str] = Form(None),
+    content_type: Optional[str] = Form(None),
+    video_plan_json: Optional[str] = Form(None),
+    brand_style: Optional[str] = Form(None),
+    current_user: Dict[str, Any] = Depends(get_current_user),
+    db: Session = Depends(get_db),
+):
+    """
+    Auto-generate a YouTube creator avatar optimized from video plan context.
+    
+    Uses video plan data (if provided) and user inputs to generate an avatar that matches
+    the video type, audience, tone, and brand style.
+    """
+    user_id = require_authenticated_user(current_user)
+    
+    try:
+        return await _generate_avatar_from_context(
+            user_id=user_id,
+            project_id=project_id,
+            audience=audience,
+            content_type=content_type,
+            video_plan_json=video_plan_json,
+            brand_style=brand_style,
+            db=db,
+        )
+    except Exception as exc:
+        logger.error(f"[YouTube] Avatar generation failed: {exc}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Avatar generation failed: {str(exc)}")
+
+
+@router.post("/regenerate")
+async def regenerate_creator_avatar(
+    video_plan_json: str = Form(...),
+    project_id: Optional[str] = Form(None),
+    current_user: Dict[str, Any] = Depends(get_current_user),
+    db: Session = Depends(get_db),
+):
+    """
+    Regenerate a YouTube creator avatar using the same video plan context.
+
+    Takes the video plan JSON and regenerates an avatar with a different seed
+    to provide variation while maintaining the same optimization based on plan data.
+    """
+    user_id = require_authenticated_user(current_user)
+
+    try:
+        # Parse video plan to extract context
+        import json
+        plan_data = json.loads(video_plan_json)
+
+        # Extract context from plan data
+        audience = plan_data.get("target_audience", "")
+        content_type = plan_data.get("video_type", "")
+        brand_style = plan_data.get("visual_style", "")
+
+        logger.info(
+            f"[YouTube] Regenerating avatar for project {project_id}: "
+            f"video_type={content_type}, audience={audience[:50] if audience else 'none'}"
+        )
+
+        avatar_response = await _generate_avatar_from_context(
+            user_id=user_id,
+            project_id=project_id,
+            audience=audience,
+            content_type=content_type,
+            video_plan_json=video_plan_json,
+            brand_style=brand_style,
+            db=db,
+        )
+
+        # Return the avatar prompt along with the URL for the frontend
+        return {
+            "avatar_url": avatar_response.get("avatar_url"),
+            "avatar_filename": avatar_response.get("avatar_filename"),
+            "avatar_prompt": avatar_response.get("avatar_prompt"),
+            "message": "Avatar regenerated successfully",
+        }
+    except Exception as exc:
+        logger.error(f"[YouTube] Avatar regeneration failed: {exc}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Avatar regeneration failed: {str(exc)}")
+
+
+
--- a/backend/api/youtube/handlers/images.py
+++ b/backend/api/youtube/handlers/images.py
@@ -0,0 +1,259 @@
+"""YouTube Creator scene image generation handlers."""
+
+from pathlib import Path
+from typing import Dict, Any, Optional
+import uuid
+
+from fastapi import APIRouter, Depends, HTTPException
+from fastapi.responses import FileResponse
+from pydantic import BaseModel
+from sqlalchemy.orm import Session
+
+from middleware.auth_middleware import get_current_user
+from services.database import get_db
+from services.subscription import PricingService
+from services.subscription.preflight_validator import validate_image_generation_operations
+from services.llm_providers.main_image_generation import generate_image
+from services.wavespeed.client import WaveSpeedClient
+from utils.asset_tracker import save_asset_to_library
+from utils.logger_utils import get_service_logger
+
+router = APIRouter(tags=["youtube-image"])
+logger = get_service_logger("api.youtube.image")
+
+# Directories
+base_dir = Path(__file__).parent.parent.parent.parent
+YOUTUBE_IMAGES_DIR = base_dir / "youtube_images"
+YOUTUBE_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
+YOUTUBE_AVATARS_DIR = base_dir / "youtube_avatars"
+
+
+class YouTubeImageRequest(BaseModel):
+    scene_id: str
+    scene_title: Optional[str] = None
+    scene_content: Optional[str] = None
+    base_avatar_url: Optional[str] = None
+    idea: Optional[str] = None
+    width: Optional[int] = 1024
+    height: Optional[int] = 1024
+    custom_prompt: Optional[str] = None
+    style: Optional[str] = None  # e.g., "Realistic", "Fiction"
+    rendering_speed: Optional[str] = None  # e.g., "Quality", "Turbo"
+    aspect_ratio: Optional[str] = None  # e.g., "16:9"
+
+
+def require_authenticated_user(current_user: Dict[str, Any]) -> str:
+    """Extract and validate user ID from current user."""
+    user_id = current_user.get("id") if current_user else None
+    if not user_id:
+        raise HTTPException(status_code=401, detail="Authentication required")
+    return str(user_id)
+
+
+def _load_base_avatar_bytes(avatar_url: str) -> bytes:
+    """Load base avatar bytes for character consistency."""
+    filename = avatar_url.split("/")[-1].split("?")[0]
+    avatar_path = YOUTUBE_AVATARS_DIR / filename
+    if not avatar_path.exists() or not avatar_path.is_file():
+        raise HTTPException(status_code=404, detail="Base avatar image not found")
+    return avatar_path.read_bytes()
+
+
+def _save_scene_image(image_bytes: bytes, scene_id: str) -> Dict[str, str]:
+    """Persist generated scene image and return file/url info."""
+    unique_id = str(uuid.uuid4())[:8]
+    image_filename = f"yt_scene_{scene_id}_{unique_id}.png"
+    image_path = YOUTUBE_IMAGES_DIR / image_filename
+    with open(image_path, "wb") as f:
+        f.write(image_bytes)
+
+    image_url = f"/api/youtube/images/scenes/{image_filename}"
+    return {
+        "image_filename": image_filename,
+        "image_path": str(image_path),
+        "image_url": image_url,
+    }
+
+
+@router.post("/image")
+async def generate_youtube_scene_image(
+    request: YouTubeImageRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+    db: Session = Depends(get_db),
+):
+    """Generate a YouTube scene image, with optional avatar consistency."""
+    user_id = require_authenticated_user(current_user)
+
+    if not request.scene_title:
+        raise HTTPException(status_code=400, detail="Scene title is required")
+
+    try:
+        # Pre-flight subscription validation
+        pricing_service = PricingService(db)
+        validate_image_generation_operations(
+            pricing_service=pricing_service,
+            user_id=user_id,
+            num_images=1,
+        )
+        logger.info(f"[YouTube] ✅ Pre-flight validation passed for user {user_id}")
+
+        base_avatar_bytes = None
+        if request.base_avatar_url:
+            try:
+                base_avatar_bytes = _load_base_avatar_bytes(request.base_avatar_url)
+                logger.info(f"[YouTube] Loaded base avatar for scene {request.scene_id}")
+            except HTTPException:
+                raise
+            except Exception as e:
+                logger.error(f"[YouTube] Failed to load base avatar: {e}", exc_info=True)
+                raise HTTPException(
+                    status_code=500,
+                    detail={
+                        "error": "Failed to load base avatar",
+                        "message": f"Could not load the base avatar image: {str(e)}",
+                    },
+                )
+
+        # Build prompt
+        image_prompt = ""
+        if base_avatar_bytes:
+            prompt_parts = []
+            if request.scene_title:
+                prompt_parts.append(f"Scene: {request.scene_title}")
+            if request.scene_content:
+                content_preview = request.scene_content[:200].replace("\n", " ").strip()
+                prompt_parts.append(f"Context: {content_preview}")
+            if request.idea:
+                prompt_parts.append(f"Video idea: {request.idea[:80].strip()}")
+            prompt_parts.append("YouTube creator on camera, engaging and dynamic framing")
+            prompt_parts.append("Clean background, good lighting, thumbnail-friendly composition")
+            image_prompt = ", ".join(prompt_parts)
+        else:
+            prompt_parts = [
+                "YouTube creator scene",
+                "clean, modern background",
+                "good lighting, high contrast for thumbnail clarity",
+            ]
+            if request.scene_title:
+                prompt_parts.append(f"Scene theme: {request.scene_title}")
+            if request.scene_content:
+                prompt_parts.append(f"Context: {request.scene_content[:120].replace(chr(10), ' ')}")
+            if request.idea:
+                prompt_parts.append(f"Topic: {request.idea[:80]}")
+            prompt_parts.append("video-optimized composition, 16:9 aspect ratio")
+            image_prompt = ", ".join(prompt_parts)
+
+        # Generate image
+        provider = "wavespeed"
+        model = "ideogram-v3-turbo"
+        if base_avatar_bytes:
+            logger.info(f"[YouTube] Using character-consistent generation for scene {request.scene_id}")
+            style = request.style or "Realistic"
+            rendering_speed = request.rendering_speed or "Quality"
+            aspect_ratio = request.aspect_ratio or "16:9"
+            width = request.width or 1024
+            height = request.height or 576
+
+            wavespeed_client = WaveSpeedClient()
+            image_bytes = wavespeed_client.generate_character_image(
+                prompt=image_prompt,
+                reference_image_bytes=base_avatar_bytes,
+                style=style,
+                aspect_ratio=aspect_ratio,
+                rendering_speed=rendering_speed,
+                timeout=None,
+            )
+            model = "ideogram-character"
+        else:
+            logger.info(f"[YouTube] Generating scene {request.scene_id} from scratch")
+            image_options = {
+                "provider": "wavespeed",
+                "model": "ideogram-v3-turbo",
+                "width": request.width or 1024,
+                "height": request.height or 576,
+            }
+            result = generate_image(
+                prompt=request.custom_prompt or image_prompt,
+                options=image_options,
+                user_id=user_id,
+            )
+            image_bytes = result.image_bytes
+            provider = result.provider
+            model = result.model
+
+        # Save image
+        saved = _save_scene_image(image_bytes, request.scene_id)
+
+        # Save to asset library
+        try:
+            save_asset_to_library(
+                db=db,
+                user_id=user_id,
+                asset_type="image",
+                source_module="youtube_creator",
+                filename=saved["image_filename"],
+                file_url=saved["image_url"],
+                file_path=saved["image_path"],
+                file_size=len(image_bytes),
+                mime_type="image/png",
+                title=f"YouTube Scene: {request.scene_title or request.scene_id}",
+                description=request.scene_content or f"Scene image for {request.scene_id}",
+                prompt=image_prompt,
+                tags=["youtube_creator", "scene", request.scene_id],
+                provider=provider,
+                model=model,
+                asset_metadata={
+                    "scene_id": request.scene_id,
+                    "scene_title": request.scene_title,
+                    "has_base_avatar": bool(base_avatar_bytes),
+                    "width": request.width or 1024,
+                    "height": request.height or 576,
+                },
+            )
+        except Exception as e:
+            logger.warning(f"[YouTube] Failed to save scene image to asset library: {e}")
+
+        return {
+            "scene_id": request.scene_id,
+            "scene_title": request.scene_title,
+            "image_filename": saved["image_filename"],
+            "image_url": saved["image_url"],
+            "width": request.width or 1024,
+            "height": request.height or 576,
+        }
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error(f"[YouTube] Scene image generation failed: {exc}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Failed to generate scene image: {str(exc)}")
+
+
+@router.get("/images/{category}/{filename}")
+async def serve_youtube_image(
+    category: str,
+    filename: str,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+):
+    """
+    Serve stored YouTube images (avatars or scenes).
+    Unified endpoint for both avatar and scene images.
+    """
+    require_authenticated_user(current_user)
+
+    if category not in {"avatars", "scenes"}:
+        raise HTTPException(status_code=400, detail="Invalid image category. Must be 'avatars' or 'scenes'")
+
+    if ".." in filename or "/" in filename or "\\" in filename:
+        raise HTTPException(status_code=400, detail="Invalid filename")
+
+    directory = YOUTUBE_AVATARS_DIR if category == "avatars" else YOUTUBE_IMAGES_DIR
+    image_path = directory / filename
+    
+    if not image_path.exists() or not image_path.is_file():
+        raise HTTPException(status_code=404, detail="Image not found")
+
+    return FileResponse(
+        path=str(image_path),
+        media_type="image/png",
+        filename=filename,
+    )
--- a/backend/api/youtube/router.py
+++ b/backend/api/youtube/router.py
@@ -23,14 +23,24 @@ from services.subscription.preflight_validator import validate_scene_animation_o
 from utils.logger_utils import get_service_logger
 from utils.asset_tracker import save_asset_to_library
 from .task_manager import task_manager
+from .handlers import avatar as avatar_handlers
+from .handlers import images as image_handlers

 router = APIRouter(prefix="/youtube", tags=["youtube"])
 logger = get_service_logger("api.youtube")

-# Video output directory
+# Video output and image directories
 base_dir = Path(__file__).parent.parent.parent.parent
 YOUTUBE_VIDEO_DIR = base_dir / "youtube_videos"
 YOUTUBE_VIDEO_DIR.mkdir(parents=True, exist_ok=True)
+YOUTUBE_AVATARS_DIR = base_dir / "youtube_avatars"
+YOUTUBE_AVATARS_DIR.mkdir(parents=True, exist_ok=True)
+YOUTUBE_IMAGES_DIR = base_dir / "youtube_images"
+YOUTUBE_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
+
+# Include sub-routers for avatar and images
+router.include_router(avatar_handlers.router)
+router.include_router(image_handlers.router)


 # Request/Response Models
@@ -42,6 +52,23 @@ class VideoPlanRequest(BaseModel):
        pattern="^(shorts|medium|long)$",
        description="Video duration type: shorts (≤60s), medium (1-4min), long (4-10min)"
    )
+    video_type: Optional[str] = Field(
+        None,
+        pattern="^(tutorial|review|educational|entertainment|vlog|product_demo|reaction|storytelling)$",
+        description="Video format type: tutorial, review, educational, entertainment, vlog, product_demo, reaction, storytelling"
+    )
+    target_audience: Optional[str] = Field(
+        None,
+        description="Target audience description (helps optimize tone, pace, and style)"
+    )
+    video_goal: Optional[str] = Field(
+        None,
+        description="Primary goal of the video (educate, sell, entertain, etc.)"
+    )
+    brand_style: Optional[str] = Field(
+        None,
+        description="Brand visual aesthetic and style preferences"
+    )
    reference_image_description: Optional[str] = Field(
        None, 
        description="Optional description of reference image for visual inspiration"
@@ -55,6 +82,14 @@ class VideoPlanRequest(BaseModel):
        pattern="^(blog|story)$",
        description="Type of source content: blog or story"
    )
+    avatar_url: Optional[str] = Field(
+        None,
+        description="Optional avatar URL if user uploaded one before plan generation"
+    )
+    enable_research: Optional[bool] = Field(
+        True,
+        description="Enable Exa research to enhance plan with current information, trends, and better SEO keywords (default: True)"
+    )


 class VideoPlanResponse(BaseModel):
@@ -158,6 +193,12 @@ async def create_video_plan(
            f"duration={request.duration_type}, user={user_id}"
        )
        
+        # Note: Research subscription checks are handled by ResearchService internally
+        # ResearchService validates limits before making API calls and raises HTTPException(429) if exceeded
+        
+        # Note: Subscription checks for LLM are handled by llm_text_gen internally
+        # It validates limits before making API calls and raises HTTPException(429) if exceeded
+        
        # Get persona data if available
        persona_data = None
        try:
@@ -168,17 +209,75 @@ async def create_video_plan(
        
        # Generate plan (optimized: for shorts, combine plan + scenes in one call)
        planner = YouTubePlannerService()
-        plan = planner.generate_video_plan(
+        plan = await planner.generate_video_plan(
            user_idea=request.user_idea,
            duration_type=request.duration_type,
+            video_type=request.video_type,
+            target_audience=request.target_audience,
+            video_goal=request.video_goal,
+            brand_style=request.brand_style,
            persona_data=persona_data,
            reference_image_description=request.reference_image_description,
            source_content_id=request.source_content_id,
            source_content_type=request.source_content_type,
            user_id=user_id,
            include_scenes=(request.duration_type == "shorts"),  # Optimize shorts
+            enable_research=getattr(request, 'enable_research', True),  # Research enabled by default
        )
        
+        # Auto-generate avatar if user didn't upload one
+        # Try to reuse existing avatar from asset library first to save on AI calls during testing
+        auto_avatar_url = None
+        if not request.avatar_url:
+            try:
+                from services.content_asset_service import ContentAssetService
+                from models.content_asset_models import AssetType, AssetSource
+                
+                # Check for existing YouTube creator avatar in asset library
+                asset_service = ContentAssetService(db)
+                existing_avatars = asset_service.get_assets(
+                    user_id=user_id,
+                    asset_type=AssetType.IMAGE,
+                    source_module=AssetSource.YOUTUBE_CREATOR,
+                    limit=1,  # Get most recent one
+                )
+                
+                if existing_avatars and len(existing_avatars) > 0:
+                    # Reuse the most recent avatar
+                    existing_avatar = existing_avatars[0]
+                    auto_avatar_url = existing_avatar.file_url
+                    plan["auto_generated_avatar_url"] = auto_avatar_url
+                    plan["avatar_reused"] = True  # Flag to indicate avatar was reused
+                    logger.info(
+                        f"[YouTubeAPI] ♻️ Reusing existing avatar from asset library to save AI call: {auto_avatar_url} "
+                        f"(asset_id: {existing_avatar.id}, created: {existing_avatar.created_at})"
+                    )
+                else:
+                    # No existing avatar found, generate new one
+                    import uuid
+                    import json
+                    from .handlers.avatar import _generate_avatar_from_context
+                    # Pass both original user inputs AND plan data for better avatar generation
+                    logger.info(f"[YouTubeAPI] 🎨 No existing avatar found, generating new avatar...")
+                    avatar_response = await _generate_avatar_from_context(
+                        user_id=user_id,
+                        project_id=f"plan_{user_id}_{uuid.uuid4().hex[:8]}",
+                        audience=request.target_audience or plan.get("target_audience"),  # Prefer user input
+                        content_type=request.video_type,  # User's video type selection
+                        video_plan_json=json.dumps(plan),
+                        brand_style=request.brand_style,  # User's brand style preference
+                        db=db,
+                    )
+                    auto_avatar_url = avatar_response.get("avatar_url")
+                    avatar_prompt = avatar_response.get("avatar_prompt")
+                    plan["auto_generated_avatar_url"] = auto_avatar_url
+                    plan["avatar_prompt"] = avatar_prompt  # Store the AI prompt used for generation
+                    plan["avatar_reused"] = False  # Flag to indicate avatar was newly generated
+                    logger.info(f"[YouTubeAPI] ✅ Auto-generated new avatar based on user inputs and plan: {auto_avatar_url}")
+            except Exception as e:
+                logger.warning(f"[YouTubeAPI] Avatar generation/reuse failed (non-critical): {e}")
+                # Non-critical, continue without avatar
+        
        return VideoPlanResponse(
            success=True,
            plan=plan,
@@ -212,12 +311,17 @@ async def build_scenes(
    try:
        user_id = require_authenticated_user(current_user)
        
+        duration_type = request.video_plan.get('duration_type', 'medium')
+        has_existing_scenes = bool(request.video_plan.get("scenes")) and request.video_plan.get("_scenes_included")
+        
        logger.info(
-            f"[YouTubeAPI] Building scenes: duration={request.video_plan.get('duration_type')}, "
-            f"custom_script={bool(request.custom_script)}, user={user_id}"
+            f"[YouTubeAPI] Building scenes: duration={duration_type}, "
+            f"custom_script={bool(request.custom_script)}, "
+            f"has_existing_scenes={has_existing_scenes}, "
+            f"user={user_id}"
        )
        
-        # Build scenes
+        # Build scenes (optimized to reuse existing scenes if available)
        scene_builder = YouTubeSceneBuilderService()
        scenes = scene_builder.build_scenes_from_plan(
            video_plan=request.video_plan,