WIP: AI Podcast Maker and YouTube Creator Studio integration

2025-12-10 09:37:55 +05:30
parent 31f078c763
commit 81590cf4db
75 changed files with 11879 additions and 1380 deletions
--- a/backend/services/blog_writer/research/exa_provider.py
+++ b/backend/services/blog_writer/research/exa_provider.py
@@ -29,17 +29,15 @@ class ExaResearchProvider(BaseProvider):
        # Determine category: use exa_category if set, otherwise map from source_types
        category = config.exa_category if config.exa_category else self._map_source_type_to_category(config.source_types)
        
-        # Build search kwargs
+        # Build search kwargs - use correct Exa API format
        search_kwargs = {
            'type': config.exa_search_type or "auto",
            'num_results': min(config.max_sources, 25),
-            'contents': {
-                'text': {'max_characters': 1000},
-                'summary': {'query': f"Key insights about {topic}"},
-                'highlights': {
-                    'num_sentences': 2,
-                    'highlights_per_url': 3
-                }
+            'text': {'max_characters': 1000},
+            'summary': {'query': f"Key insights about {topic}"},
+            'highlights': {
+                'num_sentences': 2,
+                'highlights_per_url': 3
            }
        }
        
@@ -53,8 +51,39 @@ class ExaResearchProvider(BaseProvider):
        
        logger.info(f"[Exa Research] Executing search: {query}")
        
-        # Execute Exa search
-        results = self.exa.search_and_contents(query, **search_kwargs)
+        # Execute Exa search - pass contents parameters directly, not nested
+        try:
+            results = self.exa.search_and_contents(
+                query,
+                text={'max_characters': 1000},
+                summary={'query': f"Key insights about {topic}"},
+                highlights={'num_sentences': 2, 'highlights_per_url': 3},
+                type=config.exa_search_type or "auto",
+                num_results=min(config.max_sources, 25),
+                **({k: v for k, v in {
+                    'category': category,
+                    'include_domains': config.exa_include_domains,
+                    'exclude_domains': config.exa_exclude_domains
+                }.items() if v})
+            )
+        except Exception as e:
+            logger.error(f"[Exa Research] API call failed: {e}")
+            # Try simpler call without contents if the above fails
+            try:
+                logger.info("[Exa Research] Retrying with simplified parameters")
+                results = self.exa.search_and_contents(
+                    query,
+                    type=config.exa_search_type or "auto",
+                    num_results=min(config.max_sources, 25),
+                    **({k: v for k, v in {
+                        'category': category,
+                        'include_domains': config.exa_include_domains,
+                        'exclude_domains': config.exa_exclude_domains
+                    }.items() if v})
+                )
+            except Exception as retry_error:
+                logger.error(f"[Exa Research] Retry also failed: {retry_error}")
+                raise RuntimeError(f"Exa search failed: {str(retry_error)}") from retry_error
        
        # Transform to standardized format
        sources = self._transform_sources(results.results)
--- a/backend/services/blog_writer/research/research_strategies.py
+++ b/backend/services/blog_writer/research/research_strategies.py
@@ -52,45 +52,44 @@ class BasicResearchStrategy(ResearchStrategy):
        target_audience: str,
        config: ResearchConfig
    ) -> str:
-        """Build basic research prompt focused on keywords and quick insights."""
-        prompt = f"""You are a professional blog content strategist researching for a {industry} blog targeting {target_audience}.
+        """Build basic research prompt focused on podcast-ready, actionable insights."""
+        prompt = f"""You are a podcast researcher creating TALKING POINTS and FACT CARDS for a {industry} audience of {target_audience}.

 Research Topic: "{topic}"

 Provide analysis in this EXACT format:

-## CURRENT TRENDS (2024-2025)
- [Trend 1 with specific data and source URL]
- [Trend 2 with specific data and source URL]
- [Trend 3 with specific data and source URL]
+## PODCAST HOOKS (3)
+- [Hook line with tension + data point + source URL]

-## KEY STATISTICS
- [Statistic 1: specific number/percentage with source URL]
- [Statistic 2: specific number/percentage with source URL]
- [Statistic 3: specific number/percentage with source URL]
- [Statistic 4: specific number/percentage with source URL]
- [Statistic 5: specific number/percentage with source URL]
+## OBJECTIONS & COUNTERS (3)
+- Objection: [common listener objection]
+  Counter: [concise rebuttal with stat + source URL]

-## PRIMARY KEYWORDS
-1. "{topic}" (main keyword)
-2. [Variation 1]
-3. [Variation 2]
+## KEY STATS & PROOF (6)
+- [Specific metric with %/number, date, and source URL]

-## SECONDARY KEYWORDS
-[5 related keywords for blog content]
+## MINI CASE SNAPS (3)
+- [Brand/company], [what they did], [outcome metric], [source URL]

-## CONTENT ANGLES (Top 5)
-1. [Angle 1: specific unique approach]
-2. [Angle 2: specific unique approach]
-3. [Angle 3: specific unique approach]
-4. [Angle 4: specific unique approach]
-5. [Angle 5: specific unique approach]
+## KEYWORDS TO MENTION (Primary + 5 Secondary)
+- Primary: "{topic}"
+- Secondary: [5 related keywords]
+
+## 5 CONTENT ANGLES
+1. [Angle with audience benefit + why-now]
+2. [Angle ...]
+3. [Angle ...]
+4. [Angle ...]
+5. [Angle ...]
+
+## FACT CARD LIST (8)
+- For each: Quote/claim, source URL, published date, metric/context.

 REQUIREMENTS:
- Cite EVERY claim with authoritative source URLs
- Use 2024-2025 data when available
- Include specific numbers, dates, examples
- Focus on actionable blog insights for {target_audience}"""
+- Every claim MUST include a source URL (authoritative, recent: 2024-2025 preferred).
+- Use concrete numbers, dates, outcomes; avoid generic advice.
+- Keep bullets tight and scannable for spoken narration."""
        return prompt.strip()


@@ -107,57 +106,54 @@ class ComprehensiveResearchStrategy(ResearchStrategy):
        target_audience: str,
        config: ResearchConfig
    ) -> str:
-        """Build comprehensive research prompt with all analysis components."""
+        """Build comprehensive research prompt with podcast-focused, high-value insights."""
        date_filter = f"\nDate Focus: {config.date_range.value.replace('_', ' ')}" if config.date_range else ""
        source_filter = f"\nPriority Sources: {', '.join([s.value for s in config.source_types])}" if config.source_types else ""
        
-        prompt = f"""You are a senior blog content strategist conducting comprehensive research for a {industry} blog targeting {target_audience}.
+        prompt = f"""You are a senior podcast researcher creating deeply sourced talking points for a {industry} audience of {target_audience}.

 Research Topic: "{topic}"{date_filter}{source_filter}

 Provide COMPLETE analysis in this EXACT format:

-## TRENDS AND INSIGHTS (2024-2025)
-[5-7 trends with specific data, numbers, and source URLs]
+## WHAT'S CHANGED (2024-2025)
+[5-7 concise trend bullets with numbers + source URLs]

-## KEY STATISTICS
-[7-10 statistics with exact numbers, percentages, dates, and source URLs]
+## PROOF & NUMBERS
+[10 stats with metric, date, sample size/method, and source URL]

-## EXPERT OPINIONS
-[4-5 expert quotes with full attribution and source URLs]
+## EXPERT SIGNALS
+[5 expert quotes with name, title/company, source URL]

-## RECENT DEVELOPMENTS
-[5-7 recent news/developments with dates and source URLs]
+## RECENT MOVES
+[5-7 news items or launches with dates and source URLs]

-## MARKET ANALYSIS
-[3-5 market insights with data points and source URLs]
+## MARKET SNAPSHOTS
+[3-5 insights with TAM/SAM/SOM or adoption metrics, source URLs]

-## BEST PRACTICES & CASE STUDIES
-[3-5 examples with specific outcomes/metrics and source URLs]
+## CASE SNAPS
+[3-5 cases: who, what they did, outcome metric, source URL]

-## KEYWORD ANALYSIS
-Primary Keywords: [3 main variations]
-Secondary Keywords: [7-10 related keywords]
-Long-Tail Opportunities: [5-7 specific search phrases]
+## KEYWORD PLAN
+Primary (3), Secondary (8-10), Long-tail (5-7) with intent hints.

-## COMPETITOR ANALYSIS
-Top Competitors: [5 competitors with brief descriptions]
-Content Gaps: [5 topics competitors are missing]
-Competitive Advantages: [5 unique angles we can own]
+## COMPETITOR GAPS
+- Top 5 competitors (URL) + 1-line strength
+- 5 content gaps we can own
+- 3 unique angles to differentiate

-## CONTENT ANGLES (Exactly 5)
-1. [Unique angle with reasoning and target benefit]
-2. [Unique angle with reasoning and target benefit]
-3. [Unique angle with reasoning and target benefit]
-4. [Unique angle with reasoning and target benefit]
-5. [Unique angle with reasoning and target benefit]
+## PODCAST-READY ANGLES (5)
+- Each: Hook, promised takeaway, data or example, source URL.
+
+## FACT CARD LIST (10)
+- Each: Quote/claim, source URL, published date, metric/context, suggested angle tag.

 VERIFICATION REQUIREMENTS:
- Minimum 2 authoritative sources per major claim
- Prioritize: Industry publications > Research papers > News > Blogs
- 2024-2025 data strongly preferred
- All numbers must include context (timeframe, sample size, methodology)
- Every recommendation must be actionable for {target_audience}"""
+- Minimum 2 authoritative sources per major claim.
+- Prefer industry reports > research papers > news > blogs.
+- 2024-2025 data strongly preferred.
+- All numbers must include timeframe and methodology.
+- Every bullet must be concise for spoken narration and actionable for {target_audience}."""
        return prompt.strip()


--- a/backend/services/calendar_generation_datasource_framework/prompt_chaining/steps/phase3/step8_daily_content_planning/daily_schedule_generator.py
+++ b/backend/services/calendar_generation_datasource_framework/prompt_chaining/steps/phase3/step8_daily_content_planning/daily_schedule_generator.py
@@ -78,6 +78,23 @@ class DailyScheduleGenerator:
        try:
            logger.info("🚀 Starting daily schedule generation")
            
+            # CRITICAL VALIDATION: Ensure weekly_themes is a list of dictionaries
+            if not isinstance(weekly_themes, list):
+                raise TypeError(f"weekly_themes must be a list, got {type(weekly_themes)}")
+            
+            if not weekly_themes:
+                raise ValueError("weekly_themes cannot be empty")
+            
+            for i, theme in enumerate(weekly_themes):
+                if not isinstance(theme, dict):
+                    raise TypeError(f"weekly_themes[{i}] must be a dictionary, got {type(theme)}. Value: {theme}")
+                
+                # Validate required fields
+                if "week_number" not in theme:
+                    raise ValueError(f"weekly_themes[{i}] missing required 'week_number' field")
+            
+            logger.info(f"✅ Validated {len(weekly_themes)} weekly themes")
+            
            daily_schedules = []
            current_date = datetime.now()
            
@@ -153,12 +170,22 @@ class DailyScheduleGenerator:
    def _get_weekly_theme(self, weekly_themes: List[Dict], week_number: int) -> Dict:
        """Get weekly theme for specific week number."""
        try:
+            # Additional validation
+            if not isinstance(weekly_themes, list):
+                raise TypeError(f"weekly_themes must be a list, got {type(weekly_themes)}")
+            
            for theme in weekly_themes:
+                if not isinstance(theme, dict):
+                    raise TypeError(f"Theme must be a dictionary, got {type(theme)}: {theme}")
+                
                if theme.get("week_number") == week_number:
                    return theme
            
            # If no theme found, fail with clear error
-            raise ValueError(f"No weekly theme found for week {week_number}")
+            raise ValueError(
+                f"No weekly theme found for week {week_number}. "
+                f"Available weeks: {[t.get('week_number') for t in weekly_themes if isinstance(t, dict)]}"
+            )
            
        except Exception as e:
            logger.error(f"Error getting weekly theme: {str(e)}")
@@ -205,9 +232,21 @@ class DailyScheduleGenerator:
            # Call AI service - NO FALLBACKS
            ai_response = await self.ai_engine.generate_content_recommendations(analysis_data)
            
-            # Validate AI response - NO FALLBACKS
+            # ENHANCED VALIDATION: Check for unexpected types (including float)
+            if ai_response is None:
+                raise ValueError("AI service returned None")
+            
+            if isinstance(ai_response, (int, float, str, bool)):
+                raise TypeError(
+                    f"AI service returned primitive type {type(ai_response).__name__}: {ai_response}. "
+                    f"Expected list of dictionaries. This indicates an AI service error."
+                )
+            
            if not isinstance(ai_response, list):
-                raise ValueError(f"AI service returned unexpected type: {type(ai_response)}. Expected list, got {type(ai_response)}")
+                raise TypeError(
+                    f"AI service returned unexpected type: {type(ai_response).__name__}. "
+                    f"Expected list, got {type(ai_response)}. Value: {str(ai_response)[:200]}"
+                )
            
            if not ai_response:
                raise ValueError("AI service returned empty list of recommendations")
--- a/backend/services/database.py
+++ b/backend/services/database.py
@@ -25,6 +25,8 @@ from models.content_asset_models import Base as ContentAssetBase
 from models.product_marketing_models import Campaign, CampaignProposal, CampaignAsset
 # Product Asset models (Product Marketing Suite - product assets, not campaigns)
 from models.product_asset_models import ProductAsset, ProductStyleTemplate, EcommerceExport
+# Podcast Maker models use SubscriptionBase, but import to ensure models are registered
+from models.podcast_models import PodcastProject

 # Database configuration
 DATABASE_URL = os.getenv('DATABASE_URL', 'sqlite:///./alwrity.db')
--- a/backend/services/llm_providers/main_audio_generation.py
+++ b/backend/services/llm_providers/main_audio_generation.py
@@ -69,13 +69,21 @@ def generate_audio(
        RuntimeError: If subscription limits are exceeded or user_id is missing.
    """
    try:
-        logger.info("[audio_gen] Starting audio generation")
-        logger.debug(f"[audio_gen] Text length: {len(text)} characters, voice: {voice_id}")
+        # VALIDATION: Check inputs before any processing or API calls
+        if not text or not isinstance(text, str) or len(text.strip()) == 0:
+            raise ValueError("Text input is required and cannot be empty")
+        
+        text = text.strip()  # Normalize whitespace
+        
+        if len(text) > 10000:
+            raise ValueError(f"Text is too long ({len(text)} characters). Maximum is 10,000 characters.")
        
-        # SUBSCRIPTION CHECK - Required and strict enforcement
        if not user_id:
            raise RuntimeError("user_id is required for subscription checking. Please provide Clerk user ID.")
        
+        logger.info("[audio_gen] Starting audio generation")
+        logger.debug(f"[audio_gen] Text length: {len(text)} characters, voice: {voice_id}")
+        
        # Calculate cost based on character count (every character is 1 token)
        # Pricing: $0.05 per 1,000 characters
        character_count = len(text)
@@ -190,8 +198,9 @@ def generate_audio(
                    new_cost = current_cost_before + estimated_cost
                    
                    # Use direct SQL UPDATE for dynamic attributes
-                    from sqlalchemy import text
-                    update_query = text("""
+                    # Import sqlalchemy.text with alias to avoid shadowing the 'text' parameter
+                    from sqlalchemy import text as sql_text
+                    update_query = sql_text("""
                        UPDATE usage_summaries 
                        SET audio_calls = :new_calls,
                            audio_cost = :new_cost
@@ -210,6 +219,8 @@ def generate_audio(
                    summary.updated_at = datetime.utcnow()
                    
                    # Create usage log
+                    # Store the text parameter in a local variable before any imports to prevent shadowing
+                    text_param = text  # Capture function parameter before any potential shadowing
                    usage_log = APIUsageLog(
                        user_id=user_id,
                        provider=APIProvider.AUDIO,
@@ -224,7 +235,7 @@ def generate_audio(
                        cost_total=estimated_cost,
                        response_time=0.0,
                        status_code=200,
-                        request_size=len(text.encode("utf-8")),
+                        request_size=len(text_param.encode("utf-8")),  # Use captured parameter
                        response_size=len(audio_bytes),
                        billing_period=current_period,
                    )
--- a/backend/services/podcast_service.py
+++ b/backend/services/podcast_service.py
@@ -0,0 +1,139 @@
+"""
+Podcast Service
+
+Service layer for managing podcast project persistence.
+"""
+
+from sqlalchemy.orm import Session
+from sqlalchemy import desc, and_, or_
+from typing import Optional, List, Dict, Any
+from datetime import datetime
+import uuid
+
+from models.podcast_models import PodcastProject
+
+
+class PodcastService:
+    """Service for managing podcast projects."""
+    
+    def __init__(self, db: Session):
+        self.db = db
+    
+    def create_project(
+        self,
+        user_id: str,
+        project_id: str,
+        idea: str,
+        duration: int,
+        speakers: int,
+        budget_cap: float,
+        **kwargs
+    ) -> PodcastProject:
+        """Create a new podcast project."""
+        project = PodcastProject(
+            project_id=project_id,
+            user_id=user_id,
+            idea=idea,
+            duration=duration,
+            speakers=speakers,
+            budget_cap=budget_cap,
+            status="draft",
+            current_step="create",
+            **kwargs
+        )
+        self.db.add(project)
+        self.db.commit()
+        self.db.refresh(project)
+        return project
+    
+    def get_project(self, user_id: str, project_id: str) -> Optional[PodcastProject]:
+        """Get a project by ID, ensuring user ownership."""
+        return self.db.query(PodcastProject).filter(
+            and_(
+                PodcastProject.project_id == project_id,
+                PodcastProject.user_id == user_id
+            )
+        ).first()
+    
+    def update_project(
+        self,
+        user_id: str,
+        project_id: str,
+        **updates
+    ) -> Optional[PodcastProject]:
+        """Update project fields."""
+        project = self.get_project(user_id, project_id)
+        if not project:
+            return None
+        
+        # Update fields
+        for key, value in updates.items():
+            if hasattr(project, key):
+                setattr(project, key, value)
+        
+        project.updated_at = datetime.utcnow()
+        self.db.commit()
+        self.db.refresh(project)
+        return project
+    
+    def list_projects(
+        self,
+        user_id: str,
+        status: Optional[str] = None,
+        favorites_only: bool = False,
+        limit: int = 50,
+        offset: int = 0,
+        order_by: str = "updated_at"  # "updated_at" or "created_at"
+    ) -> tuple[List[PodcastProject], int]:
+        """List user's projects with optional filtering."""
+        query = self.db.query(PodcastProject).filter(
+            PodcastProject.user_id == user_id
+        )
+        
+        # Apply filters
+        if status:
+            query = query.filter(PodcastProject.status == status)
+        
+        if favorites_only:
+            query = query.filter(PodcastProject.is_favorite == True)
+        
+        # Get total count before pagination
+        total = query.count()
+        
+        # Apply ordering
+        if order_by == "created_at":
+            query = query.order_by(desc(PodcastProject.created_at))
+        else:
+            query = query.order_by(desc(PodcastProject.updated_at))
+        
+        # Apply pagination
+        projects = query.offset(offset).limit(limit).all()
+        
+        return projects, total
+    
+    def delete_project(self, user_id: str, project_id: str) -> bool:
+        """Delete a project."""
+        project = self.get_project(user_id, project_id)
+        if not project:
+            return False
+        
+        self.db.delete(project)
+        self.db.commit()
+        return True
+    
+    def toggle_favorite(self, user_id: str, project_id: str) -> Optional[PodcastProject]:
+        """Toggle favorite status of a project."""
+        project = self.get_project(user_id, project_id)
+        if not project:
+            return None
+        
+        project.is_favorite = not project.is_favorite
+        project.updated_at = datetime.utcnow()
+        self.db.commit()
+        self.db.refresh(project)
+        return project
+    
+    def update_status(self, user_id: str, project_id: str, status: str) -> Optional[PodcastProject]:
+        """Update project status."""
+        return self.update_project(user_id, project_id, status=status)
+
--- a/backend/services/story_writer/service_components/setup.py
+++ b/backend/services/story_writer/service_components/setup.py
@@ -8,6 +8,8 @@ from typing import Any, Dict, List
 from fastapi import HTTPException
 from loguru import logger

+from services.llm_providers.main_text_generation import llm_text_gen
+
 from .base import StoryServiceBase


--- a/backend/services/subscription/preflight_validator.py
+++ b/backend/services/subscription/preflight_validator.py
@@ -545,6 +545,188 @@ def validate_video_generation_operations(
        )


+def validate_scene_animation_operation(
+    pricing_service: PricingService,
+    user_id: str,
+) -> None:
+    """
+    Validate the per-scene animation workflow before API calls.
+    """
+    try:
+        operations_to_validate = [
+            {
+                'provider': APIProvider.VIDEO,
+                'tokens_requested': 0,
+                'actual_provider_name': 'wavespeed',
+                'operation_type': 'scene_animation',
+            }
+        ]
+
+        can_proceed, message, error_details = pricing_service.check_comprehensive_limits(
+            user_id=user_id,
+            operations=operations_to_validate,
+        )
+
+        if not can_proceed:
+            logger.error(f"[Pre-flight Validator] Scene animation blocked for user {user_id}: {message}")
+            usage_info = error_details.get('usage_info', {}) if error_details else {}
+            provider = usage_info.get('provider', 'video') if usage_info else 'video'
+            raise HTTPException(
+                status_code=429,
+                detail={
+                    'error': message,
+                    'message': message,
+                    'provider': provider,
+                    'usage_info': usage_info if usage_info else error_details,
+                }
+            )
+
+        logger.info(f"[Pre-flight Validator] ✅ Scene animation validated for user {user_id}")
+        # Validation passed - no return needed (function raises HTTPException if validation fails)
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"[Pre-flight Validator] Error validating scene animation: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail={
+                'error': f"Failed to validate scene animation: {str(e)}",
+                'message': f"Failed to validate scene animation: {str(e)}"
+            }
+        )
+
+
+def validate_image_control_operations(
+    pricing_service: PricingService,
+    user_id: str,
+    num_images: int = 1
+) -> None:
+    """
+    Validate image control operations (sketch-to-image, structure control, style transfer) before making API calls.
+    
+    Control operations use Stability AI for image generation with control inputs, so they use
+    the same validation as image generation operations.
+    
+    Args:
+        pricing_service: PricingService instance
+        user_id: User ID for subscription checking
+        num_images: Number of images to generate (for multiple variations)
+        
+    Returns:
+        None - raises HTTPException with 429 status if validation fails
+    """
+    try:
+        # Control operations use Stability AI, same as image generation
+        operations_to_validate = [
+            {
+                'provider': APIProvider.STABILITY,
+                'tokens_requested': 0,
+                'actual_provider_name': 'stability',
+                'operation_type': 'image_generation'  # Control ops use image generation limits
+            }
+            for _ in range(num_images)
+        ]
+        
+        logger.info(f"[Pre-flight Validator] 🚀 Validating {num_images} image control operation(s) for user {user_id}")
+        
+        can_proceed, message, error_details = pricing_service.check_comprehensive_limits(
+            user_id=user_id,
+            operations=operations_to_validate
+        )
+        
+        if not can_proceed:
+            logger.error(f"[Pre-flight Validator] Image control blocked for user {user_id}: {message}")
+            
+            usage_info = error_details.get('usage_info', {}) if error_details else {}
+            provider = usage_info.get('provider', 'stability') if usage_info else 'stability'
+            
+            raise HTTPException(
+                status_code=429,
+                detail={
+                    'error': message,
+                    'message': message,
+                    'provider': provider,
+                    'usage_info': usage_info if usage_info else error_details
+                }
+            )
+        
+        logger.info(f"[Pre-flight Validator] ✅ Image control validated for user {user_id}")
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"[Pre-flight Validator] Error validating image control: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail={
+                'error': f"Failed to validate image control: {str(e)}",
+                'message': f"Failed to validate image control: {str(e)}"
+            }
+        )
+
+
+def validate_video_generation_operations(
+    pricing_service: PricingService,
+    user_id: str
+) -> None:
+    """
+    Validate video generation operation before making API calls.
+    
+    Args:
+        pricing_service: PricingService instance
+        user_id: User ID for subscription checking
+        
+    Returns:
+        None - raises HTTPException with 429 status if validation fails
+    """
+    try:
+        operations_to_validate = [
+            {
+                'provider': APIProvider.VIDEO,
+                'tokens_requested': 0,
+                'actual_provider_name': 'video',
+                'operation_type': 'video_generation'
+            }
+        ]
+        
+        can_proceed, message, error_details = pricing_service.check_comprehensive_limits(
+            user_id=user_id,
+            operations=operations_to_validate
+        )
+        
+        if not can_proceed:
+            logger.error(f"[Pre-flight Validator] Video generation blocked for user {user_id}: {message}")
+            
+            usage_info = error_details.get('usage_info', {}) if error_details else {}
+            provider = usage_info.get('provider', 'video') if usage_info else 'video'
+            
+            raise HTTPException(
+                status_code=429,
+                detail={
+                    'error': message,
+                    'message': message,
+                    'provider': provider,
+                    'usage_info': usage_info if usage_info else error_details
+                }
+            )
+        
+        logger.info(f"[Pre-flight Validator] ✅ Video generation validated for user {user_id}")
+        # Validation passed - no return needed (function raises HTTPException if validation fails)
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"[Pre-flight Validator] Error validating video generation: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail={
+                'error': f"Failed to validate video generation: {str(e)}",
+                'message': f"Failed to validate video generation: {str(e)}"
+            }
+        )
+
+
 def validate_scene_animation_operation(
    pricing_service: PricingService,
    user_id: str,
@@ -593,4 +775,79 @@ def validate_scene_animation_operation(
                'error': f"Failed to validate scene animation: {str(e)}",
                'message': f"Failed to validate scene animation: {str(e)}",
            },
+        )
+
+
+def validate_calendar_generation_operations(
+    pricing_service: PricingService,
+    user_id: str,
+    gpt_provider: str = "google"
+) -> None:
+    """
+    Validate calendar generation operations before making API calls.
+    
+    Args:
+        pricing_service: PricingService instance
+        user_id: User ID for subscription checking
+        gpt_provider: GPT provider from env var (defaults to "google")
+        
+    Returns:
+        None - raises HTTPException with 429 status if validation fails
+    """
+    try:
+        # Determine actual provider for LLM calls based on GPT_PROVIDER env var
+        gpt_provider_lower = gpt_provider.lower()
+        if gpt_provider_lower == "huggingface":
+            llm_provider_enum = APIProvider.MISTRAL
+            llm_provider_name = "huggingface"
+        else:
+            llm_provider_enum = APIProvider.GEMINI
+            llm_provider_name = "gemini"
+            
+        # Estimate tokens for 12-step process
+        # This is a heavy operation involving multiple steps and analysis
+        operations_to_validate = [
+            {
+                'provider': llm_provider_enum,
+                'tokens_requested': 20000, # Conservative estimate for full calendar generation
+                'actual_provider_name': llm_provider_name,
+                'operation_type': 'calendar_generation'
+            }
+        ]
+        
+        logger.info(f"[Pre-flight Validator] 🚀 Validating Calendar Generation for user {user_id}")
+        
+        can_proceed, message, error_details = pricing_service.check_comprehensive_limits(
+            user_id=user_id,
+            operations=operations_to_validate
+        )
+        
+        if not can_proceed:
+            usage_info = error_details.get('usage_info', {}) if error_details else {}
+            provider = usage_info.get('provider', llm_provider_name) if usage_info else llm_provider_name
+            
+            logger.warning(f"[Pre-flight Validator] Calendar generation blocked for user {user_id}: {message}")
+            
+            raise HTTPException(
+                status_code=429,
+                detail={
+                    'error': message,
+                    'message': message,
+                    'provider': provider,
+                    'usage_info': usage_info if usage_info else error_details
+                }
+            )
+            
+        logger.info(f"[Pre-flight Validator] ✅ Calendar Generation validated for user {user_id}")
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"[Pre-flight Validator] Error validating calendar generation: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail={
+                'error': f"Failed to validate calendar generation: {str(e)}",
+                'message': f"Failed to validate calendar generation: {str(e)}"
+            }
        )
--- a/backend/services/wavespeed/client.py
+++ b/backend/services/wavespeed/client.py
@@ -637,4 +637,260 @@ class WaveSpeedClient:
                status_code=502,
                detail="Failed to fetch generated audio from WaveSpeed URL",
            )
+    
+    def submit_text_to_video(
+        self,
+        model_path: str,
+        payload: Dict[str, Any],
+        timeout: int = 60,
+    ) -> str:
+        """
+        Submit a text-to-video generation request to WaveSpeed.
+        
+        Args:
+            model_path: Model path (e.g., "alibaba/wan-2.5/text-to-video")
+            payload: Request payload with prompt, resolution, duration, optional audio
+            timeout: Request timeout in seconds
+            
+        Returns:
+            Prediction ID for polling
+        """
+        url = f"{self.BASE_URL}/{model_path}"
+        logger.info(f"[WaveSpeed] Submitting text-to-video request to {url}")
+        response = requests.post(url, headers=self._headers(), json=payload, timeout=timeout)
+        
+        if response.status_code != 200:
+            logger.error(f"[WaveSpeed] Text-to-video submission failed: {response.status_code} {response.text}")
+            raise HTTPException(
+                status_code=502,
+                detail={
+                    "error": "WaveSpeed text-to-video submission failed",
+                    "status_code": response.status_code,
+                    "response": response.text,
+                },
+            )
+        
+        data = response.json().get("data")
+        if not data or "id" not in data:
+            logger.error(f"[WaveSpeed] Unexpected text-to-video response: {response.text}")
+            raise HTTPException(
+                status_code=502,
+                detail={"error": "WaveSpeed response missing prediction id"},
+            )
+        
+        prediction_id = data["id"]
+        logger.info(f"[WaveSpeed] Submitted text-to-video request: {prediction_id}")
+        return prediction_id
+    
+    def generate_text_video(
+        self,
+        prompt: str,
+        resolution: str = "720p",  # 480p, 720p, 1080p
+        duration: int = 5,  # 5 or 10 seconds
+        audio_base64: Optional[str] = None,  # Optional audio for lip-sync
+        negative_prompt: Optional[str] = None,
+        seed: Optional[int] = None,
+        enable_prompt_expansion: bool = True,
+        enable_sync_mode: bool = False,
+        timeout: int = 180,
+    ) -> Dict[str, Any]:
+        """
+        Generate video from text prompt using WAN 2.5 text-to-video.
+        
+        Args:
+            prompt: Text prompt describing the video
+            resolution: Output resolution (480p, 720p, 1080p)
+            duration: Video duration in seconds (5 or 10)
+            audio_base64: Optional audio file (wav/mp3, 3-30s, ≤15MB) for lip-sync
+            negative_prompt: Optional negative prompt
+            seed: Optional random seed for reproducibility
+            enable_prompt_expansion: Enable prompt optimizer
+            enable_sync_mode: If True, wait for result and return it directly
+            timeout: Request timeout in seconds
+            
+        Returns:
+            Dictionary with video bytes, metadata, and cost
+        """
+        model_path = "alibaba/wan-2.5/text-to-video"
+        
+        # Validate resolution
+        valid_resolutions = ["480p", "720p", "1080p"]
+        if resolution not in valid_resolutions:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Invalid resolution: {resolution}. Must be one of: {valid_resolutions}"
+            )
+        
+        # Validate duration
+        if duration not in [5, 10]:
+            raise HTTPException(
+                status_code=400,
+                detail="Duration must be 5 or 10 seconds"
+            )
+        
+        # Build payload
+        payload = {
+            "prompt": prompt,
+            "resolution": resolution,
+            "duration": duration,
+            "enable_prompt_expansion": enable_prompt_expansion,
+            "enable_sync_mode": enable_sync_mode,  # Add sync mode to payload
+        }
+        
+        # Add optional audio
+        if audio_base64:
+            payload["audio"] = audio_base64
+        
+        # Add optional parameters
+        if negative_prompt:
+            payload["negative_prompt"] = negative_prompt
+        if seed is not None:
+            payload["seed"] = seed
+        
+        # Submit request
+        logger.info(
+            f"[WaveSpeed] Generating text-to-video: resolution={resolution}, "
+            f"duration={duration}s, prompt_length={len(prompt)}, sync_mode={enable_sync_mode}"
+        )
+        
+        # For sync mode, submit and get result directly
+        if enable_sync_mode:
+            url = f"{self.BASE_URL}/{model_path}"
+            response = requests.post(url, headers=self._headers(), json=payload, timeout=timeout)
+            
+            if response.status_code != 200:
+                logger.error(f"[WaveSpeed] Text-to-video submission failed: {response.status_code} {response.text}")
+                raise HTTPException(
+                    status_code=502,
+                    detail={
+                        "error": "WaveSpeed text-to-video submission failed",
+                        "status_code": response.status_code,
+                        "response": response.text[:500],
+                    },
+                )
+            
+            response_json = response.json()
+            data = response_json.get("data") or response_json
+            
+            # In sync mode, result should be directly in outputs
+            outputs = data.get("outputs") or []
+            if not outputs:
+                logger.error(f"[WaveSpeed] No outputs in sync mode response: {response.text[:500]}")
+                raise HTTPException(
+                    status_code=502,
+                    detail="WaveSpeed text-to-video returned no outputs in sync mode",
+                )
+            
+            # Extract video URL from outputs
+            video_url = outputs[0]
+            if not isinstance(video_url, str) or not video_url.startswith("http"):
+                logger.error(f"[WaveSpeed] Invalid video URL format in sync mode: {video_url}")
+                raise HTTPException(
+                    status_code=502,
+                    detail=f"Invalid video URL format: {video_url}",
+                )
+            
+            # Download video
+            logger.info(f"[WaveSpeed] Downloading video from sync mode URL: {video_url}")
+            video_response = requests.get(video_url, timeout=180)
+            
+            if video_response.status_code != 200:
+                raise HTTPException(
+                    status_code=502,
+                    detail={
+                        "error": "Failed to download WAN 2.5 video from sync mode",
+                        "status_code": video_response.status_code,
+                        "response": video_response.text[:200],
+                    }
+                )
+            
+            video_bytes = video_response.content
+            prediction_id = data.get("id", "sync_mode")
+            metadata = data.get("metadata") or {}
+            # video_url is already set above for sync mode
+        else:
+            # Async mode - submit and poll
+            prediction_id = self.submit_text_to_video(model_path, payload, timeout=timeout)
+            
+            # Poll for completion
+            try:
+                result = self.poll_until_complete(
+                    prediction_id,
+                    timeout_seconds=timeout,
+                    interval_seconds=2.0
+                )
+            except HTTPException as e:
+                detail = e.detail or {}
+                if isinstance(detail, dict):
+                    detail.setdefault("prediction_id", prediction_id)
+                    detail.setdefault("resume_available", True)
+                raise HTTPException(status_code=e.status_code, detail=detail)
+            
+            # Extract video URL
+            outputs = result.get("outputs") or []
+            if not outputs:
+                raise HTTPException(
+                    status_code=502,
+                    detail="WAN 2.5 text-to-video completed but returned no outputs"
+                )
+            
+            video_url = outputs[0]
+            if not isinstance(video_url, str) or not video_url.startswith("http"):
+                raise HTTPException(
+                    status_code=502,
+                    detail=f"Invalid video URL format: {video_url}"
+                )
+            
+            # Download video
+            logger.info(f"[WaveSpeed] Downloading video from: {video_url}")
+            video_response = requests.get(video_url, timeout=180)
+            
+            if video_response.status_code != 200:
+                raise HTTPException(
+                    status_code=502,
+                    detail={
+                        "error": "Failed to download WAN 2.5 video",
+                        "status_code": video_response.status_code,
+                        "response": video_response.text[:200],
+                    }
+                )
+            
+            video_bytes = video_response.content
+            metadata = result.get("metadata") or {}
+        
+        # Calculate cost (same pricing as image-to-video)
+        pricing = {
+            "480p": 0.05,
+            "720p": 0.10,
+            "1080p": 0.15,
+        }
+        cost = pricing.get(resolution, 0.10) * duration
+        
+        # Get video dimensions
+        resolution_dims = {
+            "480p": (854, 480),
+            "720p": (1280, 720),
+            "1080p": (1920, 1080),
+        }
+        width, height = resolution_dims.get(resolution, (1280, 720))
+        
+        logger.info(
+            f"[WaveSpeed] ✅ Generated text-to-video: {len(video_bytes)} bytes, "
+            f"resolution={resolution}, duration={duration}s, cost=${cost:.2f}"
+        )
+        
+        return {
+            "video_bytes": video_bytes,
+            "prompt": prompt,
+            "duration": float(duration),
+            "model_name": "alibaba/wan-2.5/text-to-video",
+            "cost": cost,
+            "provider": "wavespeed",
+            "source_video_url": video_url,
+            "prediction_id": prediction_id,
+            "resolution": resolution,
+            "width": width,
+            "height": height,
+            "metadata": metadata,
+        }

--- a/backend/services/youtube/init.py
+++ b/backend/services/youtube/init.py
@@ -0,0 +1,2 @@
+"""YouTube Creator Studio services."""
+
--- a/backend/services/youtube/planner.py
+++ b/backend/services/youtube/planner.py
@@ -0,0 +1,358 @@
+"""
+YouTube Video Planner Service
+
+Generates video plans, outlines, and insights using AI with persona integration.
+"""
+
+from typing import Dict, Any, Optional, List
+from loguru import logger
+from fastapi import HTTPException
+
+from services.llm_providers.main_text_generation import llm_text_gen
+from utils.logger_utils import get_service_logger
+
+logger = get_service_logger("youtube.planner")
+
+
+class YouTubePlannerService:
+    """Service for planning YouTube videos with AI assistance."""
+    
+    def __init__(self):
+        """Initialize the planner service."""
+        logger.info("[YouTubePlanner] Service initialized")
+    
+    def generate_video_plan(
+        self,
+        user_idea: str,
+        duration_type: str,  # "shorts", "medium", "long"
+        persona_data: Optional[Dict[str, Any]] = None,
+        reference_image_description: Optional[str] = None,
+        source_content_id: Optional[str] = None,  # For blog/story conversion
+        source_content_type: Optional[str] = None,  # "blog", "story"
+        user_id: str = None,
+        include_scenes: bool = False,  # For shorts: combine plan + scenes in one call
+    ) -> Dict[str, Any]:
+        """
+        Generate a comprehensive video plan from user input.
+        
+        Args:
+            user_idea: User's video idea or topic
+            duration_type: "shorts" (≤60s), "medium" (1-4min), "long" (4-10min)
+            persona_data: Optional persona data for tone/style
+            reference_image_description: Optional description of reference image
+            source_content_id: Optional ID of source content (blog/story)
+            source_content_type: Type of source content
+            user_id: Clerk user ID for subscription checking
+            
+        Returns:
+            Dictionary with video plan, outline, insights, and metadata
+        """
+        try:
+            logger.info(
+                f"[YouTubePlanner] Generating plan: idea={user_idea[:50]}..., "
+                f"duration={duration_type}, user={user_id}"
+            )
+            
+            # Build persona context
+            persona_context = self._build_persona_context(persona_data)
+            
+            # Build duration context
+            duration_context = self._get_duration_context(duration_type)
+            
+            # Build source content context if provided
+            source_context = ""
+            if source_content_id and source_content_type:
+                source_context = f"""
+**Source Content:**
+- Type: {source_content_type}
+- ID: {source_content_id}
+- Note: This video should be based on the existing {source_content_type} content.
+"""
+            
+            # Build reference image context
+            image_context = ""
+            if reference_image_description:
+                image_context = f"""
+**Reference Image:**
+{reference_image_description}
+- Use this as visual inspiration for the video
+"""
+            
+            # Generate comprehensive video plan
+            planning_prompt = f"""You are an expert YouTube content strategist. Create a comprehensive video plan based on the user's idea.
+
+**User's Video Idea:**
+{user_idea}
+
+**Video Duration Type:**
+{duration_type} ({duration_context['description']})
+
+**Duration Guidelines:**
+- Target length: {duration_context['target_seconds']} seconds
+- Hook duration: {duration_context['hook_seconds']} seconds
+- Main content: {duration_context['main_seconds']} seconds
+- CTA duration: {duration_context['cta_seconds']} seconds
+- Maximum scenes: {duration_context['max_scenes']} (for shorts, keep 2-4 scenes total)
+
+{persona_context}
+
+{source_context}
+
+{image_context}
+
+**Your Task:**
+Create a detailed video plan that includes:
+
+1. **Video Summary**: A 2-3 sentence overview of what the video will cover
+2. **Target Audience**: Who this video is for
+3. **Video Goal**: Primary objective (educate, entertain, sell, inspire, etc.)
+4. **Key Message**: The main takeaway viewers should remember
+5. **Hook Strategy**: Attention-grabbing opening (first {duration_context['hook_seconds']} seconds)
+6. **Content Outline**: High-level structure with 3-5 main sections
+7. **Call-to-Action**: Clear CTA that fits the video goal
+8. **Visual Style**: Recommended visual approach (cinematic, tutorial, vlog, etc.)
+9. **Tone**: Recommended tone (professional, casual, energetic, etc.)
+10. **SEO Keywords**: 5-7 relevant keywords for YouTube SEO
+
+**Format your response as JSON:**
+{{
+  "video_summary": "...",
+  "target_audience": "...",
+  "video_goal": "...",
+  "key_message": "...",
+  "hook_strategy": "...",
+  "content_outline": [
+    {{"section": "Section 1", "description": "...", "duration_estimate": 30}},
+    {{"section": "Section 2", "description": "...", "duration_estimate": 45}}
+  ],
+  "call_to_action": "...",
+  "visual_style": "...",
+  "tone": "...",
+  "seo_keywords": ["keyword1", "keyword2", ...]
+}}
+
+Make sure the content outline fits within the {duration_type} duration constraints.
+"""
+            
+            system_prompt = (
+                "You are an expert YouTube content strategist specializing in creating "
+                "engaging, well-structured video plans. Your plans are data-driven, "
+                "audience-focused, and optimized for YouTube's algorithm."
+            )
+            
+            # For shorts, combine plan + scenes in one call to save API calls
+            if include_scenes and duration_type == "shorts":
+                planning_prompt += f"""
+
+**IMPORTANT: Since this is a SHORTS video, also generate the complete scene breakdown in the same response.**
+
+**Additional Task - Generate Detailed Scenes:**
+Create detailed scenes (up to {duration_context['max_scenes']} scenes) that include:
+1. Scene number and title
+2. Narration text (what will be spoken) - keep it concise for shorts
+3. Visual description (what viewers will see)
+4. Duration estimate (2-8 seconds each)
+5. Emphasis tags (hook, main_content, transition, cta)
+
+**Scene Format:**
+Each scene should be detailed enough for video generation. Total duration must fit within {duration_context['target_seconds']} seconds.
+
+**Update JSON structure to include "scenes" array:**
+Add a "scenes" field with the complete scene breakdown.
+"""
+                
+                json_struct = {
+                    "type": "object",
+                    "properties": {
+                        "video_summary": {"type": "string"},
+                        "target_audience": {"type": "string"},
+                        "video_goal": {"type": "string"},
+                        "key_message": {"type": "string"},
+                        "hook_strategy": {"type": "string"},
+                        "content_outline": {
+                            "type": "array",
+                            "items": {
+                                "type": "object",
+                                "properties": {
+                                    "section": {"type": "string"},
+                                    "description": {"type": "string"},
+                                    "duration_estimate": {"type": "number"}
+                                }
+                            }
+                        },
+                        "call_to_action": {"type": "string"},
+                        "visual_style": {"type": "string"},
+                        "tone": {"type": "string"},
+                        "seo_keywords": {
+                            "type": "array",
+                            "items": {"type": "string"}
+                        },
+                        "scenes": {
+                            "type": "array",
+                            "items": {
+                                "type": "object",
+                                "properties": {
+                                    "scene_number": {"type": "number"},
+                                    "title": {"type": "string"},
+                                    "narration": {"type": "string"},
+                                    "visual_description": {"type": "string"},
+                                    "duration_estimate": {"type": "number"},
+                                    "emphasis": {"type": "string"},
+                                    "visual_cues": {
+                                        "type": "array",
+                                        "items": {"type": "string"}
+                                    }
+                                },
+                                "required": [
+                                    "scene_number", "title", "narration", "visual_description",
+                                    "duration_estimate", "emphasis"
+                                ]
+                            }
+                        }
+                    },
+                    "required": [
+                        "video_summary", "target_audience", "video_goal", "key_message",
+                        "hook_strategy", "content_outline", "call_to_action",
+                        "visual_style", "tone", "seo_keywords", "scenes"
+                    ]
+                }
+            else:
+                json_struct = {
+                    "type": "object",
+                    "properties": {
+                        "video_summary": {"type": "string"},
+                        "target_audience": {"type": "string"},
+                        "video_goal": {"type": "string"},
+                        "key_message": {"type": "string"},
+                        "hook_strategy": {"type": "string"},
+                        "content_outline": {
+                            "type": "array",
+                            "items": {
+                                "type": "object",
+                                "properties": {
+                                    "section": {"type": "string"},
+                                    "description": {"type": "string"},
+                                    "duration_estimate": {"type": "number"}
+                                }
+                            }
+                        },
+                        "call_to_action": {"type": "string"},
+                        "visual_style": {"type": "string"},
+                        "tone": {"type": "string"},
+                        "seo_keywords": {
+                            "type": "array",
+                            "items": {"type": "string"}
+                        }
+                    },
+                    "required": [
+                        "video_summary", "target_audience", "video_goal", "key_message",
+                        "hook_strategy", "content_outline", "call_to_action",
+                        "visual_style", "tone", "seo_keywords"
+                    ]
+                }
+            
+            # Generate plan using LLM
+            response = llm_text_gen(
+                prompt=planning_prompt,
+                system_prompt=system_prompt,
+                user_id=user_id,
+                json_struct=json_struct
+            )
+            
+            # Parse response (handle both dict and JSON string)
+            if isinstance(response, dict):
+                plan_data = response
+            else:
+                import json
+                plan_data = json.loads(response)
+            
+            # Add metadata
+            plan_data["duration_type"] = duration_type
+            plan_data["duration_metadata"] = duration_context
+            plan_data["user_idea"] = user_idea
+            
+            # If scenes were included, mark them for scene builder
+            if include_scenes and duration_type == "shorts" and "scenes" in plan_data:
+                plan_data["_scenes_included"] = True
+                logger.info(
+                    f"[YouTubePlanner] ✅ Plan + {len(plan_data.get('scenes', []))} scenes "
+                    f"generated in 1 AI call (optimized for shorts)"
+                )
+            else:
+                if include_scenes and duration_type == "shorts":
+                    # LLM did not return scenes; downstream will regenerate
+                    plan_data["_scenes_included"] = False
+                    logger.warning(
+                        "[YouTubePlanner] Shorts optimization requested but no scenes returned; "
+                        "scene builder will generate scenes separately."
+                    )
+                logger.info(f"[YouTubePlanner] ✅ Plan generated successfully")
+            
+            return plan_data
+            
+        except HTTPException:
+            raise
+        except Exception as e:
+            logger.error(f"[YouTubePlanner] Error generating plan: {e}", exc_info=True)
+            raise HTTPException(
+                status_code=500,
+                detail=f"Failed to generate video plan: {str(e)}"
+            )
+    
+    def _build_persona_context(self, persona_data: Optional[Dict[str, Any]]) -> str:
+        """Build persona context string for prompts."""
+        if not persona_data:
+            return """
+**Persona Context:**
+- Using default professional tone
+- No specific persona constraints
+"""
+        
+        core_persona = persona_data.get("core_persona", {})
+        tone = core_persona.get("tone", "professional")
+        voice = core_persona.get("voice_characteristics", {})
+        
+        return f"""
+**Persona Context:**
+- Tone: {tone}
+- Voice Style: {voice.get('style', 'professional')}
+- Communication Style: {voice.get('communication_style', 'clear and direct')}
+- Brand Values: {core_persona.get('core_belief', 'value-driven content')}
+- Use this persona to guide the video's tone, style, and messaging approach.
+"""
+    
+    def _get_duration_context(self, duration_type: str) -> Dict[str, Any]:
+        """Get duration-specific context and constraints."""
+        contexts = {
+            "shorts": {
+                "description": "YouTube Shorts (15-60 seconds)",
+                "target_seconds": 30,
+                "hook_seconds": 3,
+                "main_seconds": 24,
+                "cta_seconds": 3,
+                # Keep scenes tight for shorts to control cost and pacing
+                "max_scenes": 4,
+                "scene_duration_range": (2, 8)
+            },
+            "medium": {
+                "description": "Medium-length video (1-4 minutes)",
+                "target_seconds": 150,  # 2.5 minutes
+                "hook_seconds": 10,
+                "main_seconds": 130,
+                "cta_seconds": 10,
+                "max_scenes": 12,
+                "scene_duration_range": (5, 15)
+            },
+            "long": {
+                "description": "Long-form video (4-10 minutes)",
+                "target_seconds": 420,  # 7 minutes
+                "hook_seconds": 15,
+                "main_seconds": 380,
+                "cta_seconds": 25,
+                "max_scenes": 20,
+                "scene_duration_range": (10, 30)
+            }
+        }
+        
+        return contexts.get(duration_type, contexts["medium"])
+
--- a/backend/services/youtube/renderer.py
+++ b/backend/services/youtube/renderer.py
@@ -0,0 +1,412 @@
+"""
+YouTube Video Renderer Service
+
+Handles video rendering using WAN 2.5 text-to-video and audio generation.
+"""
+
+from typing import Dict, Any, List, Optional
+from pathlib import Path
+import base64
+import uuid
+import requests
+from loguru import logger
+from fastapi import HTTPException
+
+from services.wavespeed.client import WaveSpeedClient
+from services.llm_providers.main_audio_generation import generate_audio
+from services.story_writer.video_generation_service import StoryVideoGenerationService
+from services.subscription import PricingService
+from services.subscription.preflight_validator import validate_scene_animation_operation
+from services.llm_providers.main_video_generation import track_video_usage
+from utils.logger_utils import get_service_logger
+from utils.asset_tracker import save_asset_to_library
+
+logger = get_service_logger("youtube.renderer")
+
+
+class YouTubeVideoRendererService:
+    """Service for rendering YouTube videos from scenes."""
+    
+    def __init__(self):
+        """Initialize the renderer service."""
+        self.wavespeed_client = WaveSpeedClient()
+        
+        # Video output directory
+        base_dir = Path(__file__).parent.parent.parent.parent
+        self.output_dir = base_dir / "youtube_videos"
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        
+        logger.info(f"[YouTubeRenderer] Initialized with output directory: {self.output_dir}")
+    
+    def render_scene_video(
+        self,
+        scene: Dict[str, Any],
+        video_plan: Dict[str, Any],
+        user_id: str,
+        resolution: str = "720p",
+        generate_audio_enabled: bool = True,
+        voice_id: str = "Wise_Woman",
+    ) -> Dict[str, Any]:
+        """
+        Render a single scene into a video.
+        
+        Args:
+            scene: Scene data with narration and visual prompts
+            video_plan: Original video plan for context
+            user_id: Clerk user ID
+            resolution: Video resolution (480p, 720p, 1080p)
+            generate_audio: Whether to generate narration audio
+            voice_id: Voice ID for audio generation
+            
+        Returns:
+            Dictionary with video metadata, bytes, and cost
+        """
+        try:
+            scene_number = scene.get("scene_number", 1)
+            narration = scene.get("narration", "").strip()
+            visual_prompt = (scene.get("enhanced_visual_prompt") or scene.get("visual_prompt", "")).strip()
+            duration_estimate = scene.get("duration_estimate", 5)
+            
+            # VALIDATION: Check inputs before making expensive API calls
+            if not visual_prompt:
+                raise HTTPException(
+                    status_code=400,
+                    detail={
+                        "error": f"Scene {scene_number} has no visual prompt",
+                        "scene_number": scene_number,
+                        "message": "Visual prompt is required for video generation",
+                        "user_action": "Please add a visual description for this scene before rendering.",
+                    }
+                )
+            
+            if len(visual_prompt) < 10:
+                logger.warning(
+                    f"[YouTubeRenderer] Scene {scene_number} has very short visual prompt "
+                    f"({len(visual_prompt)} chars), may result in poor quality"
+                )
+            
+            # Clamp duration to valid WAN 2.5 values (5 or 10 seconds)
+            duration = 5 if duration_estimate <= 7 else 10
+            
+            logger.info(
+                f"[YouTubeRenderer] Rendering scene {scene_number}: "
+                f"resolution={resolution}, duration={duration}s, prompt_length={len(visual_prompt)}"
+            )
+            
+            # Generate audio if requested - only if narration is not empty
+            audio_base64 = None
+            if generate_audio_enabled and narration and len(narration.strip()) > 0:
+                try:
+                    audio_result = generate_audio(
+                        text=narration,
+                        voice_id=voice_id,
+                        user_id=user_id,
+                    )
+                    # generate_audio may return raw bytes or AudioGenerationResult
+                    audio_bytes = audio_result.audio_bytes if hasattr(audio_result, "audio_bytes") else audio_result
+                    # Convert to base64 (just the base64 string, not data URI)
+                    audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
+                    logger.info(f"[YouTubeRenderer] Generated audio for scene {scene_number}")
+                except Exception as e:
+                    logger.warning(f"[YouTubeRenderer] Audio generation failed: {e}, continuing without audio")
+            
+            # VALIDATION: Final check before expensive video API call
+            if not visual_prompt or len(visual_prompt.strip()) < 5:
+                raise HTTPException(
+                    status_code=400,
+                    detail={
+                        "error": f"Scene {scene_number} has invalid visual prompt",
+                        "scene_number": scene_number,
+                        "message": "Visual prompt must be at least 5 characters",
+                        "user_action": "Please provide a valid visual description for this scene.",
+                    }
+                )
+            
+            # Generate video using WAN 2.5 text-to-video
+            # This is the expensive API call - all validation should be done before this
+            # Use sync mode to wait for result directly (prevents timeout issues)
+            try:
+                video_result = self.wavespeed_client.generate_text_video(
+                    prompt=visual_prompt,
+                    resolution=resolution,
+                    duration=duration,
+                    audio_base64=audio_base64,  # Optional: enables lip-sync if provided
+                    enable_prompt_expansion=True,
+                    enable_sync_mode=True,  # Use sync mode to wait for result directly
+                    timeout=600,  # Increased timeout for sync mode (10 minutes)
+                )
+            except requests.exceptions.Timeout as e:
+                logger.error(f"[YouTubeRenderer] WaveSpeed API timed out for scene {scene_number}: {e}")
+                raise HTTPException(
+                    status_code=504,
+                    detail={
+                        "error": "WaveSpeed request timed out",
+                        "scene_number": scene_number,
+                        "message": "The video generation request timed out.",
+                        "user_action": "Please retry. If it persists, try fewer scenes, lower resolution, or shorter durations.",
+                    },
+                ) from e
+            except requests.exceptions.RequestException as e:
+                logger.error(f"[YouTubeRenderer] WaveSpeed API request failed for scene {scene_number}: {e}")
+                raise HTTPException(
+                    status_code=502,
+                    detail={
+                        "error": "WaveSpeed request failed",
+                        "scene_number": scene_number,
+                        "message": str(e),
+                        "user_action": "Please retry. If it persists, check network connectivity or try again later.",
+                    },
+                ) from e
+            
+            # Save scene video
+            video_service = StoryVideoGenerationService(output_dir=str(self.output_dir))
+            save_result = video_service.save_scene_video(
+                video_bytes=video_result["video_bytes"],
+                scene_number=scene_number,
+                user_id=user_id,
+            )
+            
+            # Update video URL to use YouTube API endpoint
+            filename = save_result["video_filename"]
+            save_result["video_url"] = f"/api/youtube/videos/{filename}"
+            
+            # Track usage
+            usage_info = track_video_usage(
+                user_id=user_id,
+                provider=video_result["provider"],
+                model_name=video_result["model_name"],
+                prompt=visual_prompt,
+                video_bytes=video_result["video_bytes"],
+                cost_override=video_result["cost"],
+            )
+            
+            logger.info(
+                f"[YouTubeRenderer] ✅ Scene {scene_number} rendered: "
+                f"cost=${video_result['cost']:.2f}, size={len(video_result['video_bytes'])} bytes"
+            )
+            
+            return {
+                "scene_number": scene_number,
+                "video_filename": save_result["video_filename"],
+                "video_url": save_result["video_url"],
+                "video_path": save_result["video_path"],
+                "duration": video_result["duration"],
+                "cost": video_result["cost"],
+                "resolution": resolution,
+                "width": video_result["width"],
+                "height": video_result["height"],
+                "file_size": save_result["file_size"],
+                "prediction_id": video_result.get("prediction_id"),
+                "usage_info": usage_info,
+            }
+            
+        except HTTPException as e:
+            # Re-raise with better error message for UI
+            error_detail = e.detail
+            if isinstance(error_detail, dict):
+                error_msg = error_detail.get("error", str(error_detail))
+            else:
+                error_msg = str(error_detail)
+            
+            logger.error(
+                f"[YouTubeRenderer] Scene {scene_number} failed: {error_msg}",
+                exc_info=True
+            )
+            raise HTTPException(
+                status_code=e.status_code,
+                detail={
+                    "error": f"Failed to render scene {scene_number}",
+                    "scene_number": scene_number,
+                    "message": error_msg,
+                    "user_action": "Please try again. If the issue persists, check your scene content and try a different resolution.",
+                }
+            )
+        except Exception as e:
+            logger.error(f"[YouTubeRenderer] Error rendering scene {scene_number}: {e}", exc_info=True)
+            raise HTTPException(
+                status_code=500,
+                detail={
+                    "error": f"Failed to render scene {scene_number}",
+                    "scene_number": scene_number,
+                    "message": str(e),
+                    "user_action": "Please try again. If the issue persists, check your scene content and try a different resolution.",
+                }
+            )
+    
+    def render_full_video(
+        self,
+        scenes: List[Dict[str, Any]],
+        video_plan: Dict[str, Any],
+        user_id: str,
+        resolution: str = "720p",
+        combine_scenes: bool = True,
+        voice_id: str = "Wise_Woman",
+    ) -> Dict[str, Any]:
+        """
+        Render a complete video from multiple scenes.
+        
+        Args:
+            scenes: List of scene data
+            video_plan: Original video plan
+            user_id: Clerk user ID
+            resolution: Video resolution
+            combine_scenes: Whether to combine scenes into single video
+            voice_id: Voice ID for narration
+            
+        Returns:
+            Dictionary with video metadata and scene results
+        """
+        try:
+            logger.info(
+                f"[YouTubeRenderer] Rendering full video: {len(scenes)} scenes, "
+                f"resolution={resolution}, user={user_id}"
+            )
+            
+            # Filter enabled scenes
+            enabled_scenes = [s for s in scenes if s.get("enabled", True)]
+            if not enabled_scenes:
+                raise HTTPException(status_code=400, detail="No enabled scenes to render")
+            
+            scene_results = []
+            total_cost = 0.0
+            
+            # Render each scene
+            for idx, scene in enumerate(enabled_scenes):
+                logger.info(
+                    f"[YouTubeRenderer] Rendering scene {idx + 1}/{len(enabled_scenes)}: "
+                    f"Scene {scene.get('scene_number', idx + 1)}"
+                )
+                
+                scene_result = self.render_scene_video(
+                    scene=scene,
+                    video_plan=video_plan,
+                    user_id=user_id,
+                    resolution=resolution,
+                    generate_audio_enabled=True,
+                    voice_id=voice_id,
+                )
+                
+                scene_results.append(scene_result)
+                total_cost += scene_result["cost"]
+            
+            # Combine scenes if requested
+            final_video_path = None
+            final_video_url = None
+            if combine_scenes and len(scene_results) > 1:
+                logger.info("[YouTubeRenderer] Combining scenes into final video...")
+                
+                # Prepare data for video concatenation
+                scene_video_paths = [r["video_path"] for r in scene_results]
+                scene_audio_paths = [r.get("audio_path") for r in scene_results if r.get("audio_path")]
+                
+                # Use StoryVideoGenerationService to combine
+                video_service = StoryVideoGenerationService(output_dir=str(self.output_dir))
+                
+                # Create scene dicts for concatenation
+                scene_dicts = [
+                    {
+                        "scene_number": r["scene_number"],
+                        "title": f"Scene {r['scene_number']}",
+                    }
+                    for r in scene_results
+                ]
+                
+                combined_result = video_service.generate_story_video(
+                    scenes=scene_dicts,
+                    image_paths=[None] * len(scene_results),  # No static images
+                    audio_paths=scene_audio_paths if scene_audio_paths else [],
+                    video_paths=scene_video_paths,  # Use rendered videos
+                    user_id=user_id,
+                    story_title=video_plan.get("video_summary", "YouTube Video")[:50],
+                    fps=24,
+                )
+                
+                final_video_path = combined_result["video_path"]
+                final_video_url = combined_result["video_url"]
+            
+            logger.info(
+                f"[YouTubeRenderer] ✅ Full video rendered: {len(scene_results)} scenes, "
+                f"total_cost=${total_cost:.2f}"
+            )
+            
+            return {
+                "success": True,
+                "scene_results": scene_results,
+                "total_cost": total_cost,
+                "final_video_path": final_video_path,
+                "final_video_url": final_video_url,
+                "num_scenes": len(scene_results),
+                "resolution": resolution,
+            }
+            
+        except HTTPException:
+            raise
+        except Exception as e:
+            logger.error(f"[YouTubeRenderer] Error rendering full video: {e}", exc_info=True)
+            raise HTTPException(
+                status_code=500,
+                detail=f"Failed to render video: {str(e)}"
+            )
+    
+    def estimate_render_cost(
+        self,
+        scenes: List[Dict[str, Any]],
+        resolution: str = "720p",
+    ) -> Dict[str, Any]:
+        """
+        Estimate the cost of rendering a video before actually rendering it.
+        
+        Args:
+            scenes: List of scene data with duration estimates
+            resolution: Video resolution (480p, 720p, 1080p)
+            
+        Returns:
+            Dictionary with cost breakdown and total estimate
+        """
+        # Pricing per second (same as in WaveSpeedClient)
+        pricing = {
+            "480p": 0.05,
+            "720p": 0.10,
+            "1080p": 0.15,
+        }
+        
+        price_per_second = pricing.get(resolution, 0.10)
+        
+        # Filter enabled scenes
+        enabled_scenes = [s for s in scenes if s.get("enabled", True)]
+        
+        scene_costs = []
+        total_cost = 0.0
+        total_duration = 0.0
+        
+        for scene in enabled_scenes:
+            scene_number = scene.get("scene_number", 0)
+            duration_estimate = scene.get("duration_estimate", 5)
+            
+            # Clamp duration to valid WAN 2.5 values (5 or 10 seconds)
+            duration = 5 if duration_estimate <= 7 else 10
+            
+            scene_cost = price_per_second * duration
+            scene_costs.append({
+                "scene_number": scene_number,
+                "duration_estimate": duration_estimate,
+                "actual_duration": duration,
+                "cost": round(scene_cost, 2),
+            })
+            
+            total_cost += scene_cost
+            total_duration += duration
+        
+        return {
+            "resolution": resolution,
+            "price_per_second": price_per_second,
+            "num_scenes": len(enabled_scenes),
+            "total_duration_seconds": total_duration,
+            "scene_costs": scene_costs,
+            "total_cost": round(total_cost, 2),
+            "estimated_cost_range": {
+                "min": round(total_cost * 0.9, 2),  # 10% buffer
+                "max": round(total_cost * 1.1, 2),  # 10% buffer
+            },
+        }
+
--- a/backend/services/youtube/scene_builder.py
+++ b/backend/services/youtube/scene_builder.py
@@ -0,0 +1,551 @@
+"""
+YouTube Scene Builder Service
+
+Converts video plans into structured scenes with narration, visual prompts, and timing.
+"""
+
+from typing import Dict, Any, Optional, List
+from loguru import logger
+from fastapi import HTTPException
+
+from services.llm_providers.main_text_generation import llm_text_gen
+from services.story_writer.prompt_enhancer_service import PromptEnhancerService
+from utils.logger_utils import get_service_logger
+
+logger = get_service_logger("youtube.scene_builder")
+
+
+class YouTubeSceneBuilderService:
+    """Service for building structured video scenes from plans."""
+    
+    def __init__(self):
+        """Initialize the scene builder service."""
+        self.prompt_enhancer = PromptEnhancerService()
+        logger.info("[YouTubeSceneBuilder] Service initialized")
+    
+    def build_scenes_from_plan(
+        self,
+        video_plan: Dict[str, Any],
+        user_id: str,
+        custom_script: Optional[str] = None,
+    ) -> List[Dict[str, Any]]:
+        """
+        Build structured scenes from a video plan.
+        
+        Args:
+            video_plan: Video plan from planner service
+            user_id: Clerk user ID for subscription checking
+            custom_script: Optional custom script to use instead of generating
+            
+        Returns:
+            List of scene dictionaries with narration, visual prompts, timing, etc.
+        """
+        try:
+            logger.info(
+                f"[YouTubeSceneBuilder] Building scenes from plan: "
+                f"duration={video_plan.get('duration_type')}, "
+                f"sections={len(video_plan.get('content_outline', []))}"
+            )
+            
+            duration_metadata = video_plan.get("duration_metadata", {})
+            max_scenes = duration_metadata.get("max_scenes", 10)
+            
+            # If custom script provided, parse it into scenes
+            if custom_script:
+                scenes = self._parse_custom_script(
+                    custom_script, video_plan, duration_metadata, user_id
+                )
+            # For shorts, check if scenes were already generated in plan (optimization)
+            elif video_plan.get("_scenes_included") and video_plan.get("duration_type") == "shorts":
+                prebuilt = video_plan.get("scenes") or []
+                if prebuilt:
+                    logger.info(
+                        f"[YouTubeSceneBuilder] Using scenes from optimized plan+scenes call "
+                        f"({len(prebuilt)} scenes)"
+                    )
+                    scenes = self._normalize_scenes_from_plan(video_plan, duration_metadata)
+                else:
+                    logger.warning(
+                        "[YouTubeSceneBuilder] Plan marked _scenes_included but no scenes present; "
+                        "regenerating scenes normally."
+                    )
+                    scenes = self._generate_scenes_from_plan(
+                        video_plan, duration_metadata, user_id
+                    )
+            else:
+                # Generate scenes from plan
+                scenes = self._generate_scenes_from_plan(
+                    video_plan, duration_metadata, user_id
+                )
+            
+            # Limit to max scenes
+            if len(scenes) > max_scenes:
+                logger.warning(
+                    f"[YouTubeSceneBuilder] Truncating {len(scenes)} scenes to {max_scenes}"
+                )
+                scenes = scenes[:max_scenes]
+            
+            # Enhance visual prompts efficiently based on duration type
+            duration_type = video_plan.get("duration_type", "medium")
+            scenes = self._enhance_visual_prompts_batch(
+                scenes, video_plan, user_id, duration_type
+            )
+            
+            logger.info(f"[YouTubeSceneBuilder] ✅ Built {len(scenes)} scenes")
+            return scenes
+            
+        except HTTPException:
+            raise
+        except Exception as e:
+            logger.error(f"[YouTubeSceneBuilder] Error building scenes: {e}", exc_info=True)
+            raise HTTPException(
+                status_code=500,
+                detail=f"Failed to build scenes: {str(e)}"
+            )
+    
+    def _generate_scenes_from_plan(
+        self,
+        video_plan: Dict[str, Any],
+        duration_metadata: Dict[str, Any],
+        user_id: str,
+    ) -> List[Dict[str, Any]]:
+        """Generate scenes from video plan using AI."""
+        
+        content_outline = video_plan.get("content_outline", [])
+        hook_strategy = video_plan.get("hook_strategy", "")
+        call_to_action = video_plan.get("call_to_action", "")
+        visual_style = video_plan.get("visual_style", "cinematic")
+        tone = video_plan.get("tone", "professional")
+        
+        scene_duration_range = duration_metadata.get("scene_duration_range", (5, 15))
+        
+        scene_generation_prompt = f"""You are an expert video scriptwriter. Create detailed scenes for a YouTube video based on this plan.
+
+**Video Plan:**
+- Summary: {video_plan.get('video_summary', '')}
+- Goal: {video_plan.get('video_goal', '')}
+- Key Message: {video_plan.get('key_message', '')}
+- Visual Style: {visual_style}
+- Tone: {tone}
+
+**Hook Strategy:**
+{hook_strategy}
+
+**Content Outline:**
+{chr(10).join([f"- {section.get('section', '')}: {section.get('description', '')} ({section.get('duration_estimate', 0)}s)" for section in content_outline])}
+
+**Call-to-Action:**
+{call_to_action}
+
+**Duration Constraints:**
+- Scene duration: {scene_duration_range[0]}-{scene_duration_range[1]} seconds each
+- Total target: {duration_metadata.get('target_seconds', 150)} seconds
+
+**Your Task:**
+Create detailed scenes that include:
+1. Scene number and title
+2. Narration text (what will be spoken)
+3. Visual description (what viewers will see)
+4. Duration estimate
+5. Emphasis tags (hook, main_content, transition, cta)
+
+**Format as JSON array:**
+[
+  {{
+    "scene_number": 1,
+    "title": "Hook - Attention Grabber",
+    "narration": "The spoken text for this scene...",
+    "visual_description": "Detailed description of what viewers see...",
+    "duration_estimate": 5,
+    "emphasis": "hook",
+    "visual_cues": ["close-up", "dynamic", "bright"]
+  }},
+  ...
+]
+
+Make sure:
+- First scene is a strong hook ({duration_metadata.get('hook_seconds', 10)}s)
+- Last scene includes the CTA ({duration_metadata.get('cta_seconds', 10)}s)
+- Each scene has clear narration and visual description
+- Total duration fits within {duration_metadata.get('target_seconds', 150)} seconds
+- Scenes flow naturally from one to the next
+"""
+        
+        system_prompt = (
+            "You are an expert video scriptwriter specializing in YouTube content. "
+            "Your scenes are engaging, well-paced, and optimized for viewer retention."
+        )
+        
+        response = llm_text_gen(
+            prompt=scene_generation_prompt,
+            system_prompt=system_prompt,
+            user_id=user_id,
+            json_struct={
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "scene_number": {"type": "number"},
+                        "title": {"type": "string"},
+                        "narration": {"type": "string"},
+                        "visual_description": {"type": "string"},
+                        "duration_estimate": {"type": "number"},
+                        "emphasis": {"type": "string"},
+                        "visual_cues": {
+                            "type": "array",
+                            "items": {"type": "string"}
+                        }
+                    },
+                    "required": [
+                        "scene_number", "title", "narration", "visual_description",
+                        "duration_estimate", "emphasis"
+                    ]
+                }
+            }
+        )
+        
+        # Parse response
+        if isinstance(response, list):
+            scenes = response
+        elif isinstance(response, dict) and "scenes" in response:
+            scenes = response["scenes"]
+        else:
+            import json
+            scenes = json.loads(response) if isinstance(response, str) else response
+        
+        # Normalize scene data
+        normalized_scenes = []
+        for idx, scene in enumerate(scenes, 1):
+            normalized_scenes.append({
+                "scene_number": scene.get("scene_number", idx),
+                "title": scene.get("title", f"Scene {idx}"),
+                "narration": scene.get("narration", ""),
+                "visual_description": scene.get("visual_description", ""),
+                "duration_estimate": scene.get("duration_estimate", scene_duration_range[0]),
+                "emphasis": scene.get("emphasis", "main_content"),
+                "visual_cues": scene.get("visual_cues", []),
+                "visual_prompt": scene.get("visual_description", ""),  # Initial prompt
+            })
+        
+        return normalized_scenes
+    
+    def _normalize_scenes_from_plan(
+        self,
+        video_plan: Dict[str, Any],
+        duration_metadata: Dict[str, Any],
+    ) -> List[Dict[str, Any]]:
+        """Normalize scenes that were generated as part of the plan (optimization for shorts)."""
+        scenes = video_plan.get("scenes", [])
+        scene_duration_range = duration_metadata.get("scene_duration_range", (2, 8))
+        
+        normalized_scenes = []
+        for idx, scene in enumerate(scenes, 1):
+            normalized_scenes.append({
+                "scene_number": scene.get("scene_number", idx),
+                "title": scene.get("title", f"Scene {idx}"),
+                "narration": scene.get("narration", ""),
+                "visual_description": scene.get("visual_description", ""),
+                "duration_estimate": scene.get("duration_estimate", scene_duration_range[0]),
+                "emphasis": scene.get("emphasis", "main_content"),
+                "visual_cues": scene.get("visual_cues", []),
+                "visual_prompt": scene.get("visual_description", ""),  # Initial prompt
+            })
+        
+        logger.info(
+            f"[YouTubeSceneBuilder] ✅ Normalized {len(normalized_scenes)} scenes "
+            f"from optimized plan (saved 1 AI call)"
+        )
+        return normalized_scenes
+    
+    def _parse_custom_script(
+        self,
+        custom_script: str,
+        video_plan: Dict[str, Any],
+        duration_metadata: Dict[str, Any],
+        user_id: str,
+    ) -> List[Dict[str, Any]]:
+        """Parse a custom script into structured scenes."""
+        # Simple parsing: split by double newlines or scene markers
+        import re
+        
+        # Try to detect scene markers
+        scene_pattern = r'(?:Scene\s+\d+|#\s*\d+\.|^\d+\.)\s*(.+?)(?=(?:Scene\s+\d+|#\s*\d+\.|^\d+\.|$))'
+        matches = re.finditer(scene_pattern, custom_script, re.MULTILINE | re.DOTALL)
+        
+        scenes = []
+        for idx, match in enumerate(matches, 1):
+            scene_text = match.group(1).strip()
+            # Extract narration (first paragraph or before visual markers)
+            narration_match = re.search(r'^(.*?)(?:\n\n|Visual:|Image:)', scene_text, re.DOTALL)
+            narration = narration_match.group(1).strip() if narration_match else scene_text.split('\n')[0]
+            
+            # Extract visual description
+            visual_match = re.search(r'(?:Visual:|Image:)\s*(.+?)(?:\n\n|$)', scene_text, re.DOTALL)
+            visual_description = visual_match.group(1).strip() if visual_match else narration
+            
+            scenes.append({
+                "scene_number": idx,
+                "title": f"Scene {idx}",
+                "narration": narration,
+                "visual_description": visual_description,
+                "duration_estimate": duration_metadata.get("scene_duration_range", [5, 15])[0],
+                "emphasis": "hook" if idx == 1 else ("cta" if idx == len(list(matches)) else "main_content"),
+                "visual_cues": [],
+                "visual_prompt": visual_description,
+            })
+        
+        # Fallback: split by paragraphs if no scene markers
+        if not scenes:
+            paragraphs = [p.strip() for p in custom_script.split('\n\n') if p.strip()]
+            for idx, para in enumerate(paragraphs[:duration_metadata.get("max_scenes", 10)], 1):
+                scenes.append({
+                    "scene_number": idx,
+                    "title": f"Scene {idx}",
+                    "narration": para,
+                    "visual_description": para,
+                    "duration_estimate": duration_metadata.get("scene_duration_range", [5, 15])[0],
+                    "emphasis": "hook" if idx == 1 else ("cta" if idx == len(paragraphs) else "main_content"),
+                    "visual_cues": [],
+                    "visual_prompt": para,
+                })
+        
+        return scenes
+    
+    def _enhance_visual_prompts_batch(
+        self,
+        scenes: List[Dict[str, Any]],
+        video_plan: Dict[str, Any],
+        user_id: str,
+        duration_type: str,
+    ) -> List[Dict[str, Any]]:
+        """
+        Efficiently enhance visual prompts based on video duration type.
+        
+        Strategy:
+        - Shorts: Skip enhancement (use original descriptions) - 0 AI calls
+        - Medium: Batch enhance all scenes in 1 call - 1 AI call
+        - Long: Batch enhance in 2 calls (split scenes) - 2 AI calls max
+        """
+        # For shorts, skip enhancement to save API calls
+        if duration_type == "shorts":
+            logger.info(
+                f"[YouTubeSceneBuilder] Skipping prompt enhancement for shorts "
+                f"({len(scenes)} scenes) to save API calls"
+            )
+            for scene in scenes:
+                scene["enhanced_visual_prompt"] = scene.get(
+                    "visual_prompt", scene.get("visual_description", "")
+                )
+            return scenes
+        
+        # Build story context for prompt enhancer
+        story_context = {
+            "story_setting": video_plan.get("visual_style", "cinematic"),
+            "story_tone": video_plan.get("tone", "professional"),
+            "writing_style": video_plan.get("visual_style", "cinematic"),
+        }
+        
+        # Convert scenes to format expected by enhancer
+        scene_data_list = [
+            {
+                "scene_number": scene.get("scene_number", idx + 1),
+                "title": scene.get("title", ""),
+                "description": scene.get("visual_description", ""),
+                "image_prompt": scene.get("visual_prompt", ""),
+            }
+            for idx, scene in enumerate(scenes)
+        ]
+        
+        # For medium videos, enhance all scenes in one batch call
+        if duration_type == "medium":
+            logger.info(
+                f"[YouTubeSceneBuilder] Batch enhancing {len(scenes)} scenes "
+                f"for medium video in 1 AI call"
+            )
+            try:
+                # Use a single batch enhancement call
+                enhanced_prompts = self._batch_enhance_prompts(
+                    scene_data_list, story_context, user_id
+                )
+                for idx, scene in enumerate(scenes):
+                    scene["enhanced_visual_prompt"] = enhanced_prompts.get(
+                        idx, scene.get("visual_prompt", scene.get("visual_description", ""))
+                    )
+            except Exception as e:
+                logger.warning(
+                    f"[YouTubeSceneBuilder] Batch enhancement failed: {e}, "
+                    f"using original prompts"
+                )
+                for scene in scenes:
+                    scene["enhanced_visual_prompt"] = scene.get(
+                        "visual_prompt", scene.get("visual_description", "")
+                    )
+            return scenes
+        
+        # For long videos, split into 2 batches to avoid token limits
+        if duration_type == "long":
+            logger.info(
+                f"[YouTubeSceneBuilder] Batch enhancing {len(scenes)} scenes "
+                f"for long video in 2 AI calls"
+            )
+            mid_point = len(scenes) // 2
+            batches = [
+                scene_data_list[:mid_point],
+                scene_data_list[mid_point:],
+            ]
+            
+            all_enhanced = {}
+            for batch_idx, batch in enumerate(batches):
+                try:
+                    enhanced = self._batch_enhance_prompts(
+                        batch, story_context, user_id
+                    )
+                    start_idx = 0 if batch_idx == 0 else mid_point
+                    for local_idx, enhanced_prompt in enhanced.items():
+                        all_enhanced[start_idx + local_idx] = enhanced_prompt
+                except Exception as e:
+                    logger.warning(
+                        f"[YouTubeSceneBuilder] Batch {batch_idx + 1} enhancement "
+                        f"failed: {e}, using original prompts"
+                    )
+                    start_idx = 0 if batch_idx == 0 else mid_point
+                    for local_idx, scene_data in enumerate(batch):
+                        all_enhanced[start_idx + local_idx] = scene_data.get(
+                            "image_prompt", scene_data.get("description", "")
+                        )
+            
+            for idx, scene in enumerate(scenes):
+                scene["enhanced_visual_prompt"] = all_enhanced.get(
+                    idx, scene.get("visual_prompt", scene.get("visual_description", ""))
+                )
+            return scenes
+        
+        # Fallback: use original prompts
+        logger.warning(
+            f"[YouTubeSceneBuilder] Unknown duration type '{duration_type}', "
+            f"using original prompts"
+        )
+        for scene in scenes:
+            scene["enhanced_visual_prompt"] = scene.get(
+                "visual_prompt", scene.get("visual_description", "")
+            )
+        return scenes
+    
+    def _batch_enhance_prompts(
+        self,
+        scene_data_list: List[Dict[str, Any]],
+        story_context: Dict[str, Any],
+        user_id: str,
+    ) -> Dict[int, str]:
+        """
+        Enhance multiple scene prompts in a single AI call.
+        
+        Returns:
+            Dictionary mapping scene index to enhanced prompt
+        """
+        try:
+            # Build batch enhancement prompt
+            scenes_text = "\n\n".join([
+                f"Scene {scene.get('scene_number', idx + 1)}: {scene.get('title', '')}\n"
+                f"Description: {scene.get('description', '')}\n"
+                f"Current Prompt: {scene.get('image_prompt', '')}"
+                for idx, scene in enumerate(scene_data_list)
+            ])
+            
+            batch_prompt = f"""You are optimizing visual prompts for AI video generation. Enhance the following scenes to be more detailed and video-optimized.
+
+**Video Style Context:**
+- Setting: {story_context.get('story_setting', 'cinematic')}
+- Tone: {story_context.get('story_tone', 'professional')}
+- Style: {story_context.get('writing_style', 'cinematic')}
+
+**Scenes to Enhance:**
+{scenes_text}
+
+**Your Task:**
+For each scene, create an enhanced visual prompt (200-300 words) that:
+1. Is detailed and specific for video generation
+2. Includes camera movements, lighting, composition
+3. Maintains consistency with the video style
+4. Is optimized for WAN 2.5 text-to-video model
+
+**Format as JSON array with enhanced prompts:**
+[
+  {{"scene_index": 0, "enhanced_prompt": "detailed enhanced prompt for scene 1..."}},
+  {{"scene_index": 1, "enhanced_prompt": "detailed enhanced prompt for scene 2..."}},
+  ...
+]
+
+Make sure the array length matches the number of scenes provided ({len(scene_data_list)}).
+"""
+            
+            system_prompt = (
+                "You are an expert at creating detailed visual prompts for AI video generation. "
+                "Your prompts are specific, cinematic, and optimized for video models."
+            )
+            
+            response = llm_text_gen(
+                prompt=batch_prompt,
+                system_prompt=system_prompt,
+                user_id=user_id,
+                json_struct={
+                    "type": "array",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "scene_index": {"type": "number"},
+                            "enhanced_prompt": {"type": "string"}
+                        },
+                        "required": ["scene_index", "enhanced_prompt"]
+                    }
+                }
+            )
+            
+            # Parse response
+            if isinstance(response, list):
+                enhanced_list = response
+            elif isinstance(response, str):
+                import json
+                enhanced_list = json.loads(response)
+            else:
+                enhanced_list = response
+            
+            # Build result dictionary
+            result = {}
+            for item in enhanced_list:
+                idx = item.get("scene_index", 0)
+                prompt = item.get("enhanced_prompt", "")
+                if prompt:
+                    result[idx] = prompt
+                else:
+                    # Fallback to original
+                    original_scene = scene_data_list[idx] if idx < len(scene_data_list) else {}
+                    result[idx] = original_scene.get(
+                        "image_prompt", original_scene.get("description", "")
+                    )
+            
+            # Fill in any missing scenes with original prompts
+            for idx in range(len(scene_data_list)):
+                if idx not in result:
+                    original_scene = scene_data_list[idx]
+                    result[idx] = original_scene.get(
+                        "image_prompt", original_scene.get("description", "")
+                    )
+            
+            logger.info(
+                f"[YouTubeSceneBuilder] ✅ Batch enhanced {len(result)} prompts "
+                f"in 1 AI call"
+            )
+            return result
+            
+        except Exception as e:
+            logger.error(
+                f"[YouTubeSceneBuilder] Batch enhancement failed: {e}",
+                exc_info=True
+            )
+            # Return original prompts as fallback
+            return {
+                idx: scene.get("image_prompt", scene.get("description", ""))
+                for idx, scene in enumerate(scene_data_list)
+            }
+