AI Image and Audio Generation Improvements.

AI Video Generation Pre-Flight Checklist. Cost Estimate Improvements.
2025-12-25 16:26:08 +05:30
parent 59913bffa9
commit 7512933c65
163 changed files with 8938 additions and 37401 deletions
--- a/backend/services/llm_providers/main_audio_generation.py
+++ b/backend/services/llm_providers/main_audio_generation.py
@@ -140,6 +140,10 @@ def generate_audio(
            # Avoid passing duplicate enable_sync_mode; allow override via kwargs
            enable_sync_mode = kwargs.pop("enable_sync_mode", True)

+            # Filter out None values from kwargs to prevent WaveSpeed validation errors
+            filtered_kwargs = {k: v for k, v in kwargs.items() if v is not None}
+            logger.info(f"[audio_gen] Filtered kwargs (removed None values): {filtered_kwargs}")
+
            client = WaveSpeedClient()
            audio_bytes = client.generate_speech(
                text=text,
@@ -149,7 +153,7 @@ def generate_audio(
                pitch=pitch,
                emotion=emotion,
                enable_sync_mode=enable_sync_mode,
-                **kwargs
+                **filtered_kwargs
            )
            
            logger.info(f"[audio_gen] ✅ API call successful, generated {len(audio_bytes)} bytes")
--- a/backend/services/llm_providers/main_image_generation.py
+++ b/backend/services/llm_providers/main_image_generation.py
@@ -1,6 +1,8 @@
 from __future__ import annotations

 import os
+import sys
+from datetime import datetime
 from typing import Optional, Dict, Any

 from .image_generation import (
@@ -110,6 +112,367 @@ def generate_image(prompt: str, options: Optional[Dict[str, Any]] = None, user_i

    logger.info("Generating image via provider=%s model=%s", provider_name, image_options.model)
    provider = _get_provider(provider_name)
-    return provider.generate(image_options)
+    result = provider.generate(image_options)
+    
+    # TRACK USAGE after successful API call
+    has_image_bytes = bool(result.image_bytes) if result else False
+    image_bytes_len = len(result.image_bytes) if (result and result.image_bytes) else 0
+    logger.info(f"[Image Generation] Checking tracking conditions: user_id={user_id}, has_result={bool(result)}, has_image_bytes={has_image_bytes}, image_bytes_len={image_bytes_len}")
+    if user_id and result and result.image_bytes:
+        logger.info(f"[Image Generation] ✅ API call successful, tracking usage for user {user_id}")
+        try:
+            from services.database import get_db as get_db_track
+            db_track = next(get_db_track())
+            try:
+                from models.subscription_models import UsageSummary, APIUsageLog, APIProvider
+                from services.subscription import PricingService
+                
+                pricing = PricingService(db_track)
+                current_period = pricing.get_current_billing_period(user_id) or datetime.now().strftime("%Y-%m")
+                
+                # Get or create usage summary
+                summary = db_track.query(UsageSummary).filter(
+                    UsageSummary.user_id == user_id,
+                    UsageSummary.billing_period == current_period
+                ).first()
+                
+                if not summary:
+                    summary = UsageSummary(
+                        user_id=user_id,
+                        billing_period=current_period
+                    )
+                    db_track.add(summary)
+                    db_track.flush()
+                
+                # Get cost from result metadata or calculate
+                estimated_cost = 0.0
+                if result.metadata and "estimated_cost" in result.metadata:
+                    estimated_cost = float(result.metadata["estimated_cost"])
+                else:
+                    # Fallback: estimate based on provider/model
+                    if provider_name == "wavespeed":
+                        if result.model and "qwen" in result.model.lower():
+                            estimated_cost = 0.05
+                        else:
+                            estimated_cost = 0.10  # ideogram-v3-turbo default
+                    elif provider_name == "stability":
+                        estimated_cost = 0.04
+                    else:
+                        estimated_cost = 0.05  # Default estimate
+                
+                # Get current values before update
+                current_calls_before = getattr(summary, "stability_calls", 0) or 0
+                current_cost_before = getattr(summary, "stability_cost", 0.0) or 0.0
+                
+                # Update image calls and cost
+                new_calls = current_calls_before + 1
+                new_cost = current_cost_before + estimated_cost
+                
+                # Use direct SQL UPDATE for dynamic attributes
+                from sqlalchemy import text as sql_text
+                update_query = sql_text("""
+                    UPDATE usage_summaries 
+                    SET stability_calls = :new_calls,
+                        stability_cost = :new_cost
+                    WHERE user_id = :user_id AND billing_period = :period
+                """)
+                db_track.execute(update_query, {
+                    'new_calls': new_calls,
+                    'new_cost': new_cost,
+                    'user_id': user_id,
+                    'period': current_period
+                })
+                
+                # Update total cost
+                summary.total_cost = (summary.total_cost or 0.0) + estimated_cost
+                summary.total_calls = (summary.total_calls or 0) + 1
+                summary.updated_at = datetime.utcnow()
+                
+                # Determine API provider based on actual provider
+                api_provider = APIProvider.STABILITY  # Default for image generation
+                
+                # Create usage log
+                usage_log = APIUsageLog(
+                    user_id=user_id,
+                    provider=api_provider,
+                    endpoint="/image-generation",
+                    method="POST",
+                    model_used=result.model or "unknown",
+                    tokens_input=0,
+                    tokens_output=0,
+                    tokens_total=0,
+                    cost_input=0.0,
+                    cost_output=0.0,
+                    cost_total=estimated_cost,
+                    response_time=0.0,
+                    status_code=200,
+                    request_size=len(prompt.encode("utf-8")),
+                    response_size=len(result.image_bytes),
+                    billing_period=current_period,
+                )
+                db_track.add(usage_log)
+                
+                # Get plan details for unified log
+                limits = pricing.get_user_limits(user_id)
+                plan_name = limits.get('plan_name', 'unknown') if limits else 'unknown'
+                tier = limits.get('tier', 'unknown') if limits else 'unknown'
+                image_limit = limits['limits'].get("stability_calls", 0) if limits else 0
+                # Only show ∞ for Enterprise tier when limit is 0 (unlimited)
+                image_limit_display = image_limit if (image_limit > 0 or tier != 'enterprise') else '∞'
+                
+                # Get related stats for unified log
+                current_audio_calls = getattr(summary, "audio_calls", 0) or 0
+                audio_limit = limits['limits'].get("audio_calls", 0) if limits else 0
+                current_image_edit_calls = getattr(summary, "image_edit_calls", 0) or 0
+                image_edit_limit = limits['limits'].get("image_edit_calls", 0) if limits else 0
+                current_video_calls = getattr(summary, "video_calls", 0) or 0
+                video_limit = limits['limits'].get("video_calls", 0) if limits else 0
+                
+                db_track.commit()
+                logger.info(f"[Image Generation] ✅ Successfully tracked usage: user {user_id} -> image -> {new_calls} calls, ${estimated_cost:.4f}")
+                
+                # UNIFIED SUBSCRIPTION LOG - Shows before/after state in one message
+                print(f"""
+[SUBSCRIPTION] Image Generation
+├─ User: {user_id}
+├─ Plan: {plan_name} ({tier})
+├─ Provider: {provider_name}
+├─ Actual Provider: {provider_name}
+├─ Model: {result.model or 'unknown'}
+├─ Calls: {current_calls_before} → {new_calls} / {image_limit_display}
+├─ Cost: ${current_cost_before:.4f} → ${new_cost:.4f}
+├─ Audio: {current_audio_calls} / {audio_limit if audio_limit > 0 else '∞'}
+├─ Image Editing: {current_image_edit_calls} / {image_edit_limit if image_edit_limit > 0 else '∞'}
+├─ Videos: {current_video_calls} / {video_limit if video_limit > 0 else '∞'}
+└─ Status: ✅ Allowed & Tracked
+""", flush=True)
+                sys.stdout.flush()
+                
+            except Exception as track_error:
+                logger.error(f"[Image Generation] ❌ Error tracking usage (non-blocking): {track_error}", exc_info=True)
+                import traceback
+                logger.error(f"[Image Generation] Full traceback: {traceback.format_exc()}")
+                db_track.rollback()
+            finally:
+                db_track.close()
+        except Exception as usage_error:
+            logger.error(f"[Image Generation] ❌ Failed to track usage: {usage_error}", exc_info=True)
+            import traceback
+            logger.error(f"[Image Generation] Full traceback: {traceback.format_exc()}")
+    else:
+        logger.warning(f"[Image Generation] ⚠️ Skipping usage tracking: user_id={user_id}, image_bytes={len(result.image_bytes) if result.image_bytes else 0} bytes")
+    
+    return result
+
+
+def generate_character_image(
+    prompt: str,
+    reference_image_bytes: bytes,
+    user_id: Optional[str] = None,
+    style: str = "Realistic",
+    aspect_ratio: str = "16:9",
+    rendering_speed: str = "Quality",
+    timeout: Optional[int] = None,
+) -> bytes:
+    """Generate character-consistent image with pre-flight validation and usage tracking.
+    
+    Uses Ideogram Character API via WaveSpeed to maintain character consistency.
+    
+    Args:
+        prompt: Text prompt describing the scene/context for the character
+        reference_image_bytes: Reference image bytes (base avatar)
+        user_id: User ID for subscription checking (required)
+        style: Character style type ("Auto", "Fiction", or "Realistic")
+        aspect_ratio: Aspect ratio ("1:1", "16:9", "9:16", "4:3", "3:4")
+        rendering_speed: Rendering speed ("Default", "Turbo", "Quality")
+        timeout: Total timeout in seconds for submission + polling (default: 180)
+        
+    Returns:
+        bytes: Generated image bytes with consistent character
+    """
+    # PRE-FLIGHT VALIDATION: Validate image generation before API call
+    if user_id:
+        from services.database import get_db
+        from services.subscription import PricingService
+        from services.subscription.preflight_validator import validate_image_generation_operations
+        from fastapi import HTTPException
+        
+        logger.info(f"[Character Image Generation] 🔍 Starting pre-flight validation for user_id={user_id}")
+        db = next(get_db())
+        try:
+            pricing_service = PricingService(db)
+            # Raises HTTPException immediately if validation fails
+            validate_image_generation_operations(
+                pricing_service=pricing_service,
+                user_id=user_id,
+                num_images=1,
+            )
+            logger.info(f"[Character Image Generation] ✅ Pre-flight validation passed for user_id={user_id} - proceeding with character image generation")
+        except HTTPException as http_ex:
+            # Re-raise immediately - don't proceed with API call
+            logger.error(f"[Character Image Generation] ❌ Pre-flight validation failed for user_id={user_id} - blocking API call: {http_ex.detail}")
+            raise
+        finally:
+            db.close()
+    else:
+        logger.warning(f"[Character Image Generation] ⚠️ No user_id provided - skipping pre-flight validation (this should not happen in production)")
+    
+    # Generate character image via WaveSpeed
+    from services.wavespeed.client import WaveSpeedClient
+    from fastapi import HTTPException
+    
+    try:
+        wavespeed_client = WaveSpeedClient()
+        image_bytes = wavespeed_client.generate_character_image(
+            prompt=prompt,
+            reference_image_bytes=reference_image_bytes,
+            style=style,
+            aspect_ratio=aspect_ratio,
+            rendering_speed=rendering_speed,
+            timeout=timeout,
+        )
+        
+        # TRACK USAGE after successful API call
+        has_image_bytes = bool(image_bytes) if image_bytes else False
+        image_bytes_len = len(image_bytes) if image_bytes else 0
+        logger.info(f"[Character Image Generation] Checking tracking conditions: user_id={user_id}, has_image_bytes={has_image_bytes}, image_bytes_len={image_bytes_len}")
+        if user_id and image_bytes:
+            logger.info(f"[Character Image Generation] ✅ API call successful, tracking usage for user {user_id}")
+            try:
+                from services.database import get_db as get_db_track
+                db_track = next(get_db_track())
+                try:
+                    from models.subscription_models import UsageSummary, APIUsageLog, APIProvider
+                    from services.subscription import PricingService
+                    
+                    pricing = PricingService(db_track)
+                    current_period = pricing.get_current_billing_period(user_id) or datetime.now().strftime("%Y-%m")
+                    
+                    # Get or create usage summary
+                    summary = db_track.query(UsageSummary).filter(
+                        UsageSummary.user_id == user_id,
+                        UsageSummary.billing_period == current_period
+                    ).first()
+                    
+                    if not summary:
+                        summary = UsageSummary(
+                            user_id=user_id,
+                            billing_period=current_period
+                        )
+                        db_track.add(summary)
+                        db_track.flush()
+                    
+                    # Character image cost (same as ideogram-v3-turbo)
+                    estimated_cost = 0.10
+                    current_calls_before = getattr(summary, "stability_calls", 0) or 0
+                    current_cost_before = getattr(summary, "stability_cost", 0.0) or 0.0
+                    
+                    new_calls = current_calls_before + 1
+                    new_cost = current_cost_before + estimated_cost
+                    
+                    # Use direct SQL UPDATE for dynamic attributes
+                    from sqlalchemy import text as sql_text
+                    update_query = sql_text("""
+                        UPDATE usage_summaries 
+                        SET stability_calls = :new_calls,
+                            stability_cost = :new_cost
+                        WHERE user_id = :user_id AND billing_period = :period
+                    """)
+                    db_track.execute(update_query, {
+                        'new_calls': new_calls,
+                        'new_cost': new_cost,
+                        'user_id': user_id,
+                        'period': current_period
+                    })
+                    
+                    # Update total cost
+                    summary.total_cost = (summary.total_cost or 0.0) + estimated_cost
+                    summary.total_calls = (summary.total_calls or 0) + 1
+                    summary.updated_at = datetime.utcnow()
+                    
+                    # Create usage log
+                    usage_log = APIUsageLog(
+                        user_id=user_id,
+                        provider=APIProvider.STABILITY,  # Image generation uses STABILITY provider
+                        endpoint="/image-generation/character",
+                        method="POST",
+                        model_used="ideogram-character",
+                        tokens_input=0,
+                        tokens_output=0,
+                        tokens_total=0,
+                        cost_input=0.0,
+                        cost_output=0.0,
+                        cost_total=estimated_cost,
+                        response_time=0.0,
+                        status_code=200,
+                        request_size=len(prompt.encode("utf-8")),
+                        response_size=len(image_bytes),
+                        billing_period=current_period,
+                    )
+                    db_track.add(usage_log)
+                    
+                    # Get plan details for unified log
+                    limits = pricing.get_user_limits(user_id)
+                    plan_name = limits.get('plan_name', 'unknown') if limits else 'unknown'
+                    tier = limits.get('tier', 'unknown') if limits else 'unknown'
+                    image_limit = limits['limits'].get("stability_calls", 0) if limits else 0
+                    image_limit_display = image_limit if (image_limit > 0 or tier != 'enterprise') else '∞'
+                    
+                    # Get related stats
+                    current_audio_calls = getattr(summary, "audio_calls", 0) or 0
+                    audio_limit = limits['limits'].get("audio_calls", 0) if limits else 0
+                    current_image_edit_calls = getattr(summary, "image_edit_calls", 0) or 0
+                    image_edit_limit = limits['limits'].get("image_edit_calls", 0) if limits else 0
+                    current_video_calls = getattr(summary, "video_calls", 0) or 0
+                    video_limit = limits['limits'].get("video_calls", 0) if limits else 0
+                    
+                    db_track.commit()
+                    
+                    # UNIFIED SUBSCRIPTION LOG
+                    print(f"""
+[SUBSCRIPTION] Image Generation (Character)
+├─ User: {user_id}
+├─ Plan: {plan_name} ({tier})
+├─ Provider: wavespeed
+├─ Actual Provider: wavespeed
+├─ Model: ideogram-character
+├─ Calls: {current_calls_before} → {new_calls} / {image_limit_display}
+├─ Cost: ${current_cost_before:.4f} → ${new_cost:.4f}
+├─ Audio: {current_audio_calls} / {audio_limit if audio_limit > 0 else '∞'}
+├─ Image Editing: {current_image_edit_calls} / {image_edit_limit if image_edit_limit > 0 else '∞'}
+├─ Videos: {current_video_calls} / {video_limit if video_limit > 0 else '∞'}
+└─ Status: ✅ Allowed & Tracked
+""", flush=True)
+                    sys.stdout.flush()
+                    
+                    logger.info(f"[Character Image Generation] ✅ Successfully tracked usage: user {user_id} -> {new_calls} calls, ${estimated_cost:.4f}")
+                    
+                except Exception as track_error:
+                    logger.error(f"[Character Image Generation] ❌ Error tracking usage (non-blocking): {track_error}", exc_info=True)
+                    import traceback
+                    logger.error(f"[Character Image Generation] Full traceback: {traceback.format_exc()}")
+                    db_track.rollback()
+                finally:
+                    db_track.close()
+            except Exception as usage_error:
+                logger.error(f"[Character Image Generation] ❌ Failed to track usage: {usage_error}", exc_info=True)
+                import traceback
+                logger.error(f"[Character Image Generation] Full traceback: {traceback.format_exc()}")
+        else:
+            logger.warning(f"[Character Image Generation] ⚠️ Skipping usage tracking: user_id={user_id}, image_bytes={len(image_bytes) if image_bytes else 0} bytes")
+        
+        return image_bytes
+        
+    except HTTPException:
+        raise
+    except Exception as api_error:
+        logger.error(f"[Character Image Generation] Character image generation API failed: {api_error}")
+        raise HTTPException(
+            status_code=502,
+            detail={
+                "error": "Character image generation failed",
+                "message": str(api_error)
+            }
+        )


--- a/backend/services/youtube/renderer.py
+++ b/backend/services/youtube/renderer.py
@@ -88,14 +88,49 @@ class YouTubeVideoRendererService:
            # Clamp duration to valid WAN 2.5 values (5 or 10 seconds)
            duration = 5 if duration_estimate <= 7 else 10
            
+            # Log asset usage status
+            has_existing_image = bool(scene.get("imageUrl"))
+            has_existing_audio = bool(scene.get("audioUrl"))
+            
            logger.info(
                f"[YouTubeRenderer] Rendering scene {scene_number}: "
-                f"resolution={resolution}, duration={duration}s, prompt_length={len(visual_prompt)}"
+                f"resolution={resolution}, duration={duration}s, prompt_length={len(visual_prompt)}, "
+                f"has_existing_image={has_existing_image}, has_existing_audio={has_existing_audio}"
            )
            
-            # Generate audio if requested - only if narration is not empty
+            # Use existing audio if available, otherwise generate if requested
            audio_base64 = None
-            if generate_audio_enabled and narration and len(narration.strip()) > 0:
+            scene_audio_url = scene.get("audioUrl")
+            
+            if scene_audio_url:
+                # Load existing audio from URL
+                try:
+                    from pathlib import Path
+                    from urllib.parse import urlparse
+                    
+                    # Extract filename from URL (e.g., /api/youtube/audio/filename.mp3)
+                    parsed_url = urlparse(scene_audio_url)
+                    audio_filename = Path(parsed_url.path).name
+                    
+                    # Load audio file
+                    base_dir = Path(__file__).parent.parent.parent.parent
+                    youtube_audio_dir = base_dir / "youtube_audio"
+                    audio_path = youtube_audio_dir / audio_filename
+                    
+                    if audio_path.exists():
+                        with open(audio_path, "rb") as f:
+                            audio_bytes = f.read()
+                        audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
+                        logger.info(f"[YouTubeRenderer] Using existing audio for scene {scene_number} from {audio_filename}")
+                    else:
+                        logger.warning(f"[YouTubeRenderer] Audio file not found: {audio_path}, will generate new audio")
+                        raise FileNotFoundError(f"Audio file not found: {audio_path}")
+                except Exception as e:
+                    logger.warning(f"[YouTubeRenderer] Failed to load existing audio: {e}, will generate new audio")
+                    scene_audio_url = None  # Fall back to generation
+            
+            # Generate audio if not available and generation is enabled
+            if not audio_base64 and generate_audio_enabled and narration and len(narration.strip()) > 0:
                try:
                    audio_result = generate_audio(
                        text=narration,
@@ -106,7 +141,7 @@ class YouTubeVideoRendererService:
                    audio_bytes = audio_result.audio_bytes if hasattr(audio_result, "audio_bytes") else audio_result
                    # Convert to base64 (just the base64 string, not data URI)
                    audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
-                    logger.info(f"[YouTubeRenderer] Generated audio for scene {scene_number}")
+                    logger.info(f"[YouTubeRenderer] Generated new audio for scene {scene_number}")
                except Exception as e:
                    logger.warning(f"[YouTubeRenderer] Audio generation failed: {e}, continuing without audio")
            
@@ -352,6 +387,7 @@ class YouTubeVideoRendererService:
        self,
        scenes: List[Dict[str, Any]],
        resolution: str = "720p",
+        image_model: str = "ideogram-v3-turbo",
    ) -> Dict[str, Any]:
        """
        Estimate the cost of rendering a video before actually rendering it.
@@ -369,8 +405,16 @@ class YouTubeVideoRendererService:
            "720p": 0.10,
            "1080p": 0.15,
        }
-        
+
        price_per_second = pricing.get(resolution, 0.10)
+
+        # Image generation pricing
+        image_pricing = {
+            "ideogram-v3-turbo": 0.10,
+            "qwen-image": 0.05,
+        }
+
+        image_cost_per_scene = image_pricing.get(image_model, 0.10)
        
        # Filter enabled scenes
        enabled_scenes = [s for s in scenes if s.get("enabled", True)]
@@ -378,7 +422,8 @@ class YouTubeVideoRendererService:
        scene_costs = []
        total_cost = 0.0
        total_duration = 0.0
-        
+        total_image_cost = len(enabled_scenes) * image_cost_per_scene
+
        for scene in enabled_scenes:
            scene_number = scene.get("scene_number", 0)
            duration_estimate = scene.get("duration_estimate", 5)
@@ -396,7 +441,10 @@ class YouTubeVideoRendererService:
            
            total_cost += scene_cost
            total_duration += duration
-        
+
+        # Add image costs to total
+        total_cost += total_image_cost
+
        return {
            "resolution": resolution,
            "price_per_second": price_per_second,
@@ -408,5 +456,8 @@ class YouTubeVideoRendererService:
                "min": round(total_cost * 0.9, 2),  # 10% buffer
                "max": round(total_cost * 1.1, 2),  # 10% buffer
            },
+            "image_model": image_model,
+            "image_cost_per_scene": image_cost_per_scene,
+            "total_image_cost": round(total_image_cost, 2),
        }

--- a/backend/services/youtube/scene_builder.py
+++ b/backend/services/youtube/scene_builder.py
@@ -140,61 +140,87 @@ class YouTubeSceneBuilderService:
        
        scene_duration_range = duration_metadata.get("scene_duration_range", (5, 15))
        
-        scene_generation_prompt = f"""You are an expert video scriptwriter. Create detailed scenes for a YouTube video based on this plan.
+        scene_generation_prompt = f"""You are a top YouTube scriptwriter specializing in engaging, viral content. Create compelling scenes that captivate viewers and maximize watch time.

-**Video Plan:**
- Summary: {video_plan.get('video_summary', '')}
- Goal: {video_plan.get('video_goal', '')}
- Key Message: {video_plan.get('key_message', '')}
- Visual Style: {visual_style}
- Tone: {tone}
+**VIDEO PLAN:**
+📝 Summary: {video_plan.get('video_summary', '')}
+🎯 Goal: {video_plan.get('video_goal', '')}
+💡 Key Message: {video_plan.get('key_message', '')}
+🎨 Visual Style: {visual_style}
+🎭 Tone: {tone}

-**Hook Strategy:**
+**🎣 HOOK STRATEGY:**
 {hook_strategy}

-**Content Outline:**
-{chr(10).join([f"- {section.get('section', '')}: {section.get('description', '')} ({section.get('duration_estimate', 0)}s)" for section in content_outline])}
+**📋 CONTENT STRUCTURE:**
+{chr(10).join([f"• {section.get('section', '')}: {section.get('description', '')} ({section.get('duration_estimate', 0)}s)" for section in content_outline])}

-**Call-to-Action:**
+**🚀 CALL-TO-ACTION:**
 {call_to_action}

-**Duration Constraints:**
- Scene duration: {scene_duration_range[0]}-{scene_duration_range[1]} seconds each
- Total target: {duration_metadata.get('target_seconds', 150)} seconds
+**⏱️ TIMING CONSTRAINTS:**
+• Scene duration: {scene_duration_range[0]}-{scene_duration_range[1]} seconds each
+• Total target: {duration_metadata.get('target_seconds', 150)} seconds

-**Your Task:**
-Create detailed scenes that include:
-1. Scene number and title
-2. Narration text (what will be spoken)
-3. Visual description (what viewers will see)
-4. Duration estimate
-5. Emphasis tags (hook, main_content, transition, cta)
+**🎬 YOUR MISSION - CREATE VIRAL-WORTHY SCENES:**

-**Format as JSON array:**
+Write narration that:
+✨ **HOOKS IMMEDIATELY** - First {duration_metadata.get('hook_seconds', 10)}s must GRAB attention
+🎭 **TELLS A STORY** - Each scene advances the narrative with emotional engagement
+💡 **DELIVERS VALUE** - Provide insights, tips, or "aha!" moments in every scene
+🔥 **BUILDS EXCITEMENT** - Use power words, questions, and cliffhangers
+👥 **CONNECTS PERSONALLY** - Speak directly to the viewer's needs and desires
+⚡ **MAINTAINS PACE** - Vary sentence length for natural rhythm
+🎯 **DRIVES ACTION** - Build toward the CTA with increasing urgency
+
+**REQUIRED SCENE ELEMENTS:**
+1. **scene_number**: Sequential numbering
+2. **title**: Catchy, descriptive title (5-8 words max)
+3. **narration**: ENGAGING spoken script with:
+   - Conversational language ("you know what I mean?")
+   - Rhetorical questions ("Have you ever wondered...?")
+   - Power transitions ("But here's the game-changer...")
+   - Emotional hooks ("Imagine this...")
+   - Action-oriented language ("Let's dive in...")
+4. **visual_description**: Cinematic, professional YouTube visuals
+5. **duration_estimate**: Realistic speaking time
+6. **emphasis**: hook/main_content/transition/cta
+7. **visual_cues**: ["dramatic_zoom", "text_overlay", "fast_cuts"]
+
+**🎯 YOUTUBE OPTIMIZATION RULES:**
+• **Hook Power**: First 3 seconds = make them stay or lose them
+• **Value Density**: Every 10 seconds must deliver new insight
+• **Emotional Arc**: Build curiosity → teach → inspire → convert
+• **Natural Flow**: Scenes must connect seamlessly
+• **CTA Momentum**: Final scene creates irresistible urge to act
+
+**📊 FORMAT AS JSON ARRAY:**
 [
  {{
    "scene_number": 1,
-    "title": "Hook - Attention Grabber",
-    "narration": "The spoken text for this scene...",
-    "visual_description": "Detailed description of what viewers see...",
-    "duration_estimate": 5,
+    "title": "The Shocking Truth They Hide",
+    "narration": "You won't believe what just happened in my latest discovery! I was scrolling through the usual content when BAM - this completely changed everything I thought about [topic]. And get this - it could transform YOUR results too!",
+    "visual_description": "Dynamic opening shot with shocking text overlay, fast cuts of social media feeds, energetic music swell, close-up of surprised reaction",
+    "duration_estimate": 8,
    "emphasis": "hook",
-    "visual_cues": ["close-up", "dynamic", "bright"]
+    "visual_cues": ["shocking_text", "fast_cuts", "music_swell", "reaction_shot"]
  }},
  ...
 ]

-Make sure:
- First scene is a strong hook ({duration_metadata.get('hook_seconds', 10)}s)
- Last scene includes the CTA ({duration_metadata.get('cta_seconds', 10)}s)
- Each scene has clear narration and visual description
- Total duration fits within {duration_metadata.get('target_seconds', 150)} seconds
- Scenes flow naturally from one to the next
-"""
+**🔥 SUCCESS CRITERIA:**
+✅ First scene hooks in 3 seconds
+✅ Each scene delivers 1-2 key insights
+✅ Narration feels like talking to a friend
+✅ Total story arc creates emotional journey
+✅ CTA feels like the natural next step
+✅ Scenes fit duration perfectly"""
        
        system_prompt = (
-            "You are an expert video scriptwriter specializing in YouTube content. "
-            "Your scenes are engaging, well-paced, and optimized for viewer retention."
+            "You are a master YouTube scriptwriter who creates viral, engaging content that "
+            "keeps viewers watching until the end. You understand YouTube algorithm optimization, "
+            "emotional storytelling, and creating irresistible hooks that make viewers hit 'like' and 'subscribe'. "
+            "Your scripts are conversational, valuable, and conversion-focused."
        )
        
        response = llm_text_gen(