AI story writer enhancements, text to video and voice generation, subscription management, and more.

2025-11-19 09:55:32 +05:30
parent bf7493c366
commit e96525347b
64 changed files with 10367 additions and 400 deletions
--- a/backend/api/images.py
+++ b/backend/api/images.py
@@ -134,6 +134,12 @@ def generate(
                            current_video_calls = getattr(summary, "video_calls", 0) or 0
                            video_limit = limits['limits'].get("video_calls", 0) if limits else 0
                            
+                            # Get audio stats for unified log
+                            current_audio_calls = getattr(summary, "audio_calls", 0) or 0
+                            audio_limit = limits['limits'].get("audio_calls", 0) if limits else 0
+                            # Only show ∞ for Enterprise tier when limit is 0 (unlimited)
+                            audio_limit_display = audio_limit if (audio_limit > 0 or tier != 'enterprise') else '∞'
+                            
                            db_track.commit()
                            logger.info(f"[images.generate] ✅ Successfully tracked usage: user {user_id} -> stability -> {new_calls} calls")
                            
@@ -148,6 +154,7 @@ def generate(
 ├─ Calls: {current_calls_before} → {new_calls} / {call_limit if call_limit > 0 else '∞'}
 ├─ Image Editing: {current_image_edit_calls} / {image_edit_limit if image_edit_limit > 0 else '∞'}
 ├─ Videos: {current_video_calls} / {video_limit if video_limit > 0 else '∞'}
+├─ Audio: {current_audio_calls} / {audio_limit_display}
 └─ Status: ✅ Allowed & Tracked
 """)
                        except Exception as track_error:
@@ -437,6 +444,12 @@ def edit(
                    current_video_calls = getattr(summary, "video_calls", 0) or 0
                    video_limit = limits['limits'].get("video_calls", 0) if limits else 0
                    
+                    # Get audio stats for unified log
+                    current_audio_calls = getattr(summary, "audio_calls", 0) or 0
+                    audio_limit = limits['limits'].get("audio_calls", 0) if limits else 0
+                    # Only show ∞ for Enterprise tier when limit is 0 (unlimited)
+                    audio_limit_display = audio_limit if (audio_limit > 0 or tier != 'enterprise') else '∞'
+                    
                    db_track.commit()
                    logger.info(f"[images.edit] ✅ Successfully tracked usage: user {user_id} -> image_edit -> {new_calls} calls")
                    
@@ -451,6 +464,7 @@ def edit(
 ├─ Calls: {current_calls_before} → {new_calls} / {call_limit if call_limit > 0 else '∞'}
 ├─ Images: {current_image_gen_calls} / {image_gen_limit if image_gen_limit > 0 else '∞'}
 ├─ Videos: {current_video_calls} / {video_limit if video_limit > 0 else '∞'}
+├─ Audio: {current_audio_calls} / {audio_limit_display}
 └─ Status: ✅ Allowed & Tracked
 """)
                except Exception as track_error:
--- a/backend/api/story_writer/router.py
+++ b/backend/api/story_writer/router.py
@@ -5,12 +5,19 @@ Main router for story generation operations including premise, outline,
 content generation, and full story creation.
 """

-from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks
-from typing import Any, Dict, Union, List, Optional
+import mimetypes
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+
+from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Request
 from loguru import logger
-from middleware.auth_middleware import get_current_user
+from middleware.auth_middleware import get_current_user, get_current_user_with_query_token

 from models.story_models import (
+    AnimateSceneRequest,
+    AnimateSceneVoiceoverRequest,
+    AnimateSceneResponse,
+    ResumeSceneAnimationRequest,
    StoryGenerationRequest,
    StorySetupGenerationRequest,
    StorySetupGenerationResponse,
@@ -34,24 +41,66 @@ from models.story_models import (
    StoryVideoResult,
    TaskStatus,
 )
+from pydantic import BaseModel, Field
+from services.database import get_db
+from services.llm_providers.main_video_generation import track_video_usage
 from services.story_writer.story_service import StoryWriterService
-from .task_manager import task_manager
-from .cache_manager import cache_manager
+from services.story_writer.video_generation_service import StoryVideoGenerationService
+from services.subscription import PricingService
+from services.subscription.preflight_validator import validate_scene_animation_operation
+from services.wavespeed.kling_animation import animate_scene_image, resume_scene_animation
+from services.wavespeed.infinitetalk import animate_scene_with_voiceover
 from uuid import uuid4
-from pydantic import BaseModel
-from pathlib import Path
+from utils.logger_utils import get_service_logger

+from .cache_manager import cache_manager
+from .routes import cache_routes, media_generation, story_content, story_setup, story_tasks, video_generation
+from .task_manager import task_manager
 from .utils.auth import require_authenticated_user
-from .utils.media_utils import resolve_media_file
-from .utils.hd_video import (
-    generate_hd_video_payload,
-    generate_hd_video_scene_payload,
-)
+from .utils.hd_video import generate_hd_video_payload, generate_hd_video_scene_payload
+from .utils.media_utils import load_story_image_bytes, load_story_audio_bytes, resolve_media_file
+from urllib.parse import quote


 router = APIRouter(prefix="/api/story", tags=["Story Writer"])

+# Include modular routers (order preserved roughly by workflow)
+router.include_router(story_setup.router)
+router.include_router(story_content.router)
+router.include_router(story_tasks.router)
+router.include_router(media_generation.router)
+router.include_router(video_generation.router)
+router.include_router(cache_routes.router)
+
 service = StoryWriterService()
+scene_logger = get_service_logger("api.story_writer.scene_animation")
+AI_VIDEO_SUBDIR = Path("AI_Videos")
+
+
+def _build_authenticated_media_url(request: Request, path: str) -> str:
+    """Append the caller's auth token to a media URL so <video>/<img> tags can access it."""
+    if not path:
+        return path
+
+    token: Optional[str] = None
+    auth_header = request.headers.get("Authorization")
+    if auth_header and auth_header.startswith("Bearer "):
+        token = auth_header.replace("Bearer ", "").strip()
+    elif "token" in request.query_params:
+        token = request.query_params["token"]
+
+    if token:
+        separator = "&" if "?" in path else "?"
+        path = f"{path}{separator}token={quote(token)}"
+
+    return path
+
+
+def _guess_mime_from_url(url: str, fallback: str) -> str:
+    if not url:
+        return fallback
+    mime, _ = mimetypes.guess_type(url)
+    return mime or fallback


@router.get("/health")
@@ -558,6 +607,22 @@ async def get_task_result(
        logger.error(f"[StoryWriter] Failed to get task result: {e}")
        raise HTTPException(status_code=500, detail=str(e))

+class PromptOptimizeRequest(BaseModel):
+    text: str = Field(..., description="The prompt text to optimize")
+    mode: Optional[str] = Field(default="image", pattern="^(image|video)$", description="Optimization mode: 'image' or 'video'")
+    style: Optional[str] = Field(
+        default="default", 
+        pattern="^(default|artistic|photographic|technical|anime|realistic)$",
+        description="Style: 'default', 'artistic', 'photographic', 'technical', 'anime', or 'realistic'"
+    )
+    image: Optional[str] = Field(None, description="Base64-encoded image for context (optional)")
+
+
+class PromptOptimizeResponse(BaseModel):
+    optimized_prompt: str
+    success: bool
+
+
 class HDVideoRequest(BaseModel):
    prompt: str
    provider: str = "huggingface"
@@ -692,6 +757,51 @@ async def generate_scene_images(
        raise HTTPException(status_code=500, detail=str(e))


+@router.post("/optimize-prompt", response_model=PromptOptimizeResponse)
+async def optimize_prompt(
+    request: PromptOptimizeRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user)
+) -> PromptOptimizeResponse:
+    """Optimize an image prompt using WaveSpeed prompt optimizer."""
+    try:
+        if not current_user:
+            raise HTTPException(status_code=401, detail="Authentication required")
+        
+        user_id = str(current_user.get('id', ''))
+        if not user_id:
+            raise HTTPException(status_code=401, detail="Invalid user ID in authentication token")
+        
+        if not request.text or not request.text.strip():
+            raise HTTPException(status_code=400, detail="Prompt text is required")
+        
+        logger.info(f"[StoryWriter] Optimizing prompt for user {user_id} (mode={request.mode}, style={request.style})")
+        
+        from services.wavespeed.client import WaveSpeedClient
+        
+        client = WaveSpeedClient()
+        optimized_prompt = client.optimize_prompt(
+            text=request.text.strip(),
+            mode=request.mode or "image",
+            style=request.style or "default",
+            image=request.image,  # Optional base64 image
+            enable_sync_mode=True,
+            timeout=30
+        )
+        
+        logger.info(f"[StoryWriter] Prompt optimized successfully for user {user_id}")
+        
+        return PromptOptimizeResponse(
+            optimized_prompt=optimized_prompt,
+            success=True
+        )
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"[StoryWriter] Failed to optimize prompt: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
@router.get("/images/{image_filename}")
 async def serve_scene_image(
    image_filename: str,
@@ -793,32 +903,376 @@ async def generate_scene_audio(
        raise HTTPException(status_code=500, detail=str(e))


-@router.get("/audio/{audio_filename}")
-async def serve_scene_audio(
-    audio_filename: str,
-    current_user: Dict[str, Any] = Depends(get_current_user)
-):
-    """Serve a generated story scene audio file."""
+# Audio serving endpoint is handled by routes/media_generation.py
+# No duplicate endpoint needed here
+
+
+# ---------------------------
+# Scene Animation Endpoints
+# ---------------------------
+
+
+@router.post("/animate-scene-preview", response_model=AnimateSceneResponse)
+async def animate_scene_preview(
+    request_obj: Request,
+    request: AnimateSceneRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> AnimateSceneResponse:
+    """
+    Animate a single scene image using WaveSpeed Kling v2.5 Turbo Std.
+    """
+    if not current_user:
+        raise HTTPException(status_code=401, detail="Authentication required")
+
+    user_id = str(current_user.get("id", ""))
+    if not user_id:
+        raise HTTPException(status_code=401, detail="Invalid user ID in authentication token")
+
+    duration = request.duration or 5
+    if duration not in (5, 10):
+        raise HTTPException(status_code=400, detail="Duration must be 5 or 10 seconds.")
+
+    scene_logger.info(
+        "[AnimateScene] User=%s scene=%s duration=%s image_url=%s",
+        user_id,
+        request.scene_number,
+        duration,
+        request.image_url,
+    )
+
+    image_bytes = load_story_image_bytes(request.image_url)
+    if not image_bytes:
+        scene_logger.warning("[AnimateScene] Missing image bytes for user=%s scene=%s", user_id, request.scene_number)
+        raise HTTPException(status_code=404, detail="Scene image not found. Generate images first.")
+
+    db = next(get_db())
    try:
-        require_authenticated_user(current_user)
+        pricing_service = PricingService(db)
+        validate_scene_animation_operation(pricing_service=pricing_service, user_id=user_id)
+    finally:
+        db.close()

-        from services.story_writer.audio_generation_service import StoryAudioGenerationService
-        from fastapi.responses import FileResponse
+    animation_result = animate_scene_image(
+        image_bytes=image_bytes,
+        scene_data=request.scene_data,
+        story_context=request.story_context,
+        user_id=user_id,
+        duration=duration,
+    )

-        audio_service = StoryAudioGenerationService()
-        audio_path = resolve_media_file(audio_service.output_dir, audio_filename)
+    base_dir = Path(__file__).parent.parent.parent
+    ai_video_dir = base_dir / "story_videos" / AI_VIDEO_SUBDIR
+    ai_video_dir.mkdir(parents=True, exist_ok=True)
+    video_service = StoryVideoGenerationService(output_dir=str(ai_video_dir))

-        return FileResponse(
-            path=str(audio_path),
-            media_type="audio/mpeg",
-            filename=audio_filename
+    save_result = video_service.save_scene_video(
+        video_bytes=animation_result["video_bytes"],
+        scene_number=request.scene_number,
+        user_id=user_id,
+    )
+    video_filename = save_result["video_filename"]
+    video_url = _build_authenticated_media_url(
+        request_obj, f"/api/story/videos/ai/{video_filename}"
+    )
+
+    usage_info = track_video_usage(
+        user_id=user_id,
+        provider=animation_result["provider"],
+        model_name=animation_result["model_name"],
+        prompt=animation_result["prompt"],
+        video_bytes=animation_result["video_bytes"],
+        cost_override=animation_result["cost"],
+    )
+    if usage_info:
+        scene_logger.warning(
+            "[AnimateScene] Video usage tracked user=%s: %s → %s / %s (cost +$%.2f, total=$%.2f)",
+            user_id,
+            usage_info.get("previous_calls"),
+            usage_info.get("current_calls"),
+            usage_info.get("video_limit_display"),
+            usage_info.get("cost_per_video", 0.0),
+            usage_info.get("total_video_cost", 0.0),
        )

-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"[StoryWriter] Failed to serve audio: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
+    scene_logger.info(
+        "[AnimateScene] ✅ Completed user=%s scene=%s duration=%s cost=$%.2f video=%s",
+        user_id,
+        request.scene_number,
+        animation_result["duration"],
+        animation_result["cost"],
+        video_url,
+    )
+
+    return AnimateSceneResponse(
+        success=True,
+        scene_number=request.scene_number,
+        video_filename=video_filename,
+        video_url=video_url,
+        duration=animation_result["duration"],
+        cost=animation_result["cost"],
+        prompt_used=animation_result["prompt"],
+        provider=animation_result["provider"],
+        prediction_id=animation_result.get("prediction_id"),
+    )
+
+
+@router.post("/animate-scene-resume", response_model=AnimateSceneResponse)
+async def resume_scene_animation_endpoint(
+    request_obj: Request,
+    request: ResumeSceneAnimationRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> AnimateSceneResponse:
+    """Resume downloading a WaveSpeed animation when the initial call timed out."""
+    if not current_user:
+        raise HTTPException(status_code=401, detail="Authentication required")
+
+    user_id = str(current_user.get("id", ""))
+    if not user_id:
+        raise HTTPException(status_code=401, detail="Invalid user ID in authentication token")
+
+    scene_logger.info(
+        "[AnimateScene] Resume requested user=%s scene=%s prediction=%s",
+        user_id,
+        request.scene_number,
+        request.prediction_id,
+    )
+
+    animation_result = resume_scene_animation(
+        prediction_id=request.prediction_id,
+        duration=request.duration or 5,
+        user_id=user_id,
+    )
+
+    base_dir = Path(__file__).parent.parent.parent
+    ai_video_dir = base_dir / "story_videos" / AI_VIDEO_SUBDIR
+    ai_video_dir.mkdir(parents=True, exist_ok=True)
+    video_service = StoryVideoGenerationService(output_dir=str(ai_video_dir))
+
+    save_result = video_service.save_scene_video(
+        video_bytes=animation_result["video_bytes"],
+        scene_number=request.scene_number,
+        user_id=user_id,
+    )
+    video_filename = save_result["video_filename"]
+    video_url = _build_authenticated_media_url(
+        request_obj, f"/api/story/videos/ai/{video_filename}"
+    )
+
+    usage_info = track_video_usage(
+        user_id=user_id,
+        provider=animation_result["provider"],
+        model_name=animation_result["model_name"],
+        prompt=animation_result["prompt"],
+        video_bytes=animation_result["video_bytes"],
+        cost_override=animation_result["cost"],
+    )
+    if usage_info:
+        scene_logger.warning(
+            "[AnimateScene] (Resume) Video usage tracked user=%s: %s → %s / %s (cost +$%.2f, total=$%.2f)",
+            user_id,
+            usage_info.get("previous_calls"),
+            usage_info.get("current_calls"),
+            usage_info.get("video_limit_display"),
+            usage_info.get("cost_per_video", 0.0),
+            usage_info.get("total_video_cost", 0.0),
+        )
+
+    scene_logger.info(
+        "[AnimateScene] ✅ Resume completed user=%s scene=%s prediction=%s video=%s",
+        user_id,
+        request.scene_number,
+        request.prediction_id,
+        video_url,
+    )
+
+    return AnimateSceneResponse(
+        success=True,
+        scene_number=request.scene_number,
+        video_filename=video_filename,
+        video_url=video_url,
+        duration=animation_result["duration"],
+        cost=animation_result["cost"],
+        prompt_used=animation_result["prompt"],
+        provider=animation_result["provider"],
+        prediction_id=animation_result.get("prediction_id"),
+    )
+
+
+@router.post("/animate-scene-voiceover", response_model=Dict[str, Any])
+async def animate_scene_voiceover_endpoint(
+    request_obj: Request,
+    request: AnimateSceneVoiceoverRequest,
+    background_tasks: BackgroundTasks,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> Dict[str, Any]:
+    """
+    Animate a scene using WaveSpeed InfiniteTalk (image + audio) asynchronously.
+    Returns task_id for polling since InfiniteTalk can take up to 10 minutes.
+    """
+    if not current_user:
+        raise HTTPException(status_code=401, detail="Authentication required")
+
+    user_id = str(current_user.get("id", ""))
+    if not user_id:
+        raise HTTPException(status_code=401, detail="Invalid user ID in authentication token")
+
+    scene_logger.info(
+        "[AnimateSceneVoiceover] User=%s scene=%s resolution=%s (async)",
+        user_id,
+        request.scene_number,
+        request.resolution or "720p",
+    )
+
+    image_bytes = load_story_image_bytes(request.image_url)
+    if not image_bytes:
+        raise HTTPException(status_code=404, detail="Scene image not found. Generate images first.")
+
+    audio_bytes = load_story_audio_bytes(request.audio_url)
+    if not audio_bytes:
+        raise HTTPException(status_code=404, detail="Scene audio not found. Generate audio first.")
+
+    db = next(get_db())
+    try:
+        pricing_service = PricingService(db)
+        validate_scene_animation_operation(pricing_service=pricing_service, user_id=user_id)
+    finally:
+        db.close()
+
+    # Extract token for authenticated URL building (if needed)
+    auth_token = None
+    auth_header = request_obj.headers.get("Authorization")
+    if auth_header and auth_header.startswith("Bearer "):
+        auth_token = auth_header.replace("Bearer ", "").strip()
+
+    # Create async task
+    task_id = task_manager.create_task("scene_voiceover_animation")
+    background_tasks.add_task(
+        _execute_voiceover_animation_task,
+        task_id=task_id,
+        request=request,
+        user_id=user_id,
+        image_bytes=image_bytes,
+        audio_bytes=audio_bytes,
+        auth_token=auth_token,
+    )
+
+    return {
+        "task_id": task_id,
+        "status": "pending",
+        "message": "InfiniteTalk animation started. This may take up to 10 minutes.",
+    }
+
+
+def _execute_voiceover_animation_task(
+    task_id: str,
+    request: AnimateSceneVoiceoverRequest,
+    user_id: str,
+    image_bytes: bytes,
+    audio_bytes: bytes,
+    auth_token: Optional[str] = None,
+):
+    """Background task to generate InfiniteTalk video with progress updates."""
+    try:
+        task_manager.update_task_status(
+            task_id, "processing", progress=5.0, message="Submitting to WaveSpeed InfiniteTalk..."
+        )
+
+        animation_result = animate_scene_with_voiceover(
+            image_bytes=image_bytes,
+            audio_bytes=audio_bytes,
+            scene_data=request.scene_data,
+            story_context=request.story_context,
+            user_id=user_id,
+            resolution=request.resolution or "720p",
+            prompt_override=request.prompt,
+            image_mime=_guess_mime_from_url(request.image_url, "image/png"),
+            audio_mime=_guess_mime_from_url(request.audio_url, "audio/mpeg"),
+        )
+
+        task_manager.update_task_status(
+            task_id, "processing", progress=80.0, message="Saving video file..."
+        )
+
+        base_dir = Path(__file__).parent.parent.parent
+        ai_video_dir = base_dir / "story_videos" / AI_VIDEO_SUBDIR
+        ai_video_dir.mkdir(parents=True, exist_ok=True)
+        video_service = StoryVideoGenerationService(output_dir=str(ai_video_dir))
+
+        save_result = video_service.save_scene_video(
+            video_bytes=animation_result["video_bytes"],
+            scene_number=request.scene_number,
+            user_id=user_id,
+        )
+        video_filename = save_result["video_filename"]
+        # Build authenticated URL if token provided, otherwise return plain URL
+        video_url = f"/api/story/videos/ai/{video_filename}"
+        if auth_token:
+            video_url = f"{video_url}?token={quote(auth_token)}"
+
+        usage_info = track_video_usage(
+            user_id=user_id,
+            provider=animation_result["provider"],
+            model_name=animation_result["model_name"],
+            prompt=animation_result["prompt"],
+            video_bytes=animation_result["video_bytes"],
+            cost_override=animation_result["cost"],
+        )
+        if usage_info:
+            scene_logger.warning(
+                "[AnimateSceneVoiceover] Video usage tracked user=%s: %s → %s / %s (cost +$%.2f, total=$%.2f)",
+                user_id,
+                usage_info.get("previous_calls"),
+                usage_info.get("current_calls"),
+                usage_info.get("video_limit_display"),
+                usage_info.get("cost_per_video", 0.0),
+                usage_info.get("total_video_cost", 0.0),
+            )
+
+        scene_logger.info(
+            "[AnimateSceneVoiceover] ✅ Completed user=%s scene=%s cost=$%.2f video=%s",
+            user_id,
+            request.scene_number,
+            animation_result["cost"],
+            video_url,
+        )
+
+        result = AnimateSceneResponse(
+            success=True,
+            scene_number=request.scene_number,
+            video_filename=video_filename,
+            video_url=video_url,
+            duration=animation_result["duration"],
+            cost=animation_result["cost"],
+            prompt_used=animation_result["prompt"],
+            provider=animation_result["provider"],
+            prediction_id=animation_result.get("prediction_id"),
+        )
+
+        task_manager.update_task_status(
+            task_id,
+            "completed",
+            progress=100.0,
+            message="InfiniteTalk animation complete!",
+            result=result.dict(),
+        )
+    except HTTPException as exc:
+        error_msg = str(exc.detail) if isinstance(exc.detail, str) else exc.detail.get("error", "Animation failed") if isinstance(exc.detail, dict) else "Animation failed"
+        scene_logger.error(f"[AnimateSceneVoiceover] Failed: {error_msg}")
+        task_manager.update_task_status(
+            task_id,
+            "failed",
+            error=error_msg,
+            message=f"InfiniteTalk animation failed: {error_msg}",
+        )
+    except Exception as exc:
+        error_msg = str(exc)
+        scene_logger.error(f"[AnimateSceneVoiceover] Error: {error_msg}", exc_info=True)
+        task_manager.update_task_status(
+            task_id,
+            "failed",
+            error=error_msg,
+            message=f"InfiniteTalk animation error: {error_msg}",
+        )


 # ---------------------------
@@ -1260,19 +1714,25 @@ def execute_complete_video_generation(
        )


-@router.get("/videos/{video_filename}")
-async def serve_story_video(
+# Regular video serving endpoint is handled by routes/video_generation.py
+# Only AI videos need a separate endpoint here
+
+
+@router.get("/videos/ai/{video_filename}")
+async def serve_ai_story_video(
    video_filename: str,
    current_user: Dict[str, Any] = Depends(get_current_user)
 ):
-    """Serve a generated story video file."""
+    """Serve a generated AI scene animation video."""
    try:
        require_authenticated_user(current_user)

        from services.story_writer.video_generation_service import StoryVideoGenerationService
        from fastapi.responses import FileResponse

-        video_service = StoryVideoGenerationService()
+        base_dir = Path(__file__).parent.parent.parent
+        ai_video_dir = (base_dir / "story_videos" / "AI_Videos").resolve()
+        video_service = StoryVideoGenerationService(output_dir=str(ai_video_dir))
        video_path = resolve_media_file(video_service.output_dir, video_filename)

        return FileResponse(
@@ -1284,7 +1744,7 @@ async def serve_story_video(
    except HTTPException:
        raise
    except Exception as e:
-        logger.error(f"[StoryWriter] Failed to serve video: {e}")
+        logger.error(f"[StoryWriter] Failed to serve AI video: {e}")
        raise HTTPException(status_code=500, detail=str(e))


--- a/backend/api/story_writer/routes/init.py
+++ b/backend/api/story_writer/routes/init.py
@@ -0,0 +1,21 @@
+"""
+Collection of modular routers for Story Writer endpoints.
+Each module focuses on a related set of routes to keep the primary
+`router.py` concise and easier to maintain.
+"""
+
+from . import story_setup
+from . import story_content
+from . import story_tasks
+from . import media_generation
+from . import video_generation
+from . import cache_routes
+
+__all__ = [
+    "story_setup",
+    "story_content",
+    "story_tasks",
+    "media_generation",
+    "video_generation",
+    "cache_routes",
+]
--- a/backend/api/story_writer/routes/cache_routes.py
+++ b/backend/api/story_writer/routes/cache_routes.py
@@ -0,0 +1,42 @@
+from typing import Any, Dict
+
+from fastapi import APIRouter, Depends, HTTPException
+from loguru import logger
+
+from middleware.auth_middleware import get_current_user
+
+from ..cache_manager import cache_manager
+from ..utils.auth import require_authenticated_user
+
+
+router = APIRouter()
+
+
+@router.get("/cache/stats")
+async def get_cache_stats(
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> Dict[str, Any]:
+    """Get cache statistics."""
+    try:
+        require_authenticated_user(current_user)
+        stats = cache_manager.get_cache_stats()
+        return {"success": True, "stats": stats}
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Failed to get cache stats: {exc}")
+        raise HTTPException(status_code=500, detail=str(exc))
+
+
+@router.post("/cache/clear")
+async def clear_cache(
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> Dict[str, Any]:
+    """Clear the story generation cache."""
+    try:
+        require_authenticated_user(current_user)
+        result = cache_manager.clear_cache()
+        return {"success": True, **result}
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Failed to clear cache: {exc}")
+        raise HTTPException(status_code=500, detail=str(exc))
+
+
--- a/backend/api/story_writer/routes/media_generation.py
+++ b/backend/api/story_writer/routes/media_generation.py
@@ -0,0 +1,289 @@
+from typing import Any, Dict, List
+
+from fastapi import APIRouter, Depends, HTTPException
+from fastapi.responses import FileResponse
+from loguru import logger
+
+from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
+from models.story_models import (
+    StoryImageGenerationRequest,
+    StoryImageGenerationResponse,
+    StoryImageResult,
+    RegenerateImageRequest,
+    RegenerateImageResponse,
+    StoryAudioGenerationRequest,
+    StoryAudioGenerationResponse,
+    StoryAudioResult,
+    GenerateAIAudioRequest,
+    GenerateAIAudioResponse,
+    StoryScene,
+)
+from services.story_writer.image_generation_service import StoryImageGenerationService
+from services.story_writer.audio_generation_service import StoryAudioGenerationService
+
+from ..utils.auth import require_authenticated_user
+from ..utils.media_utils import resolve_media_file
+
+
+router = APIRouter()
+image_service = StoryImageGenerationService()
+audio_service = StoryAudioGenerationService()
+
+
+@router.post("/generate-images", response_model=StoryImageGenerationResponse)
+async def generate_scene_images(
+    request: StoryImageGenerationRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> StoryImageGenerationResponse:
+    """Generate images for story scenes."""
+    try:
+        user_id = require_authenticated_user(current_user)
+
+        if not request.scenes or len(request.scenes) == 0:
+            raise HTTPException(status_code=400, detail="At least one scene is required")
+
+        logger.info(f"[StoryWriter] Generating images for {len(request.scenes)} scenes for user {user_id}")
+
+        scenes_data = [scene.dict() if isinstance(scene, StoryScene) else scene for scene in request.scenes]
+        image_results = image_service.generate_scene_images(
+            scenes=scenes_data,
+            user_id=user_id,
+            provider=request.provider,
+            width=request.width or 1024,
+            height=request.height or 1024,
+            model=request.model,
+        )
+
+        image_models: List[StoryImageResult] = [
+            StoryImageResult(
+                scene_number=result.get("scene_number", 0),
+                scene_title=result.get("scene_title", "Untitled"),
+                image_filename=result.get("image_filename", ""),
+                image_url=result.get("image_url", ""),
+                width=result.get("width", 1024),
+                height=result.get("height", 1024),
+                provider=result.get("provider", "unknown"),
+                model=result.get("model"),
+                seed=result.get("seed"),
+                error=result.get("error"),
+            )
+            for result in image_results
+        ]
+
+        return StoryImageGenerationResponse(images=image_models, success=True)
+
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Failed to generate images: {exc}")
+        raise HTTPException(status_code=500, detail=str(exc))
+
+
+@router.post("/regenerate-images", response_model=RegenerateImageResponse)
+async def regenerate_scene_image(
+    request: RegenerateImageRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> RegenerateImageResponse:
+    """Regenerate a single scene image using a direct prompt (no AI prompt generation)."""
+    try:
+        user_id = require_authenticated_user(current_user)
+
+        if not request.prompt or not request.prompt.strip():
+            raise HTTPException(status_code=400, detail="Prompt is required")
+
+        logger.info(
+            f"[StoryWriter] Regenerating image for scene {request.scene_number} "
+            f"({request.scene_title}) for user {user_id}"
+        )
+
+        result = image_service.regenerate_scene_image(
+            scene_number=request.scene_number,
+            scene_title=request.scene_title,
+            prompt=request.prompt.strip(),
+            user_id=user_id,
+            provider=request.provider,
+            width=request.width or 1024,
+            height=request.height or 1024,
+            model=request.model,
+        )
+
+        return RegenerateImageResponse(
+            scene_number=result.get("scene_number", request.scene_number),
+            scene_title=result.get("scene_title", request.scene_title),
+            image_filename=result.get("image_filename", ""),
+            image_url=result.get("image_url", ""),
+            width=result.get("width", request.width or 1024),
+            height=result.get("height", request.height or 1024),
+            provider=result.get("provider", "unknown"),
+            model=result.get("model"),
+            seed=result.get("seed"),
+            success=True,
+        )
+
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Failed to regenerate image: {exc}")
+        return RegenerateImageResponse(
+            scene_number=request.scene_number,
+            scene_title=request.scene_title,
+            image_filename="",
+            image_url="",
+            width=request.width or 1024,
+            height=request.height or 1024,
+            provider=request.provider or "unknown",
+            success=False,
+            error=str(exc),
+        )
+
+
+@router.get("/images/{image_filename}")
+async def serve_scene_image(
+    image_filename: str,
+    current_user: Dict[str, Any] = Depends(get_current_user_with_query_token),
+):
+    """Serve a generated story scene image.
+    
+    Supports authentication via Authorization header or token query parameter.
+    Query parameter is useful for HTML elements like <img> that cannot send custom headers.
+    """
+    try:
+        require_authenticated_user(current_user)
+        image_path = resolve_media_file(image_service.output_dir, image_filename)
+        return FileResponse(path=str(image_path), media_type="image/png", filename=image_filename)
+
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Failed to serve image: {exc}")
+        raise HTTPException(status_code=500, detail=str(exc))
+
+
+@router.post("/generate-audio", response_model=StoryAudioGenerationResponse)
+async def generate_scene_audio(
+    request: StoryAudioGenerationRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> StoryAudioGenerationResponse:
+    """Generate audio narration for story scenes."""
+    try:
+        user_id = require_authenticated_user(current_user)
+
+        if not request.scenes or len(request.scenes) == 0:
+            raise HTTPException(status_code=400, detail="At least one scene is required")
+
+        logger.info(f"[StoryWriter] Generating audio for {len(request.scenes)} scenes for user {user_id}")
+
+        scenes_data = [scene.dict() if isinstance(scene, StoryScene) else scene for scene in request.scenes]
+        audio_results = audio_service.generate_scene_audio_list(
+            scenes=scenes_data,
+            user_id=user_id,
+            provider=request.provider or "gtts",
+            lang=request.lang or "en",
+            slow=request.slow or False,
+            rate=request.rate or 150,
+        )
+
+        audio_models: List[StoryAudioResult] = []
+        for result in audio_results:
+            audio_models.append(
+                StoryAudioResult(
+                    scene_number=result.get("scene_number", 0),
+                    scene_title=result.get("scene_title", "Untitled"),
+                    audio_filename=result.get("audio_filename") or "",
+                    audio_url=result.get("audio_url") or "",
+                    provider=result.get("provider", "unknown"),
+                    file_size=result.get("file_size", 0),
+                    error=result.get("error"),
+                )
+            )
+
+        return StoryAudioGenerationResponse(audio_files=audio_models, success=True)
+
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Failed to generate audio: {exc}")
+        raise HTTPException(status_code=500, detail=str(exc))
+
+
+@router.post("/generate-ai-audio", response_model=GenerateAIAudioResponse)
+async def generate_ai_audio(
+    request: GenerateAIAudioRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> GenerateAIAudioResponse:
+    """Generate AI audio for a single scene using WaveSpeed Minimax Speech 02 HD."""
+    try:
+        user_id = require_authenticated_user(current_user)
+
+        if not request.text or not request.text.strip():
+            raise HTTPException(status_code=400, detail="Text is required")
+
+        logger.info(
+            f"[StoryWriter] Generating AI audio for scene {request.scene_number} "
+            f"({request.scene_title}) for user {user_id}"
+        )
+
+        result = audio_service.generate_ai_audio(
+            scene_number=request.scene_number,
+            scene_title=request.scene_title,
+            text=request.text.strip(),
+            user_id=user_id,
+            voice_id=request.voice_id or "Wise_Woman",
+            speed=request.speed or 1.0,
+            volume=request.volume or 1.0,
+            pitch=request.pitch or 0.0,
+            emotion=request.emotion or "happy",
+        )
+
+        return GenerateAIAudioResponse(
+            scene_number=result.get("scene_number", request.scene_number),
+            scene_title=result.get("scene_title", request.scene_title),
+            audio_filename=result.get("audio_filename", ""),
+            audio_url=result.get("audio_url", ""),
+            provider=result.get("provider", "wavespeed"),
+            model=result.get("model", "minimax/speech-02-hd"),
+            voice_id=result.get("voice_id", request.voice_id or "Wise_Woman"),
+            text_length=result.get("text_length", len(request.text)),
+            file_size=result.get("file_size", 0),
+            cost=result.get("cost", 0.0),
+            success=True,
+        )
+
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Failed to generate AI audio: {exc}")
+        return GenerateAIAudioResponse(
+            scene_number=request.scene_number,
+            scene_title=request.scene_title,
+            audio_filename="",
+            audio_url="",
+            provider="wavespeed",
+            model="minimax/speech-02-hd",
+            voice_id=request.voice_id or "Wise_Woman",
+            text_length=len(request.text) if request.text else 0,
+            file_size=0,
+            cost=0.0,
+            success=False,
+            error=str(exc),
+        )
+
+
+@router.get("/audio/{audio_filename}")
+async def serve_scene_audio(
+    audio_filename: str,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+):
+    """Serve a generated story scene audio file."""
+    try:
+        require_authenticated_user(current_user)
+        audio_path = resolve_media_file(audio_service.output_dir, audio_filename)
+        return FileResponse(path=str(audio_path), media_type="audio/mpeg", filename=audio_filename)
+
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Failed to serve audio: {exc}")
+        raise HTTPException(status_code=500, detail=str(exc))
+
+
--- a/backend/api/story_writer/routes/story_content.py
+++ b/backend/api/story_writer/routes/story_content.py
@@ -0,0 +1,195 @@
+from typing import Any, Dict, List
+
+from fastapi import APIRouter, Depends, HTTPException
+from loguru import logger
+
+from middleware.auth_middleware import get_current_user
+from models.story_models import (
+    StoryStartRequest,
+    StoryContentResponse,
+    StoryScene,
+    StoryContinueRequest,
+    StoryContinueResponse,
+)
+from services.story_writer.story_service import StoryWriterService
+
+from ..utils.auth import require_authenticated_user
+
+
+router = APIRouter()
+story_service = StoryWriterService()
+
+
+@router.post("/generate-start", response_model=StoryContentResponse)
+async def generate_story_start(
+    request: StoryStartRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> StoryContentResponse:
+    """Generate the starting section of a story."""
+    try:
+        user_id = require_authenticated_user(current_user)
+
+        if not request.premise or not request.premise.strip():
+            raise HTTPException(status_code=400, detail="Premise is required")
+        if not request.outline or (isinstance(request.outline, str) and not request.outline.strip()):
+            raise HTTPException(status_code=400, detail="Outline is required")
+
+        logger.info(f"[StoryWriter] Generating story start for user {user_id}")
+
+        outline_data: Any = request.outline
+        if isinstance(outline_data, list) and outline_data and isinstance(outline_data[0], StoryScene):
+            outline_data = [scene.dict() for scene in outline_data]
+
+        story_length = getattr(request, "story_length", "Medium")
+        story_start = story_service.generate_story_start(
+            premise=request.premise,
+            outline=outline_data,
+            persona=request.persona,
+            story_setting=request.story_setting,
+            character_input=request.character_input,
+            plot_elements=request.plot_elements,
+            writing_style=request.writing_style,
+            story_tone=request.story_tone,
+            narrative_pov=request.narrative_pov,
+            audience_age_group=request.audience_age_group,
+            content_rating=request.content_rating,
+            ending_preference=request.ending_preference,
+            story_length=story_length,
+            user_id=user_id,
+        )
+
+        story_length_lower = story_length.lower()
+        is_short_story = "short" in story_length_lower or "1000" in story_length_lower
+        is_complete = False
+        if is_short_story:
+            word_count = len(story_start.split()) if story_start else 0
+            if word_count >= 900:
+                is_complete = True
+                logger.info(
+                    f"[StoryWriter] Short story generated with {word_count} words. Marking as complete."
+                )
+            else:
+                logger.warning(
+                    f"[StoryWriter] Short story generated with only {word_count} words. May need continuation."
+                )
+
+        outline_response = outline_data
+        if isinstance(outline_response, list):
+            outline_response = "\n".join(
+                [
+                    f"Scene {scene.get('scene_number', i + 1)}: "
+                    f"{scene.get('title', 'Untitled')}\n  {scene.get('description', '')}"
+                    for i, scene in enumerate(outline_response)
+                ]
+            )
+
+        return StoryContentResponse(
+            story=story_start,
+            premise=request.premise,
+            outline=str(outline_response),
+            is_complete=is_complete,
+            success=True,
+        )
+
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Failed to generate story start: {exc}")
+        raise HTTPException(status_code=500, detail=str(exc))
+
+
+@router.post("/continue", response_model=StoryContinueResponse)
+async def continue_story(
+    request: StoryContinueRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> StoryContinueResponse:
+    """Continue writing a story."""
+    try:
+        user_id = require_authenticated_user(current_user)
+
+        if not request.story_text or not request.story_text.strip():
+            raise HTTPException(status_code=400, detail="Story text is required")
+
+        logger.info(f"[StoryWriter] Continuing story for user {user_id}")
+
+        outline_data: Any = request.outline
+        if isinstance(outline_data, list) and outline_data and isinstance(outline_data[0], StoryScene):
+            outline_data = [scene.dict() for scene in outline_data]
+
+        story_length = getattr(request, "story_length", "Medium")
+        story_length_lower = story_length.lower()
+        is_short_story = "short" in story_length_lower or "1000" in story_length_lower
+        if is_short_story:
+            logger.warning(
+                "[StoryWriter] Attempted to continue a short story. Short stories should be complete in one call."
+            )
+            raise HTTPException(
+                status_code=400,
+                detail="Short stories are generated in a single call and should be complete. "
+                "If the story is incomplete, please regenerate it from the beginning.",
+            )
+
+        current_word_count = len(request.story_text.split()) if request.story_text else 0
+        if "long" in story_length_lower or "10000" in story_length_lower:
+            target_total_words = 10000
+        else:
+            target_total_words = 4500
+        buffer_target = int(target_total_words * 1.05)
+
+        if current_word_count >= buffer_target or (
+            current_word_count >= target_total_words
+            and (current_word_count - target_total_words) < 50
+        ):
+            logger.info(
+                f"[StoryWriter] Word count ({current_word_count}) already at or near target ({target_total_words})."
+            )
+            return StoryContinueResponse(continuation="IAMDONE", is_complete=True, success=True)
+
+        continuation = story_service.continue_story(
+            premise=request.premise,
+            outline=outline_data,
+            story_text=request.story_text,
+            persona=request.persona,
+            story_setting=request.story_setting,
+            character_input=request.character_input,
+            plot_elements=request.plot_elements,
+            writing_style=request.writing_style,
+            story_tone=request.story_tone,
+            narrative_pov=request.narrative_pov,
+            audience_age_group=request.audience_age_group,
+            content_rating=request.content_rating,
+            ending_preference=request.ending_preference,
+            story_length=story_length,
+            user_id=user_id,
+        )
+
+        is_complete = "IAMDONE" in continuation.upper()
+        if not is_complete and continuation:
+            new_story_text = request.story_text + "\n\n" + continuation
+            new_word_count = len(new_story_text.split())
+            if new_word_count >= buffer_target:
+                logger.info(
+                    f"[StoryWriter] Word count ({new_word_count}) now exceeds buffer target ({buffer_target})."
+                )
+                if "IAMDONE" not in continuation.upper():
+                    continuation = continuation.rstrip() + "\n\nIAMDONE"
+                is_complete = True
+            elif new_word_count >= target_total_words and (
+                new_word_count - target_total_words
+            ) < 100:
+                logger.info(
+                    f"[StoryWriter] Word count ({new_word_count}) is at or very close to target ({target_total_words})."
+                )
+                if "IAMDONE" not in continuation.upper():
+                    continuation = continuation.rstrip() + "\n\nIAMDONE"
+                is_complete = True
+
+        return StoryContinueResponse(continuation=continuation, is_complete=is_complete, success=True)
+
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Failed to continue story: {exc}")
+        raise HTTPException(status_code=500, detail=str(exc))
+
+
--- a/backend/api/story_writer/routes/story_setup.py
+++ b/backend/api/story_writer/routes/story_setup.py
@@ -0,0 +1,141 @@
+from typing import Any, Dict, List
+
+from fastapi import APIRouter, Depends, HTTPException
+from loguru import logger
+
+from middleware.auth_middleware import get_current_user
+from models.story_models import (
+    StorySetupGenerationRequest,
+    StorySetupGenerationResponse,
+    StorySetupOption,
+    StoryGenerationRequest,
+    StoryOutlineResponse,
+    StoryScene,
+    StoryStartRequest,
+    StoryPremiseResponse,
+)
+from services.story_writer.story_service import StoryWriterService
+
+from ..utils.auth import require_authenticated_user
+
+
+router = APIRouter()
+story_service = StoryWriterService()
+
+
+@router.post("/generate-setup", response_model=StorySetupGenerationResponse)
+async def generate_story_setup(
+    request: StorySetupGenerationRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> StorySetupGenerationResponse:
+    """Generate 3 story setup options from a user's story idea."""
+    try:
+        user_id = require_authenticated_user(current_user)
+
+        if not request.story_idea or not request.story_idea.strip():
+            raise HTTPException(status_code=400, detail="Story idea is required")
+
+        logger.info(f"[StoryWriter] Generating story setup options for user {user_id}")
+
+        options = story_service.generate_story_setup_options(
+            story_idea=request.story_idea,
+            user_id=user_id,
+        )
+
+        setup_options = [StorySetupOption(**option) for option in options]
+        return StorySetupGenerationResponse(options=setup_options, success=True)
+
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Failed to generate story setup options: {exc}")
+        raise HTTPException(status_code=500, detail=str(exc))
+
+
+@router.post("/generate-premise", response_model=StoryPremiseResponse)
+async def generate_premise(
+    request: StoryGenerationRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> StoryPremiseResponse:
+    """Generate a story premise."""
+    try:
+        user_id = require_authenticated_user(current_user)
+        logger.info(f"[StoryWriter] Generating premise for user {user_id}")
+
+        premise = story_service.generate_premise(
+            persona=request.persona,
+            story_setting=request.story_setting,
+            character_input=request.character_input,
+            plot_elements=request.plot_elements,
+            writing_style=request.writing_style,
+            story_tone=request.story_tone,
+            narrative_pov=request.narrative_pov,
+            audience_age_group=request.audience_age_group,
+            content_rating=request.content_rating,
+            ending_preference=request.ending_preference,
+            user_id=user_id,
+        )
+
+        return StoryPremiseResponse(premise=premise, success=True)
+
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Failed to generate premise: {exc}")
+        raise HTTPException(status_code=500, detail=str(exc))
+
+
+@router.post("/generate-outline", response_model=StoryOutlineResponse)
+async def generate_outline(
+    request: StoryStartRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+    use_structured: bool = True,
+) -> StoryOutlineResponse:
+    """Generate a story outline from a premise."""
+    try:
+        user_id = require_authenticated_user(current_user)
+
+        if not request.premise or not request.premise.strip():
+            raise HTTPException(status_code=400, detail="Premise is required")
+
+        logger.info(
+            f"[StoryWriter] Generating outline for user {user_id} (structured={use_structured})"
+        )
+        logger.info(
+            "[StoryWriter] Outline params: audience_age_group=%s, writing_style=%s, story_tone=%s",
+            request.audience_age_group,
+            request.writing_style,
+            request.story_tone,
+        )
+
+        outline = story_service.generate_outline(
+            premise=request.premise,
+            persona=request.persona,
+            story_setting=request.story_setting,
+            character_input=request.character_input,
+            plot_elements=request.plot_elements,
+            writing_style=request.writing_style,
+            story_tone=request.story_tone,
+            narrative_pov=request.narrative_pov,
+            audience_age_group=request.audience_age_group,
+            content_rating=request.content_rating,
+            ending_preference=request.ending_preference,
+            user_id=user_id,
+            use_structured_output=use_structured,
+        )
+
+        if isinstance(outline, list):
+            scenes: List[StoryScene] = [
+                StoryScene(**scene) if isinstance(scene, dict) else scene for scene in outline
+            ]
+            return StoryOutlineResponse(outline=scenes, success=True, is_structured=True)
+
+        return StoryOutlineResponse(outline=str(outline), success=True, is_structured=False)
+
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Failed to generate outline: {exc}")
+        raise HTTPException(status_code=500, detail=str(exc))
+
+
--- a/backend/api/story_writer/routes/story_tasks.py
+++ b/backend/api/story_writer/routes/story_tasks.py
@@ -0,0 +1,130 @@
+from typing import Any, Dict
+
+from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
+from loguru import logger
+
+from middleware.auth_middleware import get_current_user
+from models.story_models import (
+    StoryGenerationRequest,
+    TaskStatus,
+)
+from services.story_writer.story_service import StoryWriterService
+
+from ..cache_manager import cache_manager
+from ..task_manager import task_manager
+from ..utils.auth import require_authenticated_user
+
+
+router = APIRouter()
+story_service = StoryWriterService()
+
+
+@router.post("/generate-full", response_model=Dict[str, Any])
+async def generate_full_story(
+    request: StoryGenerationRequest,
+    background_tasks: BackgroundTasks,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+    max_iterations: int = 10,
+) -> Dict[str, Any]:
+    """Generate a complete story asynchronously."""
+    try:
+        user_id = require_authenticated_user(current_user)
+
+        cache_key = cache_manager.get_cache_key(request.dict())
+        cached_result = cache_manager.get_cached_result(cache_key)
+        if cached_result:
+            logger.info(f"[StoryWriter] Returning cached result for user {user_id}")
+            task_id = task_manager.create_task("story_generation")
+            task_manager.update_task_status(
+                task_id,
+                "completed",
+                progress=100.0,
+                result=cached_result,
+                message="Returned cached result",
+            )
+            return {"task_id": task_id, "cached": True}
+
+        task_id = task_manager.create_task("story_generation")
+        request_data = request.dict()
+        request_data["max_iterations"] = max_iterations
+
+        background_tasks.add_task(
+            task_manager.execute_story_generation_task,
+            task_id=task_id,
+            request_data=request_data,
+            user_id=user_id,
+        )
+
+        logger.info(f"[StoryWriter] Created task {task_id} for full story generation (user {user_id})")
+        return {
+            "task_id": task_id,
+            "status": "pending",
+            "message": "Story generation started. Use /task/{task_id}/status to check progress.",
+        }
+
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Failed to start story generation: {exc}")
+        raise HTTPException(status_code=500, detail=str(exc))
+
+
+@router.get("/task/{task_id}/status", response_model=TaskStatus)
+async def get_task_status(
+    task_id: str,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> TaskStatus:
+    """Get the status of a story generation task."""
+    try:
+        require_authenticated_user(current_user)
+
+        task_status = task_manager.get_task_status(task_id)
+        if not task_status:
+            raise HTTPException(status_code=404, detail=f"Task {task_id} not found")
+
+        return TaskStatus(**task_status)
+
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Failed to get task status: {exc}")
+        raise HTTPException(status_code=500, detail=str(exc))
+
+
+@router.get("/task/{task_id}/result")
+async def get_task_result(
+    task_id: str,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> Dict[str, Any]:
+    """Get the result of a completed story generation task."""
+    try:
+        require_authenticated_user(current_user)
+
+        task_status = task_manager.get_task_status(task_id)
+        if not task_status:
+            raise HTTPException(status_code=404, detail=f"Task {task_id} not found")
+        if task_status["status"] != "completed":
+            raise HTTPException(
+                status_code=400,
+                detail=f"Task {task_id} is not completed. Status: {task_status['status']}",
+            )
+
+        result = task_status.get("result")
+        if not result:
+            raise HTTPException(status_code=404, detail=f"No result found for task {task_id}")
+
+        if isinstance(result, dict):
+            payload = {**result}
+            payload.setdefault("success", True)
+            payload["task_id"] = task_id
+            return payload
+
+        return {"result": result, "success": True, "task_id": task_id}
+
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Failed to get task result: {exc}")
+        raise HTTPException(status_code=500, detail=str(exc))
+
+
--- a/backend/api/story_writer/routes/video_generation.py
+++ b/backend/api/story_writer/routes/video_generation.py
@@ -0,0 +1,511 @@
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
+from fastapi.responses import FileResponse
+from loguru import logger
+from pydantic import BaseModel
+
+from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
+from models.story_models import (
+    StoryVideoGenerationRequest,
+    StoryVideoGenerationResponse,
+    StoryVideoResult,
+    StoryScene,
+    StoryGenerationRequest,
+)
+from services.story_writer.video_generation_service import StoryVideoGenerationService
+from services.story_writer.image_generation_service import StoryImageGenerationService
+from services.story_writer.audio_generation_service import StoryAudioGenerationService
+from services.story_writer.story_service import StoryWriterService
+
+from ..task_manager import task_manager
+from ..utils.auth import require_authenticated_user
+from ..utils.hd_video import (
+    generate_hd_video_payload,
+    generate_hd_video_scene_payload,
+)
+from ..utils.media_utils import resolve_media_file
+
+
+router = APIRouter()
+video_service = StoryVideoGenerationService()
+image_service = StoryImageGenerationService()
+audio_service = StoryAudioGenerationService()
+story_service = StoryWriterService()
+
+
+class HDVideoRequest(BaseModel):
+    prompt: str
+    provider: str = "huggingface"
+    model: str | None = None
+    num_frames: int | None = None
+    guidance_scale: float | None = None
+    num_inference_steps: int | None = None
+    negative_prompt: str | None = None
+    seed: int | None = None
+
+
+class HDVideoSceneRequest(BaseModel):
+    scene_number: int
+    scene_data: Dict[str, Any]
+    story_context: Dict[str, Any]
+    all_scenes: List[Dict[str, Any]]
+    provider: str = "huggingface"
+    model: str | None = None
+    num_frames: int | None = None
+    guidance_scale: float | None = None
+    num_inference_steps: int | None = None
+    negative_prompt: str | None = None
+    seed: int | None = None
+
+
+@router.post("/generate-video", response_model=StoryVideoGenerationResponse)
+async def generate_story_video(
+    request: StoryVideoGenerationRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> StoryVideoGenerationResponse:
+    """Generate a video from story scenes, images, and audio."""
+    try:
+        user_id = require_authenticated_user(current_user)
+
+        if not request.scenes or len(request.scenes) == 0:
+            raise HTTPException(status_code=400, detail="At least one scene is required")
+
+        if len(request.scenes) != len(request.image_urls) or len(request.scenes) != len(request.audio_urls):
+            raise HTTPException(
+                status_code=400,
+                detail="Number of scenes, image URLs, and audio URLs must match",
+            )
+
+        logger.info(f"[StoryWriter] Generating video for {len(request.scenes)} scenes for user {user_id}")
+
+        scenes_data = [scene.dict() if isinstance(scene, StoryScene) else scene for scene in request.scenes]
+        video_paths: List[Optional[str]] = []  # Animated videos (preferred)
+        image_paths: List[Optional[str]] = []  # Static images (fallback)
+        audio_paths: List[str] = []
+        valid_scenes: List[Dict[str, Any]] = []
+
+        # Resolve video/audio directories
+        base_dir = Path(__file__).parent.parent.parent.parent
+        ai_video_dir = (base_dir / "story_videos" / "AI_Videos").resolve()
+
+        video_urls = request.video_urls or [None] * len(request.scenes)
+        ai_audio_urls = request.ai_audio_urls or [None] * len(request.scenes)
+
+        for idx, (scene, image_url, audio_url) in enumerate(zip(scenes_data, request.image_urls, request.audio_urls)):
+            # Prefer animated video if available
+            video_url = video_urls[idx] if idx < len(video_urls) else None
+            video_path = None
+            image_path = None
+
+            if video_url:
+                # Extract filename from animated video URL (e.g., /api/story/videos/ai/filename.mp4)
+                video_filename = video_url.split("/")[-1].split("?")[0]
+                video_path = ai_video_dir / video_filename
+                if video_path.exists():
+                    logger.info(f"[StoryWriter] Using animated video for scene {scene.get('scene_number', idx+1)}: {video_filename}")
+                    video_paths.append(str(video_path))
+                    image_paths.append(None)
+                else:
+                    logger.warning(f"[StoryWriter] Animated video not found: {video_path}, falling back to image")
+                    video_paths.append(None)
+                    video_path = None
+
+            # Fall back to image if no animated video
+            if not video_path:
+                image_filename = image_url.split("/")[-1].split("?")[0]
+                image_path = image_service.output_dir / image_filename
+                if image_path.exists():
+                    video_paths.append(None)
+                    image_paths.append(str(image_path))
+                else:
+                    logger.warning(f"[StoryWriter] Image not found: {image_path} (from URL: {image_url})")
+                    continue
+
+            # Prefer AI audio if available, otherwise use free audio
+            ai_audio_url = ai_audio_urls[idx] if idx < len(ai_audio_urls) else None
+            audio_filename = None
+            audio_path = None
+
+            if ai_audio_url:
+                audio_filename = ai_audio_url.split("/")[-1].split("?")[0]
+                audio_path = audio_service.output_dir / audio_filename
+                if audio_path.exists():
+                    logger.info(f"[StoryWriter] Using AI audio for scene {scene.get('scene_number', idx+1)}: {audio_filename}")
+                else:
+                    logger.warning(f"[StoryWriter] AI audio not found: {audio_path}, falling back to free audio")
+                    audio_path = None
+
+            # Fall back to free audio if no AI audio
+            if not audio_path:
+                audio_filename = audio_url.split("/")[-1].split("?")[0]
+                audio_path = audio_service.output_dir / audio_filename
+                if not audio_path.exists():
+                    logger.warning(f"[StoryWriter] Audio not found: {audio_path} (from URL: {audio_url})")
+                    continue
+
+            audio_paths.append(str(audio_path))
+            valid_scenes.append(scene)
+
+        if len(valid_scenes) == 0 or len(audio_paths) == 0:
+            raise HTTPException(status_code=400, detail="No valid video/image or audio files were found")
+        if len(valid_scenes) != len(audio_paths):
+            raise HTTPException(
+                status_code=400,
+                detail="Number of valid scenes and audio files must match",
+            )
+
+        video_result = video_service.generate_story_video(
+            scenes=valid_scenes,
+            image_paths=image_paths,  # Can contain None for scenes with animated videos
+            video_paths=video_paths,  # Can contain None for scenes with static images
+            audio_paths=audio_paths,
+            user_id=user_id,
+            story_title=request.story_title or "Story",
+            fps=request.fps or 24,
+            transition_duration=request.transition_duration or 0.5,
+        )
+
+        video_model = StoryVideoResult(
+            video_filename=video_result.get("video_filename", ""),
+            video_url=video_result.get("video_url", ""),
+            duration=video_result.get("duration", 0.0),
+            fps=video_result.get("fps", 24),
+            file_size=video_result.get("file_size", 0),
+            num_scenes=video_result.get("num_scenes", 0),
+            error=video_result.get("error"),
+        )
+
+        return StoryVideoGenerationResponse(video=video_model, success=True)
+
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Failed to generate video: {exc}")
+        raise HTTPException(status_code=500, detail=str(exc))
+
+
+@router.post("/generate-video-async", response_model=Dict[str, Any])
+async def generate_story_video_async(
+    request: StoryVideoGenerationRequest,
+    background_tasks: BackgroundTasks,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> Dict[str, Any]:
+    """
+    Generate a video asynchronously with progress updates via task manager.
+    Frontend can poll /api/story/task/{task_id}/status to show progress messages.
+    """
+    try:
+        user_id = require_authenticated_user(current_user)
+
+        if not request.scenes or len(request.scenes) == 0:
+            raise HTTPException(status_code=400, detail="At least one scene is required")
+        if len(request.scenes) != len(request.image_urls) or len(request.scenes) != len(request.audio_urls):
+            raise HTTPException(
+                status_code=400,
+                detail="Number of scenes, image URLs, and audio URLs must match",
+            )
+
+        task_id = task_manager.create_task("story_video_generation")
+        background_tasks.add_task(
+            _execute_video_generation_task,
+            task_id=task_id,
+            request=request,
+            user_id=user_id,
+        )
+        return {"task_id": task_id, "status": "pending", "message": "Video generation started"}
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Failed to start async video generation: {exc}")
+        raise HTTPException(status_code=500, detail=str(exc))
+
+
+def _execute_video_generation_task(task_id: str, request: StoryVideoGenerationRequest, user_id: str):
+    """Background task to generate story video with progress mapped to task manager."""
+    try:
+        task_manager.update_task_status(task_id, "processing", progress=2.0, message="Initializing video generation...")
+
+        scenes_data = [scene.dict() if isinstance(scene, StoryScene) else scene for scene in request.scenes]
+        image_paths: List[str] = []
+        audio_paths: List[str] = []
+        valid_scenes: List[Dict[str, Any]] = []
+
+        for scene, image_url, audio_url in zip(scenes_data, request.image_urls, request.audio_urls):
+            image_filename = image_url.split("/")[-1].split("?")[0]
+            audio_filename = audio_url.split("/")[-1].split("?")[0]
+            image_path = image_service.output_dir / image_filename
+            audio_path = audio_service.output_dir / audio_filename
+            if not image_path.exists():
+                logger.warning(f"[StoryWriter] Image not found: {image_path} (from URL: {image_url})")
+                continue
+            if not audio_path.exists():
+                logger.warning(f"[StoryWriter] Audio not found: {audio_path} (from URL: {audio_url})")
+                continue
+            image_paths.append(str(image_path))
+            audio_paths.append(str(audio_path))
+            valid_scenes.append(scene)
+
+        if not image_paths or not audio_paths or len(image_paths) != len(audio_paths):
+            raise RuntimeError("No valid or mismatched image/audio assets for video generation.")
+
+        def progress_callback(sub_progress: float, msg: str):
+            overall = 5.0 + max(0.0, min(100.0, sub_progress)) * 0.9
+            task_manager.update_task_status(task_id, "processing", progress=overall, message=msg)
+
+        result = video_service.generate_story_video(
+            scenes=valid_scenes,
+            image_paths=image_paths,
+            audio_paths=audio_paths,
+            user_id=user_id,
+            story_title=request.story_title or "Story",
+            fps=request.fps or 24,
+            transition_duration=request.transition_duration or 0.5,
+            progress_callback=progress_callback,
+        )
+
+        task_manager.update_task_status(
+            task_id,
+            "completed",
+            progress=100.0,
+            message="Video generation complete!",
+            result={"video": result, "success": True},
+        )
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Async video generation failed: {exc}", exc_info=True)
+        task_manager.update_task_status(task_id, "failed", error=str(exc), message=f"Video generation failed: {exc}")
+
+
+@router.post("/hd-video")
+async def generate_hd_video(
+    request: HDVideoRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> Dict[str, Any]:
+    try:
+        user_id = require_authenticated_user(current_user)
+        return generate_hd_video_payload(request, user_id)
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Failed to generate HD video: {exc}", exc_info=True)
+        raise HTTPException(status_code=500, detail=str(exc))
+
+
+@router.post("/hd-video-scene")
+async def generate_hd_video_scene(
+    request: HDVideoSceneRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> Dict[str, Any]:
+    try:
+        user_id = require_authenticated_user(current_user)
+        return generate_hd_video_scene_payload(request, user_id)
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Failed to generate HD video for scene: {exc}", exc_info=True)
+        raise HTTPException(status_code=500, detail=str(exc))
+
+
+@router.post("/generate-complete-video", response_model=Dict[str, Any])
+async def generate_complete_story_video(
+    request: StoryGenerationRequest,
+    background_tasks: BackgroundTasks,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> Dict[str, Any]:
+    """Generate a complete story video workflow asynchronously."""
+    try:
+        user_id = require_authenticated_user(current_user)
+        logger.info(f"[StoryWriter] Starting complete video generation for user {user_id}")
+
+        task_id = task_manager.create_task("complete_video_generation")
+        background_tasks.add_task(
+            execute_complete_video_generation,
+            task_id=task_id,
+            request_data=request.dict(),
+            user_id=user_id,
+        )
+
+        return {
+            "task_id": task_id,
+            "status": "pending",
+            "message": "Complete video generation started",
+        }
+
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Failed to start complete video generation: {exc}")
+        raise HTTPException(status_code=500, detail=str(exc))
+
+
+def execute_complete_video_generation(
+    task_id: str,
+    request_data: Dict[str, Any],
+    user_id: str,
+):
+    """
+    Execute complete video generation workflow synchronously.
+    Runs in a background task and performs blocking operations.
+    """
+    try:
+        task_manager.update_task_status(task_id, "processing", progress=5.0, message="Starting complete video generation...")
+
+        task_manager.update_task_status(task_id, "processing", progress=10.0, message="Generating story premise...")
+        premise = story_service.generate_premise(
+            persona=request_data["persona"],
+            story_setting=request_data["story_setting"],
+            character_input=request_data["character_input"],
+            plot_elements=request_data["plot_elements"],
+            writing_style=request_data["writing_style"],
+            story_tone=request_data["story_tone"],
+            narrative_pov=request_data["narrative_pov"],
+            audience_age_group=request_data["audience_age_group"],
+            content_rating=request_data["content_rating"],
+            ending_preference=request_data["ending_preference"],
+            user_id=user_id,
+        )
+
+        task_manager.update_task_status(task_id, "processing", progress=20.0, message="Generating structured outline with scenes...")
+        outline_scenes = story_service.generate_outline(
+            premise=premise,
+            persona=request_data["persona"],
+            story_setting=request_data["story_setting"],
+            character_input=request_data["character_input"],
+            plot_elements=request_data["plot_elements"],
+            writing_style=request_data["writing_style"],
+            story_tone=request_data["story_tone"],
+            narrative_pov=request_data["narrative_pov"],
+            audience_age_group=request_data["audience_age_group"],
+            content_rating=request_data["content_rating"],
+            ending_preference=request_data["ending_preference"],
+            user_id=user_id,
+            use_structured_output=True,
+        )
+
+        if not isinstance(outline_scenes, list):
+            raise RuntimeError("Failed to generate structured outline")
+
+        task_manager.update_task_status(task_id, "processing", progress=30.0, message="Generating images for scenes...")
+
+        def image_progress_callback(sub_progress: float, message: str):
+            overall_progress = 30.0 + (sub_progress * 0.2)
+            task_manager.update_task_status(task_id, "processing", progress=overall_progress, message=message)
+
+        image_results = image_service.generate_scene_images(
+            scenes=outline_scenes,
+            user_id=user_id,
+            provider=request_data.get("image_provider"),
+            width=request_data.get("image_width", 1024),
+            height=request_data.get("image_height", 1024),
+            model=request_data.get("image_model"),
+            progress_callback=image_progress_callback,
+        )
+
+        task_manager.update_task_status(task_id, "processing", progress=50.0, message="Generating audio narration for scenes...")
+
+        def audio_progress_callback(sub_progress: float, message: str):
+            overall_progress = 50.0 + (sub_progress * 0.2)
+            task_manager.update_task_status(task_id, "processing", progress=overall_progress, message=message)
+
+        audio_results = audio_service.generate_scene_audio_list(
+            scenes=outline_scenes,
+            user_id=user_id,
+            provider=request_data.get("audio_provider", "gtts"),
+            lang=request_data.get("audio_lang", "en"),
+            slow=request_data.get("audio_slow", False),
+            rate=request_data.get("audio_rate", 150),
+            progress_callback=audio_progress_callback,
+        )
+
+        task_manager.update_task_status(task_id, "processing", progress=70.0, message="Preparing video assets...")
+        image_paths: List[str] = []
+        audio_paths: List[str] = []
+        valid_scenes: List[Dict[str, Any]] = []
+
+        for scene in outline_scenes:
+            scene_number = scene.get("scene_number", 0)
+            image_result = next((img for img in image_results if img.get("scene_number") == scene_number), None)
+            audio_result = next((aud for aud in audio_results if aud.get("scene_number") == scene_number), None)
+
+            if image_result and audio_result and not image_result.get("error") and not audio_result.get("error"):
+                image_path = image_result.get("image_path")
+                audio_path = audio_result.get("audio_path")
+                if image_path and audio_path:
+                    image_paths.append(image_path)
+                    audio_paths.append(audio_path)
+                    valid_scenes.append(scene)
+
+        if len(image_paths) == 0 or len(audio_paths) == 0:
+            raise RuntimeError(
+                f"No valid images or audio files were generated. Images: {len(image_paths)}, Audio: {len(audio_paths)}"
+            )
+        if len(image_paths) != len(audio_paths):
+            raise RuntimeError(
+                f"Mismatch between image and audio counts. Images: {len(image_paths)}, Audio: {len(audio_paths)}"
+            )
+
+        task_manager.update_task_status(task_id, "processing", progress=75.0, message="Composing video from scenes...")
+
+        def video_progress_callback(sub_progress: float, message: str):
+            overall_progress = 75.0 + (sub_progress * 0.2)
+            task_manager.update_task_status(task_id, "processing", progress=overall_progress, message=message)
+
+        video_result = video_service.generate_story_video(
+            scenes=valid_scenes,
+            image_paths=image_paths,
+            audio_paths=audio_paths,
+            user_id=user_id,
+            story_title=request_data.get("story_setting", "Story")[:50],
+            fps=request_data.get("video_fps", 24),
+            transition_duration=request_data.get("video_transition_duration", 0.5),
+            progress_callback=video_progress_callback,
+        )
+
+        result = {
+            "premise": premise,
+            "outline_scenes": outline_scenes,
+            "images": image_results,
+            "audio_files": audio_results,
+            "video": video_result,
+            "success": True,
+        }
+
+        task_manager.update_task_status(
+            task_id,
+            "completed",
+            progress=100.0,
+            message="Complete video generation finished!",
+            result=result,
+        )
+
+        logger.info(f"[StoryWriter] Complete video generation task {task_id} completed successfully")
+
+    except Exception as exc:
+        error_msg = str(exc)
+        logger.error(f"[StoryWriter] Complete video generation task {task_id} failed: {error_msg}", exc_info=True)
+        task_manager.update_task_status(
+            task_id,
+            "failed",
+            error=error_msg,
+            message=f"Complete video generation failed: {error_msg}",
+        )
+
+
+@router.get("/videos/{video_filename}")
+async def serve_story_video(
+    video_filename: str,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+):
+    """Serve a generated story video file."""
+    try:
+        require_authenticated_user(current_user)
+        video_path = resolve_media_file(video_service.output_dir, video_filename)
+        return FileResponse(path=str(video_path), media_type="video/mp4", filename=video_filename)
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Failed to serve video: {exc}")
+        raise HTTPException(status_code=500, detail=str(exc))
+
+
--- a/backend/api/story_writer/utils/hd_video.py
+++ b/backend/api/story_writer/utils/hd_video.py
@@ -1,13 +1,11 @@
 from __future__ import annotations

-from typing import Any, Dict, Optional
+from typing import Any, Dict

 from fastapi import HTTPException
 from loguru import logger
 from uuid import uuid4

-from .media_utils import load_story_image_bytes
-

 def generate_hd_video_payload(request: Any, user_id: str) -> Dict[str, Any]:
    """Handles synchronous HD video generation."""
@@ -57,8 +55,8 @@ def generate_hd_video_payload(request: Any, user_id: str) -> Dict[str, Any]:

 def generate_hd_video_scene_payload(request: Any, user_id: str) -> Dict[str, Any]:
    """
-    Handles per-scene HD video generation including prompt enhancement,
-    subscription validation, and optional image conditioning.
+    Handles per-scene HD video generation including prompt enhancement
+    and subscription validation.
    """
    from services.database import get_db as get_db_validation
    from services.onboarding.api_key_manager import APIKeyManager
@@ -71,7 +69,6 @@ def generate_hd_video_scene_payload(request: Any, user_id: str) -> Dict[str, Any
    scene_number = request.scene_number
    logger.info(f"[StoryWriter] Generating HD video for scene {scene_number} for user {user_id}")

-    # Step 1: Validate API key
    hf_token = APIKeyManager().get_api_key("hf_token")
    if not hf_token:
        logger.error("[StoryWriter] Pre-flight: HF token not configured - blocking video generation")
@@ -83,7 +80,6 @@ def generate_hd_video_scene_payload(request: Any, user_id: str) -> Dict[str, Any
            },
        )

-    # Step 2: Subscription limits
    db_validation = next(get_db_validation())
    try:
        pricing_service = PricingService(db_validation)
@@ -93,7 +89,6 @@ def generate_hd_video_scene_payload(request: Any, user_id: str) -> Dict[str, Any
    finally:
        db_validation.close()

-    # Stage 1: Prompt enhancement
    enhanced_prompt = enhance_scene_prompt_for_video(
        current_scene=request.scene_data,
        story_context=request.story_context,
@@ -102,15 +97,6 @@ def generate_hd_video_scene_payload(request: Any, user_id: str) -> Dict[str, Any
    )
    logger.info(f"[StoryWriter] Generated enhanced prompt ({len(enhanced_prompt)} chars) for scene {scene_number}")

-    # Stage 2: Optional image reference
-    scene_image_bytes: Optional[bytes] = None
-    if getattr(request, "scene_image_url", None):
-        scene_image_bytes = load_story_image_bytes(request.scene_image_url)
-        if scene_image_bytes:
-            logger.info(f"[StoryWriter] Using scene image reference for scene {scene_number}")
-        else:
-            logger.warning(f"[StoryWriter] Scene image could not be loaded for scene {scene_number}, falling back to text-only video")
-
    kwargs: Dict[str, Any] = {}
    if getattr(request, "model", None):
        kwargs["model"] = request.model
@@ -129,7 +115,6 @@ def generate_hd_video_scene_payload(request: Any, user_id: str) -> Dict[str, Any
        prompt=enhanced_prompt,
        provider=getattr(request, "provider", None) or "huggingface",
        user_id=user_id,
-        input_image_bytes=scene_image_bytes,
        **kwargs,
    )

@@ -151,4 +136,3 @@ def generate_hd_video_scene_payload(request: Any, user_id: str) -> Dict[str, Any
        "model": getattr(request, "model", None) or "tencent/HunyuanVideo",
    }

-
--- a/backend/api/story_writer/utils/media_utils.py
+++ b/backend/api/story_writer/utils/media_utils.py
@@ -11,6 +11,8 @@ from loguru import logger
 BASE_DIR = Path(__file__).resolve().parents[3]  # backend/
 STORY_IMAGES_DIR = (BASE_DIR / "story_images").resolve()
 STORY_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
+STORY_AUDIO_DIR = (BASE_DIR / "story_audio").resolve()
+STORY_AUDIO_DIR.mkdir(parents=True, exist_ok=True)


 def load_story_image_bytes(image_url: str) -> Optional[bytes]:
@@ -48,6 +50,41 @@ def load_story_image_bytes(image_url: str) -> Optional[bytes]:
        return None


+def load_story_audio_bytes(audio_url: str) -> Optional[bytes]:
+    """
+    Resolve an authenticated story audio URL (e.g., /api/story/audio/<file>) to raw bytes.
+    Returns None if the file cannot be located.
+    """
+    if not audio_url:
+        return None
+
+    try:
+        parsed = urlparse(audio_url)
+        path = parsed.path if parsed.scheme else audio_url
+        prefix = "/api/story/audio/"
+        if prefix not in path:
+            logger.warning(f"[StoryWriter] Unsupported audio URL for video reference: {audio_url}")
+            return None
+
+        filename = path.split(prefix, 1)[1].split("?", 1)[0].strip()
+        if not filename:
+            return None
+
+        file_path = (STORY_AUDIO_DIR / filename).resolve()
+        if not str(file_path).startswith(str(STORY_AUDIO_DIR)):
+            logger.error(f"[StoryWriter] Attempted path traversal when resolving audio: {audio_url}")
+            return None
+
+        if not file_path.exists():
+            logger.warning(f"[StoryWriter] Referenced scene audio not found on disk: {file_path}")
+            return None
+
+        return file_path.read_bytes()
+    except Exception as exc:
+        logger.error(f"[StoryWriter] Failed to load reference audio for video gen: {exc}")
+        return None
+
+
 def resolve_media_file(base_dir: Path, filename: str) -> Path:
    """
    Returns a safe resolved path for a media file stored under base_dir.
@@ -62,8 +99,50 @@ def resolve_media_file(base_dir: Path, filename: str) -> Path:
        raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Access denied")

    if not resolved.exists():
+        alternate = _find_alternate_media_file(base_dir, filename)
+        if alternate:
+            logger.warning(
+                "[StoryWriter] Requested media file '%s' missing; serving closest match '%s'",
+                filename,
+                alternate.name,
+            )
+            return alternate
        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"File not found: {filename}")

    return resolved


+def _find_alternate_media_file(base_dir: Path, filename: str) -> Optional[Path]:
+    """
+    Attempt to find the most recent media file that matches the original name prefix.
+
+    This helps when files are regenerated with new UUID/hash suffixes but the frontend still
+    references an older filename.
+    """
+    try:
+        base_dir = base_dir.resolve()
+    except Exception:
+        return None
+
+    stem = Path(filename).stem
+    suffix = Path(filename).suffix
+
+    if not suffix or "_" not in stem:
+        return None
+
+    prefix = stem.rsplit("_", 1)[0]
+    pattern = f"{prefix}_*{suffix}"
+
+    try:
+        candidates = sorted(
+            (p for p in base_dir.glob(pattern) if p.is_file()),
+            key=lambda p: p.stat().st_mtime,
+            reverse=True,
+        )
+    except Exception as exc:
+        logger.debug(f"[StoryWriter] Failed to search alternate media files for {filename}: {exc}")
+        return None
+
+    return candidates[0] if candidates else None
+
+
--- a/backend/api/subscription_api.py
+++ b/backend/api/subscription_api.py
@@ -4,6 +4,7 @@ Provides endpoints for subscription management and usage monitoring.
 """

 from fastapi import APIRouter, Depends, HTTPException, Query
+from pydantic import BaseModel
 from sqlalchemy.orm import Session
 from sqlalchemy import desc, func
 from typing import Dict, Any, Optional, List
@@ -116,6 +117,7 @@ async def get_subscription_plans(
                    "stability_calls": plan.stability_calls_limit,
                    "video_calls": getattr(plan, 'video_calls_limit', 0),
                    "image_edit_calls": getattr(plan, 'image_edit_calls_limit', 0),
+                    "audio_calls": getattr(plan, 'audio_calls_limit', 0),
                    "gemini_tokens": plan.gemini_tokens_limit,
                    "openai_tokens": plan.openai_tokens_limit,
                    "anthropic_tokens": plan.anthropic_tokens_limit,
@@ -134,7 +136,7 @@ async def get_subscription_plans(
    
    except (sqlite3.OperationalError, Exception) as e:
        error_str = str(e).lower()
-        if 'no such column' in error_str and ('exa_calls_limit' in error_str or 'video_calls_limit' in error_str or 'image_edit_calls_limit' in error_str):
+        if 'no such column' in error_str and ('exa_calls_limit' in error_str or 'video_calls_limit' in error_str or 'image_edit_calls_limit' in error_str or 'audio_calls_limit' in error_str):
            logger.warning("Missing column detected in subscription plans query, attempting schema fix...")
            try:
                import services.subscription.schema_utils as schema_utils
@@ -241,6 +243,7 @@ async def get_user_subscription(
                            "stability_calls": free_plan.stability_calls_limit,
                            "video_calls": getattr(free_plan, 'video_calls_limit', 0),
                            "image_edit_calls": getattr(free_plan, 'image_edit_calls_limit', 0),
+                            "audio_calls": getattr(free_plan, 'audio_calls_limit', 0),
                            "monthly_cost": free_plan.monthly_cost_limit
                        }
                    }
@@ -340,6 +343,7 @@ async def get_subscription_status(
                            "stability_calls": free_plan.stability_calls_limit,
                            "video_calls": getattr(free_plan, 'video_calls_limit', 0),
                            "image_edit_calls": getattr(free_plan, 'image_edit_calls_limit', 0),
+                            "audio_calls": getattr(free_plan, 'audio_calls_limit', 0),
                            "monthly_cost": free_plan.monthly_cost_limit
                        }
                    }
@@ -405,7 +409,7 @@ async def get_subscription_status(

    except (sqlite3.OperationalError, Exception) as e:
        error_str = str(e).lower()
-        if 'no such column' in error_str and ('exa_calls_limit' in error_str or 'video_calls_limit' in error_str or 'image_edit_calls_limit' in error_str):
+        if 'no such column' in error_str and ('exa_calls_limit' in error_str or 'video_calls_limit' in error_str or 'image_edit_calls_limit' in error_str or 'audio_calls_limit' in error_str):
            # Try to fix schema and retry once
            logger.warning("Missing column detected in subscription status query, attempting schema fix...")
            try:
@@ -499,6 +503,7 @@ async def get_subscription_status(
                                "stability_calls": plan.stability_calls_limit,
                                "video_calls": getattr(plan, 'video_calls_limit', 0),
                                "image_edit_calls": getattr(plan, 'image_edit_calls_limit', 0),
+                                "audio_calls": getattr(plan, 'audio_calls_limit', 0),
                                "monthly_cost": plan.monthly_cost_limit
                            }
                        }
@@ -988,7 +993,7 @@ async def get_dashboard_data(
    
    except (sqlite3.OperationalError, Exception) as e:
        error_str = str(e).lower()
-        if 'no such column' in error_str and ('exa_calls' in error_str or 'exa_cost' in error_str or 'video_calls' in error_str or 'video_cost' in error_str or 'image_edit_calls' in error_str or 'image_edit_cost' in error_str):
+        if 'no such column' in error_str and ('exa_calls' in error_str or 'exa_cost' in error_str or 'video_calls' in error_str or 'video_cost' in error_str or 'image_edit_calls' in error_str or 'image_edit_cost' in error_str or 'audio_calls' in error_str or 'audio_cost' in error_str):
            logger.warning("Missing column detected in dashboard query, attempting schema fix...")
            try:
                import services.subscription.schema_utils as schema_utils
@@ -1271,4 +1276,235 @@ async def get_usage_logs(
        raise
    except Exception as e:
        logger.error(f"Error getting usage logs: {e}", exc_info=True)
-        raise HTTPException(status_code=500, detail=f"Failed to get usage logs: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Failed to get usage logs: {str(e)}")
+
+
+class PreflightOperationRequest(BaseModel):
+    """Request model for pre-flight check operation."""
+    provider: str
+    model: Optional[str] = None
+    tokens_requested: Optional[int] = 0
+    operation_type: str
+    actual_provider_name: Optional[str] = None
+
+
+class PreflightCheckRequest(BaseModel):
+    """Request model for pre-flight check."""
+    operations: List[PreflightOperationRequest]
+
+
+@router.post("/preflight-check")
+async def preflight_check(
+    request: PreflightCheckRequest,
+    db: Session = Depends(get_db),
+    current_user: Dict[str, Any] = Depends(get_current_user)
+) -> Dict[str, Any]:
+    """
+    Pre-flight check for operations with cost estimation.
+    
+    Lightweight endpoint that:
+    - Validates if operations are allowed based on subscription limits
+    - Estimates cost for operations
+    - Returns usage information and remaining quota
+    
+    Uses caching to minimize DB load (< 100ms with cache hit).
+    """
+    try:
+        user_id = str(current_user.get('id', ''))
+        if not user_id:
+            raise HTTPException(status_code=401, detail="Invalid user ID in authentication token")
+        
+        # Ensure schema columns exist
+        try:
+            ensure_subscription_plan_columns(db)
+            ensure_usage_summaries_columns(db)
+        except Exception as schema_err:
+            logger.warning(f"Schema check failed: {schema_err}")
+        
+        pricing_service = PricingService(db)
+        
+        # Convert request operations to internal format
+        operations_to_validate = []
+        for op in request.operations:
+            try:
+                # Map provider string to APIProvider enum
+                provider_str = op.provider.lower()
+                if provider_str == "huggingface":
+                    provider_enum = APIProvider.MISTRAL  # Maps to HuggingFace
+                elif provider_str == "video":
+                    provider_enum = APIProvider.VIDEO
+                elif provider_str == "image_edit":
+                    provider_enum = APIProvider.IMAGE_EDIT
+                elif provider_str == "stability":
+                    provider_enum = APIProvider.STABILITY
+                elif provider_str == "audio":
+                    provider_enum = APIProvider.AUDIO
+                else:
+                    try:
+                        provider_enum = APIProvider(provider_str)
+                    except ValueError:
+                        logger.warning(f"Unknown provider: {provider_str}, skipping")
+                        continue
+                
+                operations_to_validate.append({
+                    'provider': provider_enum,
+                    'tokens_requested': op.tokens_requested or 0,
+                    'actual_provider_name': op.actual_provider_name or op.provider,
+                    'operation_type': op.operation_type
+                })
+            except Exception as e:
+                logger.warning(f"Error processing operation {op.operation_type}: {e}")
+                continue
+        
+        if not operations_to_validate:
+            raise HTTPException(status_code=400, detail="No valid operations provided")
+        
+        # Perform pre-flight validation
+        can_proceed, message, error_details = pricing_service.check_comprehensive_limits(
+            user_id=user_id,
+            operations=operations_to_validate
+        )
+        
+        # Get pricing and cost estimation for each operation
+        operation_results = []
+        total_cost = 0.0
+        
+        for i, op in enumerate(operations_to_validate):
+            op_result = {
+                'provider': op['actual_provider_name'],
+                'operation_type': op['operation_type'],
+                'cost': 0.0,
+                'allowed': can_proceed,
+                'limit_info': None,
+                'message': None
+            }
+            
+            # Get pricing for this operation
+            model_name = request.operations[i].model
+            if model_name:
+                pricing_info = pricing_service.get_pricing_for_provider_model(
+                    op['provider'],
+                    model_name
+                )
+                
+                if pricing_info:
+                    # Determine cost based on operation type
+                    if op['provider'] in [APIProvider.VIDEO, APIProvider.IMAGE_EDIT, APIProvider.STABILITY]:
+                        cost = pricing_info.get('cost_per_request', 0.0) or pricing_info.get('cost_per_image', 0.0) or 0.0
+                    elif op['provider'] == APIProvider.AUDIO:
+                        # Audio pricing is per character (every character is 1 token)
+                        cost = (pricing_info.get('cost_per_input_token', 0.0) or 0.0) * (op['tokens_requested'] / 1000.0)
+                    elif op['tokens_requested'] > 0:
+                        # Token-based cost estimation (rough estimate)
+                        cost = (pricing_info.get('cost_per_input_token', 0.0) or 0.0) * (op['tokens_requested'] / 1000)
+                    else:
+                        cost = pricing_info.get('cost_per_request', 0.0) or 0.0
+                    
+                    op_result['cost'] = round(cost, 4)
+                    total_cost += cost
+                else:
+                    # Use default cost if pricing not found
+                    if op['provider'] == APIProvider.VIDEO:
+                        op_result['cost'] = 0.10  # Default video cost
+                        total_cost += 0.10
+                    elif op['provider'] == APIProvider.IMAGE_EDIT:
+                        op_result['cost'] = 0.05  # Default image edit cost
+                        total_cost += 0.05
+                    elif op['provider'] == APIProvider.STABILITY:
+                        op_result['cost'] = 0.04  # Default image generation cost
+                        total_cost += 0.04
+                    elif op['provider'] == APIProvider.AUDIO:
+                        # Default audio cost: $0.05 per 1,000 characters
+                        cost = (op['tokens_requested'] / 1000.0) * 0.05
+                        op_result['cost'] = round(cost, 4)
+                        total_cost += cost
+            
+            # Get limit information
+            limit_info = None
+            if error_details and not can_proceed:
+                usage_info = error_details.get('usage_info', {})
+                if usage_info:
+                    op_result['message'] = message
+                    limit_info = {
+                        'current_usage': usage_info.get('current_usage', 0),
+                        'limit': usage_info.get('limit', 0),
+                        'remaining': max(0, usage_info.get('limit', 0) - usage_info.get('current_usage', 0))
+                    }
+                    op_result['limit_info'] = limit_info
+            else:
+                # Get current usage for this provider
+                limits = pricing_service.get_user_limits(user_id)
+                if limits:
+                    usage_summary = db.query(UsageSummary).filter(
+                        UsageSummary.user_id == user_id,
+                        UsageSummary.billing_period == pricing_service.get_current_billing_period(user_id)
+                    ).first()
+                    
+                    if usage_summary:
+                        if op['provider'] == APIProvider.VIDEO:
+                            current = getattr(usage_summary, 'video_calls', 0) or 0
+                            limit = limits['limits'].get('video_calls', 0)
+                        elif op['provider'] == APIProvider.IMAGE_EDIT:
+                            current = getattr(usage_summary, 'image_edit_calls', 0) or 0
+                            limit = limits['limits'].get('image_edit_calls', 0)
+                        elif op['provider'] == APIProvider.STABILITY:
+                            current = getattr(usage_summary, 'stability_calls', 0) or 0
+                            limit = limits['limits'].get('stability_calls', 0)
+                        elif op['provider'] == APIProvider.AUDIO:
+                            current = getattr(usage_summary, 'audio_calls', 0) or 0
+                            limit = limits['limits'].get('audio_calls', 0)
+                        else:
+                            # For LLM providers, use token limits
+                            provider_key = op['provider'].value
+                            current_tokens = getattr(usage_summary, f"{provider_key}_tokens", 0) or 0
+                            limit = limits['limits'].get(f"{provider_key}_tokens", 0)
+                            current = current_tokens
+                        
+                        limit_info = {
+                            'current_usage': current,
+                            'limit': limit,
+                            'remaining': max(0, limit - current) if limit > 0 else float('inf')
+                        }
+                        op_result['limit_info'] = limit_info
+            
+            operation_results.append(op_result)
+        
+        # Get overall usage summary
+        limits = pricing_service.get_user_limits(user_id)
+        usage_summary = None
+        if limits:
+            usage_summary = db.query(UsageSummary).filter(
+                UsageSummary.user_id == user_id,
+                UsageSummary.billing_period == pricing_service.get_current_billing_period(user_id)
+            ).first()
+        
+        response_data = {
+            'can_proceed': can_proceed,
+            'estimated_cost': round(total_cost, 4),
+            'operations': operation_results,
+            'total_cost': round(total_cost, 4),
+            'usage_summary': None,
+            'cached': False  # TODO: Track if result was cached
+        }
+        
+        if usage_summary and limits:
+            # For video generation, show video limits
+            video_current = getattr(usage_summary, 'video_calls', 0) or 0
+            video_limit = limits['limits'].get('video_calls', 0)
+            
+            response_data['usage_summary'] = {
+                'current_calls': video_current,
+                'limit': video_limit,
+                'remaining': max(0, video_limit - video_current) if video_limit > 0 else float('inf')
+            }
+        
+        return {
+            "success": True,
+            "data": response_data
+        }
+    
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error in pre-flight check: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Pre-flight check failed: {str(e)}")
--- a/backend/logging_config.py
+++ b/backend/logging_config.py
@@ -97,7 +97,14 @@ def setup_clean_logging():
        def video_generation_filter(record):
            msg = record.get("message", "")
            name = record.get("name", "")
-            return "[StoryVideoGeneration]" in msg or "services.story_writer.video_generation_service" in name
+            service = record.get("extra", {}).get("service")
+            return (
+                "[StoryVideoGeneration]" in msg
+                or "services.story_writer.video_generation_service" in name
+                or "[video_gen]" in msg
+                or service == "video_generation_service"
+                or "services.llm_providers.main_video_generation" in name
+            )
        logger.add(
            sys.stdout.write,
            level="INFO",
--- a/backend/middleware/auth_middleware.py
+++ b/backend/middleware/auth_middleware.py
@@ -2,7 +2,7 @@

 import os
 from typing import Optional, Dict, Any
-from fastapi import HTTPException, Depends, status
+from fastapi import HTTPException, Depends, status, Request, Query
 from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 from loguru import logger
 from dotenv import load_dotenv
@@ -259,3 +259,63 @@ async def get_optional_user(
    except Exception as e:
        logger.warning(f"Optional authentication failed: {e}")
        return None
+
+async def get_current_user_with_query_token(
+    request: Request,
+    credentials: Optional[HTTPAuthorizationCredentials] = Depends(security)
+) -> Dict[str, Any]:
+    """Get current authenticated user from either Authorization header or query parameter.
+    
+    This is useful for media endpoints (audio, video, images) that need to be accessed
+    by HTML elements like <audio> or <img> which cannot send custom headers.
+    
+    Args:
+        request: FastAPI request object
+        credentials: HTTP authorization credentials from header
+    
+    Returns:
+        User dictionary with authentication info
+    
+    Raises:
+        HTTPException: If authentication fails
+    """
+    try:
+        # Try to get token from Authorization header first
+        token_to_verify = None
+        if credentials:
+            token_to_verify = credentials.credentials
+        else:
+            # Fall back to query parameter if no header
+            query_token = request.query_params.get("token")
+            if query_token:
+                token_to_verify = query_token
+        
+        if not token_to_verify:
+            logger.warning("No credentials provided (neither header nor query parameter)")
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                detail="Not authenticated",
+                headers={"WWW-Authenticate": "Bearer"},
+            )
+
+        user = await clerk_auth.verify_token(token_to_verify)
+        if not user:
+            # Token verification failed (likely expired) - log at debug level to reduce noise
+            logger.debug("Token verification failed (likely expired token)")
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                detail="Authentication failed",
+                headers={"WWW-Authenticate": "Bearer"},
+            )
+
+        return user
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Authentication error: {e}")
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Authentication failed",
+            headers={"WWW-Authenticate": "Bearer"},
+        )
--- a/backend/models/story_models.py
+++ b/backend/models/story_models.py
@@ -207,6 +207,32 @@ class StoryImageGenerationResponse(BaseModel):
    task_id: Optional[str] = Field(None, description="Task ID for async operations")


+class RegenerateImageRequest(BaseModel):
+    """Request model for regenerating a single scene image with a direct prompt."""
+    scene_number: int = Field(..., description="Scene number to regenerate image for")
+    scene_title: str = Field(..., description="Scene title")
+    prompt: str = Field(..., description="Direct prompt to use for image generation (no AI prompt generation)")
+    provider: Optional[str] = Field(None, description="Image generation provider (gemini, huggingface, stability)")
+    width: Optional[int] = Field(1024, description="Image width")
+    height: Optional[int] = Field(1024, description="Image height")
+    model: Optional[str] = Field(None, description="Model to use for image generation")
+
+
+class RegenerateImageResponse(BaseModel):
+    """Response model for regenerated image."""
+    scene_number: int = Field(..., description="Scene number")
+    scene_title: str = Field(..., description="Scene title")
+    image_filename: str = Field(..., description="Generated image filename")
+    image_url: str = Field(..., description="Image URL")
+    width: int = Field(..., description="Image width")
+    height: int = Field(..., description="Image height")
+    provider: str = Field(..., description="Provider used")
+    model: Optional[str] = Field(None, description="Model used")
+    seed: Optional[int] = Field(None, description="Seed used")
+    success: bool = Field(default=True, description="Whether the generation was successful")
+    error: Optional[str] = Field(None, description="Error message if generation failed")
+
+
 class StoryAudioGenerationRequest(BaseModel):
    """Request model for audio generation."""
    scenes: List[StoryScene] = Field(..., description="List of scenes to generate audio for")
@@ -234,11 +260,41 @@ class StoryAudioGenerationResponse(BaseModel):
    task_id: Optional[str] = Field(None, description="Task ID for async operations")


+class GenerateAIAudioRequest(BaseModel):
+    """Request model for generating AI audio for a single scene."""
+    scene_number: int = Field(..., description="Scene number to generate audio for")
+    scene_title: str = Field(..., description="Scene title")
+    text: str = Field(..., description="Text to convert to speech")
+    voice_id: Optional[str] = Field("Wise_Woman", description="Voice ID for AI audio generation")
+    speed: Optional[float] = Field(1.0, description="Speech speed (0.5-2.0)")
+    volume: Optional[float] = Field(1.0, description="Speech volume (0.1-10.0)")
+    pitch: Optional[float] = Field(0.0, description="Speech pitch (-12 to 12)")
+    emotion: Optional[str] = Field("happy", description="Emotion for speech")
+
+
+class GenerateAIAudioResponse(BaseModel):
+    """Response model for AI audio generation."""
+    scene_number: int = Field(..., description="Scene number")
+    scene_title: str = Field(..., description="Scene title")
+    audio_filename: str = Field(..., description="Generated audio filename")
+    audio_url: str = Field(..., description="Audio URL")
+    provider: str = Field(..., description="Provider used (wavespeed)")
+    model: str = Field(..., description="Model used (minimax/speech-02-hd)")
+    voice_id: str = Field(..., description="Voice ID used")
+    text_length: int = Field(..., description="Number of characters in text")
+    file_size: int = Field(..., description="Audio file size in bytes")
+    cost: float = Field(..., description="Cost of generation")
+    success: bool = Field(default=True, description="Whether the generation was successful")
+    error: Optional[str] = Field(None, description="Error message if generation failed")
+
+
 class StoryVideoGenerationRequest(BaseModel):
    """Request model for video generation."""
    scenes: List[StoryScene] = Field(..., description="List of scenes to generate video for")
    image_urls: List[str] = Field(..., description="List of image URLs for each scene")
    audio_urls: List[str] = Field(..., description="List of audio URLs for each scene")
+    video_urls: Optional[List[Optional[str]]] = Field(None, description="Optional list of animated video URLs (preferred over images)")
+    ai_audio_urls: Optional[List[Optional[str]]] = Field(None, description="Optional list of AI audio URLs (preferred over free audio)")
    story_title: Optional[str] = Field(default="Story", description="Title of the story")
    fps: Optional[int] = Field(default=24, description="Frames per second for video")
    transition_duration: Optional[float] = Field(default=0.5, description="Duration of transitions between scenes")
@@ -260,3 +316,39 @@ class StoryVideoGenerationResponse(BaseModel):
    video: StoryVideoResult = Field(..., description="Generated video")
    success: bool = Field(default=True, description="Whether the generation was successful")
    task_id: Optional[str] = Field(None, description="Task ID for async operations")
+
+
+class AnimateSceneRequest(BaseModel):
+    """Request model for per-scene animation preview."""
+    scene_number: int = Field(..., description="Scene number to animate")
+    scene_data: Dict[str, Any] = Field(..., description="Scene data payload")
+    story_context: Dict[str, Any] = Field(..., description="Story-wide context used for prompts")
+    image_url: str = Field(..., description="Relative URL to the generated scene image")
+    duration: int = Field(default=5, description="Animation duration (5 or 10 seconds)")
+
+
+class AnimateSceneVoiceoverRequest(AnimateSceneRequest):
+    """Request model for WaveSpeed InfiniteTalk animation."""
+    audio_url: str = Field(..., description="Relative URL to the generated scene audio")
+    resolution: Optional[str] = Field("720p", description="Output resolution ('480p' or '720p')")
+    prompt: Optional[str] = Field(None, description="Optional positive prompt override")
+
+
+class AnimateSceneResponse(BaseModel):
+    """Response model for scene animation preview."""
+    success: bool = Field(default=True, description="Whether the animation succeeded")
+    scene_number: int = Field(..., description="Scene number animated")
+    video_filename: str = Field(..., description="Stored video filename")
+    video_url: str = Field(..., description="API URL to access the animated video")
+    duration: int = Field(..., description="Duration of the animation")
+    cost: float = Field(..., description="Cost billed for the animation")
+    prompt_used: str = Field(..., description="Animation prompt passed to the model")
+    provider: str = Field(default="wavespeed", description="Underlying provider used")
+    prediction_id: Optional[str] = Field(None, description="WaveSpeed prediction ID for resume operations")
+
+
+class ResumeSceneAnimationRequest(BaseModel):
+    """Request model to resume scene animation download."""
+    prediction_id: str = Field(..., description="WaveSpeed prediction ID to resume from")
+    scene_number: int = Field(..., description="Scene number being resumed")
+    duration: int = Field(default=5, description="Animation duration (5 or 10 seconds)")
--- a/backend/models/subscription_models.py
+++ b/backend/models/subscription_models.py
@@ -37,6 +37,7 @@ class APIProvider(enum.Enum):
    EXA = "exa"
    VIDEO = "video"
    IMAGE_EDIT = "image_edit"
+    AUDIO = "audio"

 class BillingCycle(enum.Enum):
    MONTHLY = "monthly"
@@ -72,6 +73,7 @@ class SubscriptionPlan(Base):
    exa_calls_limit = Column(Integer, default=0)  # Exa neural search
    video_calls_limit = Column(Integer, default=0)  # AI video generation
    image_edit_calls_limit = Column(Integer, default=0)  # AI image editing
+    audio_calls_limit = Column(Integer, default=0)  # AI audio generation (text-to-speech)
    
    # Token Limits (for LLM providers)
    gemini_tokens_limit = Column(Integer, default=0)
@@ -191,6 +193,7 @@ class UsageSummary(Base):
    exa_calls = Column(Integer, default=0)
    video_calls = Column(Integer, default=0)  # AI video generation
    image_edit_calls = Column(Integer, default=0)  # AI image editing
+    audio_calls = Column(Integer, default=0)  # AI audio generation (text-to-speech)
    
    # Token Usage
    gemini_tokens = Column(Integer, default=0)
@@ -211,6 +214,7 @@ class UsageSummary(Base):
    exa_cost = Column(Float, default=0.0)
    video_cost = Column(Float, default=0.0)  # AI video generation
    image_edit_cost = Column(Float, default=0.0)  # AI image editing
+    audio_cost = Column(Float, default=0.0)  # AI audio generation (text-to-speech)
    
    # Totals
    total_calls = Column(Integer, default=0)
--- a/backend/services/llm_providers/main_audio_generation.py
+++ b/backend/services/llm_providers/main_audio_generation.py
@@ -0,0 +1,301 @@
+"""
+Main Audio Generation Service for ALwrity Backend.
+
+This service provides AI-powered text-to-speech functionality using WaveSpeed Minimax Speech 02 HD.
+"""
+
+from __future__ import annotations
+
+import sys
+from typing import Optional, Dict, Any
+from datetime import datetime
+from loguru import logger
+from fastapi import HTTPException
+
+from services.wavespeed.client import WaveSpeedClient
+from services.onboarding.api_key_manager import APIKeyManager
+from utils.logger_utils import get_service_logger
+
+logger = get_service_logger("audio_generation")
+
+
+class AudioGenerationResult:
+    """Result of audio generation."""
+    
+    def __init__(
+        self,
+        audio_bytes: bytes,
+        provider: str,
+        model: str,
+        voice_id: str,
+        text_length: int,
+        file_size: int,
+    ):
+        self.audio_bytes = audio_bytes
+        self.provider = provider
+        self.model = model
+        self.voice_id = voice_id
+        self.text_length = text_length
+        self.file_size = file_size
+
+
+def generate_audio(
+    text: str,
+    voice_id: str = "Wise_Woman",
+    speed: float = 1.0,
+    volume: float = 1.0,
+    pitch: float = 0.0,
+    emotion: str = "happy",
+    user_id: Optional[str] = None,
+    **kwargs
+) -> AudioGenerationResult:
+    """
+    Generate audio using AI text-to-speech with subscription tracking.
+    
+    Args:
+        text: Text to convert to speech (max 10000 characters)
+        voice_id: Voice ID (default: "Wise_Woman")
+        speed: Speech speed (0.5-2.0, default: 1.0)
+        volume: Speech volume (0.1-10.0, default: 1.0)
+        pitch: Speech pitch (-12 to 12, default: 0.0)
+        emotion: Emotion (default: "happy")
+        user_id: User ID for subscription checking (required)
+        **kwargs: Additional parameters (sample_rate, bitrate, format, etc.)
+        
+    Returns:
+        AudioGenerationResult: Generated audio result
+        
+    Raises:
+        RuntimeError: If subscription limits are exceeded or user_id is missing.
+    """
+    try:
+        logger.info("[audio_gen] Starting audio generation")
+        logger.debug(f"[audio_gen] Text length: {len(text)} characters, voice: {voice_id}")
+        
+        # SUBSCRIPTION CHECK - Required and strict enforcement
+        if not user_id:
+            raise RuntimeError("user_id is required for subscription checking. Please provide Clerk user ID.")
+        
+        # Calculate cost based on character count (every character is 1 token)
+        # Pricing: $0.05 per 1,000 characters
+        character_count = len(text)
+        cost_per_1000_chars = 0.05
+        estimated_cost = (character_count / 1000.0) * cost_per_1000_chars
+        
+        try:
+            from services.database import get_db
+            from services.subscription import PricingService
+            from models.subscription_models import UsageSummary, APIProvider
+            
+            db = next(get_db())
+            try:
+                pricing_service = PricingService(db)
+                
+                # Check limits using sync method from pricing service (strict enforcement)
+                # Use AUDIO provider for audio generation
+                can_proceed, message, usage_info = pricing_service.check_usage_limits(
+                    user_id=user_id,
+                    provider=APIProvider.AUDIO,
+                    tokens_requested=character_count,  # Use character count as "tokens" for audio
+                    actual_provider_name="wavespeed"  # Actual provider is WaveSpeed
+                )
+                
+                if not can_proceed:
+                    logger.warning(f"[audio_gen] Subscription limit exceeded for user {user_id}: {message}")
+                    error_detail = {
+                        'error': message,
+                        'message': message,
+                        'provider': 'wavespeed',
+                        'usage_info': usage_info if usage_info else {}
+                    }
+                    raise HTTPException(status_code=429, detail=error_detail)
+                
+                # Get current usage for limit checking
+                current_period = pricing_service.get_current_billing_period(user_id) or datetime.now().strftime("%Y-%m")
+                usage = db.query(UsageSummary).filter(
+                    UsageSummary.user_id == user_id,
+                    UsageSummary.billing_period == current_period
+                ).first()
+                
+            finally:
+                db.close()
+        except HTTPException:
+            raise
+        except RuntimeError:
+            raise
+        except Exception as sub_error:
+            logger.error(f"[audio_gen] Subscription check failed for user {user_id}: {sub_error}")
+            raise RuntimeError(f"Subscription check failed: {str(sub_error)}")
+        
+        # Generate audio using WaveSpeed
+        try:
+            client = WaveSpeedClient()
+            audio_bytes = client.generate_speech(
+                text=text,
+                voice_id=voice_id,
+                speed=speed,
+                volume=volume,
+                pitch=pitch,
+                emotion=emotion,
+                enable_sync_mode=True,
+                **kwargs
+            )
+            
+            logger.info(f"[audio_gen] ✅ API call successful, generated {len(audio_bytes)} bytes")
+            
+        except HTTPException:
+            raise
+        except Exception as api_error:
+            logger.error(f"[audio_gen] Audio generation API failed: {api_error}")
+            raise HTTPException(
+                status_code=502,
+                detail={
+                    "error": "Audio generation failed",
+                    "message": str(api_error)
+                }
+            )
+        
+        # TRACK USAGE after successful API call
+        if audio_bytes:
+            logger.info(f"[audio_gen] ✅ API call successful, tracking usage for user {user_id}")
+            try:
+                db_track = next(get_db())
+                try:
+                    from models.subscription_models import UsageSummary, APIUsageLog, APIProvider
+                    from services.subscription import PricingService
+                    
+                    pricing = PricingService(db_track)
+                    current_period = pricing.get_current_billing_period(user_id) or datetime.now().strftime("%Y-%m")
+                    
+                    # Get or create usage summary
+                    summary = db_track.query(UsageSummary).filter(
+                        UsageSummary.user_id == user_id,
+                        UsageSummary.billing_period == current_period
+                    ).first()
+                    
+                    if not summary:
+                        summary = UsageSummary(
+                            user_id=user_id,
+                            billing_period=current_period
+                        )
+                        db_track.add(summary)
+                        db_track.flush()
+                    
+                    # Get current values before update
+                    current_calls_before = getattr(summary, "audio_calls", 0) or 0
+                    current_cost_before = getattr(summary, "audio_cost", 0.0) or 0.0
+                    
+                    # Update audio calls and cost
+                    new_calls = current_calls_before + 1
+                    new_cost = current_cost_before + estimated_cost
+                    
+                    # Use direct SQL UPDATE for dynamic attributes
+                    from sqlalchemy import text
+                    update_query = text("""
+                        UPDATE usage_summaries 
+                        SET audio_calls = :new_calls,
+                            audio_cost = :new_cost
+                        WHERE user_id = :user_id AND billing_period = :period
+                    """)
+                    db_track.execute(update_query, {
+                        'new_calls': new_calls,
+                        'new_cost': new_cost,
+                        'user_id': user_id,
+                        'period': current_period
+                    })
+                    
+                    # Update total cost
+                    summary.total_cost = (summary.total_cost or 0.0) + estimated_cost
+                    summary.total_calls = (summary.total_calls or 0) + 1
+                    summary.updated_at = datetime.utcnow()
+                    
+                    # Create usage log
+                    usage_log = APIUsageLog(
+                        user_id=user_id,
+                        provider=APIProvider.AUDIO,
+                        endpoint="/audio-generation/wavespeed",
+                        method="POST",
+                        model_used="minimax/speech-02-hd",
+                        tokens_input=character_count,
+                        tokens_output=0,
+                        tokens_total=character_count,
+                        cost_input=0.0,
+                        cost_output=0.0,
+                        cost_total=estimated_cost,
+                        response_time=0.0,
+                        status_code=200,
+                        request_size=len(text.encode("utf-8")),
+                        response_size=len(audio_bytes),
+                        billing_period=current_period,
+                    )
+                    db_track.add(usage_log)
+                    
+                    # Get plan details for unified log
+                    limits = pricing.get_user_limits(user_id)
+                    plan_name = limits.get('plan_name', 'unknown') if limits else 'unknown'
+                    tier = limits.get('tier', 'unknown') if limits else 'unknown'
+                    audio_limit = limits['limits'].get("audio_calls", 0) if limits else 0
+                    # Only show ∞ for Enterprise tier when limit is 0 (unlimited)
+                    audio_limit_display = audio_limit if (audio_limit > 0 or tier != 'enterprise') else '∞'
+                    
+                    # Get related stats for unified log
+                    current_image_calls = getattr(summary, "stability_calls", 0) or 0
+                    image_limit = limits['limits'].get("stability_calls", 0) if limits else 0
+                    current_image_edit_calls = getattr(summary, "image_edit_calls", 0) or 0
+                    image_edit_limit = limits['limits'].get("image_edit_calls", 0) if limits else 0
+                    current_video_calls = getattr(summary, "video_calls", 0) or 0
+                    video_limit = limits['limits'].get("video_calls", 0) if limits else 0
+                    
+                    db_track.commit()
+                    logger.info(f"[audio_gen] ✅ Successfully tracked usage: user {user_id} -> audio -> {new_calls} calls, ${estimated_cost:.4f}")
+                    
+                    # UNIFIED SUBSCRIPTION LOG - Shows before/after state in one message
+                    print(f"""
+[SUBSCRIPTION] Audio Generation
+├─ User: {user_id}
+├─ Plan: {plan_name} ({tier})
+├─ Provider: wavespeed
+├─ Actual Provider: wavespeed
+├─ Model: minimax/speech-02-hd
+├─ Voice: {voice_id}
+├─ Calls: {current_calls_before} → {new_calls} / {audio_limit_display}
+├─ Cost: ${current_cost_before:.4f} → ${new_cost:.4f}
+├─ Characters: {character_count}
+├─ Images: {current_image_calls} / {image_limit if image_limit > 0 else '∞'}
+├─ Image Editing: {current_image_edit_calls} / {image_edit_limit if image_edit_limit > 0 else '∞'}
+├─ Videos: {current_video_calls} / {video_limit if video_limit > 0 else '∞'}
+└─ Status: ✅ Allowed & Tracked
+""", flush=True)
+                    sys.stdout.flush()
+                    
+                except Exception as track_error:
+                    logger.error(f"[audio_gen] ❌ Error tracking usage (non-blocking): {track_error}", exc_info=True)
+                    db_track.rollback()
+                finally:
+                    db_track.close()
+            except Exception as usage_error:
+                logger.error(f"[audio_gen] ❌ Failed to track usage: {usage_error}", exc_info=True)
+        
+        return AudioGenerationResult(
+            audio_bytes=audio_bytes,
+            provider="wavespeed",
+            model="minimax/speech-02-hd",
+            voice_id=voice_id,
+            text_length=character_count,
+            file_size=len(audio_bytes),
+        )
+        
+    except HTTPException:
+        raise
+    except RuntimeError:
+        raise
+    except Exception as e:
+        logger.error(f"[audio_gen] Error generating audio: {e}")
+        raise HTTPException(
+            status_code=500,
+            detail={
+                "error": "Audio generation failed",
+                "message": str(e)
+            }
+        )
+
--- a/backend/services/llm_providers/main_text_generation.py
+++ b/backend/services/llm_providers/main_text_generation.py
@@ -515,6 +515,12 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
                        current_video_calls = getattr(summary, "video_calls", 0) or 0
                        video_limit = limits['limits'].get("video_calls", 0) if limits else 0
                        
+                        # Get audio stats for unified log
+                        current_audio_calls = getattr(summary, "audio_calls", 0) or 0
+                        audio_limit = limits['limits'].get("audio_calls", 0) if limits else 0
+                        # Only show ∞ for Enterprise tier when limit is 0 (unlimited)
+                        audio_limit_display = audio_limit if (audio_limit > 0 or tier != 'enterprise') else '∞'
+                        
                        # CRITICAL DEBUG: Print diagnostic info BEFORE commit (always visible, flushed immediately)
                        import sys
                        debug_msg = f"[DEBUG] BEFORE COMMIT - Record count: {record_count}, Raw SQL values: calls={current_calls_before}, tokens={current_tokens_before}, Provider: {provider_name}, Period: {current_period}, New calls will be: {new_calls}, New tokens will be: {new_tokens}"
@@ -571,6 +577,8 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
 ├─ Tokens: {current_tokens_before} → {new_tokens} / {token_limit if token_limit > 0 else '∞'}
 ├─ Images: {current_images_before} / {image_limit if image_limit > 0 else '∞'}
 ├─ Image Editing: {current_image_edit_calls} / {image_edit_limit if image_edit_limit > 0 else '∞'}
+├─ Videos: {current_video_calls} / {video_limit if video_limit > 0 else '∞'}
+├─ Audio: {current_audio_calls} / {audio_limit_display}
 └─ Status: ✅ Allowed & Tracked
 """)
                    except Exception as track_error:
@@ -819,6 +827,12 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
                                current_video_calls = getattr(summary, "video_calls", 0) or 0
                                video_limit = limits['limits'].get("video_calls", 0) if limits else 0
                                
+                                # Get audio stats for unified log
+                                current_audio_calls = getattr(summary, "audio_calls", 0) or 0
+                                audio_limit = limits['limits'].get("audio_calls", 0) if limits else 0
+                                # Only show ∞ for Enterprise tier when limit is 0 (unlimited)
+                                audio_limit_display = audio_limit if (audio_limit > 0 or tier != 'enterprise') else '∞'
+                                
                                # CRITICAL: Flush before commit to ensure changes are immediately visible to other sessions
                                db_track.flush()  # Flush to ensure changes are in DB (not just in transaction)
                                db_track.commit()  # Commit transaction to make changes visible to other sessions
@@ -838,6 +852,7 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
 ├─ Images: {current_images_before} / {image_limit if image_limit > 0 else '∞'}
 ├─ Image Editing: {current_image_edit_calls} / {image_edit_limit if image_edit_limit > 0 else '∞'}
 ├─ Videos: {current_video_calls} / {video_limit if video_limit > 0 else '∞'}
+├─ Audio: {current_audio_calls} / {audio_limit_display}
 └─ Status: ✅ Allowed & Tracked
 """)
                            except Exception as track_error:
--- a/backend/services/llm_providers/main_video_generation.py
+++ b/backend/services/llm_providers/main_video_generation.py
@@ -10,6 +10,7 @@ from __future__ import annotations
 import os
 import base64
 import io
+import sys
 from typing import Any, Dict, Optional, Union

 from fastapi import HTTPException
@@ -22,11 +23,11 @@ except ImportError:
    InferenceClient = None

 from ..onboarding.api_key_manager import APIKeyManager
+from services.subscription import PricingService
 from utils.logger_utils import get_service_logger

 logger = get_service_logger("video_generation_service")

-
 class VideoProviderNotImplemented(Exception):
    pass

@@ -48,44 +49,80 @@ def _get_api_key(provider: str) -> Optional[str]:
 def _coerce_video_bytes(output: Any) -> bytes:
    """
    Normalizes the different return shapes that huggingface_hub may emit for video tasks.
-    Depending on the provider/library version we may get:
-      - raw bytes
-      - an object with `.video` or `.bytes` attributes (plus optional `.save`)
-      - a dict containing a `video` key with bytes/base64 data
+    According to HF docs, text_to_video() should return bytes directly.
    """
-    data: Union[bytes, bytearray, memoryview, io.BufferedIOBase, None] = None
-
+    logger.debug(f"[video_gen] _coerce_video_bytes received type: {type(output)}")
+    
+    # Most common case: bytes directly
    if isinstance(output, (bytes, bytearray, memoryview)):
+        logger.debug(f"[video_gen] Output is bytes: {len(output)} bytes")
        return bytes(output)

+    # Handle file-like objects
+    if hasattr(output, "read"):
+        logger.debug("[video_gen] Output has read() method, reading...")
+        data = output.read()
+        if isinstance(data, (bytes, bytearray, memoryview)):
+            return bytes(data)
+        raise TypeError(f"File-like object returned non-bytes: {type(data)}")
+
    # Objects with direct attribute access
    if hasattr(output, "video"):
+        logger.debug("[video_gen] Output has 'video' attribute")
        data = getattr(output, "video")
-    elif hasattr(output, "bytes"):
+        if isinstance(data, (bytes, bytearray, memoryview)):
+            return bytes(data)
+        if hasattr(data, "read"):
+            return bytes(data.read())
+    
+    if hasattr(output, "bytes"):
+        logger.debug("[video_gen] Output has 'bytes' attribute")
        data = getattr(output, "bytes")
-    elif isinstance(output, dict) and "video" in output:
-        data = output["video"]
-    else:
-        data = output
+        if isinstance(data, (bytes, bytearray, memoryview)):
+            return bytes(data)
+        if hasattr(data, "read"):
+            return bytes(data.read())

-    # Handle file-like responses
-    if hasattr(data, "read"):
-        data = data.read()
+    # Dict handling - but this shouldn't happen with text_to_video()
+    if isinstance(output, dict):
+        logger.warning(f"[video_gen] Received dict output (unexpected): keys={list(output.keys())}")
+        # Try to get video key safely - use .get() to avoid KeyError
+        data = output.get("video")
+        if data is not None:
+            if isinstance(data, (bytes, bytearray, memoryview)):
+                return bytes(data)
+            if hasattr(data, "read"):
+                return bytes(data.read())
+        # Try other common keys
+        for key in ["data", "content", "file", "result", "output"]:
+            data = output.get(key)
+            if data is not None:
+                if isinstance(data, (bytes, bytearray, memoryview)):
+                    return bytes(data)
+                if hasattr(data, "read"):
+                    return bytes(data.read())
+        raise TypeError(f"Dict output has no recognized video key. Keys: {list(output.keys())}")

-    if isinstance(data, (bytes, bytearray, memoryview)):
-        return bytes(data)
-
-    if isinstance(data, str):
-        # Expecting data URI or raw base64 string
-        if data.startswith("data:"):
-            _, encoded = data.split(",", 1)
+    # String handling (base64)
+    if isinstance(output, str):
+        logger.debug("[video_gen] Output is string, attempting base64 decode")
+        if output.startswith("data:"):
+            _, encoded = output.split(",", 1)
            return base64.b64decode(encoded)
        try:
-            return base64.b64decode(data)
+            return base64.b64decode(output)
        except Exception as exc:
            raise TypeError(f"Unable to decode string video payload: {exc}") from exc

-    raise TypeError(f"Unsupported video payload type: {type(data)}")
+    # Fallback: try to use output directly
+    logger.warning(f"[video_gen] Unexpected output type: {type(output)}, attempting direct conversion")
+    try:
+        if hasattr(output, "__bytes__"):
+            return bytes(output)
+    except Exception:
+        pass
+
+    raise TypeError(f"Unsupported video payload type: {type(output)}. Output: {str(output)[:200]}")


 def _generate_with_huggingface(
@@ -96,7 +133,6 @@ def _generate_with_huggingface(
    negative_prompt: Optional[str] = None,
    seed: Optional[int] = None,
    model: str = "tencent/HunyuanVideo",
-    input_image_bytes: Optional[bytes] = None,
 ) -> bytes:
    """
    Generates video bytes using Hugging Face's InferenceClient.
@@ -109,7 +145,6 @@ def _generate_with_huggingface(
        raise RuntimeError("HF token not configured. Set an hf_token in APIKeyManager.")

    client = InferenceClient(
-        model=model,
        provider="fal-ai",
        token=token,
    )
@@ -126,26 +161,25 @@ def _generate_with_huggingface(
        params["seed"] = seed

    logger.info(
-        "[video_gen] HuggingFace request model=%s frames=%s steps=%s mode=%s",
+        "[video_gen] HuggingFace request model=%s frames=%s steps=%s mode=text-to-video",
        model,
        num_frames,
        num_inference_steps,
-        "image-to-video" if input_image_bytes else "text-to-video",
    )

    try:
-        call_kwargs = {**params, "model": model}
-        if input_image_bytes:
-            video_output = client.image_to_video(
-                image=input_image_bytes,
-                prompt=prompt,
-                **call_kwargs,
-            )
-        else:
-            video_output = client.text_to_video(
-                prompt,
-                **call_kwargs,
-            )
+        logger.info("[video_gen] Calling client.text_to_video()...")
+        video_output = client.text_to_video(
+            prompt=prompt,
+            model=model,
+            **params,
+        )
+        
+        logger.info(f"[video_gen] text_to_video() returned type: {type(video_output)}")
+        if isinstance(video_output, dict):
+            logger.info(f"[video_gen] Dict keys: {list(video_output.keys())}")
+        elif hasattr(video_output, "__dict__"):
+            logger.info(f"[video_gen] Object attributes: {dir(video_output)}")

        video_bytes = _coerce_video_bytes(video_output)

@@ -158,6 +192,15 @@ def _generate_with_huggingface(
        logger.info(f"[video_gen] Successfully generated video: {len(video_bytes)} bytes")
        return video_bytes

+    except KeyError as e:
+        error_msg = str(e)
+        logger.error(f"[video_gen] HF KeyError: {error_msg}", exc_info=True)
+        logger.error(f"[video_gen] This suggests the API response format is unexpected. Check logs above for response type.")
+        raise HTTPException(status_code=502, detail={
+            "error": f"Hugging Face API returned unexpected response format: {error_msg}",
+            "error_type": "KeyError",
+            "hint": "The API response may have changed. Check server logs for details."
+        })
    except Exception as e:
        error_msg = str(e)
        error_type = type(e).__name__
@@ -179,7 +222,6 @@ def ai_video_generate(
    prompt: str,
    provider: str = "huggingface",
    user_id: Optional[str] = None,
-    input_image_bytes: Optional[bytes] = None,
    **kwargs,
 ) -> bytes:
    """
@@ -187,7 +229,6 @@ def ai_video_generate(

    - provider: 'huggingface' (default), 'gemini' (veo3 stub), 'openai' (sora stub)
    - kwargs: num_frames, guidance_scale, num_inference_steps, negative_prompt, seed, model
-    - input_image_bytes: optional bytes for image-to-video flows (uses image as motion anchor)

    Returns raw video bytes (mp4/webm depending on provider).
    """
@@ -200,7 +241,6 @@ def ai_video_generate(
    # PRE-FLIGHT VALIDATION: Validate video generation before API call
    # MUST happen BEFORE any API calls - return immediately if validation fails
    from services.database import get_db
-    from services.subscription import PricingService
    from services.subscription.preflight_validator import validate_video_generation_operations
    from fastapi import HTTPException
    
@@ -227,7 +267,6 @@ def ai_video_generate(
        if provider == "huggingface":
            video_bytes = _generate_with_huggingface(
                prompt=prompt,
-                input_image_bytes=input_image_bytes,
                **kwargs,
            )
        elif provider == "gemini":
@@ -237,112 +276,14 @@ def ai_video_generate(
        else:
            raise RuntimeError(f"Unknown video provider: {provider}")
        
-        # Track usage AFTER successful generation
-        db_track = next(get_db())
-        try:
-            from models.subscription_models import APIProvider, UsageSummary, APIUsageLog
-            from datetime import datetime
-            from services.subscription import PricingService
-            
-            # Create pricing service for tracking (uses same DB session)
-            pricing_service_track = PricingService(db_track)
-            
-            # Get current billing period
-            current_period = pricing_service_track.get_current_billing_period(user_id) or datetime.now().strftime("%Y-%m")
-            
-            # Get or create usage summary
-            usage_summary = db_track.query(UsageSummary).filter(
-                UsageSummary.user_id == user_id,
-                UsageSummary.billing_period == current_period
-            ).first()
-            
-            if not usage_summary:
-                usage_summary = UsageSummary(
-                    user_id=user_id,
-                    billing_period=current_period
-                )
-                db_track.add(usage_summary)
-                db_track.commit()
-            
-            # Calculate cost using pricing service
-            cost_info = pricing_service_track.get_pricing_for_provider_model(
-                APIProvider.VIDEO,
-                model_name
-            )
-            cost_per_video = cost_info.get('cost_per_request', 0.10) if cost_info else 0.10
-            
-            # Get "before" state for unified log
-            current_video_calls_before = getattr(usage_summary, 'video_calls', 0) or 0
-            current_video_cost = getattr(usage_summary, 'video_cost', 0.0) or 0.0
-            
-            # Increment video_calls and track cost
-            new_video_calls = current_video_calls_before + 1
-            usage_summary.video_calls = new_video_calls
-            usage_summary.video_cost = current_video_cost + cost_per_video
-            usage_summary.total_calls = (usage_summary.total_calls or 0) + 1
-            usage_summary.total_cost = (usage_summary.total_cost or 0.0) + cost_per_video
-            
-            # Get plan details for unified log (before commit, in case commit fails)
-            limits = pricing_service_track.get_user_limits(user_id)
-            plan_name = limits.get('plan_name', 'unknown') if limits else 'unknown'
-            tier = limits.get('tier', 'unknown') if limits else 'unknown'
-            video_limit = limits['limits'].get("video_calls", 0) if limits else 0
-            
-            # Get image and image editing stats for unified log
-            current_image_calls = getattr(usage_summary, "stability_calls", 0) or 0
-            image_limit = limits['limits'].get("stability_calls", 0) if limits else 0
-            current_image_edit_calls = getattr(usage_summary, "image_edit_calls", 0) or 0
-            image_edit_limit = limits['limits'].get("image_edit_calls", 0) if limits else 0
-            
-            # Create usage log entry for audit trail
-            usage_log = APIUsageLog(
-                user_id=user_id,
-                provider=APIProvider.VIDEO,
-                endpoint=f"/video-generation/{provider}",
-                method="POST",
-                model_used=model_name,
-                tokens_input=0,
-                tokens_output=0,
-                tokens_total=0,
-                cost_input=0.0,
-                cost_output=0.0,
-                cost_total=cost_per_video,
-                response_time=0.0,  # Could track actual time if needed
-                status_code=200,
-                request_size=len(prompt.encode('utf-8')),
-                response_size=len(video_bytes),
-                billing_period=current_period
-            )
-            db_track.add(usage_log)
-            
-            db_track.commit()
-            logger.info(f"[video_gen] ✅ Successfully tracked usage: user {user_id} -> 1 video call, ${cost_per_video:.4f} cost")
-            
-            # UNIFIED SUBSCRIPTION LOG - Shows before/after state in one message
-            # Flush immediately to ensure it's visible in console/logs
-            import sys
-            log_message = f"""
-[SUBSCRIPTION] Video Generation
-├─ User: {user_id}
-├─ Plan: {plan_name} ({tier})
-├─ Provider: video
-├─ Actual Provider: {provider}
-├─ Model: {model_name or 'default'}
-├─ Calls: {current_video_calls_before} → {new_video_calls} / {video_limit if video_limit > 0 else '∞'}
-├─ Images: {current_image_calls} / {image_limit if image_limit > 0 else '∞'}
-├─ Image Editing: {current_image_edit_calls} / {image_edit_limit if image_edit_limit > 0 else '∞'}
-└─ Status: ✅ Allowed & Tracked
-"""
-            print(log_message, flush=True)
-            sys.stdout.flush()
-            
-        except Exception as track_error:
-            logger.error(f"[video_gen] Error tracking usage: {track_error}", exc_info=True)
-            db_track.rollback()
-            # Don't fail video generation if tracking fails - video is already generated
-        finally:
-            db_track.close()
-        
+        track_video_usage(
+            user_id=user_id,
+            provider=provider,
+            model_name=model_name,
+            prompt=prompt,
+            video_bytes=video_bytes,
+        )
+
        return video_bytes
        
    except HTTPException:
@@ -353,3 +294,139 @@ def ai_video_generate(
        raise HTTPException(status_code=500, detail={"error": str(e)})


+def track_video_usage(
+    *,
+    user_id: str,
+    provider: str,
+    model_name: str,
+    prompt: str,
+    video_bytes: bytes,
+    cost_override: Optional[float] = None,
+) -> Dict[str, Any]:
+    """
+    Track subscription usage for any video generation (text-to-video or image-to-video).
+    """
+    from datetime import datetime
+
+    from models.subscription_models import APIProvider, APIUsageLog, UsageSummary
+    from services.database import get_db
+
+    db_track = next(get_db())
+    try:
+        logger.info(f"[video_gen] Starting usage tracking for user={user_id}, provider={provider}, model={model_name}")
+        pricing_service_track = PricingService(db_track)
+        current_period = pricing_service_track.get_current_billing_period(user_id) or datetime.now().strftime("%Y-%m")
+        logger.debug(f"[video_gen] Billing period: {current_period}")
+
+        usage_summary = (
+            db_track.query(UsageSummary)
+            .filter(
+                UsageSummary.user_id == user_id,
+                UsageSummary.billing_period == current_period,
+            )
+            .first()
+        )
+
+        if not usage_summary:
+            logger.debug(f"[video_gen] Creating new UsageSummary for user={user_id}, period={current_period}")
+            usage_summary = UsageSummary(
+                user_id=user_id,
+                billing_period=current_period,
+            )
+            db_track.add(usage_summary)
+            db_track.commit()
+            db_track.refresh(usage_summary)
+        else:
+            logger.debug(f"[video_gen] Found existing UsageSummary: video_calls={getattr(usage_summary, 'video_calls', 0)}")
+
+        cost_info = pricing_service_track.get_pricing_for_provider_model(
+            APIProvider.VIDEO,
+            model_name,
+        )
+        default_cost = 0.10
+        if cost_info and cost_info.get("cost_per_request") is not None:
+            default_cost = cost_info["cost_per_request"]
+        cost_per_video = cost_override if cost_override is not None else default_cost
+        logger.debug(f"[video_gen] Cost per video: ${cost_per_video} (override={cost_override}, default={default_cost})")
+
+        current_video_calls_before = getattr(usage_summary, "video_calls", 0) or 0
+        current_video_cost = getattr(usage_summary, "video_cost", 0.0) or 0.0
+        usage_summary.video_calls = current_video_calls_before + 1
+        usage_summary.video_cost = current_video_cost + cost_per_video
+        usage_summary.total_calls = (usage_summary.total_calls or 0) + 1
+        usage_summary.total_cost = (usage_summary.total_cost or 0.0) + cost_per_video
+        # Ensure the object is in the session
+        db_track.add(usage_summary)
+        logger.debug(f"[video_gen] Updated usage_summary: video_calls={current_video_calls_before} → {usage_summary.video_calls}")
+
+        limits = pricing_service_track.get_user_limits(user_id)
+        plan_name = limits.get("plan_name", "unknown") if limits else "unknown"
+        tier = limits.get("tier", "unknown") if limits else "unknown"
+        video_limit = limits["limits"].get("video_calls", 0) if limits else 0
+        current_image_calls = getattr(usage_summary, "stability_calls", 0) or 0
+        image_limit = limits["limits"].get("stability_calls", 0) if limits else 0
+        current_image_edit_calls = getattr(usage_summary, "image_edit_calls", 0) or 0
+        image_edit_limit = limits["limits"].get("image_edit_calls", 0) if limits else 0
+        current_audio_calls = getattr(usage_summary, "audio_calls", 0) or 0
+        audio_limit = limits["limits"].get("audio_calls", 0) if limits else 0
+        # Only show ∞ for Enterprise tier when limit is 0 (unlimited)
+        audio_limit_display = audio_limit if (audio_limit > 0 or tier != 'enterprise') else '∞'
+
+        usage_log = APIUsageLog(
+            user_id=user_id,
+            provider=APIProvider.VIDEO,
+            endpoint=f"/video-generation/{provider}",
+            method="POST",
+            model_used=model_name,
+            tokens_input=0,
+            tokens_output=0,
+            tokens_total=0,
+            cost_input=0.0,
+            cost_output=0.0,
+            cost_total=cost_per_video,
+            response_time=0.0,
+            status_code=200,
+            request_size=len(prompt.encode("utf-8")),
+            response_size=len(video_bytes),
+            billing_period=current_period,
+        )
+        db_track.add(usage_log)
+        logger.debug(f"[video_gen] Flushing changes before commit...")
+        db_track.flush()
+        logger.debug(f"[video_gen] Committing usage tracking changes...")
+        db_track.commit()
+        db_track.refresh(usage_summary)
+        logger.debug(f"[video_gen] Commit successful. Final video_calls: {usage_summary.video_calls}, video_cost: {usage_summary.video_cost}")
+
+        video_limit_display = video_limit if video_limit > 0 else '∞'
+
+        log_message = f"""
+[SUBSCRIPTION] Video Generation
+├─ User: {user_id}
+├─ Plan: {plan_name} ({tier})
+├─ Provider: video
+├─ Actual Provider: {provider}
+├─ Model: {model_name or 'default'}
+├─ Calls: {current_video_calls_before} → {usage_summary.video_calls} / {video_limit_display}
+├─ Images: {current_image_calls} / {image_limit if image_limit > 0 else '∞'}
+├─ Image Editing: {current_image_edit_calls} / {image_edit_limit if image_edit_limit > 0 else '∞'}
+├─ Audio: {current_audio_calls} / {audio_limit_display}
+└─ Status: ✅ Allowed & Tracked
+"""
+        logger.info(log_message)
+        return {
+            "previous_calls": current_video_calls_before,
+            "current_calls": usage_summary.video_calls,
+            "video_limit": video_limit,
+            "video_limit_display": video_limit_display,
+            "cost_per_video": cost_per_video,
+            "total_video_cost": usage_summary.video_cost,
+        }
+    except Exception as track_error:
+        logger.error(f"[video_gen] Error tracking usage: {track_error}", exc_info=True)
+        logger.error(f"[video_gen] Exception type: {type(track_error).__name__}", exc_info=True)
+        db_track.rollback()
+    finally:
+        db_track.close()
+
+
--- a/backend/services/onboarding/api_key_manager.py
+++ b/backend/services/onboarding/api_key_manager.py
@@ -414,7 +414,8 @@ class APIKeyManager:
            'SERPER_API_KEY',
            'METAPHOR_API_KEY',
            'FIRECRAWL_API_KEY',
-            'STABILITY_API_KEY'
+            'STABILITY_API_KEY',
+            'WAVESPEED_API_KEY',
        ]
        
        for provider in providers:
--- a/backend/services/story_writer/audio_generation_service.py
+++ b/backend/services/story_writer/audio_generation_service.py
@@ -288,4 +288,90 @@ class StoryAudioGenerationService:
        
        logger.info(f"[StoryAudioGeneration] Generated {len(audio_results)} audio files out of {total_scenes} scenes")
        return audio_results
+    
+    def generate_ai_audio(
+        self,
+        scene_number: int,
+        scene_title: str,
+        text: str,
+        user_id: str,
+        voice_id: str = "Wise_Woman",
+        speed: float = 1.0,
+        volume: float = 1.0,
+        pitch: float = 0.0,
+        emotion: str = "happy",
+    ) -> Dict[str, Any]:
+        """
+        Generate AI audio for a single scene using main_audio_generation.
+        
+        Parameters:
+            scene_number (int): Scene number.
+            scene_title (str): Scene title.
+            text (str): Text to convert to speech.
+            user_id (str): Clerk user ID for subscription checking.
+            voice_id (str): Voice ID for AI audio generation (default: "Wise_Woman").
+            speed (float): Speech speed (0.5-2.0, default: 1.0).
+            volume (float): Speech volume (0.1-10.0, default: 1.0).
+            pitch (float): Speech pitch (-12 to 12, default: 0.0).
+            emotion (str): Emotion for speech (default: "happy").
+        
+        Returns:
+            Dict[str, Any]: Audio metadata including file path, URL, and scene info.
+        """
+        if not text or not text.strip():
+            raise ValueError(f"Scene {scene_number} ({scene_title}) requires non-empty text")
+        
+        try:
+            logger.info(f"[StoryAudioGeneration] Generating AI audio for scene {scene_number}: {scene_title}")
+            logger.debug(f"[StoryAudioGeneration] Text length: {len(text)} characters, voice: {voice_id}")
+            
+            # Import main_audio_generation
+            from services.llm_providers.main_audio_generation import generate_audio
+            
+            # Generate audio using main_audio_generation service
+            result = generate_audio(
+                text=text.strip(),
+                voice_id=voice_id,
+                speed=speed,
+                volume=volume,
+                pitch=pitch,
+                emotion=emotion,
+                user_id=user_id,
+            )
+            
+            # Save audio to file
+            audio_filename = self._generate_audio_filename(scene_number, scene_title)
+            audio_path = self.output_dir / audio_filename
+            
+            with open(audio_path, "wb") as f:
+                f.write(result.audio_bytes)
+            
+            logger.info(f"[StoryAudioGeneration] Saved AI audio to: {audio_path} ({result.file_size} bytes)")
+            
+            # Calculate cost (for response)
+            character_count = result.text_length
+            cost_per_1000_chars = 0.05
+            cost = (character_count / 1000.0) * cost_per_1000_chars
+            
+            # Return audio metadata
+            return {
+                "scene_number": scene_number,
+                "scene_title": scene_title,
+                "audio_path": str(audio_path),
+                "audio_filename": audio_filename,
+                "audio_url": f"/api/story/audio/{audio_filename}",
+                "provider": result.provider,
+                "model": result.model,
+                "voice_id": result.voice_id,
+                "text_length": result.text_length,
+                "file_size": result.file_size,
+                "cost": cost,
+            }
+            
+        except HTTPException:
+            # Re-raise HTTPExceptions (e.g., 429 subscription limit)
+            raise
+        except Exception as e:
+            logger.error(f"[StoryAudioGeneration] Error generating AI audio for scene {scene_number}: {e}")
+            raise RuntimeError(f"Failed to generate AI audio for scene {scene_number}: {str(e)}") from e

--- a/backend/services/story_writer/image_generation_service.py
+++ b/backend/services/story_writer/image_generation_service.py
@@ -193,4 +193,82 @@ class StoryImageGenerationService:
        
        logger.info(f"[StoryImageGeneration] Generated {len(image_results)} images out of {total_scenes} scenes")
        return image_results
+    
+    def regenerate_scene_image(
+        self,
+        scene_number: int,
+        scene_title: str,
+        prompt: str,
+        user_id: str,
+        provider: Optional[str] = None,
+        width: int = 1024,
+        height: int = 1024,
+        model: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """
+        Regenerate an image for a single scene using a direct prompt (no AI prompt generation).
+        
+        Parameters:
+            scene_number (int): Scene number.
+            scene_title (str): Scene title.
+            prompt (str): Direct prompt to use for image generation.
+            user_id (str): Clerk user ID for subscription checking.
+            provider (str, optional): Image generation provider (gemini, huggingface, stability).
+            width (int): Image width (default: 1024).
+            height (int): Image height (default: 1024).
+            model (str, optional): Model to use for image generation.
+        
+        Returns:
+            Dict[str, Any]: Image metadata including file path, URL, and scene info.
+        """
+        if not prompt or not prompt.strip():
+            raise ValueError(f"Scene {scene_number} ({scene_title}) requires a non-empty prompt")
+        
+        try:
+            logger.info(f"[StoryImageGeneration] Regenerating image for scene {scene_number}: {scene_title}")
+            logger.debug(f"[StoryImageGeneration] Using direct prompt: {prompt[:100]}...")
+            
+            # Generate image using main_image_generation service with the direct prompt
+            image_options = {
+                "provider": provider,
+                "width": width,
+                "height": height,
+                "model": model,
+            }
+            
+            result: ImageGenerationResult = generate_image(
+                prompt=prompt.strip(),
+                options=image_options,
+                user_id=user_id
+            )
+            
+            # Save image to file
+            image_filename = self._generate_image_filename(scene_number, scene_title)
+            image_path = self.output_dir / image_filename
+            
+            with open(image_path, "wb") as f:
+                f.write(result.image_bytes)
+            
+            logger.info(f"[StoryImageGeneration] Saved regenerated image to: {image_path}")
+            
+            # Return image metadata
+            return {
+                "scene_number": scene_number,
+                "scene_title": scene_title,
+                "image_path": str(image_path),
+                "image_filename": image_filename,
+                "image_url": f"/api/story/images/{image_filename}",
+                "width": result.width,
+                "height": result.height,
+                "provider": result.provider,
+                "model": result.model,
+                "seed": result.seed,
+            }
+            
+        except HTTPException:
+            # Re-raise HTTPExceptions (e.g., 429 subscription limit)
+            raise
+        except Exception as e:
+            logger.error(f"[StoryImageGeneration] Error regenerating image for scene {scene_number}: {e}")
+            raise RuntimeError(f"Failed to regenerate image for scene {scene_number}: {str(e)}") from e

--- a/backend/services/story_writer/video_generation_service.py
+++ b/backend/services/story_writer/video_generation_service.py
@@ -220,35 +220,41 @@ class StoryVideoGenerationService:
    def generate_story_video(
        self,
        scenes: List[Dict[str, Any]],
-        image_paths: List[str],
+        image_paths: List[Optional[str]],
        audio_paths: List[str],
        user_id: str,
        story_title: str = "Story",
        fps: int = 24,
        transition_duration: float = 0.5,
-        progress_callback: Optional[callable] = None
+        progress_callback: Optional[callable] = None,
+        video_paths: Optional[List[Optional[str]]] = None
    ) -> Dict[str, Any]:
        """
        Generate a complete story video from multiple scenes.
        
        Parameters:
            scenes (List[Dict[str, Any]]): List of scene data.
-            image_paths (List[str]): List of image file paths for each scene.
+            image_paths (List[Optional[str]]): List of image file paths (None if scene has animated video).
            audio_paths (List[str]): List of audio file paths for each scene.
            user_id (str): Clerk user ID for subscription checking.
            story_title (str): Title of the story (default: "Story").
            fps (int): Frames per second for video (default: 24).
            transition_duration (float): Duration of transitions between scenes in seconds (default: 0.5).
            progress_callback (callable, optional): Callback function for progress updates.
+            video_paths (Optional[List[Optional[str]]]): List of animated video file paths (None if scene has static image).
        
        Returns:
            Dict[str, Any]: Video metadata including file path, URL, and story info.
        """
-        if not scenes or not image_paths or not audio_paths:
-            raise ValueError("Scenes, image paths, and audio paths are required")
+        if not scenes or not audio_paths:
+            raise ValueError("Scenes and audio paths are required")
        
-        if len(scenes) != len(image_paths) or len(scenes) != len(audio_paths):
-            raise ValueError("Number of scenes, image paths, and audio paths must match")
+        if len(scenes) != len(audio_paths):
+            raise ValueError("Number of scenes and audio paths must match")
+        
+        video_paths = video_paths or [None] * len(scenes)
+        if len(video_paths) != len(scenes):
+            video_paths = video_paths + [None] * (len(scenes) - len(video_paths))
        
        try:
            logger.info(f"[StoryVideoGeneration] Generating story video for {len(scenes)} scenes")
@@ -293,36 +299,59 @@ class StoryVideoGenerationService:
            scene_clips = []
            total_duration = 0.0
            
-            for idx, (scene, image_path, audio_path) in enumerate(zip(scenes, image_paths, audio_paths)):
+            # Import VideoFileClip for animated videos
+            try:
+                from moviepy import VideoFileClip
+            except ImportError:
+                VideoFileClip = None
+            
+            for idx, (scene, image_path, audio_path, video_path) in enumerate(zip(scenes, image_paths, audio_paths, video_paths)):
                try:
                    scene_number = scene.get("scene_number", idx + 1)
                    scene_title = scene.get("title", "Untitled")
                    
                    logger.info(f"[StoryVideoGeneration] Processing scene {scene_number}/{len(scenes)}: {scene_title}")
                    
-                    # Load image and audio
-                    image_file = Path(image_path)
                    audio_file = Path(audio_path)
-                    
-                    if not image_file.exists():
-                        logger.warning(f"[StoryVideoGeneration] Image not found: {image_path}, skipping scene {scene_number}")
-                        continue
                    if not audio_file.exists():
                        logger.warning(f"[StoryVideoGeneration] Audio not found: {audio_path}, skipping scene {scene_number}")
                        continue
                    
-                    # Load audio to get duration
+                    # Load audio
                    audio_clip = AudioFileClip(str(audio_file))
                    audio_duration = audio_clip.duration
                    
-                    # Create image clip (MoviePy v2: use with_* API)
-                    image_clip = ImageClip(str(image_file)).with_duration(audio_duration)
-                    image_clip = image_clip.with_fps(fps)
+                    # Prefer animated video if available
+                    if video_path and Path(video_path).exists():
+                        logger.info(f"[StoryVideoGeneration] Using animated video for scene {scene_number}: {video_path}")
+                        # Load animated video
+                        if VideoFileClip is None:
+                            raise RuntimeError("VideoFileClip not available - MoviePy may not be fully installed")
+                        video_clip = VideoFileClip(str(video_path))
+                        # Replace audio with the preferred audio (AI or free)
+                        video_clip = video_clip.with_audio(audio_clip)
+                        # Match duration to audio if needed
+                        if video_clip.duration > audio_duration:
+                            video_clip = video_clip.subclip(0, audio_duration)
+                        elif video_clip.duration < audio_duration:
+                            # Loop the video if it's shorter than audio
+                            loops_needed = int(audio_duration / video_clip.duration) + 1
+                            video_clip = concatenate_videoclips([video_clip] * loops_needed).subclip(0, audio_duration)
+                            video_clip = video_clip.with_audio(audio_clip)
+                    elif image_path and Path(image_path).exists():
+                        # Fall back to static image
+                        logger.info(f"[StoryVideoGeneration] Using static image for scene {scene_number}: {image_path}")
+                        image_file = Path(image_path)
+                        # Create image clip (MoviePy v2: use with_* API)
+                        image_clip = ImageClip(str(image_file)).with_duration(audio_duration)
+                        image_clip = image_clip.with_fps(fps)
+                        # Set audio to image clip
+                        video_clip = image_clip.with_audio(audio_clip)
+                    else:
+                        logger.warning(f"[StoryVideoGeneration] No video or image found for scene {scene_number}, skipping")
+                        continue
                    
-                    # Set audio to image clip
-                    video_clip = image_clip.with_audio(audio_clip)
                    scene_clips.append(video_clip)
-                    
                    total_duration += audio_duration
                    
                    # Call progress callback if provided
--- a/backend/services/subscription/monitoring_middleware.py
+++ b/backend/services/subscription/monitoring_middleware.py
@@ -19,10 +19,18 @@ import re

 from models.api_monitoring import APIRequest, APIEndpointStats, SystemHealth, CachePerformance
 from models.subscription_models import APIProvider
-from services.database import get_db
 from .usage_tracking_service import UsageTrackingService
 from .pricing_service import PricingService

+
+def _get_db_session():
+    """
+    Get a database session with lazy import to survive hot reloads.
+    Uvicorn's reloader can sometimes clear module-level imports.
+    """
+    from services.database import get_db
+    return next(get_db())
+
 class DatabaseAPIMonitor:
    """Database-backed API monitoring with usage tracking and subscription management."""
    
@@ -145,8 +153,9 @@ async def check_usage_limits_middleware(request: Request, user_id: str, request_
    except Exception:
        pass
    
+    db = None
    try:
-        db = next(get_db())
+        db = _get_db_session()
        api_monitor = DatabaseAPIMonitor()
        
        # Detect if this is an API call that should be rate limited
@@ -203,14 +212,15 @@ async def check_usage_limits_middleware(request: Request, user_id: str, request_
        # Don't block requests if usage checking fails
        return None
    finally:
-        db.close()
+        if db is not None:
+            db.close()

 async def monitoring_middleware(request: Request, call_next):
    """Enhanced FastAPI middleware for monitoring API calls with usage tracking."""
    start_time = time.time()
    
    # Get database session
-    db = next(get_db())
+    db = _get_db_session()
    
    # Extract request details - Enhanced user identification
    user_id = None
@@ -340,8 +350,9 @@ async def monitoring_middleware(request: Request, call_next):

 async def get_monitoring_stats(minutes: int = 5) -> Dict[str, Any]:
    """Get current monitoring statistics."""
-    db = next(get_db())
+    db = None
    try:
+        db = _get_db_session()
        # Placeholder to match old API; heavy stats handled elsewhere
        return {
            'timestamp': datetime.utcnow().isoformat(),
@@ -354,12 +365,14 @@ async def get_monitoring_stats(minutes: int = 5) -> Dict[str, Any]:
            'system_health': {'status': 'healthy', 'error_rate': 0.0}
        }
    finally:
-        db.close()
+        if db is not None:
+            db.close()

 async def get_lightweight_stats() -> Dict[str, Any]:
    """Get lightweight stats for dashboard header."""
-    db = next(get_db())
+    db = None
    try:
+        db = _get_db_session()
        # Minimal viable placeholder values
        now = datetime.utcnow()
        return {
@@ -371,4 +384,5 @@ async def get_lightweight_stats() -> Dict[str, Any]:
            'timestamp': now.isoformat()
        }
    finally:
-        db.close()
+        if db is not None:
+            db.close()
--- a/backend/services/subscription/preflight_validator.py
+++ b/backend/services/subscription/preflight_validator.py
@@ -420,3 +420,54 @@ def validate_video_generation_operations(
                'message': f"Failed to validate video generation: {str(e)}"
            }
        )
+
+
+def validate_scene_animation_operation(
+    pricing_service: PricingService,
+    user_id: str,
+) -> None:
+    """
+    Validate the per-scene animation workflow before API calls.
+    """
+    try:
+        operations_to_validate = [
+            {
+                'provider': APIProvider.VIDEO,
+                'tokens_requested': 0,
+                'actual_provider_name': 'wavespeed',
+                'operation_type': 'scene_animation',
+            }
+        ]
+
+        can_proceed, message, error_details = pricing_service.check_comprehensive_limits(
+            user_id=user_id,
+            operations=operations_to_validate,
+        )
+
+        if not can_proceed:
+            logger.error(f"[Pre-flight Validator] Scene animation blocked for user {user_id}: {message}")
+            usage_info = error_details.get('usage_info', {}) if error_details else {}
+            provider = usage_info.get('provider', 'video') if usage_info else 'video'
+            raise HTTPException(
+                status_code=429,
+                detail={
+                    'error': message,
+                    'message': message,
+                    'provider': provider,
+                    'usage_info': usage_info if usage_info else error_details,
+                }
+            )
+
+        logger.info(f"[Pre-flight Validator] ✅ Scene animation validated for user {user_id}")
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"[Pre-flight Validator] Error validating scene animation: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail={
+                'error': f"Failed to validate scene animation: {str(e)}",
+                'message': f"Failed to validate scene animation: {str(e)}",
+            },
+        )
--- a/backend/services/subscription/pricing_service.py
+++ b/backend/services/subscription/pricing_service.py
@@ -307,6 +307,41 @@ class PricingService:
                "model_name": "default",
                "cost_per_request": 0.10,  # $0.10 per video generation (estimated)
                "description": "AI Video Generation default pricing"
+            },
+            {
+                "provider": APIProvider.VIDEO,
+                "model_name": "kling-v2.5-turbo-std-5s",
+                "cost_per_request": 0.21,
+                "description": "WaveSpeed Kling v2.5 Turbo Std Image-to-Video (5 seconds)"
+            },
+            {
+                "provider": APIProvider.VIDEO,
+                "model_name": "kling-v2.5-turbo-std-10s",
+                "cost_per_request": 0.42,
+                "description": "WaveSpeed Kling v2.5 Turbo Std Image-to-Video (10 seconds)"
+            },
+            {
+                "provider": APIProvider.VIDEO,
+                "model_name": "wavespeed-ai/infinitetalk",
+                "cost_per_request": 0.30,
+                "description": "WaveSpeed InfiniteTalk (image + audio to talking avatar video)"
+            },
+            # Audio Generation Pricing (Minimax Speech 02 HD via WaveSpeed)
+            {
+                "provider": APIProvider.AUDIO,
+                "model_name": "minimax/speech-02-hd",
+                "cost_per_input_token": 0.00005,  # $0.05 per 1,000 characters (every character is 1 token)
+                "cost_per_output_token": 0.0,  # No output tokens for audio
+                "cost_per_request": 0.0,  # Pricing is per character, not per request
+                "description": "AI Audio Generation (Text-to-Speech) - Minimax Speech 02 HD via WaveSpeed"
+            },
+            {
+                "provider": APIProvider.AUDIO,
+                "model_name": "default",
+                "cost_per_input_token": 0.00005,  # $0.05 per 1,000 characters default
+                "cost_per_output_token": 0.0,
+                "cost_per_request": 0.0,
+                "description": "AI Audio Generation default pricing"
            }
        ]
        
@@ -358,6 +393,7 @@ class PricingService:
                "exa_calls_limit": 100,
                "video_calls_limit": 0,  # No video generation for free tier
                "image_edit_calls_limit": 10,  # 10 AI image editing calls/month
+                "audio_calls_limit": 20,  # 20 AI audio generation calls/month
                "gemini_tokens_limit": 100000,
                "monthly_cost_limit": 0.0,
                "features": ["basic_content_generation", "limited_research"],
@@ -381,6 +417,7 @@ class PricingService:
                "exa_calls_limit": 500,
                "video_calls_limit": 20,  # 20 videos/month for basic plan
                "image_edit_calls_limit": 30,  # 30 AI image editing calls/month
+                "audio_calls_limit": 50,  # 50 AI audio generation calls/month
                "gemini_tokens_limit": 20000,  # Increased from 5000 for better stability
                "openai_tokens_limit": 20000,  # Increased from 5000 for better stability
                "anthropic_tokens_limit": 20000,  # Increased from 5000 for better stability
@@ -406,6 +443,7 @@ class PricingService:
                "exa_calls_limit": 2000,
                "video_calls_limit": 50,  # 50 videos/month for pro plan
                "image_edit_calls_limit": 100,  # 100 AI image editing calls/month
+                "audio_calls_limit": 200,  # 200 AI audio generation calls/month
                "gemini_tokens_limit": 5000000,
                "openai_tokens_limit": 2500000,
                "anthropic_tokens_limit": 1000000,
@@ -431,6 +469,7 @@ class PricingService:
                "exa_calls_limit": 0,  # Unlimited
                "video_calls_limit": 0,  # Unlimited for enterprise
                "image_edit_calls_limit": 0,  # Unlimited image editing for enterprise
+                "audio_calls_limit": 0,  # Unlimited audio generation for enterprise
                "gemini_tokens_limit": 0,
                "openai_tokens_limit": 0,
                "anthropic_tokens_limit": 0,
@@ -651,6 +690,7 @@ class PricingService:
                'stability_calls': plan.stability_calls_limit,
                'video_calls': getattr(plan, 'video_calls_limit', 0),  # Support missing column
                'image_edit_calls': getattr(plan, 'image_edit_calls_limit', 0),  # Support missing column
+                'audio_calls': getattr(plan, 'audio_calls_limit', 0),  # Support missing column
                # Token limits
                'gemini_tokens': plan.gemini_tokens_limit,
                'openai_tokens': plan.openai_tokens_limit,
--- a/backend/services/subscription/schema_utils.py
+++ b/backend/services/subscription/schema_utils.py
@@ -31,6 +31,7 @@ def ensure_subscription_plan_columns(db: Session) -> None:
            "exa_calls_limit": "INTEGER DEFAULT 0",
            "video_calls_limit": "INTEGER DEFAULT 0",
            "image_edit_calls_limit": "INTEGER DEFAULT 0",
+            "audio_calls_limit": "INTEGER DEFAULT 0",
        }

        for col_name, ddl in required_columns.items():
@@ -84,6 +85,8 @@ def ensure_usage_summaries_columns(db: Session) -> None:
            "video_cost": "REAL DEFAULT 0.0",
            "image_edit_calls": "INTEGER DEFAULT 0",
            "image_edit_cost": "REAL DEFAULT 0.0",
+            "audio_calls": "INTEGER DEFAULT 0",
+            "audio_cost": "REAL DEFAULT 0.0",
        }

        for col_name, ddl in required_columns.items():
--- a/backend/services/wavespeed/init.py
+++ b/backend/services/wavespeed/init.py
@@ -0,0 +1 @@
+
--- a/backend/services/wavespeed/client.py
+++ b/backend/services/wavespeed/client.py
@@ -0,0 +1,471 @@
+from __future__ import annotations
+
+import json
+import time
+from typing import Any, Dict, Optional
+
+import requests
+from fastapi import HTTPException
+from requests import exceptions as requests_exceptions
+
+from services.onboarding.api_key_manager import APIKeyManager
+from utils.logger_utils import get_service_logger
+
+logger = get_service_logger("wavespeed.client")
+
+
+class WaveSpeedClient:
+    """
+    Thin HTTP client for the WaveSpeed AI API.
+    Handles authentication, submission, and polling helpers.
+    """
+
+    BASE_URL = "https://api.wavespeed.ai/api/v3"
+
+    def __init__(self, api_key: Optional[str] = None):
+        manager = APIKeyManager()
+        self.api_key = api_key or manager.get_api_key("wavespeed")
+        if not self.api_key:
+            raise RuntimeError("WAVESPEED_API_KEY is not configured. Please add it to your environment.")
+
+    def _headers(self) -> Dict[str, str]:
+        return {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.api_key}",
+        }
+
+    def submit_image_to_video(
+        self,
+        model_path: str,
+        payload: Dict[str, Any],
+        timeout: int = 30,
+    ) -> str:
+        """
+        Submit an image-to-video generation request.
+
+        Returns the prediction ID for polling.
+        """
+        url = f"{self.BASE_URL}/{model_path}"
+        logger.info(f"[WaveSpeed] Submitting request to {url}")
+        response = requests.post(url, headers=self._headers(), json=payload, timeout=timeout)
+        if response.status_code != 200:
+            logger.error(f"[WaveSpeed] Submission failed: {response.status_code} {response.text}")
+            raise HTTPException(
+                status_code=502,
+                detail={
+                    "error": "WaveSpeed image-to-video submission failed",
+                    "status_code": response.status_code,
+                    "response": response.text,
+                },
+            )
+
+        data = response.json().get("data")
+        if not data or "id" not in data:
+            logger.error(f"[WaveSpeed] Unexpected submission response: {response.text}")
+            raise HTTPException(
+                status_code=502,
+                detail={"error": "WaveSpeed response missing prediction id"},
+            )
+
+        prediction_id = data["id"]
+        logger.info(f"[WaveSpeed] Submitted request: {prediction_id}")
+        return prediction_id
+
+    def get_prediction_result(self, prediction_id: str, timeout: int = 120) -> Dict[str, Any]:
+        """
+        Fetch the current status/result for a prediction.
+        """
+        url = f"{self.BASE_URL}/predictions/{prediction_id}/result"
+        try:
+            response = requests.get(url, headers={"Authorization": f"Bearer {self.api_key}"}, timeout=timeout)
+        except requests_exceptions.Timeout as exc:
+            raise HTTPException(
+                status_code=504,
+                detail={
+                    "error": "WaveSpeed polling request timed out",
+                    "prediction_id": prediction_id,
+                    "resume_available": True,
+                    "exception": str(exc),
+                },
+            ) from exc
+        except requests_exceptions.RequestException as exc:
+            raise HTTPException(
+                status_code=502,
+                detail={
+                    "error": "WaveSpeed polling request failed",
+                    "prediction_id": prediction_id,
+                    "resume_available": True,
+                    "exception": str(exc),
+                },
+            ) from exc
+        if response.status_code != 200:
+            logger.error(f"[WaveSpeed] Polling failed: {response.status_code} {response.text}")
+            raise HTTPException(
+                status_code=502,
+                detail={
+                    "error": "WaveSpeed prediction polling failed",
+                    "status_code": response.status_code,
+                    "response": response.text,
+                },
+            )
+
+        result = response.json().get("data")
+        if not result:
+            raise HTTPException(status_code=502, detail={"error": "WaveSpeed polling response missing data"})
+        return result
+
+    def poll_until_complete(
+        self,
+        prediction_id: str,
+        timeout_seconds: int = 240,
+        interval_seconds: float = 1.0,
+    ) -> Dict[str, Any]:
+        """
+        Poll WaveSpeed until the job completes, fails, or times out.
+        """
+        start_time = time.time()
+        while True:
+            try:
+                result = self.get_prediction_result(prediction_id)
+            except HTTPException as exc:
+                detail = exc.detail or {}
+                if isinstance(detail, dict):
+                    detail.setdefault("prediction_id", prediction_id)
+                    detail.setdefault("resume_available", True)
+                    detail.setdefault("error", detail.get("error", "WaveSpeed polling failed"))
+                raise HTTPException(status_code=exc.status_code, detail=detail) from exc
+            status = result.get("status")
+            if status == "completed":
+                logger.info(f"[WaveSpeed] Prediction {prediction_id} completed.")
+                return result
+            if status == "failed":
+                logger.error(f"[WaveSpeed] Prediction {prediction_id} failed: {result.get('error')}")
+                raise HTTPException(
+                    status_code=502,
+                    detail={
+                        "error": "WaveSpeed animation failed",
+                        "prediction_id": prediction_id,
+                        "details": result.get("error"),
+                    },
+                )
+
+            elapsed = time.time() - start_time
+            if elapsed > timeout_seconds:
+                logger.error(f"[WaveSpeed] Prediction {prediction_id} timed out after {timeout_seconds}s")
+                raise HTTPException(
+                    status_code=504,
+                    detail={
+                        "error": "WaveSpeed animation timed out",
+                        "prediction_id": prediction_id,
+                        "details": result,
+                    },
+                )
+
+            logger.debug(f"[WaveSpeed] Prediction {prediction_id} status={status}. Waiting...")
+            time.sleep(interval_seconds)
+
+    def optimize_prompt(
+        self,
+        text: str,
+        mode: str = "image",
+        style: str = "default",
+        image: Optional[str] = None,
+        enable_sync_mode: bool = True,
+        timeout: int = 30,
+    ) -> str:
+        """
+        Optimize a prompt using WaveSpeed prompt optimizer.
+        
+        Args:
+            text: The prompt text to optimize
+            mode: "image" or "video" (default: "image")
+            style: "default", "artistic", "photographic", "technical", "anime", "realistic" (default: "default")
+            image: Base64-encoded image for context (optional)
+            enable_sync_mode: If True, wait for result and return it directly (default: True)
+            timeout: Request timeout in seconds (default: 30)
+            
+        Returns:
+            Optimized prompt text
+        """
+        model_path = "wavespeed-ai/prompt-optimizer"
+        url = f"{self.BASE_URL}/{model_path}"
+        
+        payload = {
+            "text": text,
+            "mode": mode,
+            "style": style,
+            "enable_sync_mode": enable_sync_mode,
+        }
+        
+        if image:
+            payload["image"] = image
+        
+        logger.info(f"[WaveSpeed] Optimizing prompt via {url} (mode={mode}, style={style})")
+        response = requests.post(url, headers=self._headers(), json=payload, timeout=timeout)
+        
+        if response.status_code != 200:
+            logger.error(f"[WaveSpeed] Prompt optimization failed: {response.status_code} {response.text}")
+            raise HTTPException(
+                status_code=502,
+                detail={
+                    "error": "WaveSpeed prompt optimization failed",
+                    "status_code": response.status_code,
+                    "response": response.text,
+                },
+            )
+        
+        response_json = response.json()
+        data = response_json.get("data") or response_json
+        
+        # Handle sync mode - result should be directly in outputs
+        if enable_sync_mode:
+            outputs = data.get("outputs") or []
+            if not outputs:
+                logger.error(f"[WaveSpeed] No outputs in sync mode response: {response.text}")
+                raise HTTPException(
+                    status_code=502,
+                    detail="WaveSpeed prompt optimizer returned no outputs",
+                )
+            
+            # Extract optimized prompt from outputs
+            # In sync mode, outputs[0] should be the optimized text directly (or a URL to fetch)
+            optimized_prompt = None
+            if isinstance(outputs, list) and len(outputs) > 0:
+                first_output = outputs[0]
+                
+                # If it's a string that looks like a URL, fetch it
+                if isinstance(first_output, str):
+                    if first_output.startswith("http://") or first_output.startswith("https://"):
+                        logger.info(f"[WaveSpeed] Fetching optimized prompt from URL: {first_output}")
+                        url_response = requests.get(first_output, timeout=timeout)
+                        if url_response.status_code == 200:
+                            optimized_prompt = url_response.text.strip()
+                        else:
+                            logger.error(f"[WaveSpeed] Failed to fetch prompt from URL: {url_response.status_code}")
+                            raise HTTPException(
+                                status_code=502,
+                                detail="Failed to fetch optimized prompt from WaveSpeed URL",
+                            )
+                    else:
+                        # It's already the text
+                        optimized_prompt = first_output
+                elif isinstance(first_output, dict):
+                    optimized_prompt = first_output.get("text") or first_output.get("prompt") or first_output.get("output")
+            
+            if not optimized_prompt:
+                logger.error(f"[WaveSpeed] Could not extract optimized prompt from outputs: {outputs}")
+                raise HTTPException(
+                    status_code=502,
+                    detail="WaveSpeed prompt optimizer output format not recognized",
+                )
+            
+            logger.info(f"[WaveSpeed] Prompt optimized successfully (length: {len(optimized_prompt)} chars)")
+            return optimized_prompt
+        
+        # Async mode - return prediction ID for polling
+        prediction_id = data.get("id")
+        if not prediction_id:
+            logger.error(f"[WaveSpeed] No prediction ID in async response: {response.text}")
+            raise HTTPException(
+                status_code=502,
+                detail="WaveSpeed response missing prediction id for async mode",
+            )
+        
+        # Poll for result
+        result = self.poll_until_complete(prediction_id, timeout_seconds=60, interval_seconds=0.5)
+        outputs = result.get("outputs") or []
+        
+        if not outputs:
+            raise HTTPException(status_code=502, detail="WaveSpeed prompt optimizer returned no outputs")
+        
+        # Extract optimized prompt from outputs
+        # In async mode, outputs[0] is typically a URL that needs to be fetched
+        optimized_prompt = None
+        if isinstance(outputs, list) and len(outputs) > 0:
+            first_output = outputs[0]
+            
+            # In async mode, it's usually a URL to fetch
+            if isinstance(first_output, str):
+                if first_output.startswith("http://") or first_output.startswith("https://"):
+                    logger.info(f"[WaveSpeed] Fetching optimized prompt from URL: {first_output}")
+                    url_response = requests.get(first_output, timeout=timeout)
+                    if url_response.status_code == 200:
+                        optimized_prompt = url_response.text.strip()
+                    else:
+                        logger.error(f"[WaveSpeed] Failed to fetch prompt from URL: {url_response.status_code}")
+                        raise HTTPException(
+                            status_code=502,
+                            detail="Failed to fetch optimized prompt from WaveSpeed URL",
+                        )
+                else:
+                    # If it's already text (shouldn't happen in async mode, but handle it)
+                    optimized_prompt = first_output
+            elif isinstance(first_output, dict):
+                optimized_prompt = first_output.get("text") or first_output.get("prompt") or first_output.get("output")
+        
+        if not optimized_prompt:
+            raise HTTPException(
+                status_code=502,
+                detail="WaveSpeed prompt optimizer output format not recognized",
+            )
+        
+        logger.info(f"[WaveSpeed] Prompt optimized successfully (length: {len(optimized_prompt)} chars)")
+        return optimized_prompt
+    
+    def generate_speech(
+        self,
+        text: str,
+        voice_id: str,
+        speed: float = 1.0,
+        volume: float = 1.0,
+        pitch: float = 0.0,
+        emotion: str = "happy",
+        enable_sync_mode: bool = True,
+        timeout: int = 60,
+        **kwargs
+    ) -> bytes:
+        """
+        Generate speech audio using Minimax Speech 02 HD via WaveSpeed.
+        
+        Args:
+            text: Text to convert to speech (max 10000 characters)
+            voice_id: Voice ID (e.g., "Wise_Woman", "Friendly_Person", etc.)
+            speed: Speech speed (0.5-2.0, default: 1.0)
+            volume: Speech volume (0.1-10.0, default: 1.0)
+            pitch: Speech pitch (-12 to 12, default: 0.0)
+            emotion: Emotion ("happy", "sad", "angry", etc., default: "happy")
+            enable_sync_mode: If True, wait for result and return it directly (default: True)
+            timeout: Request timeout in seconds (default: 60)
+            **kwargs: Additional parameters (sample_rate, bitrate, format, etc.)
+            
+        Returns:
+            bytes: Generated audio bytes
+        """
+        model_path = "minimax/speech-02-hd"
+        url = f"{self.BASE_URL}/{model_path}"
+        
+        payload = {
+            "text": text,
+            "voice_id": voice_id,
+            "speed": speed,
+            "volume": volume,
+            "pitch": pitch,
+            "emotion": emotion,
+            "enable_sync_mode": enable_sync_mode,
+        }
+        
+        # Add optional parameters
+        optional_params = [
+            "english_normalization",
+            "sample_rate",
+            "bitrate",
+            "channel",
+            "format",
+            "language_boost",
+        ]
+        for param in optional_params:
+            if param in kwargs:
+                payload[param] = kwargs[param]
+        
+        logger.info(f"[WaveSpeed] Generating speech via {url} (voice={voice_id}, text_length={len(text)})")
+        response = requests.post(url, headers=self._headers(), json=payload, timeout=timeout)
+        
+        if response.status_code != 200:
+            logger.error(f"[WaveSpeed] Speech generation failed: {response.status_code} {response.text}")
+            raise HTTPException(
+                status_code=502,
+                detail={
+                    "error": "WaveSpeed speech generation failed",
+                    "status_code": response.status_code,
+                    "response": response.text,
+                },
+            )
+        
+        response_json = response.json()
+        data = response_json.get("data") or response_json
+        
+        # Handle sync mode - result should be directly in outputs
+        if enable_sync_mode:
+            outputs = data.get("outputs") or []
+            if not outputs:
+                logger.error(f"[WaveSpeed] No outputs in sync mode response: {response.text}")
+                raise HTTPException(
+                    status_code=502,
+                    detail="WaveSpeed speech generator returned no outputs",
+                )
+            
+            # Extract audio URL from outputs
+            audio_url = None
+            if isinstance(outputs, list) and len(outputs) > 0:
+                first_output = outputs[0]
+                if isinstance(first_output, str):
+                    audio_url = first_output
+                elif isinstance(first_output, dict):
+                    audio_url = first_output.get("url") or first_output.get("output")
+            
+            if not audio_url or not (audio_url.startswith("http://") or audio_url.startswith("https://")):
+                logger.error(f"[WaveSpeed] Invalid audio URL in outputs: {outputs}")
+                raise HTTPException(
+                    status_code=502,
+                    detail="WaveSpeed speech generator output format not recognized",
+                )
+            
+            # Fetch audio bytes from URL
+            logger.info(f"[WaveSpeed] Fetching audio from URL: {audio_url}")
+            audio_response = requests.get(audio_url, timeout=timeout)
+            if audio_response.status_code == 200:
+                audio_bytes = audio_response.content
+                logger.info(f"[WaveSpeed] Speech generated successfully (size: {len(audio_bytes)} bytes)")
+                return audio_bytes
+            else:
+                logger.error(f"[WaveSpeed] Failed to fetch audio from URL: {audio_response.status_code}")
+                raise HTTPException(
+                    status_code=502,
+                    detail="Failed to fetch generated audio from WaveSpeed URL",
+                )
+        
+        # Async mode - return prediction ID for polling
+        prediction_id = data.get("id")
+        if not prediction_id:
+            logger.error(f"[WaveSpeed] No prediction ID in async response: {response.text}")
+            raise HTTPException(
+                status_code=502,
+                detail="WaveSpeed response missing prediction id for async mode",
+            )
+        
+        # Poll for result
+        result = self.poll_until_complete(prediction_id, timeout_seconds=120, interval_seconds=0.5)
+        outputs = result.get("outputs") or []
+        
+        if not outputs:
+            raise HTTPException(status_code=502, detail="WaveSpeed speech generator returned no outputs")
+        
+        # Extract audio URL and fetch
+        audio_url = None
+        if isinstance(outputs, list) and len(outputs) > 0:
+            first_output = outputs[0]
+            if isinstance(first_output, str):
+                audio_url = first_output
+            elif isinstance(first_output, dict):
+                audio_url = first_output.get("url") or first_output.get("output")
+        
+        if not audio_url or not (audio_url.startswith("http://") or audio_url.startswith("https://")):
+            raise HTTPException(
+                status_code=502,
+                detail="WaveSpeed speech generator output format not recognized",
+            )
+        
+        # Fetch audio bytes
+        logger.info(f"[WaveSpeed] Fetching audio from URL: {audio_url}")
+        audio_response = requests.get(audio_url, timeout=timeout)
+        if audio_response.status_code == 200:
+            audio_bytes = audio_response.content
+            logger.info(f"[WaveSpeed] Speech generated successfully (size: {len(audio_bytes)} bytes)")
+            return audio_bytes
+        else:
+            logger.error(f"[WaveSpeed] Failed to fetch audio from URL: {audio_response.status_code}")
+            raise HTTPException(
+                status_code=502,
+                detail="Failed to fetch generated audio from WaveSpeed URL",
+            )
+
--- a/backend/services/wavespeed/infinitetalk.py
+++ b/backend/services/wavespeed/infinitetalk.py
@@ -0,0 +1,122 @@
+from __future__ import annotations
+
+import base64
+from typing import Any, Dict, Optional
+
+import requests
+from fastapi import HTTPException
+from loguru import logger
+
+from .client import WaveSpeedClient
+from .kling_animation import generate_animation_prompt
+
+INFINITALK_MODEL_PATH = "wavespeed-ai/infinitetalk"
+INFINITALK_MODEL_NAME = "wavespeed-ai/infinitetalk"
+INFINITALK_DEFAULT_COST = 0.30  # $0.30 per 5 seconds at 720p tier
+MAX_IMAGE_BYTES = 10 * 1024 * 1024  # 10MB
+MAX_AUDIO_BYTES = 50 * 1024 * 1024  # 50MB safety cap
+
+
+def _as_data_uri(content_bytes: bytes, mime_type: str) -> str:
+    encoded = base64.b64encode(content_bytes).decode("utf-8")
+    return f"data:{mime_type};base64,{encoded}"
+
+
+def animate_scene_with_voiceover(
+    *,
+    image_bytes: bytes,
+    audio_bytes: bytes,
+    scene_data: Dict[str, Any],
+    story_context: Dict[str, Any],
+    user_id: str,
+    resolution: str = "720p",
+    prompt_override: Optional[str] = None,
+    image_mime: str = "image/png",
+    audio_mime: str = "audio/mpeg",
+    client: Optional[WaveSpeedClient] = None,
+) -> Dict[str, Any]:
+    """
+    Animate a scene image with narration audio using WaveSpeed InfiniteTalk.
+    Returns dict with video bytes, prompt used, model name, and cost.
+    """
+
+    if not image_bytes:
+        raise HTTPException(status_code=404, detail="Scene image bytes missing for animation.")
+    if not audio_bytes:
+        raise HTTPException(status_code=404, detail="Scene audio bytes missing for animation.")
+
+    if len(image_bytes) > MAX_IMAGE_BYTES:
+        raise HTTPException(
+            status_code=400,
+            detail="Scene image exceeds 10MB limit required by WaveSpeed InfiniteTalk.",
+        )
+    if len(audio_bytes) > MAX_AUDIO_BYTES:
+        raise HTTPException(
+            status_code=400,
+            detail="Scene audio exceeds 50MB limit allowed for InfiniteTalk requests.",
+        )
+
+    if resolution not in {"480p", "720p"}:
+        raise HTTPException(status_code=400, detail="Resolution must be '480p' or '720p'.")
+
+    animation_prompt = prompt_override or generate_animation_prompt(scene_data, story_context, user_id)
+
+    payload = {
+        "image": _as_data_uri(image_bytes, image_mime),
+        "audio": _as_data_uri(audio_bytes, audio_mime),
+        "resolution": resolution,
+    }
+    if animation_prompt:
+        payload["prompt"] = animation_prompt
+
+    client = client or WaveSpeedClient()
+    prediction_id = client.submit_image_to_video(INFINITALK_MODEL_PATH, payload, timeout=60)
+
+    try:
+        result = client.poll_until_complete(prediction_id, timeout_seconds=600, interval_seconds=1.0)
+    except HTTPException as exc:
+        detail = exc.detail or {}
+        if isinstance(detail, dict):
+            detail.setdefault("prediction_id", prediction_id)
+            detail.setdefault("resume_available", True)
+        raise
+
+    outputs = result.get("outputs") or []
+    if not outputs:
+        raise HTTPException(status_code=502, detail="WaveSpeed InfiniteTalk completed but returned no outputs.")
+
+    video_url = outputs[0]
+    video_response = requests.get(video_url, timeout=180)
+    if video_response.status_code != 200:
+        raise HTTPException(
+            status_code=502,
+            detail={
+                "error": "Failed to download InfiniteTalk video",
+                "status_code": video_response.status_code,
+                "response": video_response.text[:200],
+            },
+        )
+
+    metadata = result.get("metadata") or {}
+    duration = metadata.get("duration_seconds") or metadata.get("duration") or 0
+
+    logger.info(
+        "[InfiniteTalk] Generated talking avatar video user=%s scene=%s resolution=%s size=%s bytes",
+        user_id,
+        scene_data.get("scene_number"),
+        resolution,
+        len(video_response.content),
+    )
+
+    return {
+        "video_bytes": video_response.content,
+        "prompt": animation_prompt,
+        "duration": duration or 5,
+        "model_name": INFINITALK_MODEL_NAME,
+        "cost": INFINITALK_DEFAULT_COST,
+        "provider": "wavespeed",
+        "source_video_url": video_url,
+        "prediction_id": prediction_id,
+    }
+
+
--- a/backend/services/wavespeed/kling_animation.py
+++ b/backend/services/wavespeed/kling_animation.py
@@ -0,0 +1,360 @@
+from __future__ import annotations
+
+import base64
+import json
+from typing import Any, Dict, Optional
+
+import requests
+from fastapi import HTTPException
+
+from services.llm_providers.main_text_generation import llm_text_gen
+from utils.logger_utils import get_service_logger
+
+from .client import WaveSpeedClient
+
+try:
+    import imghdr
+except ModuleNotFoundError:  # Python 3.13 removed imghdr
+    imghdr = None
+
+logger = get_service_logger("wavespeed.kling_animation")
+
+KLING_MODEL_PATH = "kwaivgi/kling-v2.5-turbo-std/image-to-video"
+KLING_MODEL_5S = "kling-v2.5-turbo-std-5s"
+KLING_MODEL_10S = "kling-v2.5-turbo-std-10s"
+MAX_IMAGE_BYTES = 10 * 1024 * 1024  # 10 MB limit per docs
+
+
+def _detect_image_mime(image_bytes: bytes) -> str:
+    if imghdr:
+        detected = imghdr.what(None, h=image_bytes)
+        if detected == "jpeg":
+            return "image/jpeg"
+        if detected == "png":
+            return "image/png"
+        if detected == "gif":
+            return "image/gif"
+
+    header = image_bytes[:8]
+    if header.startswith(b"\x89PNG"):
+        return "image/png"
+    if header[:2] == b"\xff\xd8":
+        return "image/jpeg"
+    if header[:3] in (b"GIF", b"GIF"):
+        return "image/gif"
+
+    return "image/png"
+
+
+def _build_fallback_prompt(scene_data: Dict[str, Any], story_context: Dict[str, Any]) -> str:
+    title = (scene_data.get("title") or "Scene").strip()
+    description = (scene_data.get("description") or "").strip()
+    image_prompt = (scene_data.get("image_prompt") or "").strip()
+    tone = (story_context.get("story_tone") or "story").strip()
+    setting = (story_context.get("story_setting") or "the scene").strip()
+
+    parts = [
+        f"{title} cinematic motion shot.",
+        description[:220] if description else "",
+        f"Camera glides with subtle parallax over {setting}.",
+        f"Maintain a {tone} mood with natural lighting accents.",
+        f"Honor the original illustration details: {image_prompt[:200]}." if image_prompt else "",
+        "5-second sequence, gentle push-in, flowing cloth and atmospheric particles.",
+    ]
+    fallback_prompt = " ".join(filter(None, parts))
+    return fallback_prompt.strip()
+
+
+def _load_llm_json_response(response_text: Any) -> Dict[str, Any]:
+    """Normalize responses from llm_text_gen (dict or JSON string)."""
+    if isinstance(response_text, dict):
+        return response_text
+    if isinstance(response_text, str):
+        return json.loads(response_text)
+    raise ValueError(f"Unexpected response type: {type(response_text)}")
+
+
+def _generate_text_prompt(
+    *,
+    prompt: str,
+    system_prompt: str,
+    user_id: str,
+    fallback_prompt: str,
+) -> str:
+    """Fallback text generation when structured JSON parsing fails."""
+    try:
+        response = llm_text_gen(
+            prompt=prompt.strip(),
+            system_prompt=system_prompt,
+            user_id=user_id,
+        )
+    except HTTPException as exc:
+        if exc.status_code == 429:
+            raise
+        logger.warning(
+            "[AnimateScene] Text-mode prompt generation failed (%s). Using deterministic fallback.",
+            exc.detail,
+        )
+        return fallback_prompt
+    except Exception as exc:
+        logger.error(
+            "[AnimateScene] Unexpected error generating text prompt: %s",
+            exc,
+            exc_info=True,
+        )
+        return fallback_prompt
+
+    if isinstance(response, dict):
+        candidates = [
+            response.get("animation_prompt"),
+            response.get("prompt"),
+            response.get("text"),
+        ]
+        for candidate in candidates:
+            if isinstance(candidate, str) and candidate.strip():
+                return candidate.strip()
+        # As a last resort, stringify the dict
+        response_text = json.dumps(response, ensure_ascii=False)
+    else:
+        response_text = str(response)
+
+    cleaned = response_text.strip()
+    return cleaned or fallback_prompt
+
+
+def generate_animation_prompt(
+    scene_data: Dict[str, Any],
+    story_context: Dict[str, Any],
+    user_id: str,
+) -> str:
+    """
+    Generate an animation-focused prompt using llm_text_gen, falling back to a deterministic prompt if LLM fails.
+    """
+    fallback_prompt = _build_fallback_prompt(scene_data, story_context)
+    system_prompt = (
+        "You are an expert cinematic animation director. "
+        "You transform static illustrated scenes into short cinematic motion clips. "
+        "Describe motion, camera behavior, atmosphere, and pacing."
+    )
+
+    description = scene_data.get("description", "")
+    image_prompt = scene_data.get("image_prompt", "")
+    title = scene_data.get("title", "")
+    tone = story_context.get("story_tone") or story_context.get("story_tone", "")
+    setting = story_context.get("story_setting") or story_context.get("story_setting", "")
+
+    prompt = f"""
+Create a concise animation prompt (2-3 sentences) for a 5-second cinematic clip.
+
+Scene Title: {title}
+Description: {description}
+Existing Image Prompt: {image_prompt}
+Story Tone: {tone}
+Setting: {setting}
+
+Focus on:
+- Motion of characters/objects
+- Camera movement (pan, zoom, dolly, orbit)
+- Atmosphere, lighting, and emotion
+- Timing cues appropriate for a {tone or "story"} scene
+
+Respond with JSON: {{"animation_prompt": "<prompt>"}}
+"""
+
+    try:
+        response = llm_text_gen(
+            prompt=prompt.strip(),
+            system_prompt=system_prompt,
+            user_id=user_id,
+            json_struct={
+                "type": "object",
+                "properties": {
+                    "animation_prompt": {
+                        "type": "string",
+                        "description": "A cinematic motion prompt for the WaveSpeed image-to-video model.",
+                    }
+                },
+                "required": ["animation_prompt"],
+            },
+        )
+        structured = _load_llm_json_response(response)
+        animation_prompt = structured.get("animation_prompt")
+        if not animation_prompt or not isinstance(animation_prompt, str):
+            raise ValueError("Missing animation_prompt in structured response")
+        cleaned_prompt = animation_prompt.strip()
+        if not cleaned_prompt:
+            raise ValueError("animation_prompt is empty after trimming")
+        return cleaned_prompt
+    except HTTPException as exc:
+        if exc.status_code == 429:
+            raise
+        logger.warning(
+            "[AnimateScene] Structured LLM prompt generation failed (%s). Falling back to text parsing.",
+            exc.detail,
+        )
+        return _generate_text_prompt(
+            prompt=prompt,
+            system_prompt=system_prompt,
+            user_id=user_id,
+            fallback_prompt=fallback_prompt,
+        )
+    except (json.JSONDecodeError, ValueError, KeyError) as exc:
+        logger.warning(
+            "[AnimateScene] Failed to parse structured animation prompt (%s). Falling back to text parsing.",
+            exc,
+        )
+        return _generate_text_prompt(
+            prompt=prompt,
+            system_prompt=system_prompt,
+            user_id=user_id,
+            fallback_prompt=fallback_prompt,
+        )
+    except Exception as exc:
+        logger.error(
+            "[AnimateScene] Unexpected error generating animation prompt: %s",
+            exc,
+            exc_info=True,
+        )
+        return fallback_prompt
+
+
+def animate_scene_image(
+    *,
+    image_bytes: bytes,
+    scene_data: Dict[str, Any],
+    story_context: Dict[str, Any],
+    user_id: str,
+    duration: int = 5,
+    guidance_scale: float = 0.5,
+    negative_prompt: Optional[str] = None,
+    client: Optional[WaveSpeedClient] = None,
+) -> Dict[str, Any]:
+    """
+    Animate a scene image using WaveSpeed Kling v2.5 Turbo Std.
+    Returns dict with video bytes, prompt used, model name, duration, and cost.
+    """
+    if duration not in (5, 10):
+        raise HTTPException(status_code=400, detail="Duration must be 5 or 10 seconds for scene animation.")
+
+    if len(image_bytes) > MAX_IMAGE_BYTES:
+        raise HTTPException(
+            status_code=400,
+            detail="Scene image exceeds 10MB limit required by WaveSpeed."
+        )
+
+    guidance_scale = max(0.0, min(1.0, guidance_scale))
+    animation_prompt = generate_animation_prompt(scene_data, story_context, user_id)
+    image_b64 = base64.b64encode(image_bytes).decode("utf-8")
+
+    payload = {
+        "duration": duration,
+        "guidance_scale": guidance_scale,
+        "image": image_b64,
+        "prompt": animation_prompt,
+    }
+    if negative_prompt:
+        payload["negative_prompt"] = negative_prompt.strip()
+
+    client = client or WaveSpeedClient()
+    prediction_id = client.submit_image_to_video(KLING_MODEL_PATH, payload)
+    try:
+        result = client.poll_until_complete(prediction_id, timeout_seconds=240, interval_seconds=1.0)
+    except HTTPException as exc:
+        detail = exc.detail or {}
+        if isinstance(detail, dict):
+            detail.setdefault("prediction_id", prediction_id)
+            detail.setdefault("resume_available", True)
+            detail.setdefault("message", "WaveSpeed request is still processing. Use resume endpoint to fetch the video once ready.")
+        raise HTTPException(status_code=exc.status_code, detail=detail)
+
+    outputs = result.get("outputs") or []
+    if not outputs:
+        raise HTTPException(status_code=502, detail="WaveSpeed completed but returned no outputs.")
+
+    video_url = outputs[0]
+    video_response = requests.get(video_url, timeout=60)
+    if video_response.status_code != 200:
+        raise HTTPException(
+            status_code=502,
+            detail={
+                "error": "Failed to download animation video",
+                "status_code": video_response.status_code,
+                "response": video_response.text[:200],
+            },
+        )
+
+    model_name = KLING_MODEL_5S if duration == 5 else KLING_MODEL_10S
+    cost = 0.21 if duration == 5 else 0.42
+
+    return {
+        "video_bytes": video_response.content,
+        "prompt": animation_prompt,
+        "duration": duration,
+        "model_name": model_name,
+        "cost": cost,
+        "provider": "wavespeed",
+        "source_video_url": video_url,
+        "prediction_id": prediction_id,
+    }
+
+
+def resume_scene_animation(
+    *,
+    prediction_id: str,
+    duration: int,
+    user_id: str,
+    client: Optional[WaveSpeedClient] = None,
+) -> Dict[str, Any]:
+    """
+    Resume a previously submitted animation by fetching the completed result.
+    """
+    if duration not in (5, 10):
+        raise HTTPException(status_code=400, detail="Duration must be 5 or 10 seconds for scene animation.")
+
+    client = client or WaveSpeedClient()
+    result = client.get_prediction_result(prediction_id, timeout=120)
+    status = result.get("status")
+    if status != "completed":
+        raise HTTPException(
+            status_code=409,
+            detail={
+                "error": "WaveSpeed prediction is not completed yet",
+                "prediction_id": prediction_id,
+                "status": status,
+            },
+        )
+
+    outputs = result.get("outputs") or []
+    if not outputs:
+        raise HTTPException(status_code=502, detail="WaveSpeed completed but returned no outputs.")
+
+    video_url = outputs[0]
+    video_response = requests.get(video_url, timeout=120)
+    if video_response.status_code != 200:
+        raise HTTPException(
+            status_code=502,
+            detail={
+                "error": "Failed to download animation video during resume",
+                "status_code": video_response.status_code,
+                "response": video_response.text[:200],
+                "prediction_id": prediction_id,
+            },
+        )
+
+    animation_prompt = result.get("prompt") or ""
+    model_name = KLING_MODEL_5S if duration == 5 else KLING_MODEL_10S
+    cost = 0.21 if duration == 5 else 0.42
+
+    logger.info("[AnimateScene] Resumed download for prediction=%s", prediction_id)
+
+    return {
+        "video_bytes": video_response.content,
+        "prompt": animation_prompt,
+        "duration": duration,
+        "model_name": model_name,
+        "cost": cost,
+        "provider": "wavespeed",
+        "source_video_url": video_url,
+        "prediction_id": prediction_id,
+    }
+
--- a/backend/story_audio/scene_1_Welcome_to_the_Cloud_Kitchen___9ddc13cd.mp3
+++ b/backend/story_audio/scene_1_Welcome_to_the_Cloud_Kitchen___9ddc13cd.mp3
--- a/backend/story_audio/scene_2_The_Star_Recipe_Begins_68356250.mp3
+++ b/backend/story_audio/scene_2_The_Star_Recipe_Begins_68356250.mp3
--- a/backend/story_audio/scene_2_The_Star_Recipe_Begins_ed9941a3.mp3
+++ b/backend/story_audio/scene_2_The_Star_Recipe_Begins_ed9941a3.mp3
--- a/backend/story_audio/scene_3_Gathering_Sparkling_Space_Dust_d8174f84.mp3
+++ b/backend/story_audio/scene_3_Gathering_Sparkling_Space_Dust_d8174f84.mp3
--- a/backend/story_audio/scene_4_Collecting_Wishes_c38d9001.mp3
+++ b/backend/story_audio/scene_4_Collecting_Wishes_c38d9001.mp3
--- a/backend/story_audio/scene_5_The_Gravity_Mixer_e6255f00.mp3
+++ b/backend/story_audio/scene_5_The_Gravity_Mixer_e6255f00.mp3
--- a/backend/story_audio/scene_6_The_Glowing_Mixture_c0163e9c.mp3
+++ b/backend/story_audio/scene_6_The_Glowing_Mixture_c0163e9c.mp3
--- a/backend/story_audio/scene_7_A_New_Star_Is_Born_c3f3f2c4.mp3
+++ b/backend/story_audio/scene_7_A_New_Star_Is_Born_c3f3f2c4.mp3
--- a/backend/story_images/scene_1_Welcome_to_the_Cloud_Kitchen___ae6436d9.png
+++ b/backend/story_images/scene_1_Welcome_to_the_Cloud_Kitchen___ae6436d9.png
--- a/docs/LINKEDIN_WRITER_MULTIMEDIA_REVAMP.md
+++ b/docs/LINKEDIN_WRITER_MULTIMEDIA_REVAMP.md
@@ -0,0 +1,658 @@
+# LinkedIn Writer: Multimedia Content Revamp
+
+## Executive Summary
+
+This document outlines the comprehensive revamp of ALwrity's LinkedIn Writer to transform it from a text-only content tool into a complete multimedia content creation platform. By integrating video generation, avatar creation, image generation, and voice cloning, LinkedIn Writer will enable users to create engaging, professional multimedia content that drives higher engagement on LinkedIn.
+
+---
+
+## Current State Analysis
+
+### Existing LinkedIn Writer Features
+
+**Current Capabilities**:
+- Text content generation (posts, articles)
+- Writing style optimization for LinkedIn
+- Fact checking and credibility features
+- Engagement optimization
+- Brand voice consistency
+- Industry-specific content
+
+**Current Limitations**:
+- Text-only content (no video)
+- Basic image generation (limited integration)
+- No audio/video narration
+- No avatar/personal branding videos
+- Limited multimedia options
+- No video post creation
+
+**Location**: 
+- Backend: `backend/api/linkedin_writer/`
+- Frontend: `frontend/src/components/LinkedInWriter/`
+
+---
+
+## Proposed Enhancements
+
+### 1. Video Content Creation
+
+#### 1.1 LinkedIn Video Posts
+
+**Feature**: Generate professional video posts for LinkedIn
+
+**Use Cases**:
+- Thought leadership videos
+- Product announcements
+- Company updates
+- Industry insights
+- Personal brand building
+- Educational content
+
+**Implementation**:
+
+**Backend**: `backend/api/linkedin_writer/video_generation.py` (NEW)
+```python
+@router.post("/generate-video-post")
+async def generate_linkedin_video_post(
+    request: LinkedInVideoPostRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> LinkedInVideoPostResponse:
+    """
+    Generate LinkedIn video post with synchronized audio.
+    Uses WAN 2.5 for professional video generation.
+    """
+    # 1. Generate video script from text content
+    # 2. Generate audio narration (persona voice if available)
+    # 3. Generate video with WAN 2.5
+    # 4. Optimize for LinkedIn (aspect ratio, duration)
+    # 5. Return video URL and metadata
+    pass
+```
+
+**Video Specifications for LinkedIn**:
+- **Aspect Ratio**: 16:9 (landscape) or 9:16 (vertical)
+- **Duration**: 15 seconds to 10 minutes
+- **Resolution**: 720p minimum, 1080p recommended
+- **Format**: MP4
+- **Audio**: Synchronized narration, background music optional
+
+**UI Component**: `frontend/src/components/LinkedInWriter/VideoPostCreator.tsx` (NEW)
+
+**Features**:
+- Text-to-video conversion
+- Script editor with timing
+- Video preview
+- Resolution selection
+- Duration control
+- Cost estimation
+
+---
+
+#### 1.2 Avatar-Based Video Posts
+
+**Feature**: Create video posts with user's avatar (from persona system)
+
+**Use Cases**:
+- Personal branding videos
+- Consistent presence across posts
+- Professional video messages
+- Thought leadership content
+
+**Implementation**:
+
+**Integration with Persona System**:
+```python
+def generate_avatar_video_post(
+    user_id: str,
+    text_content: str,
+    use_persona_avatar: bool = True,
+) -> bytes:
+    """
+    Generate LinkedIn video post with user's avatar.
+    Uses Hunyuan Avatar or InfiniteTalk based on duration.
+    """
+    # 1. Get user's persona
+    persona = get_persona(user_id)
+    
+    # 2. Generate audio with persona voice
+    audio = generate_audio_with_persona_voice(text_content, persona)
+    
+    # 3. Generate video with persona avatar
+    if duration <= 120:  # 2 minutes
+        video = generate_with_hunyuan_avatar(persona.avatar_id, audio)
+    else:  # Longer content
+        video = generate_with_infinitetalk(persona.avatar_id, audio)
+    
+    return video
+```
+
+**UI Component**: `frontend/src/components/LinkedInWriter/AvatarVideoCreator.tsx` (NEW)
+
+---
+
+### 2. Enhanced Image Generation
+
+#### 2.1 LinkedIn-Optimized Images
+
+**Feature**: Generate professional images for LinkedIn posts
+
+**Current State**: Basic image generation exists but limited
+
+**Enhancements**:
+- LinkedIn-specific image sizes
+- Professional style optimization
+- Brand consistency
+- Multiple image options for A/B testing
+
+**Implementation**:
+
+**Backend**: `backend/api/linkedin_writer/image_generation.py` (ENHANCED)
+```python
+@router.post("/generate-post-image")
+async def generate_linkedin_post_image(
+    request: LinkedInImageRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> LinkedInImageResponse:
+    """
+    Generate LinkedIn-optimized image for post.
+    Uses Ideogram V3 Turbo for photorealistic images.
+    """
+    # 1. Analyze post content for image context
+    # 2. Generate image prompt
+    # 3. Generate image with Ideogram
+    # 4. Optimize for LinkedIn (size, format)
+    # 5. Return image URL
+    pass
+```
+
+**Image Specifications**:
+- **Sizes**: 
+  - Post image: 1200x627px (1.91:1)
+  - Article cover: 1200x627px
+  - Carousel: 1080x1080px (1:1)
+- **Format**: JPG or PNG
+- **Style**: Professional, clean, brand-consistent
+
+**UI Component**: `frontend/src/components/LinkedInWriter/ImageGenerator.tsx` (ENHANCED)
+
+---
+
+#### 2.2 Image-to-Video Conversion
+
+**Feature**: Animate static images into video posts
+
+**Use Cases**:
+- Product showcases
+- Before/after animations
+- Infographic animations
+- Portfolio presentations
+
+**Implementation**:
+
+**Backend Integration**:
+```python
+@router.post("/animate-image")
+async def animate_linkedin_image(
+    request: LinkedInImageAnimationRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> LinkedInVideoResponse:
+    """
+    Convert LinkedIn post image to animated video.
+    Uses WAN 2.5 image-to-video.
+    """
+    # 1. Get uploaded image
+    # 2. Generate animation prompt
+    # 3. Use WAN 2.5 image-to-video
+    # 4. Add audio narration if provided
+    # 5. Return video
+    pass
+```
+
+---
+
+### 3. Audio Content Integration
+
+#### 3.1 Audio Narration for Posts
+
+**Feature**: Add professional audio narration to LinkedIn posts
+
+**Use Cases**:
+- Audio versions of posts (accessibility)
+- Podcast-style content
+- Voice-over for videos
+- Multilingual content
+
+**Implementation**:
+
+**Backend**: `backend/api/linkedin_writer/audio_generation.py` (NEW)
+```python
+@router.post("/generate-audio-narration")
+async def generate_linkedin_audio(
+    request: LinkedInAudioRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> LinkedInAudioResponse:
+    """
+    Generate audio narration for LinkedIn post.
+    Uses persona voice if available.
+    """
+    # 1. Get user's persona
+    # 2. Generate audio with persona voice
+    # 3. Optimize for LinkedIn (duration, format)
+    # 4. Return audio URL
+    pass
+```
+
+**Audio Specifications**:
+- **Format**: MP3
+- **Duration**: Up to 10 minutes
+- **Quality**: 128kbps minimum
+- **Voice**: Persona voice (if trained) or professional TTS
+
+---
+
+### 4. Complete Multimedia Post Creation
+
+#### 4.1 Unified Multimedia Post Creator
+
+**Feature**: Create LinkedIn posts with text, image, video, and audio
+
+**UI Component**: `frontend/src/components/LinkedInWriter/MultimediaPostCreator.tsx` (NEW)
+
+**Workflow**:
+```
+1. User writes post content
+   ↓
+2. System suggests multimedia options:
+   ├─ Generate image
+   ├─ Create video
+   ├─ Add audio narration
+   └─ Animate image
+   ↓
+3. User selects options
+   ↓
+4. System generates multimedia content
+   ↓
+5. User previews and edits
+   ↓
+6. User publishes to LinkedIn
+```
+
+**Features**:
+- Text editor with formatting
+- Image generator with preview
+- Video creator with script editor
+- Audio narrator with voice selection
+- Cost estimation for each option
+- Preview before generation
+- Batch generation for multiple posts
+
+---
+
+## Implementation Phases
+
+### Phase 1: Video Post Creation (Week 1-3)
+
+**Priority**: HIGH - Most engaging content type
+
+**Tasks**:
+1. ✅ Create video generation endpoint
+2. ✅ Integrate WAN 2.5 for LinkedIn videos
+3. ✅ Add video post creator UI
+4. ✅ Implement script editor
+5. ✅ Add video preview
+6. ✅ Optimize for LinkedIn specs
+7. ✅ Add cost estimation
+8. ✅ Integrate with persona voice
+9. ✅ Testing and optimization
+
+**Files to Create**:
+- `backend/api/linkedin_writer/video_generation.py`
+- `frontend/src/components/LinkedInWriter/VideoPostCreator.tsx`
+- `frontend/src/components/LinkedInWriter/VideoPreview.tsx`
+
+**Files to Modify**:
+- `backend/api/linkedin_writer/router.py`
+- `frontend/src/components/LinkedInWriter/LinkedInWriter.tsx`
+- `frontend/src/services/linkedinWriterApi.ts`
+
+**Success Criteria**:
+- Users can create video posts
+- Videos optimized for LinkedIn
+- Cost tracking accurate
+- Good video quality
+- Persona voice integration works
+
+---
+
+### Phase 2: Enhanced Image Generation (Week 4-5)
+
+**Priority**: MEDIUM - Improves existing feature
+
+**Tasks**:
+1. ✅ Enhance image generation endpoint
+2. ✅ Integrate Ideogram V3 Turbo
+3. ✅ Add LinkedIn-specific image sizes
+4. ✅ Improve image generation UI
+5. ✅ Add image-to-video conversion
+6. ✅ Add multiple image options
+7. ✅ Brand consistency features
+8. ✅ Testing and optimization
+
+**Files to Create**:
+- `frontend/src/components/LinkedInWriter/ImageGenerator.tsx` (enhanced)
+- `frontend/src/components/LinkedInWriter/ImageToVideoConverter.tsx`
+
+**Files to Modify**:
+- `backend/api/linkedin_writer/image_generation.py`
+- `frontend/src/components/LinkedInWriter/LinkedInWriter.tsx`
+
+**Success Criteria**:
+- High-quality LinkedIn images
+- Multiple image options
+- Image-to-video works
+- Cost-effective
+
+---
+
+### Phase 3: Avatar Video Integration (Week 6-7)
+
+**Priority**: HIGH - Personal branding differentiator
+
+**Tasks**:
+1. ✅ Integrate Hunyuan Avatar
+2. ✅ Integrate InfiniteTalk
+3. ✅ Create avatar video creator UI
+4. ✅ Add persona avatar integration
+5. ✅ Add video duration controls
+6. ✅ Add preview and editing
+7. ✅ Testing and optimization
+
+**Files to Create**:
+- `backend/api/linkedin_writer/avatar_video.py`
+- `frontend/src/components/LinkedInWriter/AvatarVideoCreator.tsx`
+
+**Files to Modify**:
+- `backend/api/linkedin_writer/router.py`
+- `frontend/src/components/LinkedInWriter/LinkedInWriter.tsx`
+
+**Success Criteria**:
+- Avatar videos work well
+- Persona integration seamless
+- Good video quality
+- Cost tracking accurate
+
+---
+
+### Phase 4: Audio & Multimedia Integration (Week 8-9)
+
+**Priority**: MEDIUM - Complete multimedia suite
+
+**Tasks**:
+1. ✅ Create audio generation endpoint
+2. ✅ Integrate persona voice
+3. ✅ Create unified multimedia creator
+4. ✅ Add batch generation
+5. ✅ Add cost optimization
+6. ✅ Add analytics
+7. ✅ Testing and polish
+
+**Files to Create**:
+- `backend/api/linkedin_writer/audio_generation.py`
+- `frontend/src/components/LinkedInWriter/MultimediaPostCreator.tsx`
+- `frontend/src/components/LinkedInWriter/AudioNarrator.tsx`
+
+**Success Criteria**:
+- Complete multimedia workflow
+- All features integrated
+- Good user experience
+- Cost-effective
+
+---
+
+## Cost Management
+
+### Video Generation Costs
+
+**WAN 2.5 Text-to-Video**:
+- 480p: $0.05/second
+- 720p: $0.10/second
+- 1080p: $0.15/second
+
+**LinkedIn Video Optimization**:
+- Default: 720p (good quality, cost-effective)
+- Premium: 1080p (best quality)
+- Typical post: 30-60 seconds = $3-9
+
+**Avatar Videos**:
+- Hunyuan Avatar: $0.15-0.30 per 5 seconds
+- InfiniteTalk: $0.15-0.30 per 5 seconds (up to 10 minutes)
+- Typical post: 60 seconds = $1.80-3.60
+
+### Image Generation Costs
+
+**Ideogram V3 Turbo**: ~$0.04-0.08 per image
+**Multiple Options**: 3-5 images = $0.12-0.40
+
+### Audio Generation Costs
+
+**Persona Voice**: $0.02 per minute
+**Typical Post**: 2-3 minutes = $0.04-0.06
+
+### Cost Optimization Strategies
+
+1. **Pre-Flight Validation**: Check costs before generation
+2. **Resolution Selection**: Default to cost-effective options
+3. **Batch Discounts**: Lower cost for multiple posts
+4. **Usage Limits**: Per-tier limits to prevent waste
+5. **Cost Estimates**: Show costs before generation
+
+---
+
+## LinkedIn Platform Optimization
+
+### Video Best Practices
+
+**LinkedIn Video Specifications**:
+- **Maximum Duration**: 10 minutes
+- **Recommended Duration**: 15-90 seconds for posts
+- **Aspect Ratios**: 
+  - 16:9 (landscape) - best for desktop
+  - 9:16 (vertical) - best for mobile
+  - 1:1 (square) - works for both
+- **Resolution**: 720p minimum, 1080p recommended
+- **File Size**: Up to 5GB
+- **Format**: MP4 (H.264 codec)
+
+**Optimization Features**:
+- Auto-optimize for LinkedIn
+- Aspect ratio selection
+- Duration recommendations
+- Thumbnail generation
+- Caption/subtitle support
+
+### Image Best Practices
+
+**LinkedIn Image Specifications**:
+- **Post Image**: 1200x627px (1.91:1)
+- **Article Cover**: 1200x627px
+- **Carousel**: 1080x1080px (1:1)
+- **Profile Banner**: 1584x396px
+- **Format**: JPG or PNG
+- **File Size**: Up to 5MB
+
+**Optimization Features**:
+- Auto-resize for LinkedIn
+- Format optimization
+- Compression for web
+- Multiple size options
+
+---
+
+## User Experience Flow
+
+### Enhanced LinkedIn Writer Workflow
+
+```
+1. User opens LinkedIn Writer
+   ↓
+2. User selects content type:
+   ├─ Text Post
+   ├─ Video Post
+   ├─ Image Post
+   ├─ Carousel Post
+   └─ Article
+   ↓
+3. User writes content (or AI generates)
+   ↓
+4. System suggests multimedia options:
+   ├─ Generate professional image
+   ├─ Create video with narration
+   ├─ Add audio version
+   └─ Create avatar video
+   ↓
+5. User selects multimedia options
+   ↓
+6. System shows cost estimate
+   ↓
+7. User approves and generates
+   ↓
+8. User previews content
+   ↓
+9. User edits if needed
+   ↓
+10. User publishes to LinkedIn
+```
+
+### Multimedia Post Creator UI
+
+**Layout**:
+```
+┌─────────────────────────────────────┐
+│  LinkedIn Multimedia Post Creator   │
+├─────────────────────────────────────┤
+│                                     │
+│  [Text Editor]                      │
+│  ┌─────────────────────────────┐  │
+│  │ Write your post content...    │  │
+│  │                               │  │
+│  └─────────────────────────────┘  │
+│                                     │
+│  [Multimedia Options]              │
+│  ┌──────┐ ┌──────┐ ┌──────┐       │
+│  │ Image│ │Video │ │Audio │       │
+│  │  $0.1│ │ $3.00│ │ $0.05│       │
+│  └──────┘ └──────┘ └──────┘       │
+│                                     │
+│  [Preview]                          │
+│  ┌─────────────────────────────┐  │
+│  │ [Generated Content Preview] │  │
+│  └─────────────────────────────┘  │
+│                                     │
+│  [Cost Summary]                     │
+│  Total: $3.15                       │
+│                                     │
+│  [Generate] [Preview] [Publish]    │
+└─────────────────────────────────────┘
+```
+
+---
+
+## Integration Points
+
+### Persona System Integration
+
+**Voice Integration**:
+- Use persona voice for video narration
+- Use persona voice for audio posts
+- Consistent brand voice across content
+
+**Avatar Integration**:
+- Use persona avatar for video posts
+- Consistent visual presence
+- Professional branding
+
+### Story Writer Integration
+
+**Shared Services**:
+- Video generation (WAN 2.5)
+- Voice cloning (Minimax)
+- Avatar generation (Hunyuan/InfiniteTalk)
+- Image generation (Ideogram)
+
+**Code Reuse**:
+- Share video generation service
+- Share audio generation service
+- Share image generation service
+- Unified cost tracking
+
+---
+
+## Success Metrics
+
+### Engagement Metrics
+- Video post engagement vs. text posts (target: 3x higher)
+- Image post engagement vs. text posts (target: 2x higher)
+- Multimedia post reach vs. text posts (target: 2.5x higher)
+
+### Adoption Metrics
+- Video post creation rate (target: >30% of users)
+- Image generation usage (target: >60% of users)
+- Avatar video usage (target: >20% of Pro users)
+
+### Quality Metrics
+- Video quality satisfaction (target: >4.5/5)
+- Image quality satisfaction (target: >4.5/5)
+- User satisfaction with multimedia features (target: >4.5/5)
+
+### Business Metrics
+- Premium tier conversion (multimedia as differentiator)
+- User retention (multimedia users vs. text-only)
+- Content generation volume (multimedia users create more)
+
+---
+
+## Risk Mitigation
+
+| Risk | Mitigation |
+|------|------------|
+| High costs | Pre-flight validation, tier-based limits, cost estimates |
+| Quality issues | Quality checks, preview before generation, regeneration option |
+| LinkedIn API changes | Monitor LinkedIn updates, adapt quickly |
+| User confusion | Clear UI, tooltips, tutorials, documentation |
+| Performance issues | Optimize generation, queue system, background processing |
+
+---
+
+## Competitive Advantage
+
+### Unique Features
+1. **Complete Multimedia Suite**: Text + Image + Video + Audio in one tool
+2. **Persona Integration**: Consistent brand voice and avatar
+3. **LinkedIn Optimization**: Platform-specific optimizations
+4. **Cost-Effective**: More affordable than competitors
+5. **AI-Powered**: Automated content generation
+
+### Market Position
+- **vs. Canva**: More AI-powered, integrated with content generation
+- **vs. Loom**: More features, LinkedIn-optimized, persona integration
+- **vs. Descript**: More affordable, LinkedIn-focused, persona integration
+
+---
+
+## Next Steps
+
+1. **Week 1**: Set up WaveSpeed API access for LinkedIn videos
+2. **Week 1-2**: Implement video post generation
+3. **Week 2-3**: Create video post creator UI
+4. **Week 3-4**: Enhance image generation
+5. **Week 4-5**: Integrate avatar videos
+6. **Week 5-6**: Add audio narration
+7. **Week 6-7**: Create unified multimedia creator
+8. **Week 7-8**: Testing, optimization, and polish
+
+---
+
+*Document Version: 1.0*  
+*Last Updated: January 2025*  
+*Priority: HIGH - LinkedIn Engagement Driver*
+
--- a/docs/PERSONA_VOICE_AVATAR_HYPERPERSONALIZATION.md
+++ b/docs/PERSONA_VOICE_AVATAR_HYPERPERSONALIZATION.md
@@ -0,0 +1,615 @@
+# Persona System: Voice Cloning & Avatar Hyper-Personalization
+
+## Executive Summary
+
+This document outlines the integration of voice cloning and AI avatar capabilities into ALwrity's Persona System to enable true hyper-personalization. Users will train their voice and create their avatar during onboarding, then use these across all content generation (LinkedIn, Blog, Story Writer, etc.) for consistent brand identity.
+
+---
+
+## Vision: AI Hyper-Personalization
+
+**Goal**: Every piece of content generated by ALwrity should feel authentically "you" - not just in writing style, but in voice and visual presence.
+
+**Current State**: Persona system handles writing style only  
+**Target State**: Persona system handles writing style + voice + avatar = complete brand identity
+
+---
+
+## Current Persona System Analysis
+
+### Existing Capabilities
+- **Writing Style Analysis**: Tone, voice, complexity, engagement level
+- **Platform Adaptation**: LinkedIn, Facebook, Blog optimizations
+- **Content Characteristics**: Sentence structure, vocabulary, patterns
+- **Onboarding Integration**: Automatically generated from onboarding data
+
+### Current Limitations
+- No voice/personality in audio content
+- No visual representation
+- Limited to text-based personalization
+- Cannot create video content with user's presence
+
+### Persona System Architecture
+**Location**: `backend/services/persona_analysis_service.py`
+
+**Current Flow**:
+1. User completes onboarding (6 steps)
+2. System analyzes website content and writing style
+3. Core persona generated
+4. Platform-specific adaptations created
+5. Persona saved to database
+
+**Database Model**: `backend/models/persona_models.py` - `WritingPersona` table
+
+---
+
+## Proposed Enhancements
+
+### 1. Voice Cloning Integration
+
+#### 1.1 Voice Training During Onboarding
+
+**Integration Point**: Onboarding Step 6 (Persona Generation)
+
+**New Onboarding Flow**:
+```
+Step 1-5: Existing onboarding steps
+Step 6: Persona Generation
+  ├─ Writing Style Analysis (existing)
+  ├─ Voice Training (NEW)
+  │   ├─ Audio sample upload (1-3 minutes)
+  │   ├─ Voice clone training (~2-5 minutes)
+  │   └─ Voice preview and approval
+  └─ Avatar Creation (NEW)
+      ├─ Photo upload
+      ├─ Avatar generation
+      └─ Avatar preview and approval
+```
+
+**Implementation**:
+
+**Backend**: `backend/services/persona/voice_persona_service.py` (NEW)
+```python
+class VoicePersonaService:
+    """
+    Manages voice cloning for persona system.
+    Integrates with Minimax voice clone API.
+    """
+    
+    def train_voice_from_audio(
+        self,
+        user_id: str,
+        audio_file_path: str,
+        persona_id: int,
+    ) -> Dict[str, Any]:
+        """
+        Train voice clone from user's audio sample.
+        Links voice to persona.
+        """
+        # 1. Validate audio file (format, length, quality)
+        # 2. Upload to Minimax
+        # 3. Train voice clone
+        # 4. Store voice_id in persona
+        # 5. Return training status
+        pass
+    
+    def generate_audio_with_persona_voice(
+        self,
+        text: str,
+        persona_id: int,
+        emotion: str = "neutral",
+        speed: float = 1.0,
+    ) -> bytes:
+        """
+        Generate audio using persona's cloned voice.
+        """
+        # 1. Get voice_id from persona
+        # 2. Call Minimax voice generation
+        # 3. Return audio bytes
+        pass
+```
+
+**Database Schema Update**: `backend/models/persona_models.py`
+```python
+class WritingPersona(Base):
+    # Existing fields...
+    
+    # NEW: Voice cloning fields
+    voice_id: Optional[str] = Column(String(255), nullable=True)
+    voice_training_status: Optional[str] = Column(String(50), nullable=True)  # 'not_trained', 'training', 'ready', 'failed'
+    voice_training_audio_url: Optional[str] = Column(String(500), nullable=True)
+    voice_trained_at: Optional[datetime] = Column(DateTime, nullable=True)
+    
+    # NEW: Avatar fields
+    avatar_id: Optional[str] = Column(String(255), nullable=True)
+    avatar_image_url: Optional[str] = Column(String(500), nullable=True)
+    avatar_training_status: Optional[str] = Column(String(50), nullable=True)
+    avatar_created_at: Optional[datetime] = Column(DateTime, nullable=True)
+```
+
+**Frontend**: `frontend/src/components/Onboarding/PersonaGenerationStep.tsx` (NEW)
+```typescript
+interface PersonaGenerationStepProps {
+  onboardingData: OnboardingData;
+  onComplete: (persona: Persona) => void;
+}
+
+const PersonaGenerationStep: React.FC<PersonaGenerationStepProps> = ({
+  onboardingData,
+  onComplete,
+}) => {
+  // 1. Show writing style analysis progress
+  // 2. Show voice training section
+  // 3. Show avatar creation section
+  // 4. Preview complete persona
+  // 5. Allow approval/modification
+};
+```
+
+#### 1.2 Voice Usage Across Platform
+
+**Integration Points**:
+- **Story Writer**: Use persona voice for audio narration
+- **LinkedIn**: Voice-over for video posts
+- **Blog**: Audio narration for blog posts
+- **Email**: Personalized voice messages
+- **Social Media**: Video content with user's voice
+
+**Implementation Pattern**:
+```python
+# In any content generation service
+def generate_content_with_persona(user_id: str, content_type: str):
+    # 1. Get user's persona
+    persona = get_persona(user_id)
+    
+    # 2. Generate text content (existing)
+    text_content = generate_text(persona)
+    
+    # 3. Generate audio with persona voice (NEW)
+    if persona.voice_id and persona.voice_training_status == 'ready':
+        audio_content = voice_service.generate_audio_with_persona_voice(
+            text=text_content,
+            persona_id=persona.id,
+        )
+    
+    # 4. Generate video with persona avatar (NEW)
+    if persona.avatar_id:
+        video_content = avatar_service.generate_video_with_persona_avatar(
+            text=text_content,
+            audio=audio_content,
+            persona_id=persona.id,
+        )
+    
+    return {
+        'text': text_content,
+        'audio': audio_content,
+        'video': video_content,
+    }
+```
+
+---
+
+### 2. Avatar Creation Integration
+
+#### 2.1 Avatar Training During Onboarding
+
+**Integration Point**: Onboarding Step 6 (Persona Generation)
+
+**Avatar Options**:
+1. **Hunyuan Avatar**: Talking avatar from photo + audio
+2. **InfiniteTalk**: Long-form avatar videos
+3. **Custom Avatar**: User's photo as avatar base
+
+**Implementation**:
+
+**Backend**: `backend/services/persona/avatar_persona_service.py` (NEW)
+```python
+class AvatarPersonaService:
+    """
+    Manages avatar creation for persona system.
+    Integrates with WaveSpeed Hunyuan Avatar and InfiniteTalk.
+    """
+    
+    def create_avatar_from_photo(
+        self,
+        user_id: str,
+        photo_file_path: str,
+        persona_id: int,
+    ) -> Dict[str, Any]:
+        """
+        Create avatar from user's photo.
+        Uses Hunyuan Avatar for initial creation.
+        """
+        # 1. Validate photo (format, size, quality)
+        # 2. Upload to WaveSpeed
+        # 3. Create avatar
+        # 4. Store avatar_id in persona
+        # 5. Return avatar preview
+        pass
+    
+    def generate_video_with_persona_avatar(
+        self,
+        text: str,
+        audio_bytes: bytes,
+        persona_id: int,
+        duration: int = 60,  # seconds
+    ) -> bytes:
+        """
+        Generate video with persona's avatar speaking.
+        Uses InfiniteTalk for long-form, Hunyuan for short.
+        """
+        # 1. Get avatar_id from persona
+        # 2. Get voice_id from persona (for audio)
+        # 3. Call WaveSpeed API
+        # 4. Return video bytes
+        pass
+```
+
+#### 2.2 Avatar Usage Across Platform
+
+**Use Cases**:
+- **LinkedIn Video Posts**: User's avatar presenting content
+- **Story Writer**: Avatar narrating story scenes
+- **Blog Videos**: Avatar explaining blog content
+- **Email Campaigns**: Personalized video messages
+- **Social Media**: Consistent avatar across platforms
+
+---
+
+### 3. Enhanced Persona Management
+
+#### 3.1 Persona Dashboard
+
+**New UI Component**: `frontend/src/components/Persona/PersonaDashboard.tsx`
+
+**Features**:
+- Persona overview (writing style, voice, avatar)
+- Voice training status and preview
+- Avatar preview and management
+- Usage statistics (where persona is used)
+- Edit/update options
+
+#### 3.2 Persona Settings
+
+**New UI Component**: `frontend/src/components/Persona/PersonaSettings.tsx`
+
+**Settings**:
+- Voice parameters (emotion, speed, tone)
+- Avatar appearance (clothing, background, style)
+- Platform-specific adaptations
+- Content type preferences
+
+---
+
+## Implementation Phases
+
+### Phase 1: Voice Cloning Integration (Week 1-3)
+
+**Priority**: HIGH - Core hyper-personalization feature
+
+**Tasks**:
+1. ✅ Create `VoicePersonaService`
+2. ✅ Integrate Minimax voice clone API
+3. ✅ Add voice fields to `WritingPersona` model
+4. ✅ Update onboarding Step 6 with voice training
+5. ✅ Create voice training UI component
+6. ✅ Add voice preview and testing
+7. ✅ Integrate voice into Story Writer
+8. ✅ Add voice usage tracking
+9. ✅ Update persona dashboard
+10. ✅ Testing and optimization
+
+**Files to Create**:
+- `backend/services/persona/voice_persona_service.py`
+- `frontend/src/components/Onboarding/VoiceTrainingSection.tsx`
+- `frontend/src/components/Persona/VoiceManagement.tsx`
+
+**Files to Modify**:
+- `backend/models/persona_models.py`
+- `backend/services/persona_analysis_service.py`
+- `backend/api/onboarding_utils/` (onboarding routes)
+- `frontend/src/components/Onboarding/PersonaGenerationStep.tsx`
+- `backend/services/story_writer/audio_generation_service.py`
+
+**Success Criteria**:
+- Users can train voice during onboarding
+- Voice used automatically in Story Writer
+- Voice quality significantly better than gTTS
+- Voice linked to persona
+- Cost tracking accurate
+
+---
+
+### Phase 2: Avatar Creation Integration (Week 4-6)
+
+**Priority**: HIGH - Visual personalization
+
+**Tasks**:
+1. ✅ Create `AvatarPersonaService`
+2. ✅ Integrate Hunyuan Avatar API
+3. ✅ Add avatar fields to `WritingPersona` model
+4. ✅ Update onboarding Step 6 with avatar creation
+5. ✅ Create avatar creation UI component
+6. ✅ Add avatar preview and testing
+7. ✅ Integrate avatar into content generation
+8. ✅ Add avatar usage tracking
+9. ✅ Update persona dashboard
+10. ✅ Testing and optimization
+
+**Files to Create**:
+- `backend/services/persona/avatar_persona_service.py`
+- `frontend/src/components/Onboarding/AvatarCreationSection.tsx`
+- `frontend/src/components/Persona/AvatarManagement.tsx`
+
+**Files to Modify**:
+- `backend/models/persona_models.py`
+- `backend/services/persona_analysis_service.py`
+- `frontend/src/components/Onboarding/PersonaGenerationStep.tsx`
+- `backend/services/story_writer/video_generation_service.py`
+
+**Success Criteria**:
+- Users can create avatar during onboarding
+- Avatar used in video content generation
+- Avatar quality good
+- Avatar linked to persona
+- Cost tracking accurate
+
+---
+
+### Phase 3: Cross-Platform Integration (Week 7-8)
+
+**Priority**: MEDIUM - Complete hyper-personalization
+
+**Tasks**:
+1. ✅ Integrate persona voice into LinkedIn Writer
+2. ✅ Integrate persona avatar into LinkedIn Writer
+3. ✅ Integrate persona voice into Blog Writer
+4. ✅ Integrate persona avatar into Blog Writer
+5. ✅ Add persona usage analytics
+6. ✅ Update all content generation services
+7. ✅ Create persona usage dashboard
+8. ✅ Documentation and user guides
+
+**Success Criteria**:
+- Persona voice/avatar used across all platforms
+- Consistent brand identity
+- Good user experience
+- Analytics working
+
+---
+
+## Cost Management
+
+### Voice Cloning Costs
+
+**One-Time Training**: $0.75 per voice
+**Per-Minute Generation**: $0.02 per minute
+
+**Cost Optimization**:
+- Train voice once during onboarding (included in Pro/Enterprise)
+- Free tier: gTTS only
+- Basic tier: Voice training available ($0.75 one-time)
+- Pro/Enterprise: Voice training included
+
+### Avatar Creation Costs
+
+**Hunyuan Avatar**: $0.15-0.30 per 5 seconds
+**InfiniteTalk**: $0.15-0.30 per 5 seconds (up to 10 minutes)
+
+**Cost Optimization**:
+- Avatar creation: One-time during onboarding
+- Video generation: Pay-per-use
+- Default to shorter videos (5 seconds)
+- Allow longer videos for premium users
+
+### Subscription Integration
+
+**Update Subscription Tiers**:
+- **Free**: Writing persona only, no voice/avatar
+- **Basic**: Writing persona + voice training ($0.75 one-time)
+- **Pro**: Writing persona + voice + avatar creation included
+- **Enterprise**: All features + unlimited usage
+
+---
+
+## User Experience Flow
+
+### Onboarding Flow (Enhanced)
+
+```
+Step 1-5: Existing onboarding steps
+         ↓
+Step 6: Persona Generation
+         ├─ Writing Style Analysis
+         │   └─ [Progress: Analyzing your writing style...]
+         │
+         ├─ Voice Training (NEW)
+         │   ├─ Upload audio sample (1-3 minutes)
+         │   ├─ [Training your voice...] (~2-5 minutes)
+         │   ├─ Preview generated voice
+         │   └─ Approve or retrain
+         │
+         └─ Avatar Creation (NEW)
+             ├─ Upload photo
+             ├─ [Creating your avatar...] (~1-2 minutes)
+             ├─ Preview avatar
+             └─ Approve or recreate
+         ↓
+Step 7: Persona Preview
+         ├─ Writing Style Summary
+         ├─ Voice Preview
+         ├─ Avatar Preview
+         └─ Approve Complete Persona
+```
+
+### Content Generation Flow (Enhanced)
+
+```
+User creates content (LinkedIn/Blog/Story)
+         ↓
+System loads user's persona
+         ├─ Writing style → Text generation
+         ├─ Voice ID → Audio generation (if available)
+         └─ Avatar ID → Video generation (if available)
+         ↓
+Content generated with full personalization
+         ├─ Text matches writing style
+         ├─ Audio uses user's voice
+         └─ Video shows user's avatar
+```
+
+---
+
+## Technical Architecture
+
+### Backend Services
+
+```
+backend/services/
+├── persona/
+│   ├── __init__.py
+│   ├── voice_persona_service.py      # NEW: Voice cloning
+│   ├── avatar_persona_service.py     # NEW: Avatar creation
+│   └── persona_analysis_service.py    # Enhanced
+├── minimax/
+│   └── voice_clone.py                 # Shared with Story Writer
+└── wavespeed/
+    └── avatar_generation.py           # Shared with Story Writer
+```
+
+### Frontend Components
+
+```
+frontend/src/components/
+├── Onboarding/
+│   ├── PersonaGenerationStep.tsx       # Enhanced
+│   ├── VoiceTrainingSection.tsx       # NEW
+│   └── AvatarCreationSection.tsx       # NEW
+└── Persona/
+    ├── PersonaDashboard.tsx            # NEW
+    ├── VoiceManagement.tsx             # NEW
+    ├── AvatarManagement.tsx            # NEW
+    └── PersonaSettings.tsx             # NEW
+```
+
+### Database Schema
+
+```sql
+-- Enhanced WritingPersona table
+ALTER TABLE writing_persona ADD COLUMN voice_id VARCHAR(255);
+ALTER TABLE writing_persona ADD COLUMN voice_training_status VARCHAR(50);
+ALTER TABLE writing_persona ADD COLUMN voice_training_audio_url VARCHAR(500);
+ALTER TABLE writing_persona ADD COLUMN voice_trained_at TIMESTAMP;
+
+ALTER TABLE writing_persona ADD COLUMN avatar_id VARCHAR(255);
+ALTER TABLE writing_persona ADD COLUMN avatar_image_url VARCHAR(500);
+ALTER TABLE writing_persona ADD COLUMN avatar_training_status VARCHAR(50);
+ALTER TABLE writing_persona ADD COLUMN avatar_created_at TIMESTAMP;
+```
+
+---
+
+## Integration with Existing Systems
+
+### Story Writer Integration
+
+**Location**: `backend/services/story_writer/audio_generation_service.py`
+
+**Enhancement**:
+```python
+def generate_scene_audio(
+    self,
+    scene: Dict[str, Any],
+    user_id: str,
+    use_persona_voice: bool = True,  # NEW: Use persona voice
+) -> Dict[str, Any]:
+    if use_persona_voice:
+        # Get user's persona
+        persona = get_persona(user_id)
+        if persona.voice_id and persona.voice_training_status == 'ready':
+            # Use persona voice
+            return self._generate_with_persona_voice(scene, persona)
+    
+    # Fallback to default provider
+    return self._generate_with_gtts(scene)
+```
+
+### LinkedIn Writer Integration
+
+**Enhancement**: Add video generation with persona avatar
+- LinkedIn video posts with user's avatar
+- Voice-over with user's voice
+- Consistent brand presence
+
+### Blog Writer Integration
+
+**Enhancement**: Add audio/video options
+- Audio narration with persona voice
+- Video explanations with persona avatar
+- Enhanced blog content
+
+---
+
+## Success Metrics
+
+### Adoption Metrics
+- Voice training completion rate (target: >60% of Pro users)
+- Avatar creation completion rate (target: >50% of Pro users)
+- Persona usage across platforms (target: >80% of content uses persona)
+
+### Quality Metrics
+- Voice quality satisfaction (target: >4.5/5)
+- Avatar quality satisfaction (target: >4.5/5)
+- Brand consistency score (target: >90%)
+
+### Business Metrics
+- User retention (persona users vs. non-persona)
+- Content engagement (persona content vs. generic)
+- Premium tier conversion (persona as differentiator)
+
+---
+
+## Risk Mitigation
+
+| Risk | Mitigation |
+|------|------------|
+| Voice training failure | Quality checks, clear error messages, retry option |
+| Avatar quality issues | Preview before approval, regeneration option |
+| Cost concerns | Clear pricing, tier-based access, cost estimates |
+| User privacy | Secure storage, opt-in consent, data encryption |
+| API reliability | Fallback options, retry logic, error handling |
+
+---
+
+## Privacy & Security
+
+### Data Storage
+- Voice samples: Encrypted storage, deleted after training
+- Avatar photos: Encrypted storage, user can delete
+- Voice/Avatar IDs: Secure API keys, no raw data stored
+
+### User Control
+- Users can delete voice/avatar anytime
+- Users can retrain voice/avatar
+- Users can opt-out of voice/avatar features
+- Clear privacy policy
+
+---
+
+## Next Steps
+
+1. **Week 1**: Set up Minimax API access
+2. **Week 1-2**: Implement voice persona service
+3. **Week 2-3**: Integrate into onboarding
+4. **Week 3-4**: Integrate into Story Writer
+5. **Week 4-5**: Set up WaveSpeed avatar API
+6. **Week 5-6**: Implement avatar persona service
+7. **Week 6-7**: Integrate into onboarding
+8. **Week 7-8**: Cross-platform integration
+
+---
+
+*Document Version: 1.0*  
+*Last Updated: January 2025*  
+*Priority: HIGH - Core Hyper-Personalization Feature*
+
--- a/docs/STORY_WRITER_VIDEO_ENHANCEMENT.md
+++ b/docs/STORY_WRITER_VIDEO_ENHANCEMENT.md
@@ -0,0 +1,834 @@
+# Story Writer Video Generation Enhancement Plan
+
+## Executive Summary
+
+This document outlines the immediate enhancement plan for ALwrity's Story Writer to replace problematic HuggingFace video generation with WaveSpeed AI models and upgrade basic gTTS audio to professional voice cloning. This provides immediate value to users while solving current technical issues.
+
+---
+
+## Current State Analysis
+
+### Current Video Generation
+- **Provider**: HuggingFace (tencent/HunyuanVideo via fal-ai)
+- **Issues**: 
+  - Unreliable API responses
+  - Limited quality control
+  - No audio synchronization
+  - Single provider dependency
+  - Poor error handling
+
+### Current Audio Generation
+- **Provider**: gTTS (Google Text-to-Speech)
+- **Limitations**:
+  - Robotic, non-natural voice
+  - No brand voice consistency
+  - Limited language options
+  - No emotion control
+  - Cannot clone user's voice
+
+### Current Story Writer Workflow
+1. User creates story outline with scenes
+2. Each scene has `audio_narration` text
+3. Audio generated via gTTS per scene
+4. Video generated via HuggingFace per scene
+5. Videos compiled into final story video
+
+**Location**: `backend/api/story_writer/` and `frontend/src/components/StoryWriter/`
+
+---
+
+## Proposed Enhancements
+
+### Core Principles
+
+**Provider Abstraction**: 
+- Users should NOT see provider names (HuggingFace, WaveSpeed, etc.)
+- All provider routing/switching happens automatically in the background
+- Users only see user-friendly options like "Standard Quality" or "Premium Quality"
+- System automatically selects best available provider based on user's subscription and credits
+
+**Preserve Existing Options**:
+- gTTS remains available as free fallback when credits run out
+- HuggingFace remains available as fallback option
+- All existing functionality preserved
+- New features are additions, not replacements
+
+**Cost Transparency**:
+- All buttons show cost information in tooltips
+- Users make informed decisions before generating
+- No surprise costs
+
+---
+
+### 1. Provider-Agnostic Video Generation System
+
+#### 1.1 Smart Provider Routing
+
+**Backend Implementation** (`backend/services/llm_providers/main_video_generation.py`):
+
+```python
+def ai_video_generate(
+    prompt: str,
+    quality: str = "standard",  # "standard" (480p), "high" (720p), "premium" (1080p)
+    duration: int = 5,
+    audio_file_path: Optional[str] = None,
+    user_id: str,
+    **kwargs,
+) -> bytes:
+    """
+    Unified video generation entry point.
+    Automatically routes to best available provider:
+    - WaveSpeed WAN 2.5 (primary, if credits available)
+    - HuggingFace (fallback, if WaveSpeed unavailable)
+    
+    Users never see provider names - only quality options.
+    """
+    # 1. Check user subscription and credits
+    # 2. Select best available provider automatically
+    # 3. Route to appropriate provider function
+    # 4. Handle fallbacks transparently
+    pass
+
+def _select_video_provider(
+    user_id: str,
+    quality: str,
+    pricing_service: PricingService,
+) -> Tuple[str, str]:
+    """
+    Automatically select best video provider.
+    Returns: (provider_name, model_name)
+    
+    Selection logic:
+    1. Check user credits/subscription
+    2. Prefer WaveSpeed if available and credits sufficient
+    3. Fallback to HuggingFace if WaveSpeed unavailable
+    4. Return error if no providers available
+    """
+    # Implementation details...
+```
+
+**Key Features**:
+- Automatic provider selection (users don't choose)
+- Seamless fallback between providers
+- Quality-based options (Standard/High/Premium) instead of provider names
+- Cost-aware routing (uses cheapest available option)
+- Transparent error handling
+
+**Quality Mapping**:
+- **Standard Quality** (480p): $0.05/second - Uses WaveSpeed 480p or HuggingFace
+- **High Quality** (720p): $0.10/second - Uses WaveSpeed 720p
+- **Premium Quality** (1080p): $0.15/second - Uses WaveSpeed 1080p
+
+**Cost Optimization**:
+- Default to Standard Quality (480p) for cost-effectiveness
+- Allow upgrade to High/Premium for final export
+- Pre-flight validation prevents waste
+- Automatic fallback to free options when credits exhausted
+
+---
+
+### 2. Enhanced Audio Generation with Voice Cloning
+
+#### 2.1 User-Friendly Voice Selection
+
+**Key Principle**: Users choose between "AI Clone Voice" or "Default Voice" (gTTS) - no provider names shown.
+
+**Backend Implementation** (`backend/services/story_writer/audio_generation_service.py`):
+
+```python
+class StoryAudioGenerationService:
+    def generate_scene_audio(
+        self,
+        scene: Dict[str, Any],
+        user_id: str,
+        use_ai_voice: bool = False,  # User's choice: AI Clone or Default
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """
+        Generate audio with automatic provider selection.
+        
+        If use_ai_voice=True:
+            - Try persona voice clone (if trained)
+            - Try Minimax voice clone (if credits available)
+            - Fallback to gTTS if no credits
+        
+        If use_ai_voice=False:
+            - Use gTTS (always free, always available)
+        """
+        if use_ai_voice:
+            # Try AI voice options
+            if self._has_persona_voice(user_id):
+                return self._generate_with_persona_voice(scene, user_id)
+            elif self._has_credits_for_voice_clone(user_id):
+                return self._generate_with_minimax_voice_clone(scene, user_id)
+            else:
+                # Fallback to gTTS with notification
+                logger.info(f"Credits exhausted, falling back to gTTS for user {user_id}")
+                return self._generate_with_gtts(scene, **kwargs)
+        else:
+            # User explicitly chose default voice
+            return self._generate_with_gtts(scene, **kwargs)
+```
+
+**Voice Options in Story Setup**:
+- **Default Voice (gTTS)**: Free, always available, robotic but functional
+- **AI Clone Voice**: Natural, human-like, requires credits ($0.02/minute)
+
+**Cost Considerations**:
+- Voice training: One-time cost (~$0.75) - only if user wants to train custom voice
+- Voice generation: ~$0.02 per minute (only when AI Clone Voice selected)
+- gTTS: Always free, always available as fallback
+- Automatic fallback to gTTS when credits exhausted (with user notification)
+
+---
+
+### 3. Enhanced Story Setup UI
+
+#### 3.1 Video Generation Settings (Provider-Agnostic)
+
+**Location**: `frontend/src/components/StoryWriter/Phases/StorySetup/GenerationSettingsSection.tsx`
+
+**User-Friendly Settings** (No Provider Names):
+```typescript
+interface VideoGenerationSettings {
+  // Quality selection (NOT provider selection)
+  videoQuality: 'standard' | 'high' | 'premium';  // Maps to 480p/720p/1080p
+  
+  // Duration
+  videoDuration: 5 | 10;  // seconds
+  
+  // Cost estimation (shown in tooltip)
+  estimatedCostPerScene: number;
+  totalEstimatedCost: number;
+  
+  // Provider routing happens automatically in backend
+  // Users never see "WaveSpeed" or "HuggingFace"
+}
+```
+
+**UI Components**:
+- Quality selector: "Standard" / "High" / "Premium" (with cost in tooltip)
+- Duration selector: 5s (default) / 10s (premium)
+- Cost tooltip: Shows estimated cost per scene and total
+- Pre-flight validation warnings
+- **No provider selector** - routing is automatic
+
+**Tooltip Example**:
+```
+Standard Quality (480p)
+├─ Cost: $0.25 per scene (5 seconds)
+├─ Quality: Good for previews and testing
+└─ Provider: Automatically selected based on credits
+```
+
+#### 3.2 Audio Generation Settings (Simple Choice)
+
+**New Settings**:
+```typescript
+interface AudioGenerationSettings {
+  // Simple user choice - no provider names
+  voiceType: 'default' | 'ai_clone';  // "Default Voice" or "AI Clone Voice"
+  
+  // Only shown if ai_clone selected
+  voiceTrainingStatus: 'not_trained' | 'training' | 'ready' | 'failed';
+  
+  // Existing gTTS settings (preserved)
+  audioLang: string;
+  audioSlow: boolean;
+  audioRate: number;
+}
+```
+
+**UI Components**:
+- **Voice Type Selector**: 
+  - "Default Voice (gTTS)" - Free, always available
+  - "AI Clone Voice" - Natural, $0.02/minute (with cost tooltip)
+- Voice training section (only if AI Clone Voice selected)
+- Existing gTTS settings (preserved for Default Voice)
+- Cost per minute display in tooltip
+
+**Tooltip for "AI Clone Voice"**:
+```
+AI Clone Voice
+├─ Cost: $0.02 per minute
+├─ Quality: Natural, human-like narration
+├─ Fallback: Automatically uses Default Voice if credits exhausted
+└─ Training: One-time $0.75 to train your custom voice (optional)
+```
+
+**Tooltip for "Default Voice"**:
+```
+Default Voice (gTTS)
+├─ Cost: Free
+├─ Quality: Standard text-to-speech
+└─ Always Available: Works even when credits exhausted
+```
+
+---
+
+### 4. New "Animate Scene" Feature in Outline Phase
+
+#### 4.1 Per-Scene Animation Preview
+
+**Location**: `frontend/src/components/StoryWriter/Phases/StoryOutline.tsx`
+
+**Feature**: Add "Animate Scene" hover option alongside existing scene actions
+
+**Implementation**:
+- Add to `OutlineHoverActions` component
+- Appears on hover over scene cards
+- Only generates for single scene (never bulk)
+- Uses cheapest option (480p/Standard Quality) to give users a feel
+- Shows cost in tooltip before generation
+
+**UI Component**:
+```typescript
+// In OutlineHoverActions.tsx
+const sceneHoverActions = [
+  // Existing actions...
+  {
+    icon: <PlayArrowIcon />,
+    label: 'Animate Scene',
+    action: 'animate-scene',
+    tooltip: `Animate this scene with video\nCost: ~$0.25 (5 seconds, Standard Quality)\nPreview only - uses cheapest option`,
+    onClick: handleAnimateScene,
+  },
+];
+```
+
+**Backend Endpoint**:
+```python
+@router.post("/animate-scene-preview")
+async def animate_scene_preview(
+    request: SceneAnimationRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> SceneAnimationResponse:
+    """
+    Generate preview animation for a single scene.
+    Always uses cheapest option (480p/Standard Quality).
+    Per-scene only - never bulk generation.
+    """
+    # 1. Validate single scene only
+    # 2. Use Standard Quality (480p) - cheapest option
+    # 3. Generate video with automatic provider routing
+    # 4. Return preview video URL
+    pass
+```
+
+**Cost Management**:
+- Always uses Standard Quality (480p) - $0.25 per scene
+- Pre-flight validation before generation
+- Clear cost display in tooltip
+- Per-scene only prevents bulk waste
+
+---
+
+### 5. New "Animate Story with VoiceOver" Button in Writing Phase
+
+#### 5.1 Complete Story Animation
+
+**Location**: `frontend/src/components/StoryWriter/Phases/StoryWriting.tsx`
+
+**Feature**: New button alongside existing HuggingFace video options
+
+**Implementation**:
+- Add button in Writing phase toolbar
+- Generates complete animated story with synchronized voiceover
+- Uses user's voice preference from Setup (AI Clone or Default)
+- Shows comprehensive cost breakdown in tooltip
+- Pre-flight validation before generation
+
+**UI Component**:
+```typescript
+<Button
+  variant="contained"
+  startIcon={<SmartDisplayIcon />}
+  onClick={handleAnimateStoryWithVoiceOver}
+  disabled={!state.storyContent || isGenerating}
+  title={`Animate Story with VoiceOver\n\nCost Breakdown:\n- Video: $${videoCost} (${scenes.length} scenes × $${costPerScene})\n- Audio: $${audioCost} (${totalAudioMinutes} minutes)\n- Total: $${totalCost}\n\nQuality: ${state.videoQuality}\nVoice: ${state.voiceType === 'ai_clone' ? 'AI Clone' : 'Default'}`}
+>
+  Animate Story with VoiceOver
+</Button>
+```
+
+**Backend Endpoint**:
+```python
+@router.post("/animate-story-with-voiceover")
+async def animate_story_with_voiceover(
+    request: StoryAnimationRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+) -> StoryAnimationResponse:
+    """
+    Generate complete animated story with synchronized voiceover.
+    Uses user's quality and voice preferences from Setup.
+    """
+    # 1. Pre-flight validation (cost, credits, limits)
+    # 2. Generate audio for all scenes (using user's voice preference)
+    # 3. Generate videos for all scenes (using user's quality preference)
+    # 4. Synchronize audio with video
+    # 5. Compile into final story video
+    # 6. Return video URL and cost breakdown
+    pass
+```
+
+**Cost Tooltip Example**:
+```
+Animate Story with VoiceOver
+
+Cost Breakdown:
+├─ Video (Standard Quality): $2.50
+│  └─ 10 scenes × $0.25 per scene
+├─ Audio (AI Clone Voice): $1.00
+│  └─ 50 minutes total × $0.02/minute
+└─ Total: $3.50
+
+Settings:
+├─ Quality: Standard (480p)
+├─ Voice: AI Clone Voice
+└─ Duration: 5 seconds per scene
+
+⚠️ This will use $3.50 of your monthly credits
+```
+
+---
+
+## Implementation Phases
+
+### Phase 1: Provider-Agnostic Video System (Week 1-2)
+
+**Priority**: HIGH - Solves immediate HuggingFace issues with provider abstraction
+
+**Tasks**:
+1. ✅ Create WaveSpeed API client (`backend/services/wavespeed/client.py`)
+2. ✅ Add WAN 2.5 text-to-video function
+3. ✅ Implement smart provider routing in `main_video_generation.py`
+4. ✅ Add quality-based selection (Standard/High/Premium)
+5. ✅ Preserve HuggingFace as fallback option
+6. ✅ Update `hd_video.py` with provider routing
+7. ✅ Add pre-flight cost validation
+8. ✅ Update frontend with quality selector (remove provider names)
+9. ✅ Add cost tooltips to all buttons
+10. ✅ Update subscription limits
+11. ✅ Testing and error handling
+
+**Files to Modify**:
+- `backend/services/llm_providers/main_video_generation.py` (add routing logic)
+- `backend/api/story_writer/utils/hd_video.py` (use quality-based API)
+- `backend/api/story_writer/routes/video_generation.py`
+- `frontend/src/components/StoryWriter/Phases/StorySetup/GenerationSettingsSection.tsx` (quality selector)
+- `frontend/src/components/StoryWriter/components/HdVideoSection.tsx`
+- `backend/services/subscription/pricing_service.py`
+
+**Success Criteria**:
+- Video generation works reliably with automatic provider routing
+- Users see quality options, not provider names
+- HuggingFace preserved as fallback
+- Cost tracking accurate
+- Pre-flight validation prevents waste
+- Error messages clear and actionable
+
+---
+
+### Phase 2: Voice Cloning Integration (Week 3-4)
+
+**Priority**: MEDIUM - Enhances audio quality with simple user choice
+
+**Tasks**:
+1. ✅ Create Minimax API client (`backend/services/minimax/voice_clone.py`)
+2. ✅ Add voice training endpoint
+3. ✅ Add voice generation endpoint
+4. ✅ Update `audio_generation_service.py` with "AI Clone" vs "Default" logic
+5. ✅ Preserve gTTS as always-available fallback
+6. ✅ Add automatic fallback when credits exhausted
+7. ✅ Update Story Setup with simple voice type selector
+8. ✅ Add cost tooltips to voice options
+9. ✅ Add voice preview and testing (if AI Clone selected)
+10. ✅ Ensure gTTS always works even when credits exhausted
+
+**Files to Create**:
+- `backend/services/minimax/voice_clone.py`
+- `backend/services/story_writer/voice_management_service.py`
+
+**Files to Modify**:
+- `backend/services/story_writer/audio_generation_service.py` (add voice type logic)
+- `frontend/src/components/StoryWriter/Phases/StorySetup/GenerationSettingsSection.tsx` (voice type selector)
+- `backend/models/story_models.py` (add voice type field)
+
+**Success Criteria**:
+- Users see simple choice: "Default Voice" or "AI Clone Voice"
+- gTTS always available as fallback
+- Automatic fallback when credits exhausted
+- Cost tracking accurate
+- Voice quality significantly better than gTTS when AI Clone used
+
+---
+
+### Phase 3: New Features - Animate Scene & Animate Story (Week 5-6)
+
+**Priority**: MEDIUM - Add preview and complete animation features
+
+**Tasks**:
+1. ✅ Add "Animate Scene" hover option in Outline phase
+2. ✅ Implement per-scene animation preview (cheapest option only)
+3. ✅ Add "Animate Story with VoiceOver" button in Writing phase
+4. ✅ Implement complete story animation with voiceover
+5. ✅ Add comprehensive cost tooltips to all buttons
+6. ✅ Add pre-flight validation for all animation features
+7. ✅ Ensure per-scene only (no bulk generation in Outline)
+8. ✅ Update documentation
+9. ✅ User testing and feedback
+
+**Files to Create**:
+- `backend/api/story_writer/routes/scene_animation.py` (new endpoint)
+- `frontend/src/components/StoryWriter/components/AnimateSceneButton.tsx`
+
+**Files to Modify**:
+- `frontend/src/components/StoryWriter/Phases/StoryOutlineParts/OutlineHoverActions.tsx` (add Animate Scene)
+- `frontend/src/components/StoryWriter/Phases/StoryWriting.tsx` (add Animate Story button)
+- `backend/api/story_writer/routes/video_generation.py` (add story animation endpoint)
+
+**Success Criteria**:
+- "Animate Scene" works in Outline (per-scene, cheapest option)
+- "Animate Story with VoiceOver" works in Writing phase
+- All buttons show cost in tooltips
+- Pre-flight validation prevents waste
+- Good user experience
+
+---
+
+### Phase 4: Integration & Optimization (Week 7-8)
+
+**Priority**: MEDIUM - Polish and optimize
+
+**Tasks**:
+1. ✅ Integrate audio with video (synchronized videos)
+2. ✅ Improve error handling and retry logic
+3. ✅ Add progress indicators
+4. ✅ Optimize cost calculations
+5. ✅ Add usage analytics
+6. ✅ Update documentation
+7. ✅ User testing and feedback
+
+**Success Criteria**:
+- Smooth end-to-end workflow
+- Cost-effective for users
+- Reliable generation
+- Excellent user experience
+- All features work seamlessly together
+
+---
+
+## Cost Management & Prevention of Waste
+
+### Pre-Flight Validation
+
+**Implementation**: `backend/services/subscription/preflight_validator.py`
+
+**Checks Before Generation**:
+1. User has sufficient subscription tier
+2. Estimated cost within monthly budget
+3. Video generation limit not exceeded
+4. Audio generation limit not exceeded
+5. Total story cost reasonable (<$5 for typical story)
+
+**Validation Flow**:
+```python
+def validate_story_generation(
+    pricing_service: PricingService,
+    user_id: str,
+    num_scenes: int,
+    video_resolution: str,
+    video_duration: int,
+    use_voice_clone: bool,
+) -> Tuple[bool, str, Dict[str, Any]]:
+    """
+    Pre-flight validation before story generation.
+    Returns: (allowed, message, cost_breakdown)
+    """
+    # Calculate estimated costs
+    video_cost_per_scene = get_wavespeed_cost(video_resolution, video_duration)
+    audio_cost_per_scene = get_voice_clone_cost() if use_voice_clone else 0.0
+    
+    total_estimated_cost = (video_cost_per_scene + audio_cost_per_scene) * num_scenes
+    
+    # Check limits
+    limits = pricing_service.get_user_limits(user_id)
+    current_usage = pricing_service.get_current_usage(user_id)
+    
+    # Validation logic...
+    return (allowed, message, cost_breakdown)
+```
+
+### Cost Estimation Display
+
+**Frontend Implementation**:
+- Real-time cost calculator in Story Setup
+- Per-scene cost breakdown
+- Total story cost estimate
+- Monthly budget remaining
+- Warning if approaching limits
+
+**UI Example**:
+```
+Video Generation Cost Estimate:
+├─ Resolution: 720p ($0.10/second)
+├─ Duration: 5 seconds per scene
+├─ Scenes: 10
+└─ Total: $5.00
+
+Audio Generation Cost Estimate:
+├─ Provider: Voice Clone ($0.02/minute)
+├─ Average: 30 seconds per scene
+├─ Scenes: 10
+└─ Total: $1.00
+
+Total Estimated Cost: $6.00
+Monthly Budget Remaining: $44.00
+```
+
+### Usage Tracking
+
+**Enhanced Tracking**:
+- Track video generation per scene
+- Track audio generation per scene
+- Track total story cost
+- Alert users approaching limits
+- Provide cost breakdown in analytics
+
+---
+
+## Pricing Integration
+
+### WaveSpeed WAN 2.5 Pricing
+
+**Add to `pricing_service.py`**:
+```python
+# WaveSpeed WAN 2.5 Text-to-Video
+{
+    "provider": APIProvider.VIDEO,  # Or new WAVESPEED provider
+    "model_name": "wan-2.5-480p",
+    "cost_per_second": 0.05,
+    "description": "WaveSpeed WAN 2.5 Text-to-Video (480p)"
+},
+{
+    "provider": APIProvider.VIDEO,
+    "model_name": "wan-2.5-720p",
+    "cost_per_second": 0.10,
+    "description": "WaveSpeed WAN 2.5 Text-to-Video (720p)"
+},
+{
+    "provider": APIProvider.VIDEO,
+    "model_name": "wan-2.5-1080p",
+    "cost_per_second": 0.15,
+    "description": "WaveSpeed WAN 2.5 Text-to-Video (1080p)"
+}
+```
+
+### Minimax Voice Clone Pricing
+
+**Add to `pricing_service.py`**:
+```python
+# Minimax Voice Clone
+{
+    "provider": APIProvider.AUDIO,  # New provider type
+    "model_name": "minimax-voice-clone-train",
+    "cost_per_request": 0.75,  # One-time training cost
+    "description": "Minimax Voice Clone Training"
+},
+{
+    "provider": APIProvider.AUDIO,
+    "model_name": "minimax-voice-clone-generate",
+    "cost_per_minute": 0.02,  # Per minute of generated audio
+    "description": "Minimax Voice Clone Generation"
+}
+```
+
+### Subscription Tier Limits
+
+**Update subscription limits**:
+- **Free**: 3 stories/month, 480p only, gTTS only
+- **Basic**: 10 stories/month, up to 720p, voice clone available
+- **Pro**: 50 stories/month, up to 1080p, voice clone included
+- **Enterprise**: Unlimited, all features
+
+---
+
+## Technical Architecture
+
+### Backend Services
+
+```
+backend/services/
+├── wavespeed/
+│   ├── __init__.py
+│   ├── client.py              # WaveSpeed API client
+│   ├── wan25_video.py        # WAN 2.5 video generation
+│   └── models.py              # Request/response models
+├── minimax/
+│   ├── __init__.py
+│   ├── client.py              # Minimax API client
+│   ├── voice_clone.py         # Voice cloning service
+│   └── models.py
+└── story_writer/
+    ├── audio_generation_service.py  # Updated with voice clone
+    └── video_generation_service.py   # Updated with WaveSpeed
+```
+
+### Frontend Components
+
+```
+frontend/src/components/StoryWriter/
+├── Phases/StorySetup/
+│   └── GenerationSettingsSection.tsx  # Enhanced with new settings
+├── components/
+│   ├── HdVideoSection.tsx              # Updated for WaveSpeed
+│   ├── VoiceTrainingSection.tsx        # NEW: Voice training UI
+│   └── CostEstimationDisplay.tsx        # NEW: Cost calculator
+└── hooks/
+    └── useStoryGenerationCost.ts        # NEW: Cost calculation hook
+```
+
+---
+
+## Error Handling & User Experience
+
+### Error Scenarios
+
+1. **WaveSpeed API Failure**:
+   - Retry with exponential backoff (3 attempts)
+   - Fallback to HuggingFace if available
+   - Clear error message with cost refund notice
+
+2. **Voice Clone Training Failure**:
+   - Provide specific error (audio quality, length, format)
+   - Suggest improvements
+   - Allow retry with different audio
+
+3. **Cost Limit Exceeded**:
+   - Pre-flight validation prevents this
+   - Show upgrade prompt
+   - Suggest reducing scenes/resolution
+
+4. **Audio/Video Mismatch**:
+   - Validate audio length matches video duration
+   - Auto-trim or extend audio
+   - Warn user before generation
+
+### User Feedback
+
+- Progress indicators for all operations
+- Clear cost breakdowns
+- Quality previews before final generation
+- Regeneration options with cost tracking
+- Usage analytics dashboard
+
+---
+
+## Testing Plan
+
+### Unit Tests
+- WaveSpeed API client
+- Voice clone service
+- Cost calculation
+- Pre-flight validation
+
+### Integration Tests
+- End-to-end story generation
+- Audio + video synchronization
+- Error handling and fallbacks
+- Subscription limit enforcement
+
+### User Acceptance Tests
+- Story generation workflow
+- Voice training process
+- Cost estimation accuracy
+- Error recovery
+
+---
+
+## Success Metrics
+
+### Technical Metrics
+- Video generation success rate >95%
+- Audio generation success rate >98%
+- Average generation time per scene <30s
+- API error rate <2%
+
+### Business Metrics
+- User satisfaction with video quality
+- Cost per story (target: <$5 for 10-scene story)
+- Voice clone adoption rate
+- Story completion rate
+
+### User Experience Metrics
+- Time to generate story
+- Error recovery time
+- User understanding of costs
+- Feature discovery rate
+
+---
+
+## Provider Management Strategy
+
+### Always-Available Options
+- **gTTS**: Always available, always free, works even when credits exhausted
+- **HuggingFace**: Preserved as fallback option, works when WaveSpeed unavailable
+
+### Automatic Provider Routing
+- **Primary**: WaveSpeed WAN 2.5 (when credits available)
+- **Fallback**: HuggingFace (when WaveSpeed unavailable or credits exhausted)
+- **Audio Fallback**: gTTS (always available, always free)
+
+### User Experience
+- Users never see provider names
+- System automatically selects best available option
+- Seamless fallback when credits exhausted
+- Clear notifications when fallback occurs
+- No user intervention required
+
+### No Deprecation
+- **HuggingFace**: Kept as permanent fallback option
+- **gTTS**: Kept as permanent free option
+- All existing functionality preserved
+- New features are additions, not replacements
+
+---
+
+## Next Steps
+
+1. **Week 1**: Set up WaveSpeed API access and credentials
+2. **Week 1**: Implement provider-agnostic routing system
+3. **Week 2**: Integrate into Story Writer with quality-based UI
+4. **Week 3**: Implement voice cloning with simple "AI Clone" vs "Default" choice
+5. **Week 4**: Add voice training UI (only if AI Clone selected)
+6. **Week 5**: Add "Animate Scene" hover option in Outline
+7. **Week 6**: Add "Animate Story with VoiceOver" button in Writing
+8. **Week 7-8**: Testing, optimization, and polish
+
+## Key Design Principles
+
+1. **Provider Abstraction**: Users never see provider names - only quality/voice options
+2. **Preserve Existing**: gTTS and HuggingFace remain available as fallbacks
+3. **Cost Transparency**: All buttons show costs in tooltips
+4. **Automatic Fallback**: System automatically uses free options when credits exhausted
+5. **Per-Scene Only**: Outline phase only allows per-scene generation (no bulk)
+6. **User-Friendly**: Simple choices like "Standard Quality" not "WaveSpeed 480p"
+
+---
+
+## Risk Mitigation
+
+| Risk | Mitigation |
+|------|------------|
+| WaveSpeed API changes | Version pinning, abstraction layer |
+| Cost overruns | Strict pre-flight validation |
+| Voice quality issues | Quality checks, fallback options |
+| User confusion | Clear UI, tooltips, documentation |
+| Integration complexity | Phased rollout, extensive testing |
+
+---
+
+*Document Version: 1.0*  
+*Last Updated: January 2025*  
+*Priority: HIGH - Immediate Implementation*
+
--- a/docs/WAVESPEED_AI_FEATURE_PROPOSAL.md
+++ b/docs/WAVESPEED_AI_FEATURE_PROPOSAL.md
@@ -0,0 +1,516 @@
+# WaveSpeed AI Models Integration: Feature Proposal for ALwrity
+
+## Executive Summary
+
+This document outlines strategic feature enhancements for ALwrity's AI digital marketing platform by integrating advanced AI models from WaveSpeed.ai. These integrations will expand ALwrity's content creation capabilities from text-based content to comprehensive multimedia marketing solutions, positioning ALwrity as a complete end-to-end marketing content platform.
+
+---
+
+## Current ALwrity Capabilities
+
+### Existing Features
+- **Text Content Generation**: Blog posts, LinkedIn content, Facebook posts
+- **SEO Dashboard**: Comprehensive SEO analysis and optimization
+- **Content Strategy**: AI-powered persona development and content calendars
+- **Story Writer**: Multi-phase story generation with basic video/image/audio
+- **Image Generation**: Stability AI, Gemini, HuggingFace (text-to-image)
+- **Video Generation**: Basic text-to-video via HuggingFace (tencent/HunyuanVideo)
+
+### Current Limitations
+- Limited video quality options (single provider)
+- No audio-synchronized video generation
+- No avatar/lipsync capabilities
+- Basic image generation (no advanced creative options)
+- No voice cloning for personalized audio
+- Limited multilingual video content support
+
+---
+
+## Proposed New Features from WaveSpeed Models
+
+### 1. **Advanced Video Content Creation Suite**
+
+#### 1.1 Alibaba WAN 2.5 Text-to-Video
+**Model**: `alibaba/wan-2.5/text-to-video`
+
+**Capabilities**:
+- Generate 480p/720p/1080p videos from text prompts
+- Synchronized audio/voiceover generation
+- Automatic lip-sync for generated speech
+- Multilingual support (including Chinese)
+- Up to 10 seconds duration
+- 6 aspect ratio/size options
+- Custom audio upload support (3-30 seconds, wav/mp3, ≤15MB)
+
+**ALwrity Marketing Use Cases**:
+- **Product Demo Videos**: Create professional product demonstration videos from product descriptions
+- **Social Media Shorts**: Generate engaging short-form video content for TikTok, Instagram Reels, YouTube Shorts
+- **Educational Content**: Transform blog posts into video tutorials with synchronized narration
+- **Promotional Videos**: Create marketing videos with custom voiceovers for campaigns
+- **Multilingual Marketing**: Generate video content in multiple languages for global campaigns
+- **LinkedIn Video Posts**: Professional video content optimized for LinkedIn engagement
+
+**Integration Points**:
+- Extend existing Story Writer video generation
+- New "Video Content Creator" module in main dashboard
+- Integration with Blog Writer to convert articles to videos
+- Social media content calendar with video suggestions
+
+**Pricing Alignment**:
+- 480p: $0.05/second
+- 720p: $0.10/second  
+- 1080p: $0.15/second
+- More affordable than Google Veo3, making it accessible for solopreneurs
+
+---
+
+#### 1.2 Alibaba WAN 2.5 Image-to-Video
+**Model**: `alibaba/wan-2.5/image-to-video`
+
+**Capabilities**:
+- Convert static images to dynamic videos
+- Add synchronized audio/voiceover
+- Maintain image consistency while adding motion
+- Same resolution and duration options as text-to-video
+
+**ALwrity Marketing Use Cases**:
+- **Product Showcase**: Animate product images for e-commerce
+- **Portfolio Enhancement**: Transform static portfolio images into dynamic presentations
+- **Social Media Content**: Repurpose existing images into engaging video content
+- **Email Marketing**: Create animated product images for email campaigns
+- **Website Hero Videos**: Convert hero images into dynamic background videos
+- **Before/After Animations**: Create engaging transformation videos
+
+**Integration Points**:
+- Connect with existing image generation service
+- "Animate Image" feature in image gallery
+- Bulk image-to-video conversion for content libraries
+- Integration with LinkedIn image posts
+
+---
+
+### 2. **AI Avatar & Personalization Suite**
+
+#### 2.1 Hunyuan Avatar - Audio-Driven Talking Avatars
+**Model**: `wavespeed-ai/hunyuan-avatar`
+
+**Capabilities**:
+- Create talking/singing avatars from single image + audio
+- 480p/720p resolution
+- Up to 120 seconds duration
+- Character consistency preservation
+- Emotion-controllable animations
+- Multi-character dialogue support
+- High-fidelity lip-sync
+
+**ALwrity Marketing Use Cases**:
+- **Personal Branding**: Create personalized video messages from founder/CEO photos
+- **Customer Service Videos**: Generate FAQ videos with company spokesperson avatar
+- **Training Content**: Create educational videos with consistent instructor avatar
+- **Product Explainer Videos**: Use product images or brand mascots as talking avatars
+- **Multilingual Content**: Generate videos in multiple languages using same avatar
+- **Email Personalization**: Create personalized video messages for email campaigns
+- **Social Media**: Consistent brand spokesperson across all video content
+
+**Integration Points**:
+- New "Avatar Studio" module
+- Integration with persona system for brand voice consistency
+- Connect with voice cloning for complete personalization
+- LinkedIn personal branding features
+
+**Pricing**: Starts at $0.15/5 seconds
+
+---
+
+#### 2.2 InfiniteTalk - Long-Form Avatar Lipsync
+**Model**: `wavespeed-ai/infinitetalk`
+
+**Capabilities**:
+- Audio-driven avatar lipsync (image-to-video)
+- Up to 10 minutes duration
+- 480p/720p resolution
+- Precise lip synchronization
+- Full-body coherence (head, face, body movements)
+- Identity preservation across unlimited length
+- Instruction following (text prompts for scene/pose control)
+
+**ALwrity Marketing Use Cases**:
+- **Long-Form Content**: Create extended video content (tutorials, webinars, courses)
+- **Podcast-to-Video**: Convert audio podcasts into video format with host avatar
+- **Webinar Creation**: Generate webinar content with consistent presenter
+- **Course Content**: Create educational course videos with instructor avatar
+- **Interview Videos**: Transform audio interviews into video format
+- **Thought Leadership**: Extended video content for LinkedIn and YouTube
+- **Brand Storytelling**: Long-form brand narrative videos
+
+**Integration Points**:
+- Extended content creation for Story Writer
+- Podcast-to-video conversion tool
+- Course content generation module
+- YouTube content creation workflow
+
+**Pricing**:
+- 480p: $0.15/5 seconds
+- 720p: $0.30/5 seconds
+- Billing capped at 600 seconds (10 minutes)
+
+---
+
+### 3. **Advanced Image Generation**
+
+#### 3.1 Ideogram V3 Turbo - Photorealistic Image Generation
+**Model**: `ideogram-ai/ideogram-v3-turbo`
+
+**Capabilities**:
+- High-quality photorealistic image generation
+- Creative and styled image creation
+- Consistent style maintenance
+- Advanced prompt understanding
+
+**ALwrity Marketing Use Cases**:
+- **Social Media Visuals**: Create unique, brand-consistent images for social posts
+- **Blog Post Images**: Generate custom featured images for blog articles
+- **Ad Creative**: Create diverse ad visuals for A/B testing
+- **Email Campaign Images**: Custom visuals for email marketing
+- **Website Graphics**: Generate hero images, banners, and graphics
+- **Product Mockups**: Create product visualization images
+- **Brand Assets**: Consistent visual style across all marketing materials
+
+**Integration Points**:
+- Enhance existing image generation service
+- LinkedIn image generation (already partially implemented)
+- Blog Writer image suggestions
+- Social media content calendar with image previews
+
+---
+
+#### 3.2 Qwen Image - Text-to-Image
+**Model**: `wavespeed-ai/qwen-image/text-to-image`
+
+**Capabilities**:
+- High-quality text-to-image generation
+- Diverse style options
+- Fast generation times
+
+**ALwrity Marketing Use Cases**:
+- **Rapid Visual Creation**: Quick image generation for time-sensitive campaigns
+- **A/B Testing**: Generate multiple image variations for testing
+- **Content Library**: Build library of marketing visuals
+- **Brand Consistency**: Maintain visual style across content
+
+**Integration Points**:
+- Alternative image generation provider
+- Bulk image generation for content calendars
+- Integration with content strategy module
+
+---
+
+### 4. **Voice Cloning & Audio Personalization**
+
+#### 4.1 Minimax Voice Clone
+**Model**: `minimax/voice-clone`
+
+**Capabilities**:
+- Clone voices from audio samples
+- Generate personalized voiceovers
+- Maintain voice characteristics
+- Multilingual voice generation
+
+**ALwrity Marketing Use Cases**:
+- **Brand Voice Consistency**: Use founder/CEO voice across all video content
+- **Personalized Marketing**: Create personalized video messages with customer's name
+- **Multilingual Content**: Generate voiceovers in multiple languages with same voice
+- **Podcast Production**: Create consistent podcast host voice
+- **Video Narration**: Professional voiceovers for all video content
+- **Email Audio**: Add personalized audio messages to email campaigns
+- **Social Media**: Consistent voice across all video content
+
+**Integration Points**:
+- Connect with Hunyuan Avatar and InfiniteTalk for complete avatar solution
+- Integration with WAN 2.5 for synchronized audio
+- Voice library management system
+- Brand voice consistency across all content
+
+---
+
+## Strategic Feature Prioritization
+
+### Phase 1: High-Impact, Quick Wins (3-4 months)
+1. **Alibaba WAN 2.5 Text-to-Video** - Expands video capabilities significantly
+2. **Ideogram V3 Turbo** - Enhances existing image generation
+3. **Alibaba WAN 2.5 Image-to-Video** - Repurposes existing image assets
+
+**Rationale**: These features build on existing capabilities, require minimal new UI, and provide immediate value to users.
+
+---
+
+### Phase 2: Personalization & Engagement (4-6 months)
+4. **Hunyuan Avatar** - Enables personalized video content
+5. **Minimax Voice Clone** - Completes personalization suite
+6. **Qwen Image** - Additional image generation option
+
+**Rationale**: These features differentiate ALwrity by enabling true personalization, which is critical for modern marketing.
+
+---
+
+### Phase 3: Long-Form Content (6-8 months)
+7. **InfiniteTalk** - Enables extended video content creation
+
+**Rationale**: This feature opens new content types (courses, webinars) and requires more complex UI/workflow.
+
+---
+
+## Integration Architecture
+
+### Backend Integration
+```
+backend/
+├── services/
+│   ├── llm_providers/
+│   │   ├── wavespeed_video_generation.py  # WAN 2.5 text/image-to-video
+│   │   ├── wavespeed_avatar_generation.py # Hunyuan Avatar, InfiniteTalk
+│   │   ├── wavespeed_image_generation.py  # Ideogram, Qwen
+│   │   └── minimax_voice_clone.py         # Voice cloning
+│   └── wavespeed/
+│       ├── client.py                      # WaveSpeed API client
+│       ├── models.py                      # Model configurations
+│       └── pricing.py                     # Cost tracking
+```
+
+### Frontend Integration
+```
+frontend/src/
+├── components/
+│   ├── VideoCreator/
+│   │   ├── TextToVideoSection.tsx
+│   │   ├── ImageToVideoSection.tsx
+│   │   └── VideoPreview.tsx
+│   ├── AvatarStudio/
+│   │   ├── AvatarCreator.tsx
+│   │   ├── VoiceUpload.tsx
+│   │   └── AvatarPreview.tsx
+│   └── VoiceCloning/
+│       ├── VoiceTrainer.tsx
+│       └── VoiceLibrary.tsx
+```
+
+---
+
+## Business Value & Competitive Advantages
+
+### For Solopreneurs
+1. **Cost Efficiency**: More affordable than Google Veo3, making professional video accessible
+2. **Time Savings**: Automated video creation eliminates need for video production teams
+3. **Multilingual Support**: Reach global audiences without translation teams
+4. **Personalization at Scale**: Create personalized content without manual effort
+5. **Content Repurposing**: Transform existing content (images, audio) into new formats
+
+### For ALwrity Platform
+1. **Market Differentiation**: Complete multimedia content creation platform
+2. **Increased User Engagement**: Video content drives higher engagement
+3. **Premium Feature Upsell**: Advanced video features for higher-tier plans
+4. **Platform Stickiness**: Users create more content types, increasing retention
+5. **Competitive Moat**: Comprehensive AI content suite unmatched by competitors
+
+---
+
+## Marketing Use Case Examples
+
+### Use Case 1: Blog-to-Video Conversion
+**Scenario**: User creates a blog post about "10 SEO Tips" and wants to convert it to video.
+
+**Workflow**:
+1. User selects blog post in ALwrity
+2. Clicks "Create Video" button
+3. ALwrity uses WAN 2.5 to generate video with synchronized narration
+4. User can add custom audio or use AI-generated voice
+5. Video is optimized for social media platforms
+6. Automatically added to content calendar
+
+**Value**: Single piece of content becomes multi-format, maximizing reach.
+
+---
+
+### Use Case 2: Personalized Email Campaign
+**Scenario**: User wants to send personalized video messages to email subscribers.
+
+**Workflow**:
+1. User uploads their photo and records voice sample
+2. ALwrity creates voice clone and avatar
+3. User writes email campaign message
+4. ALwrity generates personalized video for each recipient using Hunyuan Avatar
+5. Videos are embedded in email campaign
+6. Analytics track video engagement
+
+**Value**: Personalized video emails have 3x higher open rates than text-only.
+
+---
+
+### Use Case 3: Multilingual Marketing Campaign
+**Scenario**: User wants to launch product in multiple countries.
+
+**Workflow**:
+1. User creates video script in English
+2. ALwrity translates script to target languages
+3. Uses WAN 2.5 to generate videos in each language with native voice
+4. Creates social media posts for each market
+5. Schedules content for optimal times in each timezone
+
+**Value**: Global reach without hiring multilingual teams.
+
+---
+
+### Use Case 4: Course Content Creation
+**Scenario**: User wants to create online course with video lessons.
+
+**Workflow**:
+1. User uploads course outline and instructor photo
+2. Records audio narration for each lesson
+3. ALwrity uses InfiniteTalk to create 10-minute video lessons
+4. Generates course thumbnails using Ideogram
+5. Creates course landing page with video previews
+6. Automatically uploads to course platform
+
+**Value**: Professional course content without video production costs.
+
+---
+
+## Technical Considerations
+
+### API Integration
+- WaveSpeed provides REST API endpoints
+- Need to handle async job processing (videos take time to generate)
+- Implement polling or webhook system for job status
+- Error handling and retry logic for failed generations
+
+### Storage & CDN
+- Video files are large (need efficient storage)
+- CDN integration for fast video delivery
+- Compression and optimization for web delivery
+- Thumbnail generation for video previews
+
+### Subscription & Usage Tracking
+- Track video generation usage per user
+- Implement rate limiting based on subscription tier
+- Cost tracking for WaveSpeed API calls
+- Usage analytics dashboard
+
+### Performance Optimization
+- Queue system for video generation jobs
+- Background processing for long-running tasks
+- Caching for frequently used avatars/voices
+- Progressive loading for video previews
+
+---
+
+## Pricing Strategy Integration
+
+### Subscription Tier Enhancements
+- **Free Tier**: Limited video generation (e.g., 5 videos/month, 480p only)
+- **Basic Tier**: Standard video features (20 videos/month, up to 720p)
+- **Pro Tier**: Advanced features (50 videos/month, 1080p, avatar features)
+- **Enterprise Tier**: Unlimited video generation, all features, custom voice cloning
+
+### Usage-Based Add-ons
+- Additional video generation credits
+- Premium avatar features
+- Extended video duration
+- Custom voice cloning training
+
+---
+
+## Success Metrics
+
+### User Engagement
+- Video content creation rate
+- Average videos per user per month
+- Video engagement rates (views, shares)
+- User retention (video creators vs. text-only)
+
+### Business Metrics
+- Revenue from premium video features
+- Average revenue per user (ARPU) increase
+- Customer lifetime value (LTV) improvement
+- Churn rate reduction
+
+### Content Performance
+- Video content performance vs. text content
+- Social media engagement rates
+- Conversion rates from video content
+- SEO performance of video-embedded content
+
+---
+
+## Implementation Roadmap
+
+### Q1 2025: Foundation
+- WaveSpeed API integration
+- WAN 2.5 text-to-video implementation
+- Basic video generation UI
+- Usage tracking and billing
+
+### Q2 2025: Enhancement
+- WAN 2.5 image-to-video
+- Ideogram image generation
+- Advanced video settings UI
+- Video library and management
+
+### Q3 2025: Personalization
+- Hunyuan Avatar integration
+- Voice cloning (Minimax) integration
+- Avatar studio UI
+- Voice library management
+
+### Q4 2025: Advanced Features
+- InfiniteTalk for long-form content
+- Qwen image generation
+- Complete multimedia workflow
+- Advanced analytics and optimization
+
+---
+
+## Risk Mitigation
+
+### Technical Risks
+- **API Reliability**: Implement retry logic and fallback providers
+- **Cost Overruns**: Strict usage limits and pre-flight validation
+- **Performance Issues**: Queue system and background processing
+- **Storage Costs**: Efficient compression and CDN optimization
+
+### Business Risks
+- **Market Adoption**: Gradual rollout with user education
+- **Competition**: Focus on unique value (personalization, integration)
+- **Pricing Pressure**: Value-based pricing with clear ROI
+- **User Experience**: Extensive testing and feedback loops
+
+---
+
+## Conclusion
+
+Integrating WaveSpeed AI models into ALwrity transforms the platform from a text-focused content tool into a comprehensive multimedia marketing solution. These features align perfectly with ALwrity's mission to democratize professional marketing capabilities for solopreneurs.
+
+The proposed features enable:
+- **Complete Content Lifecycle**: From text to video to personalized multimedia
+- **Cost-Effective Production**: Professional content without expensive production teams
+- **Scalable Personalization**: Personalized content at scale
+- **Global Reach**: Multilingual content creation
+- **Competitive Advantage**: Unique feature set in the market
+
+By implementing these features in a phased approach, ALwrity can deliver immediate value while building toward a comprehensive multimedia content platform that serves as the complete marketing solution for independent entrepreneurs.
+
+---
+
+## Next Steps
+
+1. **Technical Feasibility Review**: Evaluate WaveSpeed API documentation and integration requirements
+2. **Cost Analysis**: Calculate infrastructure and API costs for each feature
+3. **User Research**: Survey existing users on video content needs and priorities
+4. **Prototype Development**: Build MVP for highest-priority feature (WAN 2.5 text-to-video)
+5. **Partnership Discussion**: Engage with WaveSpeed for partnership and pricing negotiations
+
+---
+
+*Document Version: 1.0*  
+*Last Updated: January 2025*  
+*Author: ALwrity Product Team*
+
--- a/docs/WAVESPEED_AI_FEATURE_SUMMARY.md
+++ b/docs/WAVESPEED_AI_FEATURE_SUMMARY.md
@@ -0,0 +1,165 @@
+# WaveSpeed AI Integration: Executive Summary
+
+## Quick Overview
+
+This document summarizes how WaveSpeed AI models can enhance ALwrity's digital marketing platform with advanced video, avatar, image, and voice capabilities.
+
+---
+
+## 🎯 Key Features to Add
+
+### 1. **Professional Video Creation**
+- **WAN 2.5 Text-to-Video**: Create 480p/720p/1080p videos from text with synchronized audio
+- **WAN 2.5 Image-to-Video**: Animate static images into dynamic videos
+- **Use Cases**: Product demos, social media shorts, blog-to-video conversion, multilingual marketing
+
+### 2. **AI Avatar & Personalization**
+- **Hunyuan Avatar**: Create talking avatars from photos + audio (up to 2 minutes)
+- **InfiniteTalk**: Long-form avatar videos with perfect lip-sync (up to 10 minutes)
+- **Use Cases**: Personal branding, customer service videos, course content, personalized email campaigns
+
+### 3. **Advanced Image Generation**
+- **Ideogram V3 Turbo**: Photorealistic, creative image generation
+- **Qwen Image**: Fast, high-quality text-to-image
+- **Use Cases**: Social media visuals, ad creatives, blog images, brand assets
+
+### 4. **Voice Cloning**
+- **Minimax Voice Clone**: Clone voices for consistent brand audio
+- **Use Cases**: Brand voice consistency, multilingual content, personalized marketing
+
+---
+
+## 💰 Pricing Comparison
+
+| Feature | WaveSpeed Pricing | Current ALwrity | Benefit |
+|---------|------------------|-----------------|---------|
+| Text-to-Video (1080p) | $0.15/second | HuggingFace only | More affordable than Veo3 |
+| Avatar Videos | $0.15-0.30/5s | Not available | New capability |
+| Long-Form Video | $0.15-0.30/5s | Not available | Up to 10 minutes |
+| Voice Cloning | TBD | Not available | New capability |
+
+---
+
+## 🚀 Implementation Priority
+
+### Phase 1 (Q1 2025) - Quick Wins
+1. ✅ WAN 2.5 Text-to-Video - Expands video capabilities
+2. ✅ WAN 2.5 Image-to-Video - Repurposes existing images
+3. ✅ Ideogram Image Generation - Enhances image quality
+
+### Phase 2 (Q2-Q3 2025) - Personalization
+4. ✅ Hunyuan Avatar - Personalized video content
+5. ✅ Voice Cloning - Brand voice consistency
+
+### Phase 3 (Q4 2025) - Advanced
+6. ✅ InfiniteTalk - Long-form content creation
+7. ✅ Qwen Image - Additional image option
+
+---
+
+## 📊 Business Value
+
+### For Users (Solopreneurs)
+- **Save Money**: No need for video production teams
+- **Save Time**: Automated video creation
+- **Scale Globally**: Multilingual content without translation teams
+- **Personalize**: Create personalized content at scale
+- **Repurpose**: Transform existing content into new formats
+
+### For ALwrity
+- **Differentiation**: Complete multimedia platform
+- **Engagement**: Video drives 3x higher engagement
+- **Revenue**: Premium features for higher-tier plans
+- **Retention**: More content types = higher stickiness
+- **Competitive Edge**: Unmatched AI content suite
+
+---
+
+## 🎬 Real-World Use Cases
+
+### Use Case 1: Blog-to-Video
+**Problem**: User has great blog post but wants video version  
+**Solution**: One-click conversion using WAN 2.5  
+**Result**: Single content piece becomes multi-format
+
+### Use Case 2: Personalized Email Campaign
+**Problem**: User wants personalized video messages  
+**Solution**: Hunyuan Avatar + Voice Clone  
+**Result**: 3x higher email open rates
+
+### Use Case 3: Multilingual Launch
+**Problem**: Launching product in multiple countries  
+**Solution**: WAN 2.5 with multilingual support  
+**Result**: Global reach without translation teams
+
+### Use Case 4: Online Course Creation
+**Problem**: Need professional course videos  
+**Solution**: InfiniteTalk for long-form content  
+**Result**: Professional course without production costs
+
+---
+
+## 🔧 Technical Requirements
+
+### Backend
+- WaveSpeed API client integration
+- Async job processing (videos take time)
+- Usage tracking and billing
+- Storage and CDN for video files
+
+### Frontend
+- Video creation UI components
+- Avatar studio interface
+- Voice cloning interface
+- Video library and management
+
+### Infrastructure
+- Video storage (large files)
+- CDN for fast delivery
+- Queue system for background jobs
+- Cost monitoring and limits
+
+---
+
+## 📈 Success Metrics
+
+- **User Engagement**: Video creation rate, videos per user
+- **Business**: Revenue from premium features, ARPU increase
+- **Content**: Video engagement rates, conversion rates
+- **Retention**: Video creators vs. text-only users
+
+---
+
+## ⚠️ Risks & Mitigation
+
+| Risk | Mitigation |
+|------|------------|
+| API Reliability | Retry logic, fallback providers |
+| Cost Overruns | Strict usage limits, pre-flight validation |
+| Performance | Queue system, background processing |
+| Adoption | Gradual rollout, user education |
+
+---
+
+## ✅ Next Steps
+
+1. **Review**: Technical feasibility and API documentation
+2. **Analyze**: Cost structure and infrastructure needs
+3. **Research**: User needs and priorities
+4. **Prototype**: MVP for WAN 2.5 text-to-video
+5. **Partner**: Engage WaveSpeed for pricing/partnership
+
+---
+
+## 📝 Key Takeaways
+
+1. **Complete Multimedia Platform**: Transform ALwrity from text-focused to full multimedia
+2. **Cost-Effective**: More affordable than competitors (Veo3, etc.)
+3. **Personalization**: Unique avatar and voice cloning capabilities
+4. **Scalability**: Multilingual and automated content creation
+5. **Competitive Advantage**: Unmatched feature set in the market
+
+---
+
+*For detailed implementation plan, see `WAVESPEED_AI_FEATURE_PROPOSAL.md`*
+
--- a/docs/WAVESPEED_IMPLEMENTATION_ROADMAP.md
+++ b/docs/WAVESPEED_IMPLEMENTATION_ROADMAP.md
@@ -0,0 +1,335 @@
+# WaveSpeed AI Integration: Complete Implementation Roadmap
+
+## Overview
+
+This document provides a unified roadmap for implementing WaveSpeed AI models across ALwrity's platform. It consolidates the three focused implementation plans:
+
+1. **Story Writer Video Enhancement** - Immediate value, replace HuggingFace
+2. **Persona Voice & Avatar Hyper-Personalization** - Core differentiator
+3. **LinkedIn Writer Multimedia Revamp** - Engagement driver
+
+---
+
+## Implementation Priority Matrix
+
+| Feature | Priority | Timeline | Impact | Effort |
+|---------|----------|----------|--------|--------|
+| Story Writer: WaveSpeed Video | **HIGH** | Week 1-2 | Immediate value, solves current issues | Medium |
+| Story Writer: Voice Cloning | **HIGH** | Week 3-4 | Significant quality improvement | Medium |
+| Persona: Voice Training | **HIGH** | Week 1-3 | Core hyper-personalization | High |
+| Persona: Avatar Creation | **HIGH** | Week 4-6 | Visual personalization | High |
+| LinkedIn: Video Posts | **HIGH** | Week 1-3 | Engagement driver | Medium |
+| LinkedIn: Avatar Videos | **HIGH** | Week 6-7 | Personal branding | Medium |
+| LinkedIn: Enhanced Images | **MEDIUM** | Week 4-5 | Quality improvement | Low |
+| LinkedIn: Audio Narration | **MEDIUM** | Week 8-9 | Complete suite | Low |
+
+---
+
+## Phased Implementation Plan
+
+### Phase 1: Foundation (Weeks 1-4)
+**Goal**: Replace HuggingFace, add voice cloning to Story Writer
+
+**Deliverables**:
+- ✅ WaveSpeed WAN 2.5 video generation
+- ✅ Minimax voice cloning
+- ✅ Story Writer video enhancement
+- ✅ Story Writer audio enhancement
+- ✅ Cost management and validation
+
+**Success Criteria**:
+- Story Writer videos work reliably
+- Voice quality significantly improved
+- Cost tracking accurate
+- User satisfaction improved
+
+---
+
+### Phase 2: Hyper-Personalization (Weeks 1-6)
+**Goal**: Integrate voice and avatar into Persona System
+
+**Deliverables**:
+- ✅ Voice training in onboarding
+- ✅ Avatar creation in onboarding
+- ✅ Persona voice integration
+- ✅ Persona avatar integration
+- ✅ Persona dashboard enhancements
+
+**Success Criteria**:
+- Users can train voice/avatar during onboarding
+- Persona voice/avatar used across platform
+- Brand consistency achieved
+- High adoption rate (>60% Pro users)
+
+---
+
+### Phase 3: LinkedIn Multimedia (Weeks 1-9)
+**Goal**: Transform LinkedIn Writer into multimedia platform
+
+**Deliverables**:
+- ✅ Video post generation
+- ✅ Avatar video posts
+- ✅ Enhanced image generation
+- ✅ Audio narration
+- ✅ Unified multimedia creator
+
+**Success Criteria**:
+- Users can create multimedia LinkedIn posts
+- Engagement rates improved (3x target)
+- High-quality content generation
+- Cost-effective for users
+
+---
+
+## Shared Infrastructure
+
+### Common Services
+
+**WaveSpeed API Client** (`backend/services/wavespeed/`):
+- Shared across Story Writer, LinkedIn, Persona
+- Unified error handling
+- Cost tracking
+- Rate limiting
+
+**Voice Cloning Service** (`backend/services/minimax/`):
+- Shared across Story Writer, LinkedIn, Persona
+- Voice library management
+- Training queue
+- Usage tracking
+
+**Avatar Service** (`backend/services/wavespeed/avatar/`):
+- Shared across LinkedIn, Persona
+- Avatar library management
+- Generation queue
+- Usage tracking
+
+### Cost Management
+
+**Unified Cost Tracking**:
+- Pre-flight validation across all features
+- Real-time cost estimation
+- Usage limits per tier
+- Cost optimization recommendations
+
+**Subscription Integration**:
+- Unified pricing service
+- Tier-based feature access
+- Usage tracking and alerts
+- Cost breakdown analytics
+
+---
+
+## Resource Allocation
+
+### Development Team
+
+**Backend Developers** (2-3):
+- Week 1-2: WaveSpeed integration
+- Week 3-4: Voice cloning integration
+- Week 5-6: Avatar integration
+- Week 7-9: LinkedIn multimedia
+
+**Frontend Developers** (2):
+- Week 1-2: Story Writer UI updates
+- Week 3-4: Voice training UI
+- Week 5-6: Avatar creation UI
+- Week 7-9: LinkedIn multimedia UI
+
+**QA/Testing** (1):
+- Continuous testing throughout
+- User acceptance testing
+- Performance testing
+- Cost validation testing
+
+### Timeline Summary
+
+```
+Month 1 (Weeks 1-4):
+├─ Story Writer: WaveSpeed + Voice Cloning
+└─ Persona: Voice Training
+
+Month 2 (Weeks 5-8):
+├─ Persona: Avatar Creation
+├─ LinkedIn: Video Posts
+└─ LinkedIn: Enhanced Images
+
+Month 3 (Weeks 9-12):
+├─ LinkedIn: Avatar Videos
+├─ LinkedIn: Audio Narration
+└─ Complete Integration & Polish
+```
+
+---
+
+## Cost Management Strategy
+
+### Pre-Flight Validation
+
+**Implementation**: Unified validation service
+
+**Checks**:
+1. User subscription tier
+2. Feature availability
+3. Usage limits
+4. Cost estimates
+5. Budget remaining
+
+**Benefits**:
+- Prevents wasted API calls
+- Clear user feedback
+- Cost transparency
+- Better user experience
+
+### Cost Optimization
+
+**Strategies**:
+1. **Default to Cost-Effective Options**: 480p/720p default, 1080p premium
+2. **Batch Processing**: Lower costs for multiple items
+3. **Caching**: Reuse generated content when possible
+4. **Smart Defaults**: Optimize settings automatically
+5. **Usage Limits**: Per-tier limits prevent overuse
+
+### Pricing Transparency
+
+**User-Facing**:
+- Real-time cost estimates
+- Per-feature cost breakdown
+- Monthly budget tracking
+- Cost optimization suggestions
+
+---
+
+## Success Metrics
+
+### Technical Metrics
+- API success rate >95%
+- Average generation time <30s
+- Error rate <2%
+- Cost accuracy >99%
+
+### User Metrics
+- Feature adoption rate >50%
+- User satisfaction >4.5/5
+- Content quality >4.5/5
+- Retention improvement >20%
+
+### Business Metrics
+- Premium tier conversion +30%
+- User engagement +200%
+- Content generation volume +150%
+- Cost per user <$10/month average
+
+---
+
+## Risk Management
+
+### Technical Risks
+
+| Risk | Probability | Impact | Mitigation |
+|------|------------|--------|------------|
+| API reliability | Medium | High | Retry logic, fallbacks |
+| Cost overruns | Medium | High | Pre-flight validation |
+| Quality issues | Low | Medium | Quality checks, previews |
+| Performance | Low | Medium | Queue system, optimization |
+
+### Business Risks
+
+| Risk | Probability | Impact | Mitigation |
+|------|------------|--------|------------|
+| Low adoption | Medium | Medium | User education, tutorials |
+| High costs | Low | High | Tier limits, cost estimates |
+| User confusion | Medium | Low | Clear UI, documentation |
+| Competition | Low | Medium | Unique features, quality |
+
+---
+
+## Dependencies
+
+### External Dependencies
+- WaveSpeed API access and credentials
+- Minimax API access and credentials
+- API documentation and support
+- Pricing agreements
+
+### Internal Dependencies
+- Persona system (existing)
+- Subscription system (existing)
+- Story Writer (existing)
+- LinkedIn Writer (existing)
+- Cost tracking infrastructure
+
+---
+
+## Next Steps
+
+### Immediate (Week 1)
+1. ✅ Secure WaveSpeed API access
+2. ✅ Secure Minimax API access
+3. ✅ Review API documentation
+4. ✅ Set up development environment
+5. ✅ Create project plan and assign tasks
+
+### Short-term (Weeks 2-4)
+1. ✅ Implement WaveSpeed video generation
+2. ✅ Implement voice cloning
+3. ✅ Update Story Writer
+4. ✅ Testing and optimization
+
+### Medium-term (Weeks 5-8)
+1. ✅ Implement persona voice/avatar
+2. ✅ Implement LinkedIn video posts
+3. ✅ Testing and optimization
+
+### Long-term (Weeks 9-12)
+1. ✅ Complete LinkedIn multimedia suite
+2. ✅ Full integration testing
+3. ✅ User acceptance testing
+4. ✅ Documentation and launch
+
+---
+
+## Documentation
+
+### For Developers
+- API integration guides
+- Service architecture docs
+- Testing procedures
+- Deployment guides
+
+### For Users
+- Feature guides
+- Video tutorials
+- Best practices
+- FAQ and troubleshooting
+
+### For Business
+- Cost analysis
+- ROI projections
+- Success metrics
+- Competitive analysis
+
+---
+
+## Conclusion
+
+This roadmap provides a comprehensive plan for integrating WaveSpeed AI models into ALwrity, transforming it from a text-focused platform into a complete multimedia content creation suite. The phased approach ensures:
+
+1. **Immediate Value**: Story Writer improvements solve current issues
+2. **Core Differentiation**: Persona hyper-personalization sets ALwrity apart
+3. **Engagement Growth**: LinkedIn multimedia drives user engagement
+4. **Cost Effectiveness**: Careful cost management prevents waste
+5. **Scalable Foundation**: Shared infrastructure supports future growth
+
+**Key Success Factors**:
+- Phased implementation reduces risk
+- Cost management prevents waste
+- User education ensures adoption
+- Quality focus ensures satisfaction
+- Integration creates competitive advantage
+
+---
+
+*Document Version: 1.0*  
+*Last Updated: January 2025*  
+*Status: Ready for Implementation*
+
--- a/frontend/src/components/StoryWriter/Phases/StoryExport.tsx
+++ b/frontend/src/components/StoryWriter/Phases/StoryExport.tsx
@@ -95,25 +95,38 @@ const StoryExport: React.FC<StoryExportProps> = ({ state }) => {

    try {
      // Prepare image and audio URLs in scene order
-      const imageUrls: string[] = [];
+      const imageUrls: (string | null)[] = [];
      const audioUrls: string[] = [];
      const scenes = state.outlineScenes;

+      const videoUrls: (string | null)[] = [];
+
      for (const scene of scenes) {
        const sceneNumber = scene.scene_number || scenes.indexOf(scene) + 1;
        const imageUrl = state.sceneImages?.get(sceneNumber);
        const audioUrl = state.sceneAudio?.get(sceneNumber);
+        const animatedVideoUrl = state.sceneAnimatedVideos?.get(sceneNumber);

-        if (imageUrl && audioUrl) {
-          imageUrls.push(imageUrl);
-          audioUrls.push(audioUrl);
-        } else {
-          throw new Error(`Missing image or audio for scene ${sceneNumber}`);
+        if (!audioUrl) {
+          throw new Error(`Missing audio for scene ${sceneNumber}`);
        }
+
+        // Prefer animated video if available, otherwise use image
+        if (animatedVideoUrl) {
+          videoUrls.push(animatedVideoUrl);
+          imageUrls.push(null);
+        } else if (imageUrl) {
+          videoUrls.push(null);
+          imageUrls.push(imageUrl);
+        } else {
+          throw new Error(`Missing image or animated video for scene ${sceneNumber}`);
+        }
+
+        audioUrls.push(audioUrl);
      }

      if (imageUrls.length !== scenes.length || audioUrls.length !== scenes.length) {
-        throw new Error('Number of images and audio files must match number of scenes');
+        throw new Error('Number of images/videos and audio files must match number of scenes');
      }

      // Start async video generation
@@ -121,6 +134,8 @@ const StoryExport: React.FC<StoryExportProps> = ({ state }) => {
        scenes: scenes,
        image_urls: imageUrls,
        audio_urls: audioUrls,
+        video_urls: videoUrls.length > 0 ? videoUrls : undefined,
+        ai_audio_urls: undefined, // TODO: Track AI audio separately in state
        story_title: state.storySetting || 'Story',
        fps: state.videoFps,
        transition_duration: state.videoTransitionDuration,
@@ -147,7 +162,11 @@ const StoryExport: React.FC<StoryExportProps> = ({ state }) => {
          state.setStoryVideo(videoUrl);
          // fetch blob for authenticated preview
          const blobUrl = await fetchMediaBlobUrl(videoUrl);
-          setVideoBlobUrl(blobUrl);
+          if (blobUrl) {
+            setVideoBlobUrl(blobUrl);
+          } else {
+            setVideoBlobUrl(null);
+          }
          setVideoProgress(100);
          setVideoMessage('Video generation complete');
        state.setError(null);
@@ -175,6 +194,9 @@ const StoryExport: React.FC<StoryExportProps> = ({ state }) => {
  const handleDownloadVideo = async () => {
    if (state.storyVideo) {
      const blobUrl = await fetchMediaBlobUrl(state.storyVideo);
+      if (!blobUrl) {
+        return;
+      }
      const a = document.createElement('a');
      a.href = blobUrl;
      a.download = `story-video-${Date.now()}.mp4`;
--- a/frontend/src/components/StoryWriter/Phases/StoryOutline.tsx
+++ b/frontend/src/components/StoryWriter/Phases/StoryOutline.tsx
@@ -14,9 +14,9 @@ import GlobalStyles from '@mui/material/GlobalStyles';
 import ImageIcon from '@mui/icons-material/Image';
 import VolumeUpIcon from '@mui/icons-material/VolumeUp';
 import { motion, AnimatePresence } from 'framer-motion';
-import { useStoryWriterState } from '../../../hooks/useStoryWriterState';
+import { useStoryWriterState, SceneAnimationResume } from '../../../hooks/useStoryWriterState';
 import { storyWriterApi } from '../../../services/storyWriterApi';
-import { aiApiClient } from '../../../api/client';
+import { aiApiClient, triggerSubscriptionError } from '../../../api/client';
 import OutlineHoverActions from './StoryOutlineParts/OutlineHoverActions';
 import EditSectionModal from './StoryOutlineParts/EditSectionModal';
 import { leftPageVariants, rightPageVariants } from './StoryOutlineParts/pageVariants';
@@ -48,7 +48,9 @@ const StoryOutline: React.FC<StoryOutlineProps> = ({ state, onNext }) => {
  const [imageLoadError, setImageLoadError] = useState<Set<number>>(new Set());
  const [imageBlobUrls, setImageBlobUrls] = useState<Map<number, string>>(new Map());
  const [audioBlobUrls, setAudioBlobUrls] = useState<Map<number, string>>(new Map());
+  const [videoBlobUrls, setVideoBlobUrls] = useState<Map<number, string>>(new Map());
  const [audioLoadError, setAudioLoadError] = useState<Set<number>>(new Set());
+  const [hasVideoLoadError, setVideoLoadError] = useState<Set<number>>(new Set());
  const [outlineToastOpen, setOutlineToastOpen] = useState(false);
  const lastToastSceneCount = useRef<number | null>(null);
  const [isEditModalOpen, setIsEditModalOpen] = useState(false);
@@ -66,15 +68,182 @@ const StoryOutline: React.FC<StoryOutlineProps> = ({ state, onNext }) => {
  const [isKeyEventsModalOpen, setIsKeyEventsModalOpen] = useState(false);
  const [isTitleModalOpen, setIsTitleModalOpen] = useState(false);
  const [titleDraft, setTitleDraft] = useState('');
+  const [animatingSceneNumber, setAnimatingSceneNumber] = useState<number | null>(null);
  
  // Use state from hook instead of local state
  const sceneImages = state.sceneImages || new Map<number, string>();
  const sceneAudio = state.sceneAudio || new Map<number, string>();
+  const sceneAnimatedVideos = state.sceneAnimatedVideos || new Map<number, string>();
+  const sceneAnimationResumables = state.sceneAnimationResumables || new Map<number, SceneAnimationResume>();
+
+  const updateSceneAnimatedVideo = (sceneNumber: number, videoUrl: string) => {
+    const nextMap = new Map(state.sceneAnimatedVideos || []);
+    nextMap.set(sceneNumber, videoUrl);
+    state.setSceneAnimatedVideos(nextMap);
+    // Clear the blob URL for this scene so it reloads with the new video
+    setVideoBlobUrls((prev) => {
+      const next = new Map(prev);
+      const oldBlobUrl = next.get(sceneNumber);
+      if (oldBlobUrl) {
+        URL.revokeObjectURL(oldBlobUrl);
+      }
+      next.delete(sceneNumber);
+      return next;
+    });
+    // Clear any error state for this scene
+    setVideoLoadError((prev) => {
+      const next = new Set(prev);
+      next.delete(sceneNumber);
+      return next;
+    });
+  };
+
+  const handleAnimateSceneWithVoiceover = async () => {
+    if (!hasScenes || !currentScene) {
+      setError('Please generate your outline before animating scenes.');
+      return;
+    }
+
+    const sceneNumber = currentScene.scene_number || currentSceneIndex + 1;
+    const sceneImageRelativeUrl = state.sceneImages?.get(sceneNumber);
+    const sceneAudioRelativeUrl = state.sceneAudio?.get(sceneNumber);
+
+    if (!sceneImageRelativeUrl) {
+      setError('Please generate an image for this scene before animating it.');
+      return;
+    }
+
+    if (!sceneAudioRelativeUrl) {
+      setError('Please generate narration audio for this scene before animating with voiceover.');
+      return;
+    }
+
+    setAnimatingSceneNumber(sceneNumber);
+    setError(null);
+    updateSceneAnimationResume(sceneNumber, undefined);
+
+    const storyContextPayload = createStoryContextPayload();
+
+    try {
+      console.info('[Outline] Animate scene with voiceover requested', {
+        sceneNumber,
+        image: sceneImageRelativeUrl,
+        audio: sceneAudioRelativeUrl,
+      });
+
+      // Start async task
+      const startResponse = await storyWriterApi.animateSceneVoiceover({
+        scene_number: sceneNumber,
+        scene_data: currentScene,
+        story_context: storyContextPayload,
+        image_url: sceneImageRelativeUrl,
+        audio_url: sceneAudioRelativeUrl,
+        resolution: '720p',
+      });
+
+      // Poll for completion (InfiniteTalk can take up to 10 minutes)
+      const taskId = startResponse.task_id;
+      let done = false;
+      while (!done) {
+        await new Promise((r) => setTimeout(r, 2000)); // Poll every 2 seconds
+        const status = await storyWriterApi.getTaskStatus(taskId);
+        if (status.status === 'completed') {
+          done = true;
+          const result = await storyWriterApi.getTaskResult(taskId);
+          // Extract AnimateSceneResponse from result
+          // The result can be either the AnimateSceneResponse directly or wrapped in a result field
+          const animationResult = (result as any).result || result;
+          const videoUrl = animationResult.video_url;
+          const cost = animationResult.cost || 0;
+          if (videoUrl) {
+            updateSceneAnimatedVideo(sceneNumber, videoUrl);
+            console.info('[Outline] Animate with voiceover completed', {
+              sceneNumber,
+              video: videoUrl,
+              cost: cost,
+            });
+          } else {
+            throw new Error('Video URL not found in result');
+          }
+        } else if (status.status === 'failed') {
+          throw new Error(status.error || 'InfiniteTalk animation failed');
+        }
+        // Continue polling if status is 'pending' or 'processing'
+      }
+    } catch (err: any) {
+      const detail = err?.response?.data?.detail;
+      const handled = await triggerSubscriptionError(err);
+      const message = extractDetailMessage(detail, err.message || 'Failed to animate scene with voiceover.');
+      setError(message);
+      if (!handled) {
+        console.error('[Outline] Animate scene with voiceover failed', err);
+      }
+    } finally {
+      setAnimatingSceneNumber(null);
+    }
+  };
+
+  const updateSceneAnimationResume = (sceneNumber: number, info?: SceneAnimationResume) => {
+    const prevMap = state.sceneAnimationResumables || new Map<number, SceneAnimationResume>();
+    const nextMap = new Map(prevMap);
+    if (info) {
+      nextMap.set(sceneNumber, info);
+    } else {
+      nextMap.delete(sceneNumber);
+    }
+    state.setSceneAnimationResumables(nextMap.size > 0 ? nextMap : null);
+  };
+
+  const extractDetailMessage = (detail: any, fallback: string): string => {
+    if (!detail) return fallback;
+    if (typeof detail === 'string') return detail;
+    if (typeof detail === 'object') {
+      if (typeof detail.message === 'string') return detail.message;
+      if (typeof detail.error === 'string') return detail.error;
+      if (typeof detail.detail === 'string') return detail.detail;
+    }
+    return fallback;
+  };
+
+  const captureResumeOpportunity = (
+    sceneNumber: number,
+    duration: 5 | 10,
+    detail: any
+  ): string | null => {
+    if (!detail || typeof detail !== 'object') {
+      return null;
+    }
+    if (!detail.resume_available || !detail.prediction_id) {
+      return null;
+    }
+    const message =
+      typeof detail.message === 'string'
+        ? detail.message
+        : typeof detail.error === 'string'
+        ? detail.error
+        : 'WaveSpeed is still finalizing this animation. Click Resume to download without extra cost.';
+
+    updateSceneAnimationResume(sceneNumber, {
+      predictionId: detail.prediction_id,
+      duration,
+      message,
+      createdAt: new Date().toISOString(),
+    });
+    return message;
+  };

  const scenes = state.outlineScenes || [];
  const sceneCount = scenes.length;
  const hasScenes = state.isOutlineStructured && scenes.length > 0;
  const hasOutlineScenes = Boolean(state.outlineScenes && state.outlineScenes.length > 0);
+  const resumableScenesArray = Array.from(sceneAnimationResumables.entries());
+  const resumableSummaryMessage =
+    resumableScenesArray.length === 0
+      ? null
+      : resumableScenesArray.length === 1
+      ? resumableScenesArray[0][1]?.message ||
+        `Scene ${resumableScenesArray[0][0]} animation is ready to resume without extra cost.`
+      : `Scenes ${resumableScenesArray.map(([scene]) => scene).join(', ')} have WaveSpeed animations ready to resume without extra cost. Open each scene and click Resume Animation.`;

  // removed old accordion renderer (unused)

@@ -98,10 +267,14 @@ const StoryOutline: React.FC<StoryOutlineProps> = ({ state, onNext }) => {
  
  // Get the current scene's image URL
  const currentSceneNumber = currentScene?.scene_number || currentSceneIndex + 1;
+  const currentSceneResumeInfo = sceneAnimationResumables.get(currentSceneNumber) || null;
+  const canAnimateCurrentScene = !animatingSceneNumber && !currentSceneResumeInfo;
+  const isCurrentSceneAnimating = animatingSceneNumber === currentSceneNumber;
  const currentSceneImageUrl = sceneImages.get(currentSceneNumber);
  const hasImageLoadError = imageLoadError.has(currentSceneNumber);
  const currentSceneAudioUrl = sceneAudio.get(currentSceneNumber);
  const hasAudioLoadError = audioLoadError.has(currentSceneNumber);
+  const hasAudioForScene = Boolean(currentSceneAudioUrl);
  
  // Fetch image as blob with authentication
  useEffect(() => {
@@ -128,8 +301,12 @@ const StoryOutline: React.FC<StoryOutlineProps> = ({ state, onNext }) => {
          next.set(currentSceneNumber, blobUrl);
          return next;
        });
-      } catch (err) {
-        console.error('Failed to load image:', err);
+      } catch (err: any) {
+        // Only log non-404 errors (404 means file doesn't exist, which is acceptable)
+        if (err?.response?.status !== 404) {
+          console.error('Failed to load image:', err);
+        }
+        // Mark as error to prevent retries
        setImageLoadError((prev) => new Set(prev).add(currentSceneNumber));
      }
    };
@@ -137,6 +314,47 @@ const StoryOutline: React.FC<StoryOutlineProps> = ({ state, onNext }) => {
    loadImage();
  }, [currentSceneNumber, currentSceneImageUrl, hasImageLoadError]);
  
+  // Fetch video as blob with authentication
+  useEffect(() => {
+    const animatedVideoRelativeUrl = sceneAnimatedVideos.get(currentSceneNumber);
+    if (!animatedVideoRelativeUrl || hasVideoLoadError.has(currentSceneNumber) || videoBlobUrls.has(currentSceneNumber)) {
+      return;
+    }
+    
+    const loadVideo = async () => {
+      try {
+        // Remove query parameters (token) from URL if present, we'll use authenticated request instead
+        const cleanUrl = animatedVideoRelativeUrl.split('?')[0];
+        // Use relative URL path directly (aiApiClient will add base URL and auth)
+        const videoUrl = cleanUrl.startsWith('/') 
+          ? cleanUrl 
+          : `/${cleanUrl}`;
+        // Use aiApiClient to get authenticated response with blob
+        const response = await aiApiClient.get(videoUrl, {
+          responseType: 'blob',
+        });
+        
+        const blob = response.data;
+        const blobUrl = URL.createObjectURL(blob);
+        
+        setVideoBlobUrls((prev) => {
+          const next = new Map(prev);
+          next.set(currentSceneNumber, blobUrl);
+          return next;
+        });
+      } catch (err: any) {
+        // Only log non-404 errors (404 means file doesn't exist, which is acceptable)
+        if (err?.response?.status !== 404) {
+          console.error('Failed to load video:', err);
+        }
+        // Mark as error to prevent retries
+        setVideoLoadError((prev) => new Set(prev).add(currentSceneNumber));
+      }
+    };
+    
+    loadVideo();
+  }, [currentSceneNumber, sceneAnimatedVideos, hasVideoLoadError, videoBlobUrls]);
+
  // Cleanup blob URLs when component unmounts or scenes change
  useEffect(() => {
    return () => {
@@ -147,13 +365,36 @@ const StoryOutline: React.FC<StoryOutlineProps> = ({ state, onNext }) => {
      audioBlobUrls.forEach((blobUrl) => {
        URL.revokeObjectURL(blobUrl);
      });
+      videoBlobUrls.forEach((blobUrl) => {
+        URL.revokeObjectURL(blobUrl);
+      });
    };
  }, []);
  
  const currentSceneImageFullUrl = imageBlobUrls.get(currentSceneNumber) || null;
  const currentSceneAudioFullUrl = audioBlobUrls.get(currentSceneNumber) || null;
+  const resolvedSceneAudioUrl =
+    currentSceneAudioFullUrl ||
+    (currentSceneAudioUrl ? storyWriterApi.getAudioUrl(currentSceneAudioUrl) : null);
+  const currentSceneAnimatedVideoUrl = videoBlobUrls.get(currentSceneNumber) || null;
  
-  // Reset image load error when scene changes
+  const createStoryContextPayload = () => ({
+    persona: state.persona,
+    story_setting: state.storySetting,
+    characters: state.characters,
+    plot_elements: state.plotElements,
+    writing_style: state.writingStyle,
+    story_tone: state.storyTone,
+    narrative_pov: state.narrativePOV,
+    audience_age_group: state.audienceAgeGroup,
+    content_rating: state.contentRating,
+    story_length: state.storyLength,
+    premise: state.premise,
+    outline: state.outline,
+    story_content: state.storyContent,
+  });
+
+  // Reset image/audio/video load errors when scene changes (to allow retry for new scene)
  useEffect(() => {
    setImageLoadError((prev) => {
      const next = new Set(prev);
@@ -165,6 +406,11 @@ const StoryOutline: React.FC<StoryOutlineProps> = ({ state, onNext }) => {
      next.delete(currentSceneNumber);
      return next;
    });
+    setVideoLoadError((prev) => {
+      const next = new Set(prev);
+      next.delete(currentSceneNumber);
+      return next;
+    });
  }, [currentSceneNumber]);

  useEffect(() => {
@@ -192,9 +438,20 @@ const StoryOutline: React.FC<StoryOutlineProps> = ({ state, onNext }) => {

    const loadAudio = async () => {
      try {
-        const audioPath = currentSceneAudioUrl.startsWith('/')
-          ? currentSceneAudioUrl
-          : `/${currentSceneAudioUrl}`;
+        // Remove query parameters (token) from URL if present, we'll use authenticated request instead
+        const cleanUrl = currentSceneAudioUrl.split('?')[0];
+        // Normalize path - ensure it starts with /api/story/audio/
+        let audioPath = cleanUrl.startsWith('/')
+          ? cleanUrl
+          : `/${cleanUrl}`;
+        
+        // If path doesn't include /api/story/audio/, add it
+        if (!audioPath.includes('/api/story/audio/')) {
+          // Extract filename from path
+          const filename = cleanUrl.split('/').pop() || cleanUrl;
+          audioPath = `/api/story/audio/${filename}`;
+        }
+        
        const response = await aiApiClient.get(audioPath, {
          responseType: 'blob',
        });
@@ -210,8 +467,19 @@ const StoryOutline: React.FC<StoryOutlineProps> = ({ state, onNext }) => {
          next.set(currentSceneNumber, blobUrl);
          return next;
        });
-      } catch (err) {
-        console.error('Failed to load audio:', err);
+      } catch (err: any) {
+        // Only log non-404 errors (404 means file doesn't exist, which is acceptable)
+        if (err?.response?.status !== 404) {
+          console.error(`Failed to load audio for scene ${currentSceneNumber}:`, err);
+          console.error(`Audio URL was: ${currentSceneAudioUrl}`);
+          
+          // If auth error, log more details
+          if (err?.response?.status === 401) {
+            console.error(`Authentication failed for audio file. Make sure auth token is set.`);
+          }
+        }
+        
+        // Mark as error to prevent retries
        setAudioLoadError((prev) => new Set(prev).add(currentSceneNumber));
      }
    };
@@ -444,6 +712,104 @@ const StoryOutline: React.FC<StoryOutlineProps> = ({ state, onNext }) => {
    }
  };

+  const handleAnimateScene = async () => {
+    if (!hasScenes || !currentScene) {
+      setError('Please generate your outline before animating scenes.');
+      return;
+    }
+
+    const sceneNumber = currentScene.scene_number || currentSceneIndex + 1;
+    const sceneImageRelativeUrl = state.sceneImages?.get(sceneNumber);
+    if (!sceneImageRelativeUrl) {
+      setError('Please generate an image for this scene before animating it.');
+      return;
+    }
+
+    setAnimatingSceneNumber(sceneNumber);
+    setError(null);
+    updateSceneAnimationResume(sceneNumber, undefined);
+
+    const storyContextPayload = createStoryContextPayload();
+
+    const animationDuration: 5 | 10 = 5;
+
+    try {
+      console.info(
+        `[Outline] Animate scene requested`,
+        { sceneNumber, duration: 5, image: sceneImageRelativeUrl }
+      );
+      const response = await storyWriterApi.animateScene({
+        scene_number: sceneNumber,
+        scene_data: currentScene,
+        story_context: storyContextPayload,
+        image_url: sceneImageRelativeUrl,
+        duration: animationDuration,
+      });
+
+      updateSceneAnimatedVideo(sceneNumber, response.video_url);
+      updateSceneAnimationResume(sceneNumber, undefined);
+      console.info(
+        `[Outline] Animate scene completed`,
+        {
+          sceneNumber,
+          video: response.video_url,
+          cost: response.cost,
+          prediction: response.prediction_id || 'n/a',
+        }
+      );
+    } catch (err: any) {
+      const detail = err?.response?.data?.detail;
+      const resumeMessage = captureResumeOpportunity(sceneNumber, animationDuration, detail);
+      const handled = await triggerSubscriptionError(err);
+      const message = resumeMessage || extractDetailMessage(detail, err.message || 'Failed to animate scene.');
+      setError(message);
+      if (!resumeMessage || !handled) {
+        console.error('[Outline] Animate scene failed', err);
+      }
+    } finally {
+      setAnimatingSceneNumber(null);
+    }
+  };
+
+  const handleResumeSceneAnimation = async (
+    sceneNumber: number,
+    resumeInfo: SceneAnimationResume
+  ) => {
+    setAnimatingSceneNumber(sceneNumber);
+    setError(null);
+
+    try {
+      console.info('[Outline] Resume scene requested', {
+        sceneNumber,
+        prediction: resumeInfo.predictionId,
+      });
+
+      const response = await storyWriterApi.resumeAnimateScene({
+        prediction_id: resumeInfo.predictionId,
+        scene_number: sceneNumber,
+        duration: resumeInfo.duration,
+      });
+
+      updateSceneAnimatedVideo(sceneNumber, response.video_url);
+      updateSceneAnimationResume(sceneNumber, undefined);
+
+      console.info('[Outline] Resume scene completed', {
+        sceneNumber,
+        video: response.video_url,
+        cost: response.cost,
+        prediction: response.prediction_id || resumeInfo.predictionId,
+      });
+    } catch (err: any) {
+      const detail = err?.response?.data?.detail;
+      const message = extractDetailMessage(detail, err.message || 'Failed to resume animation.');
+      setError(message);
+      await triggerSubscriptionError(err);
+      console.error('[Outline] Resume scene failed', err);
+    } finally {
+      setAnimatingSceneNumber(null);
+    }
+  };
+
  const handleRegenerateCurrentSceneImage = async () => {
    if (!hasScenes || !currentScene) return;
    setIsRegeneratingSceneImage(true);
@@ -532,6 +898,12 @@ const StoryOutline: React.FC<StoryOutlineProps> = ({ state, onNext }) => {
        </Alert>
      )}

+      {resumableSummaryMessage && (
+        <Alert severity="info" sx={{ mb: 3 }}>
+          {resumableSummaryMessage}
+        </Alert>
+      )}
+
      {!state.premise && (
        <Alert severity="warning" sx={{ mb: 3 }}>
          Please generate a premise first in the Setup phase.
@@ -552,17 +924,24 @@ const StoryOutline: React.FC<StoryOutlineProps> = ({ state, onNext }) => {
            imageUrl={currentSceneImageFullUrl}
            onImageError={() => setImageLoadError((prev) => new Set(prev).add(currentSceneNumber))}
            narrationEnabled={!!state.enableNarration}
-            audioUrl={
-              currentSceneAudioFullUrl || (state.sceneAudio && state.sceneAudio.get(currentSceneNumber)
-                ? storyWriterApi.getAudioUrl(state.sceneAudio.get(currentSceneNumber) || '')
-                : null)
-            }
+            audioUrl={resolvedSceneAudioUrl || null}
+            hasAudio={hasAudioForScene}
            onOpenImageModal={openImageModal}
            onOpenAudioModal={openAudioModal}
            onOpenCharactersModal={openCharactersModal}
            onOpenKeyEventsModal={openKeyEventsModal}
            onOpenTitleModal={openTitleModal}
            onOpenEditModal={openEditModal}
+            onAnimateScene={canAnimateCurrentScene ? handleAnimateScene : undefined}
+            onAnimateWithVoiceover={hasAudioForScene ? handleAnimateSceneWithVoiceover : undefined}
+            onResumeScene={
+              currentSceneResumeInfo && !animatingSceneNumber
+                ? () => handleResumeSceneAnimation(currentSceneNumber, currentSceneResumeInfo)
+                : undefined
+            }
+            resumeInfo={currentSceneResumeInfo}
+            isAnimatingScene={isCurrentSceneAnimating}
+            animatedVideoUrl={currentSceneAnimatedVideoUrl}
          />
          <OutlineActionsBar
            isGenerating={isGenerating}
@@ -617,6 +996,50 @@ const StoryOutline: React.FC<StoryOutlineProps> = ({ state, onNext }) => {
          (state.setOutlineScenes as any)(updated);
          setIsImageModalOpen(false);
        }}
+        onRegenerate={async (prompt: string) => {
+          if (!hasScenes || !currentScene) return;
+          setIsRegeneratingSceneImage(true);
+          try {
+            const sceneNum = currentScene.scene_number || currentSceneIndex + 1;
+            const sceneTitle = currentScene.title || `Scene ${sceneNum}`;
+            
+            const resp = await storyWriterApi.regenerateSceneImage({
+              scene_number: sceneNum,
+              scene_title: sceneTitle,
+              prompt: prompt.trim(),
+              provider: state.imageProvider || undefined,
+              width: state.imageWidth,
+              height: state.imageHeight,
+              model: state.imageModel || undefined,
+            });
+            
+            if (resp.success && resp.image_url) {
+              const nextMap = new Map(state.sceneImages || []);
+              nextMap.set(sceneNum, resp.image_url);
+              state.setSceneImages(nextMap);
+              
+              // Update the scene with the new prompt if generation was successful
+              const updated = [...scenes];
+              updated[currentSceneIndex] = { ...updated[currentSceneIndex], image_prompt: prompt.trim() };
+              (state.setOutlineScenes as any)(updated);
+              setImagePromptDraft(prompt.trim());
+              
+              // Close the modal after successful regeneration
+              setIsImageModalOpen(false);
+            } else {
+              throw new Error(resp.error || 'Failed to regenerate image');
+            }
+          } catch (err: any) {
+            console.error('Failed to regenerate scene image:', err);
+            throw err; // Re-throw to be handled by modal
+          } finally {
+            setIsRegeneratingSceneImage(false);
+          }
+        }}
+        imageProvider={state.imageProvider}
+        imageWidth={state.imageWidth}
+        imageHeight={state.imageHeight}
+        imageModel={state.imageModel}
      />
      <AudioScriptModal
        open={isAudioModalOpen}
@@ -644,6 +1067,94 @@ const StoryOutline: React.FC<StoryOutlineProps> = ({ state, onNext }) => {
            ? storyWriterApi.getAudioUrl(state.sceneAudio.get(currentSceneNumber) || '')
            : currentSceneAudioFullUrl) || null
        }
+        onGenerateAI={async (params: {
+          text: string;
+          voice_id?: string;
+          speed?: number;
+          volume?: number;
+          pitch?: number;
+          emotion?: string;
+        }) => {
+          if (!hasScenes || !currentScene) return;
+          setIsRegeneratingSceneAudio(true);
+          try {
+            const sceneNum = currentScene.scene_number || currentSceneIndex + 1;
+            const sceneTitle = currentScene.title || `Scene ${sceneNum}`;
+            
+            const resp = await storyWriterApi.generateAIAudio({
+              scene_number: sceneNum,
+              scene_title: sceneTitle,
+              text: params.text.trim(),
+              voice_id: params.voice_id || 'Wise_Woman',
+              speed: params.speed !== undefined ? params.speed : 1.0,
+              volume: params.volume !== undefined ? params.volume : 1.0,
+              pitch: params.pitch !== undefined ? params.pitch : 0.0,
+              emotion: params.emotion || 'happy',
+            });
+            
+            if (resp.success && resp.audio_url) {
+              const nextMap = new Map(state.sceneAudio || []);
+              nextMap.set(sceneNum, resp.audio_url);
+              state.setSceneAudio(nextMap);
+              
+              // Update the scene with the new audio_narration if generation was successful
+              const updated = [...scenes];
+              updated[currentSceneIndex] = { ...updated[currentSceneIndex], audio_narration: params.text.trim() };
+              (state.setOutlineScenes as any)(updated);
+              setAudioScriptDraft(params.text.trim());
+              
+              // Close the modal after successful generation
+              setIsAudioModalOpen(false);
+            } else {
+              throw new Error(resp.error || 'Failed to generate AI audio');
+            }
+          } catch (err: any) {
+            console.error('Failed to generate AI audio:', err);
+            throw err; // Re-throw to be handled by modal
+          } finally {
+            setIsRegeneratingSceneAudio(false);
+          }
+        }}
+        onGenerateFree={async (text: string) => {
+          if (!hasScenes || !currentScene) return;
+          setIsRegeneratingSceneAudio(true);
+          try {
+            const sceneNum = currentScene.scene_number || currentSceneIndex + 1;
+            const sceneTitle = currentScene.title || `Scene ${sceneNum}`;
+            
+            const resp = await storyWriterApi.generateFreeAudio({
+              scene_number: sceneNum,
+              scene_title: sceneTitle,
+              text: text.trim(),
+              provider: state.audioProvider || 'gtts',
+              lang: state.audioLang || 'en',
+              slow: state.audioSlow || false,
+              rate: state.audioRate || 150,
+            });
+            
+            if (resp.success && resp.audio_url) {
+              const nextMap = new Map(state.sceneAudio || []);
+              nextMap.set(sceneNum, resp.audio_url);
+              state.setSceneAudio(nextMap);
+              
+              // Update the scene with the new audio_narration if generation was successful
+              const updated = [...scenes];
+              updated[currentSceneIndex] = { ...updated[currentSceneIndex], audio_narration: text.trim() };
+              (state.setOutlineScenes as any)(updated);
+              setAudioScriptDraft(text.trim());
+              
+              // Close the modal after successful generation
+              setIsAudioModalOpen(false);
+            } else {
+              throw new Error(resp.error || 'Failed to generate free audio');
+            }
+          } catch (err: any) {
+            console.error('Failed to generate free audio:', err);
+            throw err; // Re-throw to be handled by modal
+          } finally {
+            setIsRegeneratingSceneAudio(false);
+          }
+        }}
      />
      <CharactersModal
        open={isCharactersModalOpen}
--- a/frontend/src/components/StoryWriter/Phases/StoryOutlineParts/AudioScriptModal.tsx
+++ b/frontend/src/components/StoryWriter/Phases/StoryOutlineParts/AudioScriptModal.tsx
@@ -1,5 +1,14 @@
 import React from 'react';
-import { Box, Button, Dialog, DialogActions, DialogContent, DialogTitle, TextField } from '@mui/material';
+import { 
+  Box, Button, Dialog, DialogActions, DialogContent, DialogTitle, 
+  TextField, Divider, CircularProgress, Typography, Tooltip, IconButton,
+  Slider, FormControl, InputLabel, Select, MenuItem, FormHelperText,
+  ToggleButtonGroup, ToggleButton 
+} from '@mui/material';
+import VolumeUpIcon from '@mui/icons-material/VolumeUp';
+import SmartToyIcon from '@mui/icons-material/SmartToy';
+import InfoOutlinedIcon from '@mui/icons-material/InfoOutlined';
+import { OperationButton } from '../../../shared/OperationButton';

 interface AudioScriptModalProps {
  open: boolean;
@@ -18,14 +27,114 @@ interface AudioScriptModalProps {
  onChangeSlow: (v: boolean) => void;
  onChangeRate: (v: number) => void;
  audioUrl?: string | null;
+  // audio generation callbacks - now with full parameters
+  onGenerateAI?: (params: {
+    text: string;
+    voice_id?: string;
+    speed?: number;
+    volume?: number;
+    pitch?: number;
+    emotion?: string;
+  }) => Promise<void>;
+  onGenerateFree?: (text: string) => Promise<void>;
 }

+// Available voice IDs from WaveSpeed Minimax
+const AVAILABLE_VOICES = [
+  { value: 'Wise_Woman', label: 'Wise Woman', description: 'Warm, authoritative female voice' },
+  { value: 'Friendly_Person', label: 'Friendly Person', description: 'Approachable and conversational' },
+  { value: 'Inspirational_girl', label: 'Inspirational Girl', description: 'Energetic and motivating' },
+  { value: 'Deep_Voice_Man', label: 'Deep Voice Man', description: 'Rich, deep male voice' },
+  { value: 'Calm_Woman', label: 'Calm Woman', description: 'Peaceful and soothing' },
+  { value: 'Casual_Guy', label: 'Casual Guy', description: 'Relaxed and informal' },
+  { value: 'Lively_Girl', label: 'Lively Girl', description: 'Vibrant and enthusiastic' },
+  { value: 'Patient_Man', label: 'Patient Man', description: 'Steady and reassuring' },
+  { value: 'Young_Knight', label: 'Young Knight', description: 'Brave and confident' },
+  { value: 'Determined_Man', label: 'Determined Man', description: 'Strong and resolute' },
+  { value: 'Lovely_Girl', label: 'Lovely Girl', description: 'Sweet and charming' },
+  { value: 'Decent_Boy', label: 'Decent Boy', description: 'Polite and well-mannered' },
+  { value: 'Imposing_Manner', label: 'Imposing Manner', description: 'Commanding and powerful' },
+  { value: 'Elegant_Man', label: 'Elegant Man', description: 'Sophisticated and refined' },
+  { value: 'Abbess', label: 'Abbess', description: 'Dignified and wise' },
+  { value: 'Sweet_Girl_2', label: 'Sweet Girl 2', description: 'Gentle and kind' },
+  { value: 'Exuberant_Girl', label: 'Exuberant Girl', description: 'Joyful and energetic' },
+];
+
+const EMOTIONS = [
+  { value: 'happy', label: 'Happy', description: 'Cheerful and upbeat tone' },
+  { value: 'sad', label: 'Sad', description: 'Melancholic and somber tone' },
+  { value: 'angry', label: 'Angry', description: 'Intense and forceful tone' },
+  { value: 'fear', label: 'Fear', description: 'Anxious and nervous tone' },
+  { value: 'surprised', label: 'Surprised', description: 'Astonished and amazed tone' },
+  { value: 'neutral', label: 'Neutral', description: 'Calm and balanced tone (default)' },
+];
+
 const AudioScriptModal: React.FC<AudioScriptModalProps> = ({
  open, sceneNumber, value, onChange, onClose, onSave,
  audioProvider, audioLang, audioSlow, audioRate,
  onChangeProvider, onChangeLang, onChangeSlow, onChangeRate,
  audioUrl,
+  onGenerateAI,
+  onGenerateFree,
 }) => {
+  const [isGeneratingAI, setIsGeneratingAI] = React.useState(false);
+  const [isGeneratingFree, setIsGeneratingFree] = React.useState(false);
+  const [generateError, setGenerateError] = React.useState<string | null>(null);
+  
+  // Audio type toggle - default to 'free'
+  const [audioType, setAudioType] = React.useState<'free' | 'ai'>('free');
+  
+  // AI Audio generation parameters with intelligent defaults
+  const [voiceId, setVoiceId] = React.useState<string>('Wise_Woman');
+  const [customVoiceId, setCustomVoiceId] = React.useState<string>('');
+  const [useCustomVoice, setUseCustomVoice] = React.useState<boolean>(false);
+  const [emotion, setEmotion] = React.useState<string>('happy');
+  const [speed, setSpeed] = React.useState<number>(1.0);
+  const [volume, setVolume] = React.useState<number>(1.0);
+  const [pitch, setPitch] = React.useState<number>(0.0);
+
+  const handleGenerateAI = async () => {
+    if (!onGenerateAI || !value.trim()) {
+      return;
+    }
+    
+    setIsGeneratingAI(true);
+    setGenerateError(null);
+    try {
+      await onGenerateAI({
+        text: value.trim(),
+        voice_id: useCustomVoice ? customVoiceId : voiceId,
+        emotion: emotion,
+        speed: speed,
+        volume: volume,
+        pitch: pitch,
+      });
+      // Optionally close modal after successful generation
+      // onClose();
+    } catch (err: any) {
+      setGenerateError(err?.response?.data?.detail || err?.message || 'Failed to generate AI audio');
+    } finally {
+      setIsGeneratingAI(false);
+    }
+  };
+
+  const handleGenerateFree = async () => {
+    if (!onGenerateFree || !value.trim()) {
+      return;
+    }
+    
+    setIsGeneratingFree(true);
+    setGenerateError(null);
+    try {
+      await onGenerateFree(value.trim());
+      // Optionally close modal after successful generation
+      // onClose();
+    } catch (err: any) {
+      setGenerateError(err?.response?.data?.detail || err?.message || 'Failed to generate free audio');
+    } finally {
+      setIsGeneratingFree(false);
+    }
+  };
  return (
    <Dialog
      open={open}
@@ -42,14 +151,43 @@ const AudioScriptModal: React.FC<AudioScriptModalProps> = ({
      }}
    >
      <DialogTitle>Edit Audio Narration Script (Scene {sceneNumber})</DialogTitle>
-      <DialogContent dividers sx={{ color: '#2C2416' }}>
+      <DialogContent dividers sx={{ color: '#2C2416', bgcolor: '#fff' }}>
        <Box
          sx={{
            display: 'flex',
            flexDirection: 'column',
-            gap: 2,
-            '& .MuiFormLabel-root': { color: '#6b5846' },
-            '& .MuiInputBase-root': { color: '#2C2416' },
+            gap: 3,
+            pt: 1,
+            '& .MuiFormLabel-root': { color: '#5D4037', fontWeight: 500 },
+            '& .MuiInputBase-root': { 
+              color: '#2C2416',
+              bgcolor: '#fff',
+              '& .MuiOutlinedInput-notchedOutline': {
+                borderColor: 'rgba(0, 0, 0, 0.23)',
+              },
+              '&:hover .MuiOutlinedInput-notchedOutline': {
+                borderColor: 'rgba(0, 0, 0, 0.87)',
+              },
+              '&.Mui-focused .MuiOutlinedInput-notchedOutline': {
+                borderColor: 'primary.main',
+                borderWidth: '2px',
+              },
+            },
+            '& .MuiInputBase-input': {
+              color: '#2C2416',
+            },
+            '& textarea': {
+              color: '#2C2416',
+            },
+            '& .MuiSelect-select': {
+              color: '#2C2416',
+            },
+            '& .MuiFormHelperText-root': {
+              color: 'rgba(0, 0, 0, 0.6)',
+            },
+            '& .MuiMenuItem-root': {
+              color: '#2C2416',
+            },
          }}
        >
          {audioUrl ? (
@@ -73,40 +211,387 @@ const AudioScriptModal: React.FC<AudioScriptModalProps> = ({
            multiline
            minRows={6}
            fullWidth
+            placeholder="Enter the narration text for this scene..."
+            sx={{
+              '& .MuiInputBase-input': {
+                color: '#2C2416',
+              },
+            }}
          />
-          <Box sx={{ display: 'grid', gridTemplateColumns: { xs: '1fr', md: '1fr 1fr' }, gap: 2 }}>
-            <TextField
-              select
-              label="Audio Provider"
-              value={audioProvider}
-              onChange={(e) => onChangeProvider(e.target.value)}
-              SelectProps={{ native: true }}
-            >
-              <option value="gtts">gTTS</option>
-              <option value="pyttsx3">pyttsx3</option>
-            </TextField>
-            <TextField
-              label="Language (e.g., en, hi)"
-              value={audioLang}
-              onChange={(e) => onChangeLang(e.target.value)}
-            />
-            <TextField
-              select
-              label="Slow (gTTS)"
-              value={audioSlow ? 'true' : 'false'}
-              onChange={(e) => onChangeSlow(e.target.value === 'true')}
-              SelectProps={{ native: true }}
-            >
-              <option value="false">Normal</option>
-              <option value="true">Slow</option>
-            </TextField>
-            <TextField
-              type="number"
-              label="Rate (pyttsx3)"
-              value={audioRate}
-              onChange={(e) => onChangeRate(Number(e.target.value))}
-              inputProps={{ min: 50, max: 300, step: 10 }}
-            />
+          
+          {generateError && (
+            <Box sx={{ color: 'error.main', fontSize: '0.875rem', mt: -1 }}>
+              {generateError}
+            </Box>
+          )}
+
+          <Divider sx={{ my: 1 }} />
+
+          {/* Audio Type Toggle */}
+          <Box sx={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
+            <Box>
+              <Typography variant="subtitle2" sx={{ mb: 1.5, fontWeight: 600, color: '#5D4037' }}>
+                Audio Type
+              </Typography>
+              <ToggleButtonGroup
+                value={audioType}
+                exclusive
+                onChange={(_, newValue) => {
+                  if (newValue !== null) {
+                    setAudioType(newValue);
+                    setGenerateError(null);
+                  }
+                }}
+                aria-label="audio type"
+                fullWidth
+                sx={{
+                  '& .MuiToggleButton-root': {
+                    textTransform: 'none',
+                    borderColor: 'rgba(0, 0, 0, 0.23)',
+                    color: '#5D4037',
+                    '&.Mui-selected': {
+                      backgroundColor: 'primary.main',
+                      color: '#fff',
+                      '&:hover': {
+                        backgroundColor: 'primary.dark',
+                      },
+                    },
+                    '&:hover': {
+                      backgroundColor: 'rgba(0, 0, 0, 0.04)',
+                    },
+                  },
+                }}
+              >
+                <ToggleButton value="free" aria-label="free audio">
+                  <VolumeUpIcon sx={{ mr: 1 }} />
+                  Free Audio (gTTS)
+                </ToggleButton>
+                <ToggleButton value="ai" aria-label="ai audio">
+                  <SmartToyIcon sx={{ mr: 1 }} />
+                  AI Audio (Minimax)
+                </ToggleButton>
+              </ToggleButtonGroup>
+            </Box>
+
+            {/* Generate Button - Context aware based on audio type */}
+            <Box sx={{ display: 'flex', gap: 2, flexWrap: 'wrap' }}>
+              {audioType === 'ai' && onGenerateAI && (
+                <OperationButton
+                  operation={{
+                    provider: 'audio',
+                    model: 'minimax/speech-02-hd',
+                    tokens_requested: value.trim().length, // Every character is 1 token
+                    operation_type: 'audio_generation',
+                    actual_provider_name: 'wavespeed',
+                  }}
+                  label="Generate AI Audio"
+                  variant="contained"
+                  size="medium"
+                  startIcon={<SmartToyIcon />}
+                  showCost={true}
+                  checkOnHover={true}
+                  checkOnMount={false}
+                  onClick={handleGenerateAI}
+                  disabled={isGeneratingAI || isGeneratingFree || !value.trim()}
+                  loading={isGeneratingAI}
+                  sx={{ flex: 1, minWidth: '200px' }}
+                />
+              )}
+
+              {audioType === 'free' && onGenerateFree && (
+                <Button
+                  variant="contained"
+                  size="medium"
+                  startIcon={isGeneratingFree ? <CircularProgress size={16} /> : <VolumeUpIcon />}
+                  onClick={handleGenerateFree}
+                  disabled={isGeneratingAI || isGeneratingFree || !value.trim()}
+                  sx={{ flex: 1, minWidth: '200px' }}
+                >
+                  {isGeneratingFree ? 'Generating...' : 'Generate Free Audio (gTTS)'}
+                </Button>
+              )}
+            </Box>
+
+            <Divider sx={{ my: 1 }} />
+
+            {/* Settings - Conditionally shown based on audio type */}
+            {audioType === 'ai' && (
+              <Box>
+                <Typography variant="subtitle2" sx={{ mb: 2, fontWeight: 600, color: '#5D4037' }}>
+                  AI Audio Generation Settings
+                </Typography>
+            <Box sx={{ display: 'grid', gridTemplateColumns: { xs: '1fr', md: '1fr 1fr' }, gap: 2 }}>
+              {/* Voice Selection */}
+              <FormControl fullWidth>
+                <InputLabel>Voice</InputLabel>
+                <Select
+                  value={useCustomVoice ? 'custom' : voiceId}
+                  onChange={(e) => {
+                    if (e.target.value === 'custom') {
+                      setUseCustomVoice(true);
+                    } else {
+                      setUseCustomVoice(false);
+                      setVoiceId(e.target.value);
+                    }
+                  }}
+                  label="Voice"
+                  renderValue={(value) => {
+                    if (value === 'custom') {
+                      return customVoiceId || 'Custom Voice ID';
+                    }
+                    const voice = AVAILABLE_VOICES.find(v => v.value === value);
+                    return voice ? voice.label : value;
+                  }}
+                >
+                  {AVAILABLE_VOICES.map((voice) => (
+                    <MenuItem key={voice.value} value={voice.value}>
+                      <Box>
+                        <Typography variant="body2" sx={{ fontWeight: 500 }}>
+                          {voice.label}
+                        </Typography>
+                        <Typography variant="caption" sx={{ color: 'text.secondary' }}>
+                          {voice.description}
+                        </Typography>
+                      </Box>
+                    </MenuItem>
+                  ))}
+                  <MenuItem value="custom">
+                    <Box>
+                      <Typography variant="body2" sx={{ fontWeight: 500, fontStyle: 'italic' }}>
+                        Custom Voice ID...
+                      </Typography>
+                      <Typography variant="caption" sx={{ color: 'text.secondary' }}>
+                        Use a voice ID from voice cloning
+                      </Typography>
+                    </Box>
+                  </MenuItem>
+                </Select>
+                <FormHelperText>
+                  <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
+                    Choose a voice that matches your story's tone
+                    <Tooltip
+                      title={
+                        <Box sx={{ p: 0.5 }}>
+                          <Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
+                            Current Voice ID: {voiceId}
+                          </Typography>
+                          <Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
+                            You can use system voices above or enter a custom voice ID from voice cloning.
+                          </Typography>
+                          <Typography variant="caption" sx={{ display: 'block' }}>
+                            Learn more:{' '}
+                            <a
+                              href="https://wavespeed.ai/models/minimax/voice-clone"
+                              target="_blank"
+                              rel="noopener noreferrer"
+                              style={{ color: '#90caf9' }}
+                            >
+                              Voice Cloning Guide
+                            </a>
+                          </Typography>
+                        </Box>
+                      }
+                      arrow
+                      placement="top"
+                    >
+                      <InfoOutlinedIcon sx={{ fontSize: '0.875rem', color: 'text.secondary', cursor: 'help' }} />
+                    </Tooltip>
+                  </Box>
+                </FormHelperText>
+              </FormControl>
+              
+              {/* Custom Voice ID Input (shown when custom voice is selected) */}
+              {useCustomVoice && (
+                <TextField
+                  fullWidth
+                  label="Custom Voice ID"
+                  value={customVoiceId}
+                  onChange={(e) => setCustomVoiceId(e.target.value)}
+                  helperText="Enter your custom voice ID from voice cloning"
+                  placeholder="your-custom-voice-id"
+                />
+              )}
+
+              {/* Emotion Selection */}
+              <FormControl fullWidth>
+                <InputLabel>Emotion</InputLabel>
+                <Select
+                  value={emotion}
+                  onChange={(e) => setEmotion(e.target.value)}
+                  label="Emotion"
+                >
+                  {EMOTIONS.map((em) => (
+                    <MenuItem key={em.value} value={em.value}>
+                      <Box>
+                        <Typography variant="body2">{em.label}</Typography>
+                        <Typography variant="caption" sx={{ color: 'text.secondary' }}>
+                          {em.description}
+                        </Typography>
+                      </Box>
+                    </MenuItem>
+                  ))}
+                </Select>
+                <FormHelperText>
+                  Select the emotional tone for the narration
+                </FormHelperText>
+              </FormControl>
+
+              {/* Speed Slider */}
+              <Box>
+                <Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 1 }}>
+                  <Typography variant="body2" sx={{ minWidth: '60px' }}>
+                    Speed
+                  </Typography>
+                  <Slider
+                    value={speed}
+                    onChange={(_, newValue) => setSpeed(newValue as number)}
+                    min={0.5}
+                    max={2.0}
+                    step={0.1}
+                    valueLabelDisplay="auto"
+                    valueLabelFormat={(value) => `${value}x`}
+                    sx={{ flex: 1 }}
+                  />
+                  <Typography variant="body2" sx={{ minWidth: '40px', textAlign: 'right' }}>
+                    {speed.toFixed(1)}x
+                  </Typography>
+                </Box>
+                <FormHelperText>
+                  <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
+                    Speech speed (0.5x = slow, 1.0x = normal, 2.0x = fast)
+                    <Tooltip
+                      title="Adjust how fast the narration speaks. 1.0 is normal speed, suitable for most content."
+                      arrow
+                      placement="top"
+                    >
+                      <InfoOutlinedIcon sx={{ fontSize: '0.875rem', color: 'text.secondary', cursor: 'help' }} />
+                    </Tooltip>
+                  </Box>
+                </FormHelperText>
+              </Box>
+
+              {/* Volume Slider */}
+              <Box>
+                <Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 1 }}>
+                  <Typography variant="body2" sx={{ minWidth: '60px' }}>
+                    Volume
+                  </Typography>
+                  <Slider
+                    value={volume}
+                    onChange={(_, newValue) => setVolume(newValue as number)}
+                    min={0.1}
+                    max={10.0}
+                    step={0.1}
+                    valueLabelDisplay="auto"
+                    valueLabelFormat={(value) => `${value.toFixed(1)}`}
+                    sx={{ flex: 1 }}
+                  />
+                  <Typography variant="body2" sx={{ minWidth: '40px', textAlign: 'right' }}>
+                    {volume.toFixed(1)}
+                  </Typography>
+                </Box>
+                <FormHelperText>
+                  <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
+                    Audio volume level (0.1 = quiet, 1.0 = normal, 10.0 = loud)
+                    <Tooltip
+                      title="Control the loudness of the audio. 1.0 is standard volume. Increase for emphasis, decrease for subtlety."
+                      arrow
+                      placement="top"
+                    >
+                      <InfoOutlinedIcon sx={{ fontSize: '0.875rem', color: 'text.secondary', cursor: 'help' }} />
+                    </Tooltip>
+                  </Box>
+                </FormHelperText>
+              </Box>
+
+              {/* Pitch Slider */}
+              <Box sx={{ gridColumn: { xs: '1', md: '1 / -1' } }}>
+                <Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 1 }}>
+                  <Typography variant="body2" sx={{ minWidth: '60px' }}>
+                    Pitch
+                  </Typography>
+                  <Slider
+                    value={pitch}
+                    onChange={(_, newValue) => setPitch(newValue as number)}
+                    min={-12}
+                    max={12}
+                    step={1}
+                    valueLabelDisplay="auto"
+                    valueLabelFormat={(value) => `${value > 0 ? '+' : ''}${value}`}
+                    marks={[
+                      { value: -12, label: '-12' },
+                      { value: 0, label: '0' },
+                      { value: 12, label: '+12' },
+                    ]}
+                    sx={{ flex: 1 }}
+                  />
+                  <Typography variant="body2" sx={{ minWidth: '50px', textAlign: 'right' }}>
+                    {pitch > 0 ? '+' : ''}{pitch}
+                  </Typography>
+                </Box>
+                <FormHelperText>
+                  <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
+                    Voice pitch adjustment (-12 = lower, 0 = normal, +12 = higher)
+                    <Tooltip
+                      title="Adjust the pitch of the voice. Negative values make the voice deeper, positive values make it higher. 0 keeps the natural voice pitch."
+                      arrow
+                      placement="top"
+                    >
+                      <InfoOutlinedIcon sx={{ fontSize: '0.875rem', color: 'text.secondary', cursor: 'help' }} />
+                    </Tooltip>
+                  </Box>
+                </FormHelperText>
+              </Box>
+            </Box>
+              </Box>
+            )}
+
+            {audioType === 'free' && (
+              <Box>
+                <Typography variant="subtitle2" sx={{ mb: 2, fontWeight: 600, color: '#5D4037' }}>
+                  Free Audio (gTTS) Settings
+                </Typography>
+                <Box sx={{ display: 'grid', gridTemplateColumns: { xs: '1fr', md: '1fr 1fr' }, gap: 2 }}>
+                  <TextField
+                    select
+                    label="Audio Provider"
+                    value={audioProvider}
+                    onChange={(e) => onChangeProvider(e.target.value)}
+                    SelectProps={{ native: true }}
+                    helperText="Text-to-speech engine for free audio generation"
+                  >
+                    <option value="gtts">gTTS (Google Text-to-Speech)</option>
+                    <option value="pyttsx3">pyttsx3 (Offline)</option>
+                  </TextField>
+                  <TextField
+                    label="Language"
+                    value={audioLang}
+                    onChange={(e) => onChangeLang(e.target.value)}
+                    helperText="Language code (e.g., en for English, hi for Hindi)"
+                    placeholder="en"
+                  />
+                  <TextField
+                    select
+                    label="Speech Speed (gTTS)"
+                    value={audioSlow ? 'true' : 'false'}
+                    onChange={(e) => onChangeSlow(e.target.value === 'true')}
+                    SelectProps={{ native: true }}
+                    helperText="Whether to speak slowly (useful for clarity)"
+                  >
+                    <option value="false">Normal Speed</option>
+                    <option value="true">Slow Speed</option>
+                  </TextField>
+                  <TextField
+                    type="number"
+                    label="Speech Rate (pyttsx3)"
+                    value={audioRate}
+                    onChange={(e) => onChangeRate(Number(e.target.value))}
+                    inputProps={{ min: 50, max: 300, step: 10 }}
+                    helperText="Words per minute (50-300, default: 150)"
+                  />
+                </Box>
+              </Box>
+            )}
          </Box>
        </Box>
      </DialogContent>
--- a/frontend/src/components/StoryWriter/Phases/StoryOutlineParts/BookPages.tsx
+++ b/frontend/src/components/StoryWriter/Phases/StoryOutlineParts/BookPages.tsx
@@ -1,12 +1,17 @@
 import React from 'react';
-import { Box, Typography, Tooltip, Chip } from '@mui/material';
+import { Box, Typography, Tooltip, Chip, CircularProgress } from '@mui/material';
 import { motion, AnimatePresence } from 'framer-motion';
 import OutlineHoverActions from './OutlineHoverActions';
 import EditNoteIcon from '@mui/icons-material/EditNote';
 import VolumeUpIcon from '@mui/icons-material/VolumeUp';
 import TipsAndUpdatesIcon from '@mui/icons-material/TipsAndUpdates';
+import PlayArrowIcon from '@mui/icons-material/PlayArrow';
+import GraphicEqIcon from '@mui/icons-material/GraphicEq';
+import ReplayIcon from '@mui/icons-material/Replay';
+import { OperationButton } from '../../../shared/OperationButton';
 import { leftPageVariants, rightPageVariants } from './pageVariants';
-import { storyWriterApi, StoryScene } from '../../../../services/storyWriterApi';
+import { StoryScene } from '../../../../services/storyWriterApi';
+import type { SceneAnimationResume } from '../../../../hooks/useStoryWriterState';

 const MotionBox = motion(Box);

@@ -33,12 +38,19 @@ interface BookPagesProps {

  narrationEnabled: boolean;
  audioUrl: string | null;
+  hasAudio: boolean;
  onOpenImageModal: () => void;
  onOpenAudioModal: () => void;
  onOpenCharactersModal: () => void;
  onOpenKeyEventsModal: () => void;
  onOpenTitleModal: () => void;
  onOpenEditModal: () => void;
+  onAnimateScene?: () => void;
+  onResumeScene?: () => void;
+  onAnimateWithVoiceover?: () => void;
+  isAnimatingScene?: boolean;
+  animatedVideoUrl?: string | null;
+  resumeInfo?: SceneAnimationResume | null;
 }

 const BookPages: React.FC<BookPagesProps> = ({
@@ -56,12 +68,22 @@ const BookPages: React.FC<BookPagesProps> = ({
  onOpenImageModal,
  onOpenAudioModal,
  audioUrl,
+  hasAudio,
  onOpenCharactersModal,
  onOpenKeyEventsModal,
  onOpenTitleModal,
  onOpenEditModal,
+  onAnimateScene,
+  onResumeScene,
+  onAnimateWithVoiceover,
+  isAnimatingScene,
+  animatedVideoUrl,
+  resumeInfo,
 }) => {
  const currentSceneNumber = currentScene?.scene_number || currentSceneIndex + 1;
+  const showAnimatedVideo = Boolean(animatedVideoUrl);
+  const hasImage = Boolean(imageUrl);
+  const hasMedia = showAnimatedVideo || hasImage;

  return (
    <Box sx={{ mb: 4, display: 'flex', justifyContent: 'center' }}>
@@ -213,13 +235,43 @@ const BookPages: React.FC<BookPagesProps> = ({
                  overflowY: 'auto',
                  mt: 3,
                  display: 'grid',
-                  gridTemplateRows: imageUrl ? 'auto 1fr auto auto' : 'auto auto auto 1fr',
+                  gridTemplateRows: hasMedia ? 'auto 1fr auto auto' : 'auto auto auto 1fr',
                  alignContent: 'start',
                  gap: 3,
                }}
              >
                <Box sx={{ position: 'relative', '&:hover .left-image-actions': { opacity: 1, pointerEvents: 'auto' } }}>
-                  {imageUrl ? (
+                  {showAnimatedVideo ? (
+                    <Box
+                      sx={{
+                        width: '100%',
+                        borderRadius: '12px',
+                        overflow: 'hidden',
+                        boxShadow: '0 12px 24px rgba(0, 0, 0, 0.2)',
+                        border: '3px solid rgba(120, 90, 60, 0.25)',
+                        backgroundColor: '#000',
+                      }}
+                    >
+                      <Box
+                        component="video"
+                        src={animatedVideoUrl ?? undefined}
+                        poster={imageUrl ?? undefined}
+                        autoPlay
+                        muted
+                        loop
+                        controls
+                        playsInline
+                        sx={{
+                          width: '100%',
+                          height: 'auto',
+                          display: 'block',
+                          minHeight: '300px',
+                          maxHeight: '500px',
+                          objectFit: 'cover',
+                        }}
+                      />
+                    </Box>
+                  ) : hasImage ? (
                    <>
                       {/* Removed 'Scene Illustration' heading for cleaner look */}
                      <Box
@@ -239,7 +291,7 @@ const BookPages: React.FC<BookPagesProps> = ({
                      >
                        <Box
                          component="img"
-                          src={imageUrl}
+                          src={imageUrl || undefined}
                          alt={currentScene?.title || `Scene ${currentSceneNumber} illustration`}
                          sx={{
                            width: '100%',
@@ -258,11 +310,13 @@ const BookPages: React.FC<BookPagesProps> = ({
                            top: 8,
                            right: 8,
                            display: 'flex',
+                            flexDirection: 'column',
                            gap: 1,
                            opacity: 0,
                            pointerEvents: 'none',
                            transition: 'opacity 0.2s ease',
-                             zIndex: 5,
+                            zIndex: 5,
+                            alignItems: 'flex-end',
                          }}
                        >
                          <Tooltip title="Edit scene image prompt">
@@ -286,6 +340,152 @@ const BookPages: React.FC<BookPagesProps> = ({
                              <EditNoteIcon />
                            </Box>
                          </Tooltip>
+
+                          {hasImage && onAnimateScene && (
+                            <Box
+                              onClick={(e) => {
+                                e.stopPropagation();
+                              }}
+                              sx={{ display: 'inline-flex', pointerEvents: 'auto' }}
+                            >
+                              <OperationButton
+                                operation={{
+                                  provider: 'video',
+                                  model: 'kling-v2.5-turbo-std-5s',
+                                  operation_type: 'scene_animation',
+                                  actual_provider_name: 'wavespeed',
+                                }}
+                                label="Animate Scene"
+                                variant="contained"
+                                size="small"
+                                startIcon={<PlayArrowIcon />}
+                                showCost
+                                checkOnHover
+                                checkOnMount={false}
+                                onClick={onAnimateScene}
+                                disabled={isAnimatingScene}
+                                sx={{
+                                  minWidth: 'auto',
+                                  padding: '8px',
+                                  width: 40,
+                                  height: 40,
+                                  borderRadius: '50%',
+                                  background: 'linear-gradient(135deg, #1f8a70 0%, #32d9c8 100%)',
+                                  boxShadow: '0 8px 16px rgba(31,138,112,0.35)',
+                                  color: 'white',
+                                  '&:hover': {
+                                    background: 'linear-gradient(135deg, #1a7a60 0%, #2dc9b8 100%)',
+                                  },
+                                  '& .MuiButton-startIcon': {
+                                    margin: 0,
+                                  },
+                                  '& .MuiButton-label': {
+                                    display: 'none',
+                                  },
+                                }}
+                                tooltipPlacement="left"
+                              />
+                            </Box>
+                          )}
+
+                          {hasImage && hasAudio && onAnimateWithVoiceover && (
+                            <Box
+                              onClick={(e) => {
+                                e.stopPropagation();
+                              }}
+                              sx={{ display: 'inline-flex', pointerEvents: 'auto' }}
+                            >
+                              <OperationButton
+                                operation={{
+                                  provider: 'video',
+                                  model: 'wavespeed-ai/infinitetalk',
+                                  operation_type: 'scene_animation_voiceover',
+                                  actual_provider_name: 'wavespeed',
+                                }}
+                                label="Animate with Voiceover"
+                                variant="contained"
+                                size="small"
+                                startIcon={<GraphicEqIcon />}
+                                showCost
+                                checkOnHover
+                                checkOnMount={false}
+                                onClick={onAnimateWithVoiceover}
+                                disabled={isAnimatingScene}
+                                sx={{
+                                  minWidth: 'auto',
+                                  padding: '8px',
+                                  width: 40,
+                                  height: 40,
+                                  borderRadius: '50%',
+                                  background: 'linear-gradient(135deg, #733dd9 0%, #bb86fc 100%)',
+                                  boxShadow: '0 8px 16px rgba(115,61,217,0.35)',
+                                  color: 'white',
+                                  '&:hover': {
+                                    background: 'linear-gradient(135deg, #6030ba 0%, #a974f1 100%)',
+                                  },
+                                  '& .MuiButton-startIcon': {
+                                    margin: 0,
+                                  },
+                                  '& .MuiButton-label': {
+                                    display: 'none',
+                                  },
+                                }}
+                                tooltipPlacement="left"
+                              />
+                            </Box>
+                          )}
+
+                          {resumeInfo && onResumeScene && (
+                            <Tooltip
+                              title={resumeInfo.message || 'Resume animation download (no extra cost)'}
+                              placement="left"
+                            >
+                              <Box
+                                onClick={(e) => {
+                                  e.stopPropagation();
+                                }}
+                                sx={{ display: 'inline-flex', pointerEvents: 'auto' }}
+                              >
+                                <OperationButton
+                                  operation={{
+                                    provider: 'video',
+                                    model: 'kling-v2.5-turbo-std-resume',
+                                    operation_type: 'scene_animation_resume',
+                                    actual_provider_name: 'wavespeed',
+                                  }}
+                                  label="Resume Animation"
+                                  variant="contained"
+                                  size="small"
+                                  startIcon={<ReplayIcon />}
+                                  showCost={false}
+                                  checkOnHover={false}
+                                  checkOnMount={false}
+                                  onClick={onResumeScene}
+                                  disabled={isAnimatingScene}
+                                  sx={{
+                                    minWidth: 'auto',
+                                    padding: '8px',
+                                    width: 40,
+                                    height: 40,
+                                    borderRadius: '50%',
+                                    background: 'linear-gradient(135deg, #b35c1e 0%, #f5a623 100%)',
+                                    boxShadow: '0 8px 16px rgba(179,92,30,0.35)',
+                                    color: 'white',
+                                    '&:hover': {
+                                      background: 'linear-gradient(135deg, #9c511a 0%, #e1911c 100%)',
+                                    },
+                                    '& .MuiButton-startIcon': {
+                                      margin: 0,
+                                    },
+                                    '& .MuiButton-label': {
+                                      display: 'none',
+                                    },
+                                  }}
+                                  tooltipPlacement="left"
+                                />
+                              </Box>
+                            </Tooltip>
+                          )}
                        </Box>
                      </Box>
                    </>
@@ -325,6 +525,27 @@ const BookPages: React.FC<BookPagesProps> = ({
                      </Box>
                    </>
                  )}
+                  {isAnimatingScene && (
+                    <Box
+                      sx={{
+                        position: 'absolute',
+                        inset: 0,
+                        display: 'flex',
+                        flexDirection: 'column',
+                        alignItems: 'center',
+                        justifyContent: 'center',
+                        backdropFilter: 'blur(2px)',
+                        backgroundColor: 'rgba(0,0,0,0.35)',
+                        borderRadius: '12px',
+                        color: '#fff',
+                        gap: 1,
+                        zIndex: 6,
+                      }}
+                    >
+                      <CircularProgress color="inherit" size={36} />
+                      <Typography variant="body2">Animating scene...</Typography>
+                    </Box>
+                  )}
                </Box>

                {/* Audio chip moved to right page */}
@@ -375,7 +596,10 @@ const BookPages: React.FC<BookPagesProps> = ({
                '&:hover .chip-actions': { opacity: 1, pointerEvents: 'auto' },
              }}
            >
-              <OutlineHoverActions onEdit={onOpenEditModal} onImprove={onOpenEditModal} />
+              <OutlineHoverActions
+                onEdit={onOpenEditModal}
+                onImprove={onOpenEditModal}
+              />
              <Box sx={{ flex: 1, overflowY: 'auto', pt: { xs: 1, md: 2 } }}>
                <Box className="chip-actions" sx={{ display: 'flex', gap: 1, flexWrap: 'wrap', mb: 1.5, opacity: 0, pointerEvents: 'none', transition: 'opacity 0.2s ease' }}>
                  <Chip
--- a/frontend/src/components/StoryWriter/Phases/StoryOutlineParts/ImageEditModal.tsx
+++ b/frontend/src/components/StoryWriter/Phases/StoryOutlineParts/ImageEditModal.tsx
@@ -1,5 +1,9 @@
 import React from 'react';
-import { Box, Button, Dialog, DialogActions, DialogContent, DialogTitle, TextField } from '@mui/material';
+import { Box, Button, Dialog, DialogActions, DialogContent, DialogTitle, TextField, Divider, CircularProgress } from '@mui/material';
+import { OperationButton } from '../../../shared/OperationButton';
+import AutoFixHighIcon from '@mui/icons-material/AutoFixHigh';
+import RefreshIcon from '@mui/icons-material/Refresh';
+import { storyWriterApi } from '../../../../services/storyWriterApi';

 interface ImageEditModalProps {
  open: boolean;
@@ -8,9 +12,82 @@ interface ImageEditModalProps {
  onChange: (v: string) => void;
  onClose: () => void;
  onSave: () => void;
+  onRegenerate?: (prompt: string) => Promise<void>;
+  imageProvider?: string | null;
+  imageWidth?: number;
+  imageHeight?: number;
+  imageModel?: string | null;
 }

-const ImageEditModal: React.FC<ImageEditModalProps> = ({ open, sceneNumber, value, onChange, onClose, onSave }) => {
+const ImageEditModal: React.FC<ImageEditModalProps> = ({ 
+  open, 
+  sceneNumber, 
+  value, 
+  onChange, 
+  onClose, 
+  onSave,
+  onRegenerate,
+  imageProvider,
+  imageWidth = 1024,
+  imageHeight = 1024,
+  imageModel,
+}) => {
+  const [isRegenerating, setIsRegenerating] = React.useState(false);
+  const [regenerateError, setRegenerateError] = React.useState<string | null>(null);
+  const [isOptimizing, setIsOptimizing] = React.useState(false);
+  const [optimizeError, setOptimizeError] = React.useState<string | null>(null);
+
+  const handleRegenerate = async () => {
+    if (!onRegenerate || !value.trim()) {
+      return;
+    }
+    
+    setIsRegenerating(true);
+    setRegenerateError(null);
+    try {
+      await onRegenerate(value.trim());
+      // Optionally close modal after successful regeneration
+      // onClose();
+    } catch (err: any) {
+      setRegenerateError(err?.response?.data?.detail || err?.message || 'Failed to regenerate image');
+    } finally {
+      setIsRegenerating(false);
+    }
+  };
+
+  const handleOptimize = async () => {
+    if (!value.trim()) {
+      return;
+    }
+    
+    setIsOptimizing(true);
+    setOptimizeError(null);
+    try {
+      const response = await storyWriterApi.optimizePrompt({
+        text: value.trim(),
+        mode: 'image', // Default to image mode for scene image prompts
+        style: 'default', // Could be made configurable in the future
+      });
+      
+      if (response.success && response.optimized_prompt) {
+        onChange(response.optimized_prompt);
+      } else {
+        throw new Error('Optimization returned no result');
+      }
+    } catch (err: any) {
+      const errorMessage = err?.response?.data?.detail || err?.message || 'Failed to optimize prompt';
+      setOptimizeError(errorMessage);
+      console.error('Failed to optimize prompt:', err);
+    } finally {
+      setIsOptimizing(false);
+    }
+  };
+
+  // Determine the model for cost estimation
+  // Default to FLUX.1-Krea-dev for HuggingFace, or stability model
+  const modelForEstimation = imageModel || (imageProvider === 'stability' ? 'stable-diffusion' : 'black-forest-labs/FLUX.1-Krea-dev');
+  const providerForEstimation = imageProvider || 'huggingface';
+
  return (
    <Dialog
      open={open}
@@ -44,7 +121,54 @@ const ImageEditModal: React.FC<ImageEditModalProps> = ({ open, sceneNumber, valu
            multiline
            minRows={5}
            fullWidth
+            placeholder="Enter a detailed description of the scene image..."
          />
+          
+          {(regenerateError || optimizeError) && (
+            <Box sx={{ color: 'error.main', fontSize: '0.875rem', mt: -1 }}>
+              {regenerateError || optimizeError}
+            </Box>
+          )}
+
+          <Divider sx={{ my: 1 }} />
+
+          <Box sx={{ display: 'flex', gap: 2, flexWrap: 'wrap' }}>
+            {/* AI Prompt Optimizer */}
+            <Button
+              variant="outlined"
+              size="medium"
+              startIcon={isOptimizing ? <CircularProgress size={16} /> : <AutoFixHighIcon />}
+              onClick={handleOptimize}
+              disabled={isOptimizing || !value.trim() || isRegenerating}
+              sx={{ flex: 1, minWidth: '200px' }}
+            >
+              {isOptimizing ? 'Optimizing...' : 'AI Prompt Optimizer'}
+            </Button>
+
+            {/* Regenerate Scene - Active with cost estimation */}
+            {onRegenerate && (
+              <OperationButton
+                operation={{
+                  provider: 'stability',
+                  model: modelForEstimation,
+                  tokens_requested: 0,
+                  operation_type: 'image_generation',
+                  actual_provider_name: providerForEstimation,
+                }}
+                label="Regenerate Scene"
+                variant="contained"
+                size="medium"
+                startIcon={<RefreshIcon />}
+                showCost={true}
+                checkOnHover={true}
+                checkOnMount={false}
+                onClick={handleRegenerate}
+                disabled={isRegenerating || !value.trim()}
+                loading={isRegenerating}
+                sx={{ flex: 1, minWidth: '200px' }}
+              />
+            )}
+          </Box>
        </Box>
      </DialogContent>
      <DialogActions>
--- a/frontend/src/components/StoryWriter/Phases/StoryOutlineParts/OutlineHoverActions.tsx
+++ b/frontend/src/components/StoryWriter/Phases/StoryOutlineParts/OutlineHoverActions.tsx
@@ -8,7 +8,10 @@ interface OutlineHoverActionsProps {
  onImprove: () => void;
 }

-const OutlineHoverActions: React.FC<OutlineHoverActionsProps> = ({ onEdit, onImprove }) => {
+const OutlineHoverActions: React.FC<OutlineHoverActionsProps> = ({
+  onEdit,
+  onImprove,
+}) => {
  return (
    <Box
      className="outline-actions"
--- a/frontend/src/components/StoryWriter/Phases/StoryWriting.tsx
+++ b/frontend/src/components/StoryWriter/Phases/StoryWriting.tsx
@@ -13,6 +13,7 @@ import { useStoryWriterState } from '../../../hooks/useStoryWriterState';
 import { storyWriterApi } from '../../../services/storyWriterApi';
 import { triggerSubscriptionError } from '../../../api/client';
 import { aiApiClient } from '../../../api/client';
+import { fetchMediaBlobUrl } from '../../../utils/fetchMediaBlobUrl';
 import { MultimediaSection } from '../components/MultimediaSection';

 const MotionBox = motion(Box);
@@ -123,10 +124,13 @@ const StoryWriting: React.FC<StoryWritingProps> = ({ state, onNext }) => {
  const [pageDirection, setPageDirection] = useState(0);
  const [imageLoadError, setImageLoadError] = useState<Set<number>>(new Set());
  const [imageBlobUrls, setImageBlobUrls] = useState<Map<number, string>>(new Map());
+  const [videoBlobUrls, setVideoBlobUrls] = useState<Map<number, string>>(new Map());
+  const [videoLoadError, setVideoLoadError] = useState<Set<number>>(new Set());

  // Get scenes and images from state
  const scenes = state.outlineScenes || [];
  const sceneImages = state.sceneImages || new Map<number, string>();
+  const sceneAnimatedVideos = state.sceneAnimatedVideos || new Map<number, string>();
  const hasScenes = state.isOutlineStructured && scenes.length > 0;
  
  // Split story content into sections mapped to scenes
@@ -201,6 +205,10 @@ const StoryWriting: React.FC<StoryWritingProps> = ({ state, onNext }) => {
  }, []);

  const currentSceneImageFullUrl = imageBlobUrls.get(currentSceneNumber) || null;
+  const currentSceneAnimatedVideoUrl = sceneAnimatedVideos.get(currentSceneNumber) || null;
+  const currentSceneAnimatedVideoBlobUrl = videoBlobUrls.get(currentSceneNumber) || null;
+  const hasVideoLoadError = videoLoadError.has(currentSceneNumber);
+  const showAnimatedVideo = Boolean(currentSceneAnimatedVideoBlobUrl);

  // Reset image load error when page changes
  useEffect(() => {
@@ -211,6 +219,60 @@ const StoryWriting: React.FC<StoryWritingProps> = ({ state, onNext }) => {
    });
  }, [currentSceneNumber]);

+  useEffect(() => {
+    if (!currentSceneAnimatedVideoUrl || hasVideoLoadError || currentSceneAnimatedVideoBlobUrl) {
+      return;
+    }
+
+    let cancelled = false;
+
+    const loadVideo = async () => {
+      try {
+        const videoPath = currentSceneAnimatedVideoUrl.startsWith('/')
+          ? currentSceneAnimatedVideoUrl
+          : `/${currentSceneAnimatedVideoUrl}`;
+        const blobUrl = await fetchMediaBlobUrl(videoPath);
+        if (!blobUrl || cancelled) {
+          if (!blobUrl) {
+            setVideoLoadError((prev) => new Set(prev).add(currentSceneNumber));
+          }
+          return;
+        }
+
+        setVideoBlobUrls((prev) => {
+          const next = new Map(prev);
+          const existing = next.get(currentSceneNumber);
+          if (existing) {
+            URL.revokeObjectURL(existing);
+          }
+          next.set(currentSceneNumber, blobUrl);
+          return next;
+        });
+      } catch (err) {
+        console.warn('Failed to load animated video:', err);
+        setVideoLoadError((prev) => {
+          const next = new Set(prev);
+          next.add(currentSceneNumber);
+          return next;
+        });
+      }
+    };
+
+    loadVideo();
+
+    return () => {
+      cancelled = true;
+    };
+  }, [currentSceneNumber, currentSceneAnimatedVideoUrl, currentSceneAnimatedVideoBlobUrl, hasVideoLoadError]);
+
+  useEffect(() => {
+    return () => {
+      videoBlobUrls.forEach((blob) => {
+        URL.revokeObjectURL(blob);
+      });
+    };
+  }, [videoBlobUrls]);
+
  useEffect(() => {
    if (storySections.length > 0) {
      setCurrentPageIndex(0);
@@ -502,7 +564,37 @@ const StoryWriting: React.FC<StoryWritingProps> = ({ state, onNext }) => {
                        },
                      }}
                    >
-                      {currentSceneImageFullUrl ? (
+                      {showAnimatedVideo ? (
+                        <Box
+                          sx={{
+                            width: '100%',
+                            borderRadius: '12px',
+                            overflow: 'hidden',
+                            boxShadow: '0 8px 20px rgba(0, 0, 0, 0.18), 0 4px 8px rgba(0, 0, 0, 0.12)',
+                            border: '3px solid rgba(120, 90, 60, 0.25)',
+                            backgroundColor: '#000',
+                          }}
+                        >
+                          <Box
+                            component="video"
+                            src={currentSceneAnimatedVideoBlobUrl ?? undefined}
+                            poster={currentSceneImageFullUrl ?? undefined}
+                            autoPlay
+                            muted
+                            loop
+                            controls
+                            playsInline
+                            sx={{
+                              width: '100%',
+                              height: 'auto',
+                              display: 'block',
+                              minHeight: '300px',
+                              maxHeight: '500px',
+                              objectFit: 'cover',
+                            }}
+                          />
+                        </Box>
+                      ) : currentSceneImageFullUrl ? (
                        <Box
                          sx={{
                            width: '100%',
--- a/frontend/src/components/StoryWriter/StoryWriter.tsx
+++ b/frontend/src/components/StoryWriter/StoryWriter.tsx
@@ -123,23 +123,38 @@ export const StoryWriter: React.FC = () => {
    setIsGeneratingVideo(true);

    try {
-      const imageUrls: string[] = [];
+      const imageUrls: (string | null)[] = [];
      const audioUrls: string[] = [];
      const scenes = state.outlineScenes;

+      const videoUrls: (string | null)[] = [];
+
      for (const scene of scenes) {
        const sceneNumber = scene.scene_number || scenes.indexOf(scene) + 1;
        const imageUrl = state.sceneImages?.get(sceneNumber);
        const audioUrl = state.sceneAudio?.get(sceneNumber);
+        const animatedVideoUrl = state.sceneAnimatedVideos?.get(sceneNumber);

-        if (imageUrl && audioUrl) {
-          imageUrls.push(imageUrl);
-          audioUrls.push(audioUrl);
+        if (!audioUrl) {
+          continue; // Skip scenes without audio
        }
+
+        // Prefer animated video if available, otherwise use image
+        if (animatedVideoUrl) {
+          videoUrls.push(animatedVideoUrl);
+          imageUrls.push(null);
+        } else if (imageUrl) {
+          videoUrls.push(null);
+          imageUrls.push(imageUrl);
+        } else {
+          continue; // Skip scenes without image or video
+        }
+
+        audioUrls.push(audioUrl);
      }

      if (imageUrls.length !== scenes.length || audioUrls.length !== scenes.length) {
-        throw new Error('Number of images and audio files must match number of scenes');
+        throw new Error('Number of images/videos and audio files must match number of scenes');
      }

      // Switch to async flow so UI can poll progress messages
@@ -147,6 +162,8 @@ export const StoryWriter: React.FC = () => {
        scenes: scenes,
        image_urls: imageUrls,
        audio_urls: audioUrls,
+        video_urls: videoUrls.length > 0 ? videoUrls : undefined,
+        ai_audio_urls: undefined, // TODO: Track AI audio separately in state
        story_title: state.storySetting || 'Story',
        fps: state.videoFps,
        transition_duration: state.videoTransitionDuration,
--- a/frontend/src/components/StoryWriter/components/AudioPlayerList.tsx
+++ b/frontend/src/components/StoryWriter/components/AudioPlayerList.tsx
@@ -29,14 +29,30 @@ export const AudioPlayerList: React.FC<AudioPlayerListProps> = ({ scenes, sceneA
      for (const [sceneNumber, audioPath] of entries) {
        if (!audioPath) continue;
        try {
-          const normalizedPath = audioPath.startsWith('/') ? audioPath : `/${audioPath}`;
+          // Normalize path - ensure it starts with /api/story/audio/
+          let normalizedPath = audioPath.startsWith('/') ? audioPath : `/${audioPath}`;
+          
+          // If path doesn't include /api/story/audio/, add it
+          if (!normalizedPath.includes('/api/story/audio/')) {
+            // Extract filename from path
+            const filename = audioPath.split('/').pop() || audioPath;
+            normalizedPath = `/api/story/audio/${filename}`;
+          }
+          
          const response = await aiApiClient.get(normalizedPath, {
            responseType: 'blob',
          });
          const blobUrl = URL.createObjectURL(response.data);
          blobEntries.push([sceneNumber, blobUrl]);
-        } catch (err) {
-          console.error('Failed to load audio blob:', err);
+        } catch (err: any) {
+          console.error(`Failed to load audio blob for scene ${sceneNumber}:`, err);
+          console.error(`Audio path was: ${audioPath}`);
+          console.error(`Normalized path would be: ${audioPath.startsWith('/') ? audioPath : `/${audioPath}`}`);
+          
+          // If auth error, log more details
+          if (err?.response?.status === 401) {
+            console.error(`Authentication failed for audio file. Make sure auth token is set.`);
+          }
        }
      }

@@ -87,13 +103,19 @@ export const AudioPlayerList: React.FC<AudioPlayerListProps> = ({ scenes, sceneA
              <Typography variant="subtitle2" sx={{ mb: 1, fontWeight: 600, color: '#1A1611' }}>
                Scene {sceneNumber}: {scene.title || `Scene ${sceneNumber}`}
              </Typography>
-              <audio
-                controls
-                src={blobUrl ? blobUrl : storyWriterApi.getAudioUrl(audioUrl)}
-                style={{ width: '100%' }}
-              >
-                Your browser does not support the audio element.
-              </audio>
+              {blobUrl ? (
+                <audio
+                  controls
+                  src={blobUrl}
+                  style={{ width: '100%' }}
+                >
+                  Your browser does not support the audio element.
+                </audio>
+              ) : (
+                <Typography variant="body2" sx={{ color: 'text.secondary', fontStyle: 'italic' }}>
+                  Loading audio...
+                </Typography>
+              )}
            </Box>
          );
        })}
--- a/frontend/src/components/StoryWriter/components/HdVideoSection.tsx
+++ b/frontend/src/components/StoryWriter/components/HdVideoSection.tsx
@@ -2,15 +2,14 @@ import React, { useState, useRef } from 'react';
 import {
  Box,
  Typography,
-  Button,
  Alert,
  LinearProgress,
-  Tooltip,
 } from '@mui/material';
 import SmartDisplayIcon from '@mui/icons-material/SmartDisplay';
 import { useStoryWriterState } from '../../../hooks/useStoryWriterState';
 import { storyWriterApi } from '../../../services/storyWriterApi';
 import { triggerSubscriptionError } from '../../../api/client';
+import { OperationButton } from '../../shared/OperationButton';
 import SceneVideoApproval from './SceneVideoApproval';

 // Simple logger for frontend
@@ -94,14 +93,11 @@ export const HdVideoSection: React.FC<HdVideoSectionProps> = ({ state, onError }
      setHdVideoMessage(`Generating HD video for Scene ${sceneNumber}...`);

      try {
-        const sceneImageUrl = state.sceneImages?.get(sceneNumber);
-
        const result = await storyWriterApi.generateHdVideoScene({
          scene_number: sceneNumber,
          scene_data: scene,
          story_context: storyContext,
          all_scenes: scenes,
-          scene_image_url: sceneImageUrl,
          provider: 'huggingface',
          model: 'tencent/HunyuanVideo',
          num_frames: 50,
@@ -240,14 +236,11 @@ export const HdVideoSection: React.FC<HdVideoSectionProps> = ({ state, onError }
        story_content: state.storyContent || '',
      };

-      const sceneImageUrl = state.sceneImages?.get(sceneNumber);
-
      const result = await storyWriterApi.generateHdVideoScene({
        scene_number: sceneNumber,
        scene_data: scene,
        story_context: storyContext,
        all_scenes: scenes,
-        scene_image_url: sceneImageUrl,
        provider: 'huggingface',
        model: 'tencent/HunyuanVideo',
        num_frames: 50,
@@ -303,45 +296,30 @@ export const HdVideoSection: React.FC<HdVideoSectionProps> = ({ state, onError }
  return (
    <>
      <Box sx={{ mt: 1, display: 'flex', flexDirection: 'column', gap: 1 }}>
-        <Tooltip
-          title={
-            <Box sx={{ p: 1 }}>
-              <Typography variant="body2" sx={{ mb: 1, fontWeight: 600 }}>
-                Generate HD Animation with AI
-              </Typography>
-              <Typography variant="caption" sx={{ display: 'block', mb: 1 }}>
-                Upgrade this storyboard into a high‑definition AI animation using Hugging Face text‑to‑video models.
-                Your draft was generated affordably (images + narration). This premium option uses an AI model to render motion.
-              </Typography>
-              <Typography variant="caption" sx={{ display: 'block', mb: 0.5, fontWeight: 600 }}>
-                Recommended models:
-              </Typography>
-              <Typography variant="caption" component="div" sx={{ display: 'block', mb: 1 }}>
-                • tencent/HunyuanVideo<br />
-                • Lightricks/LTX-Video<br />
-                • Lightricks/LTX-Video-0.9.8-13B-distilled
-              </Typography>
-              <Typography variant="caption" sx={{ display: 'block', fontStyle: 'italic' }}>
-                This will generate HD videos for each scene one at a time. You'll review and approve each scene before the next one is generated.
-              </Typography>
-            </Box>
-          }
-          arrow
-          placement="top"
-        >
-          <span style={{ display: 'inline-flex' }}>
-            <Button
-              variant="contained"
-              startIcon={<SmartDisplayIcon />}
-              onClick={handleGenerateHdVideo}
-              disabled={isGeneratingHdVideo || state.hdVideoGenerationStatus === 'awaiting_approval'}
-            >
-              {isGeneratingHdVideo || state.hdVideoGenerationStatus === 'awaiting_approval' 
-                ? 'Generating HD Animation...' 
-                : 'Generate HD Animation with AI'}
-            </Button>
-          </span>
-        </Tooltip>
+        <OperationButton
+          operation={{
+            provider: 'video',
+            model: 'tencent/HunyuanVideo',
+            tokens_requested: 0,
+            operation_type: 'video_generation',
+            actual_provider_name: 'huggingface',
+          }}
+          label="Generate HD Animation with AI"
+          variant="contained"
+          startIcon={<SmartDisplayIcon />}
+          showCost={true}
+          checkOnHover={true}
+          checkOnMount={false}
+          onClick={handleGenerateHdVideo}
+          disabled={isGeneratingHdVideo || state.hdVideoGenerationStatus === 'awaiting_approval'}
+          loading={isGeneratingHdVideo || state.hdVideoGenerationStatus === 'awaiting_approval'}
+          tooltipPlacement="top"
+          buttonProps={{
+            children: isGeneratingHdVideo || state.hdVideoGenerationStatus === 'awaiting_approval'
+              ? 'Generating HD Animation...'
+              : undefined,
+          }}
+        />
        
        {(isGeneratingHdVideo || state.hdVideoGenerationStatus === 'generating' || state.hdVideoGenerationStatus === 'awaiting_approval') && (
          <Box sx={{ mt: 2, p: 2, backgroundColor: '#FAF9F6', borderRadius: 1, border: '1px solid #E0DCD4' }}>
--- a/frontend/src/components/StoryWriter/components/VideoSection.tsx
+++ b/frontend/src/components/StoryWriter/components/VideoSection.tsx
@@ -40,7 +40,19 @@ export const VideoSection: React.FC<VideoSectionProps> = ({ state, error, onErro
  // Load video blob URL when storyVideo changes
  useEffect(() => {
    if (state.storyVideo) {
-      fetchMediaBlobUrl(state.storyVideo).then(setVideoBlobUrl);
+      fetchMediaBlobUrl(state.storyVideo)
+        .then((blobUrl) => {
+          if (blobUrl) {
+            setVideoBlobUrl(blobUrl);
+          } else {
+            // File not found - clear the blob URL
+            setVideoBlobUrl(null);
+          }
+        })
+        .catch((err) => {
+          console.warn('Failed to load video blob:', err);
+          setVideoBlobUrl(null);
+        });
    } else {
      if (videoBlobUrl) {
        URL.revokeObjectURL(videoBlobUrl);
@@ -76,31 +88,50 @@ export const VideoSection: React.FC<VideoSectionProps> = ({ state, error, onErro
    setVideoMessage('');

    try {
-      const imageUrls: string[] = [];
+      const imageUrls: (string | null)[] = [];
      const audioUrls: string[] = [];
      const scenes = state.outlineScenes;

+      const videoUrls: (string | null)[] = [];
+      const aiAudioUrls: (string | null)[] = [];
+
      for (const scene of scenes) {
        const sceneNumber = scene.scene_number || scenes.indexOf(scene) + 1;
        const imageUrl = state.sceneImages?.get(sceneNumber);
        const audioUrl = state.sceneAudio?.get(sceneNumber);
+        const animatedVideoUrl = state.sceneAnimatedVideos?.get(sceneNumber);

-        if (imageUrl && audioUrl) {
-          imageUrls.push(imageUrl);
-          audioUrls.push(audioUrl);
-        } else {
-          throw new Error(`Missing image or audio for scene ${sceneNumber}`);
+        if (!audioUrl) {
+          throw new Error(`Missing audio for scene ${sceneNumber}`);
        }
+
+        // Prefer animated video if available, otherwise use image
+        if (animatedVideoUrl) {
+          videoUrls.push(animatedVideoUrl);
+          imageUrls.push(null);
+        } else if (imageUrl) {
+          videoUrls.push(null);
+          imageUrls.push(imageUrl);
+        } else {
+          throw new Error(`Missing image or animated video for scene ${sceneNumber}`);
+        }
+
+        audioUrls.push(audioUrl);
+        // AI audio detection: check if URL contains 'ai' or 'wavespeed' (can be enhanced later)
+        // For now, pass null and backend will use available audio
+        aiAudioUrls.push(null);
      }

      if (imageUrls.length !== scenes.length || audioUrls.length !== scenes.length) {
-        throw new Error('Number of images and audio files must match number of scenes');
+        throw new Error('Number of images/videos and audio files must match number of scenes');
      }

      const start = await storyWriterApi.generateStoryVideoAsync({
        scenes: scenes,
        image_urls: imageUrls,
        audio_urls: audioUrls,
+        video_urls: videoUrls.length > 0 ? videoUrls : undefined,
+        ai_audio_urls: undefined, // TODO: Track AI audio separately in state
        story_title: state.storySetting || 'Story',
        fps: state.videoFps,
        transition_duration: state.videoTransitionDuration,
@@ -122,7 +153,9 @@ export const VideoSection: React.FC<VideoSectionProps> = ({ state, error, onErro
          if (!finalUrl) throw new Error('Video URL not found in result');
          state.setStoryVideo(finalUrl);
          const blobUrl = await fetchMediaBlobUrl(finalUrl);
-          setVideoBlobUrl(blobUrl);
+          if (blobUrl) {
+            setVideoBlobUrl(blobUrl);
+          }
          setVideoProgress(100);
          setVideoMessage('Video generation complete');
          state.setError(null);
@@ -160,6 +193,10 @@ export const VideoSection: React.FC<VideoSectionProps> = ({ state, error, onErro
  const handleDownloadVideo = async () => {
    if (state.storyVideo) {
      const blobUrl = await fetchMediaBlobUrl(state.storyVideo);
+      if (!blobUrl) {
+        // File not found - skip download
+        return;
+      }
      const a = document.createElement('a');
      a.href = blobUrl;
      a.download = `story-video-${Date.now()}.mp4`;
--- a/frontend/src/components/shared/OperationButton.tsx
+++ b/frontend/src/components/shared/OperationButton.tsx
@@ -0,0 +1,273 @@
+import React, { useMemo } from 'react';
+import {
+  Button,
+  ButtonProps,
+  Tooltip,
+  Box,
+  Typography,
+  CircularProgress,
+} from '@mui/material';
+import WarningIcon from '@mui/icons-material/Warning';
+import { SxProps, Theme } from '@mui/material/styles';
+import { usePreflightCheck, UsePreflightCheckOptions } from '../../hooks/usePreflightCheck';
+import { PreflightOperation } from '../../services/billingService';
+
+export interface OperationButtonProps {
+  // Operation definition
+  operation: PreflightOperation;
+  
+  // Button configuration
+  label: string; // Base label (e.g., "Generate HD Video")
+  variant?: 'contained' | 'outlined' | 'text';
+  size?: 'small' | 'medium' | 'large';
+  color?: 'primary' | 'secondary' | 'success' | 'error';
+  startIcon?: React.ReactNode;
+  endIcon?: React.ReactNode;
+  
+  // Pre-flight check behavior
+  showCost?: boolean; // Show cost in label (default: true)
+  checkOnHover?: boolean; // Check on hover (default: true)
+  checkOnMount?: boolean; // Check on mount (default: false)
+  
+  // Callbacks
+  onClick: () => void;
+  onPreflightResult?: (canProceed: boolean) => void;
+  
+  // Customization
+  disabled?: boolean; // Additional disabled state
+  loading?: boolean; // Loading state override
+  tooltipPlacement?: 'top' | 'bottom' | 'left' | 'right';
+  
+  // Styling
+  sx?: SxProps<Theme>;
+  fullWidth?: boolean;
+  
+  // Additional button props
+  buttonProps?: Partial<ButtonProps>;
+}
+
+/**
+ * Reusable button component with pre-flight check and cost estimation.
+ * 
+ * Features:
+ * - Shows estimated cost in button label
+ * - Performs pre-flight check on hover (debounced)
+ * - Shows detailed tooltip with limits/remaining quota
+ * - Disables button with messaging if blocked
+ */
+export const OperationButton: React.FC<OperationButtonProps> = ({
+  operation,
+  label,
+  variant = 'contained',
+  size = 'medium',
+  color = 'primary',
+  startIcon,
+  endIcon,
+  showCost = true,
+  checkOnHover = true,
+  checkOnMount = false,
+  onClick,
+  onPreflightResult,
+  disabled: externalDisabled = false,
+  loading: externalLoading = false,
+  tooltipPlacement = 'top',
+  sx,
+  fullWidth = false,
+  buttonProps = {},
+}) => {
+  const preflightOptions: UsePreflightCheckOptions = {
+    operation,
+    enabled: checkOnHover || checkOnMount,
+    debounceMs: 300,
+    cacheTtl: 5000,
+  };
+
+  const {
+    canProceed,
+    estimatedCost,
+    limitInfo,
+    loading: preflightLoading,
+    error: preflightError,
+    checkOnHover: triggerCheckOnHover,
+    checkNow: triggerCheckNow,
+  } = usePreflightCheck(preflightOptions);
+
+  // Check on mount if requested
+  React.useEffect(() => {
+    if (checkOnMount) {
+      triggerCheckNow();
+    }
+  }, [checkOnMount, triggerCheckNow]);
+
+  // Notify parent of pre-flight result changes
+  React.useEffect(() => {
+    if (onPreflightResult) {
+      onPreflightResult(canProceed);
+    }
+  }, [canProceed, onPreflightResult]);
+
+  // Format cost as currency
+  const formattedCost = useMemo(() => {
+    if (!showCost || estimatedCost === 0) {
+      return null;
+    }
+    return new Intl.NumberFormat('en-US', {
+      style: 'currency',
+      currency: 'USD',
+      minimumFractionDigits: 2,
+      maximumFractionDigits: 2,
+    }).format(estimatedCost);
+  }, [estimatedCost, showCost]);
+
+  // Build button label with cost
+  const buttonLabel = useMemo(() => {
+    if (formattedCost) {
+      return `${label} ${formattedCost}`;
+    }
+    return label;
+  }, [label, formattedCost]);
+
+  // Determine if button should be disabled
+  const isDisabled = useMemo(() => {
+    return externalDisabled || externalLoading || preflightLoading || !canProceed;
+  }, [externalDisabled, externalLoading, preflightLoading, canProceed]);
+
+  // Build tooltip content
+  const tooltipContent = useMemo(() => {
+    const content: React.ReactNode[] = [];
+
+    if (preflightLoading) {
+      content.push(
+        <Typography key="loading" variant="body2" sx={{ mb: 1 }}>
+          Checking limits...
+        </Typography>
+      );
+    } else if (preflightError) {
+      content.push(
+        <Typography key="error" variant="body2" sx={{ mb: 1, color: 'error.main', fontWeight: 600 }}>
+          {preflightError}
+        </Typography>
+      );
+    } else if (limitInfo) {
+      const { current_usage, limit, remaining } = limitInfo;
+      const isUnlimited = limit === 0 || remaining === Infinity;
+      
+      content.push(
+        <Box key="limits" sx={{ mb: 1 }}>
+          <Typography variant="body2" sx={{ fontWeight: 600, mb: 0.5 }}>
+            {canProceed ? '✅ Operation Allowed' : '❌ Operation Blocked'}
+          </Typography>
+          {isUnlimited ? (
+            <Typography variant="caption" sx={{ display: 'block' }}>
+              Usage: {current_usage} / Unlimited
+            </Typography>
+          ) : (
+            <Typography variant="caption" sx={{ display: 'block' }}>
+              Usage: {current_usage} / {limit} ({remaining} remaining)
+            </Typography>
+          )}
+          {formattedCost && (
+            <Typography variant="caption" sx={{ display: 'block', mt: 0.5, fontWeight: 600 }}>
+              Estimated Cost: {formattedCost}
+            </Typography>
+          )}
+        </Box>
+      );
+    }
+
+    if (preflightError && !canProceed) {
+      content.push(
+        <Typography key="message" variant="caption" sx={{ display: 'block', color: 'error.main' }}>
+          {preflightError}
+        </Typography>
+      );
+    }
+
+    return content.length > 0 ? <Box sx={{ p: 0.5 }}>{content}</Box> : null;
+  }, [canProceed, estimatedCost, formattedCost, limitInfo, preflightError, preflightLoading]);
+
+  // Handle hover
+  const handleMouseEnter = () => {
+    if (checkOnHover) {
+      triggerCheckOnHover();
+    }
+  };
+
+  // Handle click
+  const handleClick = () => {
+    if (!isDisabled && canProceed) {
+      onClick();
+    }
+  };
+
+  // Determine button color based on state
+  const buttonColor = useMemo(() => {
+    if (!canProceed) {
+      return 'error';
+    }
+    return color;
+  }, [canProceed, color]);
+
+  // Determine if we should show loading spinner
+  const showLoading = externalLoading || (preflightLoading && checkOnMount);
+
+  // Custom label override for loading state
+  const displayLabel = useMemo(() => {
+    if (externalLoading && buttonProps?.children) {
+      return buttonProps.children;
+    }
+    if (showLoading && !externalLoading) {
+      return 'Checking...';
+    }
+    if (!canProceed && preflightError) {
+      return preflightError;
+    }
+    return buttonLabel;
+  }, [externalLoading, showLoading, canProceed, preflightError, buttonLabel, buttonProps?.children]);
+
+  // Build button with icon
+  const button = (
+    <Button
+      variant={variant}
+      size={size}
+      color={buttonColor}
+      startIcon={
+        showLoading ? (
+          <CircularProgress size={16} color="inherit" />
+        ) : !canProceed ? (
+          <WarningIcon fontSize="small" />
+        ) : (
+          startIcon
+        )
+      }
+      endIcon={endIcon}
+      onClick={handleClick}
+      disabled={isDisabled}
+      fullWidth={fullWidth}
+      onMouseEnter={handleMouseEnter}
+      sx={sx}
+      {...buttonProps}
+    >
+      {displayLabel}
+    </Button>
+  );
+
+  // Wrap with tooltip if we have content
+  if (tooltipContent || checkOnHover) {
+    return (
+      <Tooltip
+        title={tooltipContent || 'Hover to check limits'}
+        arrow
+        placement={tooltipPlacement}
+        onOpen={handleMouseEnter}
+      >
+        <span style={{ display: 'inline-flex' }}>
+          {button}
+        </span>
+      </Tooltip>
+    );
+  }
+
+  return button;
+};
+
--- a/frontend/src/hooks/usePreflightCheck.ts
+++ b/frontend/src/hooks/usePreflightCheck.ts
@@ -0,0 +1,257 @@
+import { useState, useCallback, useRef, useEffect } from 'react';
+import {
+  checkPreflight,
+  PreflightOperation,
+  PreflightCheckResponse,
+  PreflightLimitInfo,
+} from '../services/billingService';
+
+export interface UsePreflightCheckOptions {
+  operation: PreflightOperation;
+  enabled?: boolean; // Whether to perform check on hover
+  debounceMs?: number; // Debounce delay (default: 300ms)
+  cacheTtl?: number; // Cache TTL in ms (default: 5000ms)
+}
+
+export interface UsePreflightCheckResult {
+  canProceed: boolean;
+  estimatedCost: number;
+  limitInfo: PreflightLimitInfo | null;
+  loading: boolean;
+  error: string | null;
+  checkOnHover: () => void;
+  checkNow: () => void; // Immediate check
+  reset: () => void;
+}
+
+interface CacheEntry {
+  data: PreflightCheckResponse;
+  timestamp: number;
+}
+
+/**
+ * React hook for pre-flight checking operations with cost estimation.
+ * 
+ * Features:
+ * - Debounced hover checks (300ms default)
+ * - In-memory caching (5s default TTL)
+ * - Request cancellation on unmount
+ */
+export const usePreflightCheck = (
+  options: UsePreflightCheckOptions
+): UsePreflightCheckResult => {
+  const {
+    operation,
+    enabled = true,
+    debounceMs = 300,
+    cacheTtl = 5000,
+  } = options;
+
+  const [canProceed, setCanProceed] = useState<boolean>(true);
+  const [estimatedCost, setEstimatedCost] = useState<number>(0);
+  const [limitInfo, setLimitInfo] = useState<PreflightLimitInfo | null>(null);
+  const [loading, setLoading] = useState<boolean>(false);
+  const [error, setError] = useState<string | null>(null);
+
+  // Cache for pre-flight check results
+  const cacheRef = useRef<Map<string, CacheEntry>>(new Map());
+  
+  // Debounce timer ref
+  const debounceTimerRef = useRef<NodeJS.Timeout | null>(null);
+  
+  // Abort controller for request cancellation
+  const abortControllerRef = useRef<AbortController | null>(null);
+
+  // Generate cache key from operation
+  const getCacheKey = useCallback(() => {
+    return JSON.stringify(operation);
+  }, [operation]);
+
+  // Check if cached result is still valid
+  const getCachedResult = useCallback((): PreflightCheckResponse | null => {
+    const cacheKey = getCacheKey();
+    const cached = cacheRef.current.get(cacheKey);
+    
+    if (cached) {
+      const age = Date.now() - cached.timestamp;
+      if (age < cacheTtl) {
+        return cached.data;
+      }
+      // Cache expired, remove it
+      cacheRef.current.delete(cacheKey);
+    }
+    
+    return null;
+  }, [getCacheKey, cacheTtl]);
+
+  // Store result in cache
+  const setCache = useCallback((data: PreflightCheckResponse) => {
+    const cacheKey = getCacheKey();
+    cacheRef.current.set(cacheKey, {
+      data,
+      timestamp: Date.now(),
+    });
+  }, [getCacheKey]);
+
+  // Perform actual pre-flight check
+  const performCheck = useCallback(async (): Promise<void> => {
+    if (!enabled) {
+      return;
+    }
+
+    // Check cache first
+    const cached = getCachedResult();
+    if (cached) {
+      updateState(cached);
+      return;
+    }
+
+    // Cancel any in-flight request
+    if (abortControllerRef.current) {
+      abortControllerRef.current.abort();
+    }
+
+    // Create new abort controller
+    abortControllerRef.current = new AbortController();
+    const currentAbortController = abortControllerRef.current;
+
+    setLoading(true);
+    setError(null);
+
+    try {
+      const response = await checkPreflight(operation);
+      
+      // Check if request was cancelled
+      if (currentAbortController.signal.aborted) {
+        return;
+      }
+
+      // Cache the result
+      setCache(response);
+
+      // Update state
+      updateState(response);
+    } catch (err: any) {
+      // Check if request was cancelled
+      if (currentAbortController.signal.aborted) {
+        return;
+      }
+
+      const errorMessage = err?.message || 'Pre-flight check failed';
+      setError(errorMessage);
+      setCanProceed(false);
+      setEstimatedCost(0);
+      setLimitInfo(null);
+    } finally {
+      if (!currentAbortController.signal.aborted) {
+        setLoading(false);
+      }
+    }
+  }, [operation, enabled, getCachedResult, setCache]);
+
+  // Update state from response
+  const updateState = useCallback((response: PreflightCheckResponse) => {
+    setCanProceed(response.can_proceed);
+    setEstimatedCost(response.estimated_cost);
+    
+    // Get limit info from first operation (for single operation checks)
+    const firstOp = response.operations[0];
+    if (firstOp) {
+      setLimitInfo(firstOp.limit_info);
+      if (!response.can_proceed && firstOp.message) {
+        setError(firstOp.message);
+      } else {
+        setError(null);
+      }
+    } else {
+      setLimitInfo(null);
+    }
+  }, []);
+
+  // Debounced check for hover events
+  const checkOnHover = useCallback(() => {
+    if (!enabled) {
+      return;
+    }
+
+    // Clear existing timer
+    if (debounceTimerRef.current) {
+      clearTimeout(debounceTimerRef.current);
+    }
+
+    // Check cache first (no debounce for cache hits)
+    const cached = getCachedResult();
+    if (cached) {
+      updateState(cached);
+      return;
+    }
+
+    // Debounce the actual API call
+    debounceTimerRef.current = setTimeout(() => {
+      performCheck();
+    }, debounceMs);
+  }, [enabled, debounceMs, getCachedResult, updateState, performCheck]);
+
+  // Immediate check (no debounce)
+  const checkNow = useCallback(() => {
+    if (!enabled) {
+      return;
+    }
+
+    // Clear any pending debounced check
+    if (debounceTimerRef.current) {
+      clearTimeout(debounceTimerRef.current);
+      debounceTimerRef.current = null;
+    }
+
+    performCheck();
+  }, [enabled, performCheck]);
+
+  // Reset state
+  const reset = useCallback(() => {
+    setCanProceed(true);
+    setEstimatedCost(0);
+    setLimitInfo(null);
+    setLoading(false);
+    setError(null);
+
+    // Clear debounce timer
+    if (debounceTimerRef.current) {
+      clearTimeout(debounceTimerRef.current);
+      debounceTimerRef.current = null;
+    }
+
+    // Cancel any in-flight request
+    if (abortControllerRef.current) {
+      abortControllerRef.current.abort();
+      abortControllerRef.current = null;
+    }
+  }, []);
+
+  // Cleanup on unmount
+  useEffect(() => {
+    return () => {
+      // Clear debounce timer
+      if (debounceTimerRef.current) {
+        clearTimeout(debounceTimerRef.current);
+      }
+
+      // Cancel any in-flight request
+      if (abortControllerRef.current) {
+        abortControllerRef.current.abort();
+      }
+    };
+  }, []);
+
+  return {
+    canProceed,
+    estimatedCost,
+    limitInfo,
+    loading,
+    error,
+    checkOnHover,
+    checkNow,
+    reset,
+  };
+};
+
--- a/frontend/src/hooks/useStoryWriterState.ts
+++ b/frontend/src/hooks/useStoryWriterState.ts
@@ -7,6 +7,13 @@ import {
  StoryFullGenerationResponse,
 } from '../services/storyWriterApi';

+export interface SceneAnimationResume {
+  predictionId: string;
+  duration: 5 | 10;
+  message?: string;
+  createdAt?: string;
+}
+
 export interface StoryWriterState {
  // Story parameters (Setup phase)
  persona: string;
@@ -52,6 +59,8 @@ export interface StoryWriterState {
  sceneAudio: Map<number, string> | null; // Generated audio URLs by scene number
  storyVideo: string | null; // Generated video URL
  sceneHdVideos: Map<number, string> | null; // Approved HD video URLs by scene number
+  sceneAnimatedVideos: Map<number, string> | null; // Animated scene preview videos
+  sceneAnimationResumables: Map<number, SceneAnimationResume> | null; // Pending resume info per scene
  hdVideoGenerationStatus: 'idle' | 'generating' | 'awaiting_approval' | 'completed' | 'paused';
  currentHdSceneIndex: number; // Which scene is currently being generated/reviewed

@@ -104,6 +113,8 @@ const DEFAULT_STATE: Partial<StoryWriterState> = {
  sceneAudio: null,
  storyVideo: null,
  sceneHdVideos: null,
+  sceneAnimatedVideos: null,
+  sceneAnimationResumables: null,
  hdVideoGenerationStatus: 'idle',
  currentHdSceneIndex: 0,
  currentTaskId: null,
@@ -148,6 +159,8 @@ export const useStoryWriterState = () => {
          sceneImages: parsed.sceneImages ? new Map(parsed.sceneImages) : null,
          sceneAudio: parsed.sceneAudio ? new Map(parsed.sceneAudio) : null,
          sceneHdVideos: parsed.sceneHdVideos ? new Map(parsed.sceneHdVideos) : null,
+          sceneAnimatedVideos: parsed.sceneAnimatedVideos ? new Map(parsed.sceneAnimatedVideos) : null,
+          sceneAnimationResumables: parsed.sceneAnimationResumables ? new Map(parsed.sceneAnimationResumables) : null,
        };
        
        return restoredState as StoryWriterState;
@@ -193,6 +206,12 @@ export const useStoryWriterState = () => {
        sceneImages: persistableState.sceneImages ? Array.from(persistableState.sceneImages.entries()) : null,
        sceneAudio: persistableState.sceneAudio ? Array.from(persistableState.sceneAudio.entries()) : null,
        sceneHdVideos: persistableState.sceneHdVideos ? Array.from(persistableState.sceneHdVideos.entries()) : null,
+        sceneAnimatedVideos: persistableState.sceneAnimatedVideos
+          ? Array.from(persistableState.sceneAnimatedVideos.entries())
+          : null,
+        sceneAnimationResumables: persistableState.sceneAnimationResumables
+          ? Array.from(persistableState.sceneAnimationResumables.entries())
+          : null,
      };
      
      localStorage.setItem('story_writer_state', JSON.stringify(serializableState));
@@ -337,6 +356,14 @@ export const useStoryWriterState = () => {
    setState((prev) => ({ ...prev, sceneImages: images }));
  }, []);

+  const setSceneAnimatedVideos = useCallback((videos: Map<number, string> | null) => {
+    setState((prev) => ({ ...prev, sceneAnimatedVideos: videos }));
+  }, []);
+
+  const setSceneAnimationResumables = useCallback((resumables: Map<number, SceneAnimationResume> | null) => {
+    setState((prev) => ({ ...prev, sceneAnimationResumables: resumables }));
+  }, []);
+
  const setSceneAudio = useCallback((audio: Map<number, string> | null) => {
    setState((prev) => ({ ...prev, sceneAudio: audio }));
  }, []);
@@ -471,6 +498,8 @@ export const useStoryWriterState = () => {
    setSceneAudio,
    setStoryVideo,
    setSceneHdVideos,
+    setSceneAnimatedVideos,
+    setSceneAnimationResumables,
    setHdVideoGenerationStatus,
    setCurrentHdSceneIndex,
    setCurrentTaskId,
--- a/frontend/src/services/billingService.ts
+++ b/frontend/src/services/billingService.ts
@@ -587,6 +587,127 @@ export const formatCurrency = (amount: number): string => {
  }).format(amount);
 };

+// Pre-flight check interfaces
+export interface PreflightOperation {
+  provider: string;
+  model?: string;
+  tokens_requested?: number;
+  operation_type: string;
+  actual_provider_name?: string;
+}
+
+export interface PreflightLimitInfo {
+  current_usage: number;
+  limit: number;
+  remaining: number;
+}
+
+export interface PreflightOperationResult {
+  provider: string;
+  operation_type: string;
+  cost: number;
+  allowed: boolean;
+  limit_info: PreflightLimitInfo | null;
+  message: string | null;
+}
+
+export interface PreflightCheckResponse {
+  can_proceed: boolean;
+  estimated_cost: number;
+  operations: PreflightOperationResult[];
+  total_cost: number;
+  usage_summary: {
+    current_calls: number;
+    limit: number;
+    remaining: number;
+  } | null;
+  cached: boolean;
+}
+
+/**
+ * Check pre-flight validation for a single operation.
+ * Returns cost estimation, limits check, and usage information.
+ */
+export const checkPreflight = async (
+  operation: PreflightOperation
+): Promise<PreflightCheckResponse> => {
+  try {
+    const response = await billingAPI.post<{ success: boolean; data: PreflightCheckResponse }>(
+      '/preflight-check',
+      {
+        operations: [operation]
+      }
+    );
+
+    if (!response.data.success) {
+      throw new Error('Pre-flight check failed');
+    }
+
+    return response.data.data;
+  } catch (error: any) {
+    console.error('[BillingService] Pre-flight check error:', error);
+    
+    // Return a safe default response on error
+    return {
+      can_proceed: false,
+      estimated_cost: 0,
+      operations: [{
+        provider: operation.provider,
+        operation_type: operation.operation_type,
+        cost: 0,
+        allowed: false,
+        limit_info: null,
+        message: error?.response?.data?.detail || 'Pre-flight check failed'
+      }],
+      total_cost: 0,
+      usage_summary: null,
+      cached: false
+    };
+  }
+};
+
+/**
+ * Check pre-flight validation for multiple operations in a single request.
+ * Useful for pages with many buttons to reduce API calls.
+ */
+export const checkPreflightBatch = async (
+  operations: PreflightOperation[]
+): Promise<PreflightCheckResponse> => {
+  try {
+    const response = await billingAPI.post<{ success: boolean; data: PreflightCheckResponse }>(
+      '/preflight-check',
+      {
+        operations
+      }
+    );
+
+    if (!response.data.success) {
+      throw new Error('Pre-flight check failed');
+    }
+
+    return response.data.data;
+  } catch (error: any) {
+    console.error('[BillingService] Pre-flight batch check error:', error);
+    
+    // Return a safe default response on error
+    return {
+      can_proceed: false,
+      estimated_cost: 0,
+      operations: operations.map(op => ({
+        provider: op.provider,
+        operation_type: op.operation_type,
+        cost: 0,
+        allowed: false,
+        limit_info: null,
+        message: error?.response?.data?.detail || 'Pre-flight check failed'
+      })),
+      total_cost: 0,
+      usage_summary: null,
+      cached: false
+    };
+  }
+};
+
 export const formatNumber = (num: number): string => {
  return new Intl.NumberFormat('en-US').format(num);
 };
--- a/frontend/src/services/storyWriterApi.ts
+++ b/frontend/src/services/storyWriterApi.ts
@@ -204,8 +204,10 @@ export interface StoryAudioGenerationResponse {

 export interface StoryVideoGenerationRequest {
  scenes: StoryScene[];
-  image_urls: string[];
+  image_urls: (string | null)[];
  audio_urls: string[];
+  video_urls?: (string | null)[] | null;
+  ai_audio_urls?: (string | null)[] | null;
  story_title?: string;
  fps?: number;
  transition_duration?: number;
@@ -227,6 +229,38 @@ export interface StoryVideoGenerationResponse {
  task_id?: string;
 }

+export interface AnimateSceneRequest {
+  scene_number: number;
+  scene_data: StoryScene;
+  story_context: Record<string, any>;
+  image_url: string;
+  duration?: 5 | 10;
+}
+
+export interface AnimateSceneVoiceoverRequest extends AnimateSceneRequest {
+  audio_url: string;
+  resolution?: '480p' | '720p';
+  prompt?: string;
+}
+
+export interface AnimateSceneResponse {
+  success: boolean;
+  scene_number: number;
+  video_filename: string;
+  video_url: string;
+  duration: number;
+  cost: number;
+  prompt_used: string;
+  provider: string;
+  prediction_id?: string;
+}
+
+export interface ResumeAnimateSceneRequest {
+  prediction_id: string;
+  scene_number: number;
+  duration?: 5 | 10;
+}
+
 class StoryWriterApi {
  /**
   * Generate 3 story setup options from a user's story idea
@@ -373,20 +407,63 @@ class StoryWriterApi {
    return response.data;
  }

+  /**
+   * Animate a single scene image into a short video preview
+   */
+  async animateScene(request: AnimateSceneRequest): Promise<AnimateSceneResponse> {
+    const response = await aiApiClient.post<AnimateSceneResponse>(
+      "/api/story/animate-scene-preview",
+      request
+    );
+    return response.data;
+  }
+
+  /**
+   * Animate a scene image using WaveSpeed InfiniteTalk with voiceover (async)
+   * Returns task_id for polling since InfiniteTalk can take up to 10 minutes.
+   */
+  async animateSceneVoiceover(request: AnimateSceneVoiceoverRequest): Promise<{ task_id: string; status: string; message: string }> {
+    const response = await aiApiClient.post<{ task_id: string; status: string; message: string }>(
+      "/api/story/animate-scene-voiceover",
+      request
+    );
+    return response.data;
+  }
+
+  /**
+   * Resume a timed-out scene animation download using the prediction id
+   */
+  async resumeAnimateScene(request: ResumeAnimateSceneRequest): Promise<AnimateSceneResponse> {
+    const response = await aiApiClient.post<AnimateSceneResponse>(
+      "/api/story/animate-scene-resume",
+      request
+    );
+    return response.data;
+  }
+
+  private buildAbsoluteUrl(path: string): string {
+    if (!path) return path;
+    if (path.startsWith('http://') || path.startsWith('https://')) {
+      return path;
+    }
+    const baseURL = aiApiClient.defaults.baseURL || '';
+    const cleanBaseURL = baseURL.endsWith('/') ? baseURL.slice(0, -1) : baseURL;
+    const cleanPath = path.startsWith('/') ? path : `/${path}`;
+    return `${cleanBaseURL}${cleanPath}`;
+  }
+
  /**
   * Get image URL for a scene image
   */
  getImageUrl(imageUrl: string): string {
-    // If imageUrl is already a full URL, return it as-is
-    if (imageUrl.startsWith('http://') || imageUrl.startsWith('https://')) {
-      return imageUrl;
+    return this.buildAbsoluteUrl(imageUrl);
    }
-    // Otherwise, prepend the base URL
-    const baseURL = aiApiClient.defaults.baseURL || '';
-    // Remove trailing slash from baseURL if present, and leading slash from imageUrl if present
-    const cleanBaseURL = baseURL.endsWith('/') ? baseURL.slice(0, -1) : baseURL;
-    const cleanImageUrl = imageUrl.startsWith('/') ? imageUrl : `/${imageUrl}`;
-    return `${cleanBaseURL}${cleanImageUrl}`;
+
+  /**
+   * Convert any relative media URL to absolute
+   */
+  getMediaUrl(path: string): string {
+    return this.buildAbsoluteUrl(path);
  }

  /**
@@ -400,6 +477,165 @@ class StoryWriterApi {
    return response.data;
  }

+  /**
+   * Optimize an image prompt using WaveSpeed prompt optimizer
+   */
+  async optimizePrompt(request: {
+    text: string;
+    mode?: 'image' | 'video';
+    style?: 'default' | 'artistic' | 'photographic' | 'technical' | 'anime' | 'realistic';
+    image?: string;
+  }): Promise<{ optimized_prompt: string; success: boolean }> {
+    const response = await aiApiClient.post<{ optimized_prompt: string; success: boolean }>(
+      "/api/story/optimize-prompt",
+      request
+    );
+    return response.data;
+  }
+
+  /**
+   * Regenerate a scene image using a direct prompt (no AI prompt generation)
+   */
+  async regenerateSceneImage(request: {
+    scene_number: number;
+    scene_title: string;
+    prompt: string;
+    provider?: string;
+    width?: number;
+    height?: number;
+    model?: string;
+  }): Promise<{
+    scene_number: number;
+    scene_title: string;
+    image_filename: string;
+    image_url: string;
+    width: number;
+    height: number;
+    provider: string;
+    model?: string;
+    seed?: number;
+    success: boolean;
+    error?: string;
+  }> {
+    const response = await aiApiClient.post<{
+      scene_number: number;
+      scene_title: string;
+      image_filename: string;
+      image_url: string;
+      width: number;
+      height: number;
+      provider: string;
+      model?: string;
+      seed?: number;
+      success: boolean;
+      error?: string;
+    }>(
+      "/api/story/regenerate-images",
+      request
+    );
+    return response.data;
+  }
+
+  /**
+   * Generate AI audio for a single scene using WaveSpeed Minimax Speech 02 HD
+   */
+  async generateAIAudio(request: {
+    scene_number: number;
+    scene_title: string;
+    text: string;
+    voice_id?: string;
+    speed?: number;
+    volume?: number;
+    pitch?: number;
+    emotion?: string;
+  }): Promise<{
+    scene_number: number;
+    scene_title: string;
+    audio_filename: string;
+    audio_url: string;
+    provider: string;
+    model: string;
+    voice_id: string;
+    text_length: number;
+    file_size: number;
+    cost: number;
+    success: boolean;
+    error?: string;
+  }> {
+    const response = await aiApiClient.post<{
+      scene_number: number;
+      scene_title: string;
+      audio_filename: string;
+      audio_url: string;
+      provider: string;
+      model: string;
+      voice_id: string;
+      text_length: number;
+      file_size: number;
+      cost: number;
+      success: boolean;
+      error?: string;
+    }>(
+      "/api/story/generate-ai-audio",
+      request
+    );
+    return response.data;
+  }
+
+  /**
+   * Generate free audio for a single scene using gTTS
+   */
+  async generateFreeAudio(request: {
+    scene_number: number;
+    scene_title: string;
+    text: string;
+    provider?: string;
+    lang?: string;
+    slow?: boolean;
+    rate?: number;
+  }): Promise<{
+    scene_number: number;
+    scene_title: string;
+    audio_filename: string;
+    audio_url: string;
+    provider: string;
+    file_size: number;
+    success: boolean;
+    error?: string;
+  }> {
+    // Use existing generateSceneAudio endpoint but for a single scene
+    const response = await aiApiClient.post<StoryAudioGenerationResponse>(
+      "/api/story/generate-audio",
+      {
+        scenes: [{
+          scene_number: request.scene_number,
+          title: request.scene_title,
+          audio_narration: request.text,
+        }],
+        provider: request.provider || 'gtts',
+        lang: request.lang || 'en',
+        slow: request.slow || false,
+        rate: request.rate || 150,
+      }
+    );
+    const result = response.data;
+    if (result.success && result.audio_files && result.audio_files.length > 0) {
+      const audio = result.audio_files[0];
+      return {
+        scene_number: audio.scene_number,
+        scene_title: audio.scene_title,
+        audio_filename: audio.audio_filename,
+        audio_url: audio.audio_url,
+        provider: audio.provider,
+        file_size: audio.file_size,
+        success: true,
+        error: audio.error,
+      };
+    } else {
+      throw new Error(result.audio_files?.[0]?.error || 'Failed to generate audio');
+    }
+  }
+
  /**
   * Get audio URL for a scene audio file
   */
@@ -496,7 +732,6 @@ class StoryWriterApi {
    scene_data: StoryScene;
    story_context: Record<string, any>;
    all_scenes: StoryScene[];
-    scene_image_url?: string;
    provider?: string;
    model?: string;
    num_frames?: number;
--- a/frontend/src/utils/fetchMediaBlobUrl.ts
+++ b/frontend/src/utils/fetchMediaBlobUrl.ts
@@ -1,9 +1,19 @@
 import { aiApiClient } from "../api/client";

-export async function fetchMediaBlobUrl(pathOrUrl: string): Promise<string> {
-  const rel = pathOrUrl.startsWith("/") ? pathOrUrl : `/${pathOrUrl}`;
-  const res = await aiApiClient.get(rel, { responseType: "blob" });
-  return URL.createObjectURL(res.data);
+export async function fetchMediaBlobUrl(pathOrUrl: string): Promise<string | null> {
+  try {
+    const rel = pathOrUrl.startsWith("/") ? pathOrUrl : `/${pathOrUrl}`;
+    const res = await aiApiClient.get(rel, { responseType: "blob" });
+    return URL.createObjectURL(res.data);
+  } catch (err: any) {
+    // Gracefully handle 404s and other errors - file might not exist or was regenerated
+    if (err?.response?.status === 404) {
+      console.warn(`Media file not found (404): ${pathOrUrl}`);
+      return null;
+    }
+    // Re-throw other errors
+    throw err;
+  }
 }


--- a/preflight-check-cost-estimation.plan.md
+++ b/preflight-check-cost-estimation.plan.md
@@ -0,0 +1,490 @@
+# Pre-flight Check with Cost Estimation and Button Enhancement Plan
+
+## Overview
+Implement a reusable pre-flight check system that shows estimated costs on buttons and validates operations on hover. This will provide users with cost transparency and prevent unnecessary API calls by showing if operations are allowed before execution.
+
+## Goals
+1. Show estimated cost on buttons (e.g., "Generate HD Video $0.21")
+2. Perform pre-flight check on hover (debounced to avoid performance issues)
+3. Show detailed information (allowed/blocked, limits, remaining quota)
+4. Disable buttons with appropriate messaging if limits exceeded
+5. Common/reusable solution across all ALwrity tools (blog writer, story, linkedin, etc.)
+6. Performance optimized (caching, debouncing, batching)
+7. Foundation for billing dashboard insights about operation costs
+
+## Current State Analysis
+
+### Backend Existing Capabilities
+- **Pre-flight validation**: `preflight_validator.py` has functions like `validate_video_generation_operations`, `validate_image_generation_operations`
+- **Limit checking**: `pricing_service.py` has `check_comprehensive_limits()` and `check_usage_limits()`
+- **Pricing lookup**: `get_pricing_for_provider_model()` returns cost information
+- **Caching**: `_limits_cache` with TTL to reduce DB reads
+- **Operation validation**: Supports multi-operation workflows with token estimation
+
+### Frontend Existing Capabilities
+- **Billing service**: `billingService.ts` has API client for subscription endpoints
+- **Subscription hooks**: `useSubscriptionGuard`, `useSubscription` for subscription state
+- **Button components**: Various buttons but no cost/pre-flight integration
+- **Usage dashboard**: Shows usage but not per-operation costs
+
+### Gaps
+- No lightweight endpoint for cost estimation + pre-flight check
+- No reusable button component with cost/pre-flight integration
+- No debouncing/throttling for hover-based checks
+- No consistent UX pattern across tools
+
+## Implementation Plan
+
+### Phase 1: Backend API Endpoint
+
+#### 1.1 Create Pre-flight Check Endpoint
+**File**: `backend/api/subscription_api.py`
+
+**Endpoint**: `POST /api/subscription/preflight-check`
+
+**Purpose**: Lightweight endpoint that:
+- Accepts operation definition (provider, model, tokens_estimated, operation_type)
+- Returns cost estimation, limits check result, usage info
+- Uses caching to minimize DB load
+- Fast response (< 100ms with cache hit)
+
+**Request Format**:
+```json
+{
+  "operations": [
+    {
+      "provider": "video",
+      "model": "tencent/HunyuanVideo",
+      "tokens_requested": 0,
+      "operation_type": "video_generation",
+      "actual_provider_name": "huggingface"
+    }
+  ]
+}
+```
+
+**Response Format**:
+```json
+{
+  "success": true,
+  "data": {
+    "can_proceed": true,
+    "estimated_cost": 0.21,
+    "operations": [
+      {
+        "provider": "video",
+        "operation_type": "video_generation",
+        "cost": 0.21,
+        "allowed": true,
+        "limit_info": {
+          "current_usage": 5,
+          "limit": 100,
+          "remaining": 95
+        },
+        "message": null
+      }
+    ],
+    "total_cost": 0.21,
+    "usage_summary": {
+      "current_calls": 5,
+      "limit": 100,
+      "remaining": 95
+    },
+    "cached": false
+  }
+}
+```
+
+**Implementation Details**:
+- Use `PricingService.check_comprehensive_limits()` for validation
+- Use `PricingService.get_pricing_for_provider_model()` for cost
+- Leverage existing `_limits_cache` (5-second TTL)
+- Return structured error if blocked with user-friendly message
+
+#### 1.2 Batch Pre-flight Check Endpoint (Optional, for performance)
+**Endpoint**: `POST /api/subscription/preflight-check-batch`
+
+**Purpose**: Check multiple operations at once for pages with many buttons
+
+**Performance Considerations**:
+- Single DB query for all operations
+- Batch cache lookups
+- Return results in order matching request
+
+### Phase 2: Frontend Service Layer
+
+#### 2.1 Extend Billing Service
+**File**: `frontend/src/services/billingService.ts`
+
+**New Functions**:
+```typescript
+interface PreflightOperation {
+  provider: string;
+  model?: string;
+  tokens_requested?: number;
+  operation_type: string;
+  actual_provider_name?: string;
+}
+
+interface PreflightCheckResponse {
+  can_proceed: boolean;
+  estimated_cost: number;
+  operations: Array<{
+    provider: string;
+    operation_type: string;
+    cost: number;
+    allowed: boolean;
+    limit_info: {
+      current_usage: number;
+      limit: number;
+      remaining: number;
+    };
+    message: string | null;
+  }>;
+  total_cost: number;
+  usage_summary: {
+    current_calls: number;
+    limit: number;
+    remaining: number;
+  };
+  cached: boolean;
+}
+
+// Single operation check
+export const checkPreflight = async (
+  operation: PreflightOperation
+): Promise<PreflightCheckResponse>
+
+// Batch operations check (for pages with many buttons)
+export const checkPreflightBatch = async (
+  operations: PreflightOperation[]
+): Promise<PreflightCheckResponse>
+```
+
+**Implementation Details**:
+- Use axios with request cancellation support
+- Add request debouncing wrapper
+- Handle errors gracefully (show cached result if available)
+- Return structured error messages for UI display
+
+#### 2.2 Create Pre-flight Check Hook
+**File**: `frontend/src/hooks/usePreflightCheck.ts`
+
+**Purpose**: Reusable React hook that:
+- Manages pre-flight check state (loading, error, result)
+- Debounces hover events (300ms delay)
+- Caches results per operation (5-second TTL)
+- Provides easy-to-use API for components
+
+**API**:
+```typescript
+interface UsePreflightCheckOptions {
+  operation: PreflightOperation;
+  enabled?: boolean; // Whether to perform check on hover
+  debounceMs?: number; // Debounce delay (default: 300ms)
+  cacheTtl?: number; // Cache TTL in ms (default: 5000ms)
+}
+
+interface UsePreflightCheckResult {
+  canProceed: boolean;
+  estimatedCost: number;
+  limitInfo: {
+    current: number;
+    limit: number;
+    remaining: number;
+  } | null;
+  loading: boolean;
+  error: string | null;
+  checkOnHover: () => void;
+  checkNow: () => void; // Immediate check
+  reset: () => void;
+}
+
+export const usePreflightCheck = (
+  options: UsePreflightCheckOptions
+): UsePreflightCheckResult
+```
+
+**Implementation Details**:
+- Use `useState` for state management
+- Use `useCallback` for memoized handlers
+- Use `useRef` for debounce timers and cache
+- Implement request cancellation on unmount
+
+### Phase 3: Reusable Button Component
+
+#### 3.1 Create Enhanced Operation Button Component
+**File**: `frontend/src/components/shared/OperationButton.tsx`
+
+**Purpose**: Reusable button component that:
+- Shows estimated cost in button label
+- Performs pre-flight check on hover
+- Shows detailed tooltip with limits/remaining quota
+- Disables button with messaging if blocked
+- Supports all operation types (video, image, image_edit, text generation, etc.)
+
+**Props**:
+```typescript
+interface OperationButtonProps {
+  // Operation definition
+  operation: PreflightOperation;
+  
+  // Button configuration
+  label: string; // Base label (e.g., "Generate HD Video")
+  variant?: 'contained' | 'outlined' | 'text';
+  size?: 'small' | 'medium' | 'large';
+  color?: 'primary' | 'secondary' | 'success' | 'error';
+  startIcon?: React.ReactNode;
+  endIcon?: React.ReactNode;
+  
+  // Pre-flight check behavior
+  showCost?: boolean; // Show cost in label (default: true)
+  checkOnHover?: boolean; // Check on hover (default: true)
+  checkOnMount?: boolean; // Check on mount (default: false)
+  
+  // Callbacks
+  onClick: () => void;
+  onPreflightResult?: (result: PreflightCheckResponse) => void;
+  
+  // Customization
+  disabled?: boolean; // Additional disabled state
+  loading?: boolean; // Loading state override
+  tooltipPlacement?: 'top' | 'bottom' | 'left' | 'right';
+  
+  // Styling
+  sx?: SxProps<Theme>;
+  fullWidth?: boolean;
+}
+```
+
+**Features**:
+- Cost display: "Generate HD Video $0.21" or "Generate HD Video" if cost unavailable
+- Tooltip on hover shows:
+  - Operation allowed/blocked status
+  - Current usage / limit / remaining
+  - Estimated cost breakdown
+  - Message if blocked (e.g., "You've reached your video generation limit. Upgrade your plan for more videos.")
+- Button disabled if:
+  - `disabled` prop is true
+  - `loading` prop is true
+  - Pre-flight check returned `can_proceed: false`
+- Button styling:
+  - Normal: standard button
+  - Blocked: grayed out with warning icon
+  - Loading: spinner with disabled state
+
+**Implementation Details**:
+- Use Material-UI `Button` and `Tooltip` components
+- Integrate with `usePreflightCheck` hook
+- Format cost as currency (e.g., "$0.21" or "$0.00" if free)
+- Handle edge cases (no subscription, no limits, etc.)
+
+#### 3.2 Create Operation Type Mappings
+**File**: `frontend/src/utils/operationTypes.ts`
+
+**Purpose**: Centralized configuration for operation types:
+- Default models per operation type
+- Display names
+- Icons
+- Default token estimates
+
+```typescript
+export const OPERATION_TYPES = {
+  video_generation: {
+    provider: 'video',
+    defaultModel: 'tencent/HunyuanVideo',
+    displayName: 'Video Generation',
+    icon: VideoLibraryIcon,
+    defaultTokens: 0,
+  },
+  image_generation: {
+    provider: 'stability',
+    defaultModel: 'stability-ai/stable-diffusion-xl',
+    displayName: 'Image Generation',
+    icon: ImageIcon,
+    defaultTokens: 0,
+  },
+  image_editing: {
+    provider: 'image_edit',
+    defaultModel: 'Qwen/Qwen-Image-Edit',
+    displayName: 'Image Editing',
+    icon: EditIcon,
+    defaultTokens: 0,
+  },
+  // ... more operation types
+} as const;
+```
+
+### Phase 4: Integration Across Tools
+
+#### 4.1 Story Writer Integration
+**Files**: 
+- `frontend/src/components/StoryWriter/components/HdVideoSection.tsx`
+- `frontend/src/components/StoryWriter/components/VideoSection.tsx`
+- `frontend/src/components/StoryWriter/components/MultimediaToolbar.tsx`
+
+**Changes**:
+- Replace existing buttons with `OperationButton`
+- Configure with appropriate operation type
+- Pass existing `onClick` handlers
+
+**Example**:
+```tsx
+<OperationButton
+  operation={{
+    provider: 'video',
+    model: 'tencent/HunyuanVideo',
+    tokens_requested: 0,
+    operation_type: 'video_generation',
+    actual_provider_name: 'huggingface',
+  }}
+  label="Generate HD Animation"
+  showCost={true}
+  checkOnHover={true}
+  onClick={handleGenerateHdVideo}
+  disabled={isGeneratingHdVideo || state.hdVideoGenerationStatus === 'awaiting_approval'}
+  loading={isGeneratingHdVideo}
+/>
+```
+
+#### 4.2 Blog Writer Integration
+**Files**: Various blog writer components with generation buttons
+
+**Changes**: Similar to Story Writer - replace buttons with `OperationButton`
+
+#### 4.3 LinkedIn Writer Integration
+**Files**: LinkedIn writer components
+
+**Changes**: Similar pattern
+
+### Phase 5: Performance Optimization
+
+#### 5.1 Caching Strategy
+**Backend**:
+- Use existing `_limits_cache` (5-second TTL)
+- Cache pre-flight check results per user:operation combination
+- Invalidate cache on usage updates
+
+**Frontend**:
+- In-memory cache per hook instance (5-second TTL)
+- Share cache across components using React Context
+- Clear cache on subscription changes
+
+#### 5.2 Debouncing/Throttling
+**Frontend**:
+- Debounce hover events (300ms delay)
+- Throttle batch requests (max 1 request per 500ms)
+- Cancel in-flight requests on unmount/hover exit
+
+#### 5.3 Request Batching
+**Frontend**:
+- For pages with many buttons (e.g., story export with multiple operations)
+- Batch multiple operations into single request
+- Use `checkPreflightBatch` API
+
+#### 5.4 Lazy Loading
+**Frontend**:
+- Only check on hover (not on mount)
+- Optional: Check on mount for primary buttons only
+- Defer checks for secondary/tertiary buttons
+
+### Phase 6: Billing Dashboard Integration (Future)
+
+#### 6.1 Operation Cost Tracking
+**Backend**: 
+- Track operation costs in `APIUsageLog` (already exists)
+- Add operation_type field to logs (already exists)
+
+#### 6.2 Cost Insights
+**Frontend**:
+- Add operation cost breakdown to billing dashboard
+- Show most expensive operations
+- Show cost trends per operation type
+- Add filters by operation type
+
+## Performance Considerations
+
+### Potential Bottlenecks
+1. **Many buttons on one page**: Each button hovering could trigger requests
+   - **Solution**: Batch requests, debounce, cache aggressively
+
+2. **Rapid hover in/out**: Multiple requests for same operation
+   - **Solution**: Debounce (300ms), cancel in-flight requests
+
+3. **Backend DB load**: Each check queries subscription/usage tables
+   - **Solution**: Use existing cache (5-second TTL), optimize queries
+
+4. **Frontend render performance**: Many tooltips updating
+   - **Solution**: Virtualize if needed, optimize re-renders with React.memo
+
+### Performance Targets
+- Pre-flight check API: < 100ms with cache hit, < 300ms without cache
+- Frontend hover response: < 50ms (debounced)
+- Batch check (10 operations): < 500ms
+- Tooltip render: < 16ms (60fps)
+
+## Testing Strategy
+
+### Unit Tests
+- `usePreflightCheck` hook: debouncing, caching, error handling
+- `OperationButton` component: cost display, tooltip, disabled states
+- Billing service: API calls, error handling
+
+### Integration Tests
+- Pre-flight check endpoint: validation, cost calculation, caching
+- Button hover behavior: tooltip display, disabled states
+
+### E2E Tests
+- User hovers over button, sees cost and limits
+- User blocked by limits, sees appropriate messaging
+- User clicks button, operation executes (or fails with clear error)
+
+## Migration Strategy
+
+### Phase 1: Backend (Week 1)
+1. Create pre-flight check endpoint
+2. Add unit tests
+3. Deploy and monitor performance
+
+### Phase 2: Frontend Core (Week 2)
+1. Extend billing service
+2. Create `usePreflightCheck` hook
+3. Create `OperationButton` component
+4. Add unit tests
+
+### Phase 3: Integration (Week 3)
+1. Integrate into Story Writer (highest priority - most buttons)
+2. Test thoroughly
+3. Iterate based on feedback
+
+### Phase 4: Rollout (Week 4+)
+1. Integrate into Blog Writer
+2. Integrate into LinkedIn Writer
+3. Integrate into other tools
+4. Monitor performance and user feedback
+
+## Success Metrics
+
+1. **User Experience**:
+   - Reduced confusion about operation costs
+   - Fewer failed operations due to limits
+   - Increased clarity about remaining quota
+
+2. **Performance**:
+   - < 100ms API response time (with cache)
+   - < 1% increase in backend DB load
+   - No noticeable UI lag on pages with many buttons
+
+3. **Adoption**:
+   - All major operation buttons using new component
+   - Consistent UX across all tools
+
+## Future Enhancements
+
+1. **Cost estimation for multi-operation workflows**: Estimate total cost for complex operations
+2. **Usage predictions**: Show projected usage if user continues current pattern
+3. **Cost optimization suggestions**: Suggest cheaper alternatives
+4. **Batch operation approval**: Show total cost and allow approval for multiple operations
+5. **Cost alerts**: Warn users approaching cost limits
+6. **Operation history**: Show recent operations and their costs in tooltip
+