Save local changes (GSC/Bing integrations) before merging PR #354

2026-02-13 13:11:27 +05:30
parent 43e66835ac
commit 08a1f4a1d8
144 changed files with 8310 additions and 2748 deletions
--- a/backend/api/assets_serving.py
+++ b/backend/api/assets_serving.py
@@ -0,0 +1,52 @@
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import FileResponse
+import os
+from pathlib import Path
+from services.database import WORKSPACE_DIR, get_user_db_path
+
+router = APIRouter(prefix="/api/assets", tags=["Assets Serving"])
+
+@router.get("/{user_id}/avatars/{filename}")
+async def serve_avatar(user_id: str, filename: str):
+    """
+    Serve avatar images directly.
+    Public endpoint relying on unguessable filenames.
+    """
+    # Sanitize user_id (simple check to prevent directory traversal)
+    safe_user_id = "".join(c for c in user_id if c.isalnum() or c in ('-', '_'))
+    if safe_user_id != user_id:
+        raise HTTPException(status_code=400, detail="Invalid user ID")
+    
+    # Sanitize filename
+    safe_filename = os.path.basename(filename)
+    
+    # Construct path
+    # workspace/workspace_{user_id}/assets/avatars/{filename}
+    file_path = Path(WORKSPACE_DIR) / f"workspace_{safe_user_id}" / "assets" / "avatars" / safe_filename
+    
+    if not file_path.exists():
+        raise HTTPException(status_code=404, detail="Asset not found")
+        
+    return FileResponse(file_path)
+
+@router.get("/{user_id}/voice_samples/{filename}")
+async def serve_voice_sample(user_id: str, filename: str):
+    """
+    Serve voice sample audio files directly.
+    """
+    # Sanitize user_id
+    safe_user_id = "".join(c for c in user_id if c.isalnum() or c in ('-', '_'))
+    if safe_user_id != user_id:
+        raise HTTPException(status_code=400, detail="Invalid user ID")
+    
+    # Sanitize filename
+    safe_filename = os.path.basename(filename)
+    
+    # Construct path
+    # workspace/workspace_{user_id}/assets/voice_samples/{filename}
+    file_path = Path(WORKSPACE_DIR) / f"workspace_{safe_user_id}" / "assets" / "voice_samples" / safe_filename
+    
+    if not file_path.exists():
+        raise HTTPException(status_code=404, detail="Asset not found")
+        
+    return FileResponse(file_path)
--- a/backend/api/onboarding_utils/docs/BRAND_AVATAR_API.md
+++ b/backend/api/onboarding_utils/docs/BRAND_AVATAR_API.md
@@ -0,0 +1,97 @@
+# Brand Avatar API Documentation
+
+## Overview
+The Brand Avatar API provides endpoints for generating, varying, and enhancing brand avatars using WaveSpeed AI.
+
+**Base URL**: `/api/onboarding/assets`
+
+## Endpoints
+
+### 1. Generate Avatar
+Generate a new brand avatar from a text prompt.
+
+- **URL**: `/generate-avatar`
+- **Method**: `POST`
+- **Body** (`application/json`):
+  ```json
+  {
+    "prompt": "A professional tech entrepreneur, studio lighting",
+    "style_preset": "Cinematic",
+    "aspect_ratio": "1:1",
+    "model": "ideogram-v3-turbo",
+    "provider": "wavespeed"
+  }
+  ```
+- **Response**:
+  ```json
+  {
+    "success": true,
+    "image_url": "/api/assets/{user_id}/avatars/{filename}.png",
+    "image_base64": "...",
+    "asset_id": 123
+  }
+  ```
+
+### 2. Create Variation
+Create a variation of an existing avatar/image.
+
+- **URL**: `/create-variation`
+- **Method**: `POST`
+- **Content-Type**: `multipart/form-data`
+- **Form Data**:
+  - `prompt` (text): Description of the variation (e.g., "same person but smiling")
+  - `file` (file): The reference image file
+- **Response**:
+  ```json
+  {
+    "success": true,
+    "image_url": "/api/assets/{user_id}/avatars/{filename}.png",
+    "image_base64": "...",
+    "asset_id": 124
+  }
+  ```
+
+### 3. Enhance Avatar
+Upscale and enhance an existing avatar image.
+
+- **URL**: `/enhance-avatar`
+- **Method**: `POST`
+- **Content-Type**: `multipart/form-data`
+- **Form Data**:
+  - `file` (file): The image file to enhance
+- **Response**:
+  ```json
+  {
+    "success": true,
+    "image_url": "/api/assets/{user_id}/avatars/{filename}.png",
+    "image_base64": "...",
+    "asset_id": 125
+  }
+  ```
+
+### 4. Enhance Prompt
+Optimize a simple prompt into a detailed, high-quality prompt using WaveSpeed.
+
+- **URL**: `/enhance-prompt`
+- **Method**: `POST`
+- **Body**:
+  ```json
+  {
+    "prompt": "man in suit"
+  }
+  ```
+- **Response**:
+  ```json
+  {
+    "success": true,
+    "original_prompt": "man in suit",
+    "optimized_prompt": "A professional portrait of a man in a tailored navy blue suit, confident expression, studio lighting, 4k resolution..."
+  }
+  ```
+
+## Providers
+- **Default Provider**: `wavespeed`
+- **Models**:
+  - Generation: `ideogram-v3-turbo` (default), `qwen-image`
+  - Editing/Variation: `qwen-edit-plus` (default)
+  - Enhancement: `nano-banana-pro-edit-ultra` (4K upscale)
--- a/backend/api/onboarding_utils/onboarding_completion_service.py
+++ b/backend/api/onboarding_utils/onboarding_completion_service.py
@@ -100,6 +100,8 @@ class OnboardingCompletionService:
            except Exception as e:
                logger.warning(f"Failed to schedule website analysis task creation for user {user_id}: {e}")

+
+
            # Schedule onboarding full-site SEO audit (non-blocking) ~10 minutes after completion
            try:
                from services.database import SessionLocal
--- a/backend/api/onboarding_utils/step4_asset_routes.py
+++ b/backend/api/onboarding_utils/step4_asset_routes.py
@@ -10,22 +10,36 @@ from sqlalchemy.orm import Session
 from pydantic import BaseModel
 from loguru import logger
 from .step4_persona_routes import _extract_user_id
+from middleware.auth_middleware import get_current_user
 import base64
 import os
 from pathlib import Path
 from utils.file_storage import save_file_safely, generate_unique_filename
 from services.database import get_db, WORKSPACE_DIR
 from utils.asset_tracker import save_asset_to_library
+from models.content_asset_models import ContentAsset, AssetType, AssetSource
+from sqlalchemy import desc

 from services.llm_providers.main_image_generation import (
    generate_image_with_provider,
    enhance_image_prompt,
-    generate_image_variation
+    generate_image_variation,
+    generate_image_enhance
 )
+from services.llm_providers.main_audio_generation import clone_voice, qwen3_voice_clone, cosyvoice_voice_clone, qwen3_voice_design
+import asyncio
+import random
+import string

-router = APIRouter()
+router = APIRouter(prefix="/onboarding/assets")

 # --- Models ---
+class VoiceDesignRequest(BaseModel):
+    user_id: Optional[str] = None
+    text: str
+    voice_description: str
+    language: str = "auto"
+
 class AvatarPromptRequest(BaseModel):
    user_id: Optional[str] = None
    prompt: str
@@ -34,6 +48,9 @@ class AvatarPromptRequest(BaseModel):
    negative_prompt: Optional[str] = None
    num_inference_steps: int = 30
    guidance_scale: float = 7.5
+    model: Optional[str] = None
+    rendering_speed: Optional[str] = None
+    provider: Optional[str] = None

 class AvatarEnhanceRequest(BaseModel):
    user_id: Optional[str] = None
@@ -47,14 +64,108 @@ class VoiceCloneRequest(BaseModel):

 # --- Routes ---

+@router.get("/latest-avatar")
+async def get_latest_avatar(
+    current_user: Dict[str, Any] = Depends(get_current_user),
+    db: Session = Depends(get_db)
+):
+    """Get the latest generated brand avatar for the user."""
+    try:
+        user_id = _extract_user_id(current_user)
+        
+        # Search for assets that are either:
+        # 1. Saved with source_module=BRAND_AVATAR_GENERATOR (new)
+        # 2. Saved with source_module=STORY_WRITER but have metadata category='brand_avatar' (legacy)
+        
+        # Fetch candidates (limit to recent 20 to avoid performance issues)
+        candidates = db.query(ContentAsset).filter(
+            ContentAsset.user_id == user_id,
+            ContentAsset.asset_type == AssetType.IMAGE,
+            ContentAsset.source_module.in_([
+                AssetSource.BRAND_AVATAR_GENERATOR, 
+                AssetSource.STORY_WRITER
+            ])
+        ).order_by(desc(ContentAsset.created_at)).limit(50).all()
+        
+        asset = None
+        for candidate in candidates:
+            # Check for direct match (new assets)
+            if candidate.source_module == AssetSource.BRAND_AVATAR_GENERATOR:
+                asset = candidate
+                break
+            
+            # Check for legacy match (metadata category)
+            if candidate.source_module == AssetSource.STORY_WRITER:
+                meta = candidate.asset_metadata or {}
+                if meta.get('category') == 'brand_avatar':
+                    asset = candidate
+                    break
+        
+        if not asset:
+            return {"success": False, "message": "No avatar found"}
+            
+        # Fallback to metadata prompt if main column is empty (legacy support)
+        prompt = asset.prompt
+        if not prompt and asset.asset_metadata:
+            prompt = asset.asset_metadata.get('prompt', '')
+            
+        return {
+            "success": True,
+            "image_url": asset.file_url,
+            "prompt": prompt,
+            "asset_id": asset.id,
+            "provider": asset.provider
+        }
+    except Exception as e:
+        logger.error(f"Failed to fetch latest avatar: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.get("/latest-voice-clone")
+async def get_latest_voice_clone(
+    current_user: Dict[str, Any] = Depends(get_current_user),
+    db: Session = Depends(get_db)
+):
+    """Get the latest generated voice clone for the user."""
+    try:
+        user_id = _extract_user_id(current_user)
+        
+        # Fetch latest voice clone asset
+        asset = db.query(ContentAsset).filter(
+            ContentAsset.user_id == user_id,
+            ContentAsset.asset_type == AssetType.AUDIO,
+            ContentAsset.source_module == AssetSource.VOICE_CLONER
+        ).order_by(desc(ContentAsset.created_at)).first()
+        
+        if not asset:
+            # Try to find legacy assets or assets that might have been saved differently
+            # For example, voice designs might be saved as VOICE_CLONER too?
+            # Or check for 'voice_design' logic if needed, but 'voice_cloner' is primary
+            return {"success": False, "message": "No voice clone found"}
+            
+        meta = asset.asset_metadata or {}
+        
+        return {
+            "success": True,
+            "custom_voice_id": meta.get("custom_voice_id"),
+            "preview_audio_url": meta.get("preview_url") or asset.file_url,
+            "asset_id": asset.id,
+            "voice_name": meta.get("voice_name"),
+            "engine": meta.get("engine")
+        }
+    except Exception as e:
+        logger.error(f"Failed to fetch latest voice clone: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
@router.post("/generate-avatar")
 async def generate_avatar(
    request: AvatarPromptRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
    db: Session = Depends(get_db)
 ):
    """Generate a brand avatar using available image providers."""
    try:
-        user_id = _extract_user_id(request.user_id)
+        user_id = _extract_user_id(current_user)
        
        logger.info(f"Generating avatar for user {user_id} with prompt: {request.prompt}")
        
@@ -66,6 +177,9 @@ async def generate_avatar(
            num_inference_steps=request.num_inference_steps,
            guidance_scale=request.guidance_scale,
            style_preset=request.style_preset,
+            model=request.model,
+            rendering_speed=request.rendering_speed,
+            provider=request.provider,
            user_id=user_id
        )
        
@@ -78,42 +192,66 @@ async def generate_avatar(
        
        image_data = result.get("image_base64")
        if not image_data and result.get("image_url"):
-            # TODO: Download image from URL if needed, or just store URL
-            pass
+            try:
+                import httpx
+                async with httpx.AsyncClient() as client:
+                    response = await client.get(result["image_url"], timeout=30.0)
+                    response.raise_for_status()
+                    image_data = response.content
+            except ImportError:
+                # Fallback to requests if httpx is not installed
+                import requests
+                response = requests.get(result["image_url"], timeout=30.0)
+                response.raise_for_status()
+                image_data = response.content
+            except Exception as e:
+                logger.error(f"Failed to download image from URL: {e}")
+                raise HTTPException(status_code=500, detail=f"Failed to download generated image: {str(e)}")
            
        if image_data:
            # Decode if needed (usually it's already base64 string)
            # Save file
            filename = generate_unique_filename("avatar", "png")
-            file_path = save_file_safely(
-                base64.b64decode(image_data) if isinstance(image_data, str) else image_data,
-                user_id,
-                "avatars",
+            # If image_data is bytes (from URL download), pass it directly
+            # If it's base64 string (from API), decode it
+            content_to_save = base64.b64decode(image_data) if isinstance(image_data, str) else image_data
+            
+            # Construct user assets directory
+            user_assets_dir = Path(WORKSPACE_DIR) / f"workspace_{user_id}" / "assets" / "avatars"
+            
+            saved_path, error = save_file_safely(
+                content_to_save,
+                user_assets_dir,
                filename
            )
            
+            if error or not saved_path:
+                raise HTTPException(status_code=500, detail=f"Failed to save image file: {error}")
+            
+            # Construct public URL
+            image_url = f"/api/assets/{user_id}/avatars/{filename}"
+            
            # Save to Asset Library
            asset_id = save_asset_to_library(
                db=db,
                user_id=user_id,
-                file_path=file_path,
                asset_type="image",
-                category="brand_avatar",
-                meta_data={
-                    "prompt": request.prompt,
+                source_module="brand_avatar_generator",
+                filename=filename,
+                file_url=image_url,
+                file_path=str(saved_path),
+                prompt=request.prompt,
+                asset_metadata={
                    "provider": result.get("provider", "unknown"),
-                    "style": request.style_preset
+                    "style": request.style_preset,
+                    "category": "brand_avatar"
                }
            )
            
-            # Construct public URL (this depends on your static file serving setup)
-            # Assuming /api/assets/{user_id}/avatars/{filename}
-            image_url = f"/api/assets/{user_id}/avatars/{filename}"
-            
            return {
                "success": True,
                "image_url": image_url,
-                "image_base64": image_data, # Optional: return base64 for immediate display
+                "image_base64": image_data if isinstance(image_data, str) else base64.b64encode(image_data).decode('utf-8'),
                "asset_id": asset_id
            }
            
@@ -126,14 +264,15 @@ async def generate_avatar(

@router.post("/enhance-prompt")
 async def enhance_prompt_route(
-    request: AvatarEnhanceRequest
+    request: AvatarEnhanceRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user)
 ):
    """Enhance a simple prompt into a detailed midjourney-style prompt."""
    try:
-        user_id = _extract_user_id(request.user_id)
+        user_id = _extract_user_id(current_user)
        logger.info(f"Enhancing prompt for user {user_id}: {request.prompt}")
        
-        enhanced_prompt = await enhance_image_prompt(request.prompt)
+        enhanced_prompt = await enhance_image_prompt(request.prompt, user_id=user_id)
        
        return {
            "success": True,
@@ -145,52 +284,347 @@ async def enhance_prompt_route(
        raise HTTPException(status_code=500, detail=str(e))


+@router.post("/create-variation")
+async def create_variation_route(
+    prompt: str = Form(...),
+    file: UploadFile = File(...),
+    user_id: Optional[str] = Form(None), # Ignored in favor of authenticated user
+    current_user: Dict[str, Any] = Depends(get_current_user)
+):
+    """Generate a variation of an existing avatar."""
+    try:
+        user_id = _extract_user_id(current_user)
+        logger.info(f"Creating variation for user {user_id} with prompt: {prompt}")
+        
+        # Read file
+        file_content = await file.read()
+        
+        result = await generate_image_variation(
+            image=file_content,
+            prompt=prompt,
+            user_id=user_id
+        )
+        
+        if not result.get("success"):
+            raise HTTPException(status_code=500, detail=result.get("error", "Variation generation failed"))
+            
+        # Save result
+        image_data = result.get("image_base64")
+        if image_data:
+            filename = generate_unique_filename("avatar_variation", "png")
+            content_to_save = base64.b64decode(image_data)
+            
+            # Construct user assets directory
+            user_assets_dir = Path(WORKSPACE_DIR) / f"workspace_{user_id}" / "assets" / "avatars"
+            
+            saved_path, error = save_file_safely(
+                content_to_save,
+                user_assets_dir,
+                filename
+            )
+            
+            if error or not saved_path:
+                raise HTTPException(status_code=500, detail=f"Failed to save variation file: {error}")
+            
+            # Construct public URL
+            image_url = f"/api/assets/{user_id}/avatars/{filename}"
+            
+            # Save to Asset Library
+            asset_id = save_asset_to_library(
+                db=next(get_db()),
+                user_id=user_id,
+                asset_type="image",
+                source_module="brand_avatar_variation",
+                filename=filename,
+                file_url=image_url,
+                file_path=str(saved_path),
+                asset_metadata={
+                    "prompt": prompt,
+                    "provider": "wavespeed",
+                    "original_filename": file.filename,
+                    "category": "brand_avatar_variation"
+                }
+            )
+            
+            return {
+                "success": True,
+                "image_url": image_url,
+                "image_base64": image_data,
+                "asset_id": asset_id
+            }
+            
+        return {"success": False, "error": "No image data returned"}
+        
+    except Exception as e:
+        logger.error(f"Variation generation failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/enhance-avatar")
+async def enhance_avatar_route(
+    file: UploadFile = File(...),
+    user_id: Optional[str] = Form(None), # Ignored in favor of authenticated user
+    current_user: Dict[str, Any] = Depends(get_current_user)
+):
+    """Enhance/Upscale an existing avatar."""
+    try:
+        user_id = _extract_user_id(current_user)
+        logger.info(f"Enhancing avatar for user {user_id}")
+        
+        # Read file
+        file_content = await file.read()
+        
+        result = await generate_image_enhance(
+            image=file_content,
+            user_id=user_id
+        )
+        
+        if not result.get("success"):
+            raise HTTPException(status_code=500, detail=result.get("error", "Enhancement failed"))
+            
+        # Save result
+        image_data = result.get("image_base64")
+        if image_data:
+            filename = generate_unique_filename("avatar_enhanced", "png")
+            content_to_save = base64.b64decode(image_data)
+            
+            # Construct user assets directory
+            user_assets_dir = Path(WORKSPACE_DIR) / f"workspace_{user_id}" / "assets" / "avatars"
+            
+            saved_path, error = save_file_safely(
+                content_to_save,
+                user_assets_dir,
+                filename
+            )
+            
+            if error or not saved_path:
+                raise HTTPException(status_code=500, detail=f"Failed to save enhanced file: {error}")
+            
+            # Construct public URL
+            image_url = f"/api/assets/{user_id}/avatars/{filename}"
+            
+            # Save to Asset Library
+            asset_id = save_asset_to_library(
+                db=next(get_db()),
+                user_id=user_id,
+                asset_type="image",
+                source_module="brand_avatar_enhancer",
+                filename=filename,
+                file_url=image_url,
+                file_path=str(saved_path),
+                asset_metadata={
+                    "provider": "wavespeed",
+                    "category": "brand_avatar_enhanced",
+                    "original_filename": file.filename
+                }
+            )
+            
+            return {
+                "success": True,
+                "image_url": image_url,
+                "image_base64": image_data,
+                "asset_id": asset_id
+            }
+            
+        return {"success": False, "error": "No image data returned"}
+        
+    except Exception as e:
+        logger.error(f"Avatar enhancement failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
@router.post("/create-voice-clone")
 async def create_voice_clone(
    voice_name: str = Form(...),
    description: str = Form(None),
    engine: str = Form("qwen3"),
    file: UploadFile = File(...),
-    user_id: Optional[str] = Form(None),
+    user_id: Optional[str] = Form(None), # Ignored in favor of authenticated user
+    current_user: Dict[str, Any] = Depends(get_current_user),
    db: Session = Depends(get_db)
 ):
    """Create a voice clone from an audio file."""
    try:
-        user_id = _extract_user_id(user_id)
-        logger.info(f"Creating voice clone '{voice_name}' for user {user_id}")
+        user_id = _extract_user_id(current_user)
+        logger.info(f"Creating voice clone '{voice_name}' (engine={engine}) for user {user_id}")
        
        # 1. Save uploaded audio file
        file_content = await file.read()
        filename = generate_unique_filename("voice_sample", Path(file.filename).suffix.lstrip("."))
-        file_path = save_file_safely(file_content, user_id, "voice_samples", filename)
        
-        # 2. Call Voice Cloning API (Placeholder for actual implementation)
-        # TODO: Integrate with Minimax or CosyVoice API
-        # For now, we simulate success
+        user_voice_dir = Path(WORKSPACE_DIR) / f"workspace_{user_id}" / "assets" / "voice_samples"
+        saved_path, error = save_file_safely(file_content, user_voice_dir, filename)
        
-        # 3. Save to Asset Library
+        if error or not saved_path:
+             raise HTTPException(status_code=500, detail=f"Failed to save voice sample: {error}")
+             
+        file_path = str(saved_path)
+        
+        # 2. Call Voice Cloning API
+        preview_audio_bytes = None
+        custom_voice_id = None
+        
+        loop = asyncio.get_event_loop()
+        
+        # Default preview text
+        preview_text = "Hello! This is a preview of my cloned voice using AI technology. I hope you like it!"
+        
+        if engine.lower() == "minimax":
+            # Generate valid voice ID for Minimax (alphanumeric, starts with letter, 8+ chars)
+            random_suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=8))
+            custom_voice_id = f"vc_{random_suffix}"
+            
+            logger.info(f"Cloning voice with Minimax, ID: {custom_voice_id}")
+            
+            # Run blocking call in executor
+            result = await loop.run_in_executor(
+                None,
+                lambda: clone_voice(
+                    audio_bytes=file_content,
+                    custom_voice_id=custom_voice_id,
+                    text=preview_text,
+                    user_id=user_id
+                )
+            )
+            preview_audio_bytes = result.preview_audio_bytes
+            
+        elif engine.lower() == "cosyvoice":
+            logger.info("Cloning voice with CosyVoice")
+            result = await loop.run_in_executor(
+                None,
+                lambda: cosyvoice_voice_clone(
+                    audio_bytes=file_content,
+                    text=preview_text,
+                    user_id=user_id
+                )
+            )
+            preview_audio_bytes = result.preview_audio_bytes
+            # CosyVoice doesn't persist ID on provider side, but we need one for DB
+            asset_uuid = ''.join(random.choices(string.ascii_letters + string.digits, k=8))
+            custom_voice_id = f"vc_cosy_{asset_uuid}"
+            
+        else: # qwen3 (default)
+            logger.info("Cloning voice with Qwen3")
+            result = await loop.run_in_executor(
+                None,
+                lambda: qwen3_voice_clone(
+                    audio_bytes=file_content,
+                    text=preview_text,
+                    user_id=user_id
+                )
+            )
+            preview_audio_bytes = result.preview_audio_bytes
+            # Qwen3 doesn't persist ID on provider side
+            asset_uuid = ''.join(random.choices(string.ascii_letters + string.digits, k=8))
+            custom_voice_id = f"vc_qwen_{asset_uuid}"
+
+        # 3. Save Preview Audio (if generated)
+        preview_url = None
+        if preview_audio_bytes:
+            preview_filename = f"preview_{filename}"
+            # Ensure it ends with .wav
+            if not preview_filename.endswith(".wav"):
+                preview_filename = str(Path(preview_filename).with_suffix('.wav'))
+            
+            user_voice_dir = Path(WORKSPACE_DIR) / f"workspace_{user_id}" / "assets" / "voice_samples"
+            saved_preview_path, error = save_file_safely(preview_audio_bytes, user_voice_dir, preview_filename)
+            
+            if not error and saved_preview_path:
+                preview_url = f"/api/assets/{user_id}/voice_samples/{preview_filename}"
+            
+        # 4. Save to Asset Library
        asset_id = save_asset_to_library(
            db=db,
            user_id=user_id,
            file_path=file_path,
            asset_type="audio",
-            category="voice_clone",
-            meta_data={
+            source_module="voice_cloner",
+            filename=filename,
+            file_url=f"/api/assets/{user_id}/voice_samples/{filename}",
+            asset_metadata={
                "voice_name": voice_name,
                "engine": engine,
                "description": description,
-                "original_filename": file.filename
+                "original_filename": file.filename,
+                "custom_voice_id": custom_voice_id,
+                "preview_url": preview_url,
+                "category": "voice_clone"
            }
        )
        
        return {
            "success": True,
-            "custom_voice_id": f"vc_{asset_id}", # Mock ID
-            "preview_audio_url": f"/api/assets/{user_id}/voice_samples/{filename}",
+            "custom_voice_id": custom_voice_id,
+            "preview_audio_url": preview_url or f"/api/assets/{user_id}/voice_samples/{filename}",
            "asset_id": asset_id,
-            "message": "Voice clone created successfully (simulated)"
+            "message": "Voice clone created successfully"
        }
        
    except Exception as e:
        logger.error(f"Voice cloning failed: {e}")
        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/create-voice-design")
+async def create_voice_design(
+    request: VoiceDesignRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+    db: Session = Depends(get_db)
+):
+    """Create a voice from text description (Voice Design)."""
+    try:
+        user_id = _extract_user_id(current_user)
+        logger.info(f"Designing voice for user {user_id}")
+        
+        loop = asyncio.get_event_loop()
+        
+        result = await loop.run_in_executor(
+            None,
+            lambda: qwen3_voice_design(
+                text=request.text,
+                voice_description=request.voice_description,
+                language=request.language,
+                user_id=user_id
+            )
+        )
+        
+        # Save the result to a temporary file
+        filename = generate_unique_filename("voice_design_preview", "wav")
+        user_voice_dir = Path(WORKSPACE_DIR) / f"workspace_{user_id}" / "assets" / "voice_samples"
+        saved_path, error = save_file_safely(result.preview_audio_bytes, user_voice_dir, filename)
+        
+        if error or not saved_path:
+             raise HTTPException(status_code=500, detail=f"Failed to save voice design: {error}")
+             
+        # Generate URL
+        preview_url = f"/api/assets/{user_id}/voice_samples/{filename}"
+        
+        # Save to Asset Library
+        asset_id = save_asset_to_library(
+            db=db,
+            user_id=user_id,
+            file_path=str(saved_path),
+            asset_type="audio",
+            source_module="voice_cloner",
+            filename=filename,
+            file_url=preview_url,
+            asset_metadata={
+                "voice_description": request.voice_description,
+                "text": request.text,
+                "language": request.language,
+                "engine": "qwen3-design",
+                "category": "voice_design",
+                "preview_url": preview_url
+            }
+        )
+        
+        return {
+            "success": True,
+            "preview_audio_url": preview_url,
+            "asset_id": asset_id,
+            "message": "Voice generated successfully"
+        }
+        
+    except Exception as e:
+        logger.error(f"Voice design failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
--- a/backend/api/podcast/handlers/audio.py
+++ b/backend/api/podcast/handlers/audio.py
@@ -4,10 +4,10 @@ Podcast Audio Handlers
 Audio generation, combining, and serving endpoints.
 """

-from fastapi import APIRouter, Depends, HTTPException
+from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
 from fastapi.responses import FileResponse
 from sqlalchemy.orm import Session
-from typing import Dict, Any
+from typing import Dict, Any, Optional
 from pathlib import Path
 from urllib.parse import urlparse
 import tempfile
@@ -31,6 +31,83 @@ from ..models import (
 router = APIRouter()


+@router.post("/audio/upload")
+async def upload_podcast_audio(
+    file: UploadFile = File(...),
+    project_id: Optional[str] = Form(None),
+    current_user: Dict[str, Any] = Depends(get_current_user),
+    db: Session = Depends(get_db),
+):
+    """
+    Upload an audio file (voice sample) for a podcast project.
+    Returns the audio URL for use in video generation.
+    """
+    user_id = require_authenticated_user(current_user)
+    
+    # Validate file type
+    if not file.content_type or not file.content_type.startswith('audio/'):
+        # Allow octet-stream if extension is audio
+        allowed_exts = ['.mp3', '.wav', '.m4a', '.aac']
+        file_ext = Path(file.filename).suffix.lower()
+        if file_ext not in allowed_exts and file.content_type != 'application/octet-stream':
+             raise HTTPException(status_code=400, detail="File must be an audio file")
+    
+    # Validate file size (max 20MB)
+    file_content = await file.read()
+    if len(file_content) > 20 * 1024 * 1024:
+        raise HTTPException(status_code=400, detail="Audio file size must be less than 20MB")
+    
+    try:
+        # Generate filename
+        file_ext = Path(file.filename).suffix or '.mp3'
+        unique_id = str(uuid.uuid4())[:8]
+        audio_filename = f"audio_{project_id or 'temp'}_{unique_id}{file_ext}"
+        audio_path = PODCAST_AUDIO_DIR / audio_filename
+        
+        # Save file
+        with open(audio_path, "wb") as f:
+            f.write(file_content)
+        
+        logger.info(f"[Podcast] Audio uploaded: {audio_path}")
+        
+        # Create audio URL
+        audio_url = f"/api/podcast/audio/{audio_filename}"
+        
+        # Save to asset library if project_id provided
+        if project_id:
+            try:
+                save_asset_to_library(
+                    db=db,
+                    user_id=user_id,
+                    asset_type="audio",
+                    source_module="podcast_maker",
+                    filename=audio_filename,
+                    file_url=audio_url,
+                    file_path=str(audio_path),
+                    file_size=len(file_content),
+                    mime_type=file.content_type,
+                    title=f"Uploaded Audio - {project_id}",
+                    description="Uploaded podcast audio/voice sample",
+                    tags=["podcast", "audio", "upload", project_id],
+                    asset_metadata={
+                        "project_id": project_id,
+                        "type": "uploaded_audio",
+                        "status": "completed",
+                    },
+                )
+            except Exception as e:
+                logger.warning(f"[Podcast] Failed to save audio asset: {e}")
+        
+        return {
+            "audio_url": audio_url,
+            "audio_filename": audio_filename,
+            "message": "Audio uploaded successfully"
+        }
+    except Exception as exc:
+        logger.error(f"[Podcast] Audio upload failed: {exc}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Audio upload failed: {str(exc)}")
+
+
@router.post("/audio", response_model=PodcastAudioResponse)
 async def generate_podcast_audio(
    request: PodcastAudioRequest,
--- a/backend/api/podcast/utils.py
+++ b/backend/api/podcast/utils.py
@@ -10,6 +10,7 @@ from fastapi import HTTPException
 from loguru import logger

 from .constants import PODCAST_AUDIO_DIR, PODCAST_IMAGES_DIR
+from utils.media_utils import load_media_bytes


 def load_podcast_audio_bytes(audio_url: str) -> bytes:
@@ -54,49 +55,23 @@ def load_podcast_audio_bytes(audio_url: str) -> bytes:


 def load_podcast_image_bytes(image_url: str) -> bytes:
-    """Load podcast image bytes from URL. Only handles /api/podcast/images/ URLs."""
+    """Load podcast image bytes from URL. Uses centralized media loader."""
    if not image_url:
        raise HTTPException(status_code=400, detail="Image URL is required")
    
    logger.info(f"[Podcast] Loading image from URL: {image_url}")
    
    try:
-        parsed = urlparse(image_url)
-        path = parsed.path if parsed.scheme else image_url
+        # REUSE: Use centralized media loader which handles cross-module lookups
+        image_bytes = load_media_bytes(image_url)
        
-        # Only handle /api/podcast/images/ URLs
-        prefix = "/api/podcast/images/"
-        if prefix not in path:
-            logger.error(f"[Podcast] Unsupported image URL format: {image_url}")
-            raise HTTPException(
-                status_code=400, 
-                detail=f"Unsupported image URL format: {image_url}. Only /api/podcast/images/ URLs are supported."
-            )
-        
-        filename = path.split(prefix, 1)[1].split("?", 1)[0].strip()
-        if not filename:
-            logger.error(f"[Podcast] Could not extract filename from URL: {image_url}")
-            raise HTTPException(status_code=400, detail=f"Could not extract filename from URL: {image_url}")
-        
-        logger.info(f"[Podcast] Extracted filename: {filename}")
-        logger.info(f"[Podcast] PODCAST_IMAGES_DIR: {PODCAST_IMAGES_DIR}")
-        
-        # Podcast images are stored in podcast_images directory
-        image_path = (PODCAST_IMAGES_DIR / filename).resolve()
-        logger.info(f"[Podcast] Resolved image path: {image_path}")
-        
-        # Security check: ensure path is within PODCAST_IMAGES_DIR
-        if not str(image_path).startswith(str(PODCAST_IMAGES_DIR)):
-            logger.error(f"[Podcast] Attempted path traversal when resolving image: {image_url} -> {image_path}")
-            raise HTTPException(status_code=403, detail="Invalid image path")
-        
-        if not image_path.exists():
-            logger.error(f"[Podcast] Image file not found: {image_path}")
-            raise HTTPException(status_code=404, detail=f"Image file not found: {filename}")
-        
-        image_bytes = image_path.read_bytes()
-        logger.info(f"[Podcast] ✅ Successfully loaded image: {len(image_bytes)} bytes from {image_path}")
+        if not image_bytes:
+            logger.error(f"[Podcast] Image file not found for URL: {image_url}")
+            raise HTTPException(status_code=404, detail=f"Image file not found: {image_url}")
+            
+        logger.info(f"[Podcast] ✅ Successfully loaded image: {len(image_bytes)} bytes")
        return image_bytes
+        
    except HTTPException:
        raise
    except Exception as exc:
--- a/backend/api/subscription/routes/preflight.py
+++ b/backend/api/subscription/routes/preflight.py
@@ -56,6 +56,8 @@ async def preflight_check(
                    provider_enum = APIProvider.MISTRAL  # Maps to HuggingFace
                elif provider_str == "video":
                    provider_enum = APIProvider.VIDEO
+                elif provider_str == "fal-ai" or provider_str == "fal":
+                    provider_enum = APIProvider.VIDEO # Map fal-ai to VIDEO as it's primarily used for media gen
                elif provider_str == "image_edit":
                    provider_enum = APIProvider.IMAGE_EDIT
                elif provider_str == "stability":
--- a/backend/api/video_studio/init.py
+++ b/backend/api/video_studio/init.py
@@ -0,0 +1 @@
+# Video Studio API Module
--- a/backend/api/video_studio/handlers/avatar.py
+++ b/backend/api/video_studio/handlers/avatar.py
@@ -0,0 +1,173 @@
+from fastapi import APIRouter, UploadFile, File, Form, BackgroundTasks, HTTPException, Depends
+from fastapi.responses import FileResponse
+from typing import Optional, Dict, Any
+import shutil
+import os
+from pathlib import Path
+from services.wavespeed.infinitetalk import animate_scene_with_voiceover
+from ..task_manager import task_manager
+from middleware.auth_middleware import get_current_user
+from loguru import logger
+from services.database import get_engine_for_user
+from sqlalchemy.orm import sessionmaker
+from utils.asset_tracker import save_asset_to_library
+
+router = APIRouter()
+
+# Define storage directory
+UPLOAD_DIR = Path("backend/data/video_studio/uploads")
+UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
+
+def _process_avatar_generation(task_id: str, image_path: Path, audio_path: Path, user_id: str, resolution: str, model: str):
+    """
+    Background task to process avatar generation using shared InfiniteTalk service.
+    """
+    try:
+        task_manager.update_task(task_id, "processing", user_id=user_id)
+        
+        # Read file bytes
+        with open(image_path, "rb") as f:
+            image_bytes = f.read()
+        with open(audio_path, "rb") as f:
+            audio_bytes = f.read()
+            
+        # Dummy scene data required by the service (used for prompt generation)
+        scene_data = {
+            "title": "Test Persona", 
+            "description": "A talking avatar video generated via Video Studio."
+        }
+        story_context = {}
+        
+        # Call the common interface function
+        logger.info(f"[VideoStudio] Starting InfiniteTalk generation for task {task_id}")
+        result = animate_scene_with_voiceover(
+            image_bytes=image_bytes,
+            audio_bytes=audio_bytes,
+            scene_data=scene_data,
+            story_context=story_context,
+            user_id=user_id,
+            resolution=resolution
+        )
+        
+        # Save the resulting video bytes to a file
+        video_filename = f"video_{task_id}.mp4"
+        video_path = UPLOAD_DIR / video_filename
+        with open(video_path, "wb") as f:
+            f.write(result["video_bytes"])
+            
+        # Prepare result for frontend (remove raw bytes)
+        result.pop("video_bytes", None)
+        
+        # Add local download URL
+        video_url = f"/api/video-studio/download/{video_filename}"
+        result["video_url"] = video_url
+        
+        # Save asset to library
+        try:
+            engine = get_engine_for_user(user_id)
+            SessionLocal = sessionmaker(bind=engine)
+            db = SessionLocal()
+            try:
+                save_asset_to_library(
+                    db=db,
+                    user_id=user_id,
+                    asset_type="video",
+                    source_module="video_studio",
+                    filename=video_filename,
+                    file_url=video_url,
+                    file_path=str(video_path),
+                    file_size=video_path.stat().st_size,
+                    mime_type="video/mp4",
+                    title=f"Avatar Video {task_id}",
+                    description=f"Generated avatar video using {model}",
+                    model=model,
+                    cost=result.get("cost", 0.0),
+                    generation_time=result.get("generation_time", 0.0)
+                )
+            finally:
+                db.close()
+        except Exception as e:
+            logger.error(f"[VideoStudio] Failed to save asset to library: {e}")
+
+        logger.info(f"[VideoStudio] Task {task_id} completed successfully")
+        task_manager.update_task(task_id, "completed", result=result, user_id=user_id)
+        
+    except Exception as e:
+        logger.error(f"[VideoStudio] Avatar generation failed for task {task_id}: {e}", exc_info=True)
+        task_manager.update_task(task_id, "failed", error=str(e), user_id=user_id)
+    finally:
+        # Cleanup temp upload files
+        try:
+            if image_path.exists(): image_path.unlink()
+            if audio_path.exists(): audio_path.unlink()
+        except Exception as e:
+            logger.warning(f"[VideoStudio] Failed to cleanup temp files: {e}")
+
+@router.post("/avatar/create-async")
+async def create_avatar_video(
+    background_tasks: BackgroundTasks,
+    image: UploadFile = File(...),
+    audio: UploadFile = File(...),
+    resolution: str = Form("720p"),
+    model: str = Form("infinitetalk"),
+    current_user: dict = Depends(get_current_user)
+):
+    """
+    Create a talking avatar video using InfiniteTalk (WaveSpeed).
+    Directly uses the common backend service without Podcast Maker dependencies.
+    """
+    user_id = current_user.get("id", "anonymous")
+    
+    # Validate file types roughly
+    if not image.content_type.startswith("image/"):
+        raise HTTPException(status_code=400, detail="Invalid image file type")
+    
+    task_id = task_manager.create_task("avatar_generation", user_id=user_id)
+    
+    # Generate temp paths
+    image_ext = Path(image.filename).suffix or ".png"
+    audio_ext = Path(audio.filename).suffix or ".mp3"
+    image_path = UPLOAD_DIR / f"img_{task_id}{image_ext}"
+    audio_path = UPLOAD_DIR / f"aud_{task_id}{audio_ext}"
+    
+    try:
+        # Save uploaded files
+        with open(image_path, "wb") as f:
+            shutil.copyfileobj(image.file, f)
+        with open(audio_path, "wb") as f:
+            shutil.copyfileobj(audio.file, f)
+            
+        # Start background task
+        background_tasks.add_task(
+            _process_avatar_generation,
+            task_id,
+            image_path,
+            audio_path,
+            user_id,
+            resolution,
+            model
+        )
+        
+        return {"task_id": task_id, "status": "pending", "message": "Video generation started successfully."}
+        
+    except Exception as e:
+        # Cleanup if immediate failure
+        if image_path.exists(): image_path.unlink()
+        if audio_path.exists(): audio_path.unlink()
+        logger.error(f"[VideoStudio] Failed to start generation: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to start generation: {str(e)}")
+
+@router.get("/task/{task_id}")
+async def get_task_status(task_id: str, current_user: dict = Depends(get_current_user)):
+    user_id = current_user.get("id", "anonymous")
+    task = task_manager.get_task(task_id, user_id=user_id)
+    if not task:
+        raise HTTPException(status_code=404, detail="Task not found")
+    return task
+
+@router.get("/download/{filename}")
+async def download_video(filename: str):
+    file_path = UPLOAD_DIR / filename
+    if not file_path.exists():
+        raise HTTPException(status_code=404, detail="File not found")
+    return FileResponse(file_path)
--- a/backend/api/video_studio/router.py
+++ b/backend/api/video_studio/router.py
@@ -0,0 +1,6 @@
+from fastapi import APIRouter
+from .handlers import avatar
+
+router = APIRouter(prefix="/api/video-studio", tags=["Video Studio"])
+
+router.include_router(avatar.router)
--- a/backend/api/video_studio/task_manager.py
+++ b/backend/api/video_studio/task_manager.py
@@ -0,0 +1,126 @@
+from typing import Dict, Any, Optional
+import uuid
+from datetime import datetime
+from loguru import logger
+from sqlalchemy.orm import sessionmaker
+from services.database import get_engine_for_user
+from models.video_models import VideoGenerationTask, VideoTaskStatus, Base
+
+class TaskManager:
+    def __init__(self):
+        pass
+
+    def create_task(self, task_type: str, user_id: str, request_data: Optional[Dict] = None) -> str:
+        """Create a new persistent task."""
+        task_id = str(uuid.uuid4())
+        
+        try:
+            engine = get_engine_for_user(user_id)
+            # Ensure table exists
+            Base.metadata.create_all(bind=engine)
+            
+            SessionLocal = sessionmaker(bind=engine)
+            db = SessionLocal()
+            
+            try:
+                task = VideoGenerationTask(
+                    task_id=task_id,
+                    user_id=user_id,
+                    status=VideoTaskStatus.PENDING,
+                    request_data=request_data
+                )
+                db.add(task)
+                db.commit()
+                logger.info(f"[VideoStudio] Created persistent task {task_id} for user {user_id}")
+                return task_id
+            finally:
+                db.close()
+        except Exception as e:
+            logger.error(f"[VideoStudio] Failed to create task: {e}")
+            raise
+
+    def update_task(self, task_id: str, status: str, result: Optional[Dict] = None, error: Optional[str] = None, user_id: str = None, progress: float = None, message: str = None):
+        """Update an existing task."""
+        if not user_id:
+            logger.error(f"[VideoStudio] Cannot update task {task_id} without user_id")
+            return
+
+        try:
+            engine = get_engine_for_user(user_id)
+            SessionLocal = sessionmaker(bind=engine)
+            db = SessionLocal()
+            
+            try:
+                task = db.query(VideoGenerationTask).filter(VideoGenerationTask.task_id == task_id).first()
+                if not task:
+                    logger.warning(f"[VideoStudio] Task {task_id} not found in DB for update")
+                    return
+                
+                # Map string status to Enum
+                try:
+                    # Handle case-insensitive status mapping
+                    status_upper = status.upper()
+                    if status_upper == "RUNNING": 
+                        status_upper = "PROCESSING"
+                    enum_status = VideoTaskStatus[status_upper]
+                except KeyError:
+                    logger.warning(f"[VideoStudio] Invalid status {status}, defaulting to PROCESSING")
+                    enum_status = VideoTaskStatus.PROCESSING
+                
+                task.status = enum_status
+                task.updated_at = datetime.utcnow()
+                
+                if result:
+                    task.result = result
+                if error:
+                    task.error = error
+                if progress is not None:
+                    task.progress = progress
+                if message:
+                    task.message = message
+                
+                db.commit()
+                logger.debug(f"[VideoStudio] Updated task {task_id} to {status}")
+            finally:
+                db.close()
+        except Exception as e:
+            logger.error(f"[VideoStudio] Failed to update task {task_id}: {e}")
+
+    def get_task(self, task_id: str, user_id: str = None) -> Optional[Dict[str, Any]]:
+        """Retrieve task status."""
+        if not user_id:
+            logger.error(f"[VideoStudio] Cannot get task {task_id} without user_id")
+            return None
+
+        try:
+            engine = get_engine_for_user(user_id)
+            SessionLocal = sessionmaker(bind=engine)
+            db = SessionLocal()
+            
+            try:
+                task = db.query(VideoGenerationTask).filter(VideoGenerationTask.task_id == task_id).first()
+                if not task:
+                    return None
+                
+                # Map internal status to frontend status
+                status_val = task.status.value
+                if status_val == "processing":
+                    status_val = "running"
+
+                return {
+                    "task_id": task.task_id,
+                    "status": status_val,
+                    "result": task.result,
+                    "error": task.error,
+                    "progress": task.progress,
+                    "message": task.message,
+                    "created_at": task.created_at,
+                    "updated_at": task.updated_at
+                }
+            finally:
+                db.close()
+        except Exception as e:
+            logger.error(f"[VideoStudio] Failed to get task {task_id}: {e}")
+            return None
+
+task_manager = TaskManager()
--- a/backend/api/youtube/handlers/images.py
+++ b/backend/api/youtube/handlers/images.py
@@ -17,6 +17,7 @@ from services.subscription.preflight_validator import validate_image_generation_
 from services.llm_providers.main_image_generation import generate_image, generate_character_image
 from utils.asset_tracker import save_asset_to_library
 from utils.logger_utils import get_service_logger
+from utils.media_utils import load_media_bytes
 from ..task_manager import task_manager

 router = APIRouter(tags=["youtube-image"])
@@ -59,36 +60,15 @@ def require_authenticated_user(current_user: Dict[str, Any]) -> str:

 def _load_base_avatar_bytes(avatar_url: str) -> Optional[bytes]:
    """Load base avatar bytes for character consistency."""
-    try:
-        # Handle different avatar URL formats
-        if avatar_url.startswith("/api/youtube/avatars/"):
-            # YouTube avatar
-            filename = avatar_url.split("/")[-1].split("?")[0]
-            avatar_path = YOUTUBE_AVATARS_DIR / filename
-        elif avatar_url.startswith("/api/podcast/avatars/"):
-            # Podcast avatar (cross-module usage)
-            filename = avatar_url.split("/")[-1].split("?")[0]
-            from pathlib import Path
-            podcast_avatars_dir = Path(__file__).parent.parent.parent.parent / "podcast_avatars"
-            avatar_path = podcast_avatars_dir / filename
-        else:
-            # Try to extract filename and check YouTube avatars first
-            filename = avatar_url.split("/")[-1].split("?")[0]
-            avatar_path = YOUTUBE_AVATARS_DIR / filename
-            if not avatar_path.exists():
-                # Fallback to podcast avatars
-                podcast_avatars_dir = Path(__file__).parent.parent.parent.parent / "podcast_avatars"
-                avatar_path = podcast_avatars_dir / filename
-
-        if not avatar_path.exists() or not avatar_path.is_file():
-            logger.warning(f"[YouTube] Avatar file not found: {avatar_path}")
-            return None
-
-        logger.info(f"[YouTube] Successfully loaded avatar: {avatar_path}")
-        return avatar_path.read_bytes()
-    except Exception as e:
-        logger.error(f"[YouTube] Error loading avatar from {avatar_url}: {e}")
-        return None
+    # REUSE: Use centralized media loader
+    avatar_bytes = load_media_bytes(avatar_url)
+    
+    if avatar_bytes:
+        logger.info(f"[YouTube] Successfully loaded avatar from: {avatar_url}")
+        return avatar_bytes
+    
+    logger.warning(f"[YouTube] Avatar file not found for URL: {avatar_url}")
+    return None


 def _save_scene_image(image_bytes: bytes, scene_id: str) -> Dict[str, str]: