Save local changes (GSC/Bing integrations) before merging PR #354

This commit is contained in:
ajaysi
2026-02-13 13:11:27 +05:30
parent 43e66835ac
commit 08a1f4a1d8
144 changed files with 8310 additions and 2748 deletions

View File

@@ -0,0 +1,52 @@
from fastapi import APIRouter, HTTPException
from fastapi.responses import FileResponse
import os
from pathlib import Path
from services.database import WORKSPACE_DIR, get_user_db_path
router = APIRouter(prefix="/api/assets", tags=["Assets Serving"])
@router.get("/{user_id}/avatars/{filename}")
async def serve_avatar(user_id: str, filename: str):
"""
Serve avatar images directly.
Public endpoint relying on unguessable filenames.
"""
# Sanitize user_id (simple check to prevent directory traversal)
safe_user_id = "".join(c for c in user_id if c.isalnum() or c in ('-', '_'))
if safe_user_id != user_id:
raise HTTPException(status_code=400, detail="Invalid user ID")
# Sanitize filename
safe_filename = os.path.basename(filename)
# Construct path
# workspace/workspace_{user_id}/assets/avatars/{filename}
file_path = Path(WORKSPACE_DIR) / f"workspace_{safe_user_id}" / "assets" / "avatars" / safe_filename
if not file_path.exists():
raise HTTPException(status_code=404, detail="Asset not found")
return FileResponse(file_path)
@router.get("/{user_id}/voice_samples/{filename}")
async def serve_voice_sample(user_id: str, filename: str):
"""
Serve voice sample audio files directly.
"""
# Sanitize user_id
safe_user_id = "".join(c for c in user_id if c.isalnum() or c in ('-', '_'))
if safe_user_id != user_id:
raise HTTPException(status_code=400, detail="Invalid user ID")
# Sanitize filename
safe_filename = os.path.basename(filename)
# Construct path
# workspace/workspace_{user_id}/assets/voice_samples/{filename}
file_path = Path(WORKSPACE_DIR) / f"workspace_{safe_user_id}" / "assets" / "voice_samples" / safe_filename
if not file_path.exists():
raise HTTPException(status_code=404, detail="Asset not found")
return FileResponse(file_path)

View File

@@ -0,0 +1,97 @@
# Brand Avatar API Documentation
## Overview
The Brand Avatar API provides endpoints for generating, varying, and enhancing brand avatars using WaveSpeed AI.
**Base URL**: `/api/onboarding/assets`
## Endpoints
### 1. Generate Avatar
Generate a new brand avatar from a text prompt.
- **URL**: `/generate-avatar`
- **Method**: `POST`
- **Body** (`application/json`):
```json
{
"prompt": "A professional tech entrepreneur, studio lighting",
"style_preset": "Cinematic",
"aspect_ratio": "1:1",
"model": "ideogram-v3-turbo",
"provider": "wavespeed"
}
```
- **Response**:
```json
{
"success": true,
"image_url": "/api/assets/{user_id}/avatars/{filename}.png",
"image_base64": "...",
"asset_id": 123
}
```
### 2. Create Variation
Create a variation of an existing avatar/image.
- **URL**: `/create-variation`
- **Method**: `POST`
- **Content-Type**: `multipart/form-data`
- **Form Data**:
- `prompt` (text): Description of the variation (e.g., "same person but smiling")
- `file` (file): The reference image file
- **Response**:
```json
{
"success": true,
"image_url": "/api/assets/{user_id}/avatars/{filename}.png",
"image_base64": "...",
"asset_id": 124
}
```
### 3. Enhance Avatar
Upscale and enhance an existing avatar image.
- **URL**: `/enhance-avatar`
- **Method**: `POST`
- **Content-Type**: `multipart/form-data`
- **Form Data**:
- `file` (file): The image file to enhance
- **Response**:
```json
{
"success": true,
"image_url": "/api/assets/{user_id}/avatars/{filename}.png",
"image_base64": "...",
"asset_id": 125
}
```
### 4. Enhance Prompt
Optimize a simple prompt into a detailed, high-quality prompt using WaveSpeed.
- **URL**: `/enhance-prompt`
- **Method**: `POST`
- **Body**:
```json
{
"prompt": "man in suit"
}
```
- **Response**:
```json
{
"success": true,
"original_prompt": "man in suit",
"optimized_prompt": "A professional portrait of a man in a tailored navy blue suit, confident expression, studio lighting, 4k resolution..."
}
```
## Providers
- **Default Provider**: `wavespeed`
- **Models**:
- Generation: `ideogram-v3-turbo` (default), `qwen-image`
- Editing/Variation: `qwen-edit-plus` (default)
- Enhancement: `nano-banana-pro-edit-ultra` (4K upscale)

View File

@@ -100,6 +100,8 @@ class OnboardingCompletionService:
except Exception as e:
logger.warning(f"Failed to schedule website analysis task creation for user {user_id}: {e}")
# Schedule onboarding full-site SEO audit (non-blocking) ~10 minutes after completion
try:
from services.database import SessionLocal

View File

@@ -10,22 +10,36 @@ from sqlalchemy.orm import Session
from pydantic import BaseModel
from loguru import logger
from .step4_persona_routes import _extract_user_id
from middleware.auth_middleware import get_current_user
import base64
import os
from pathlib import Path
from utils.file_storage import save_file_safely, generate_unique_filename
from services.database import get_db, WORKSPACE_DIR
from utils.asset_tracker import save_asset_to_library
from models.content_asset_models import ContentAsset, AssetType, AssetSource
from sqlalchemy import desc
from services.llm_providers.main_image_generation import (
generate_image_with_provider,
enhance_image_prompt,
generate_image_variation
generate_image_variation,
generate_image_enhance
)
from services.llm_providers.main_audio_generation import clone_voice, qwen3_voice_clone, cosyvoice_voice_clone, qwen3_voice_design
import asyncio
import random
import string
router = APIRouter()
router = APIRouter(prefix="/onboarding/assets")
# --- Models ---
class VoiceDesignRequest(BaseModel):
user_id: Optional[str] = None
text: str
voice_description: str
language: str = "auto"
class AvatarPromptRequest(BaseModel):
user_id: Optional[str] = None
prompt: str
@@ -34,6 +48,9 @@ class AvatarPromptRequest(BaseModel):
negative_prompt: Optional[str] = None
num_inference_steps: int = 30
guidance_scale: float = 7.5
model: Optional[str] = None
rendering_speed: Optional[str] = None
provider: Optional[str] = None
class AvatarEnhanceRequest(BaseModel):
user_id: Optional[str] = None
@@ -47,14 +64,108 @@ class VoiceCloneRequest(BaseModel):
# --- Routes ---
@router.get("/latest-avatar")
async def get_latest_avatar(
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""Get the latest generated brand avatar for the user."""
try:
user_id = _extract_user_id(current_user)
# Search for assets that are either:
# 1. Saved with source_module=BRAND_AVATAR_GENERATOR (new)
# 2. Saved with source_module=STORY_WRITER but have metadata category='brand_avatar' (legacy)
# Fetch candidates (limit to recent 20 to avoid performance issues)
candidates = db.query(ContentAsset).filter(
ContentAsset.user_id == user_id,
ContentAsset.asset_type == AssetType.IMAGE,
ContentAsset.source_module.in_([
AssetSource.BRAND_AVATAR_GENERATOR,
AssetSource.STORY_WRITER
])
).order_by(desc(ContentAsset.created_at)).limit(50).all()
asset = None
for candidate in candidates:
# Check for direct match (new assets)
if candidate.source_module == AssetSource.BRAND_AVATAR_GENERATOR:
asset = candidate
break
# Check for legacy match (metadata category)
if candidate.source_module == AssetSource.STORY_WRITER:
meta = candidate.asset_metadata or {}
if meta.get('category') == 'brand_avatar':
asset = candidate
break
if not asset:
return {"success": False, "message": "No avatar found"}
# Fallback to metadata prompt if main column is empty (legacy support)
prompt = asset.prompt
if not prompt and asset.asset_metadata:
prompt = asset.asset_metadata.get('prompt', '')
return {
"success": True,
"image_url": asset.file_url,
"prompt": prompt,
"asset_id": asset.id,
"provider": asset.provider
}
except Exception as e:
logger.error(f"Failed to fetch latest avatar: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/latest-voice-clone")
async def get_latest_voice_clone(
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""Get the latest generated voice clone for the user."""
try:
user_id = _extract_user_id(current_user)
# Fetch latest voice clone asset
asset = db.query(ContentAsset).filter(
ContentAsset.user_id == user_id,
ContentAsset.asset_type == AssetType.AUDIO,
ContentAsset.source_module == AssetSource.VOICE_CLONER
).order_by(desc(ContentAsset.created_at)).first()
if not asset:
# Try to find legacy assets or assets that might have been saved differently
# For example, voice designs might be saved as VOICE_CLONER too?
# Or check for 'voice_design' logic if needed, but 'voice_cloner' is primary
return {"success": False, "message": "No voice clone found"}
meta = asset.asset_metadata or {}
return {
"success": True,
"custom_voice_id": meta.get("custom_voice_id"),
"preview_audio_url": meta.get("preview_url") or asset.file_url,
"asset_id": asset.id,
"voice_name": meta.get("voice_name"),
"engine": meta.get("engine")
}
except Exception as e:
logger.error(f"Failed to fetch latest voice clone: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/generate-avatar")
async def generate_avatar(
request: AvatarPromptRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""Generate a brand avatar using available image providers."""
try:
user_id = _extract_user_id(request.user_id)
user_id = _extract_user_id(current_user)
logger.info(f"Generating avatar for user {user_id} with prompt: {request.prompt}")
@@ -66,6 +177,9 @@ async def generate_avatar(
num_inference_steps=request.num_inference_steps,
guidance_scale=request.guidance_scale,
style_preset=request.style_preset,
model=request.model,
rendering_speed=request.rendering_speed,
provider=request.provider,
user_id=user_id
)
@@ -78,42 +192,66 @@ async def generate_avatar(
image_data = result.get("image_base64")
if not image_data and result.get("image_url"):
# TODO: Download image from URL if needed, or just store URL
pass
try:
import httpx
async with httpx.AsyncClient() as client:
response = await client.get(result["image_url"], timeout=30.0)
response.raise_for_status()
image_data = response.content
except ImportError:
# Fallback to requests if httpx is not installed
import requests
response = requests.get(result["image_url"], timeout=30.0)
response.raise_for_status()
image_data = response.content
except Exception as e:
logger.error(f"Failed to download image from URL: {e}")
raise HTTPException(status_code=500, detail=f"Failed to download generated image: {str(e)}")
if image_data:
# Decode if needed (usually it's already base64 string)
# Save file
filename = generate_unique_filename("avatar", "png")
file_path = save_file_safely(
base64.b64decode(image_data) if isinstance(image_data, str) else image_data,
user_id,
"avatars",
# If image_data is bytes (from URL download), pass it directly
# If it's base64 string (from API), decode it
content_to_save = base64.b64decode(image_data) if isinstance(image_data, str) else image_data
# Construct user assets directory
user_assets_dir = Path(WORKSPACE_DIR) / f"workspace_{user_id}" / "assets" / "avatars"
saved_path, error = save_file_safely(
content_to_save,
user_assets_dir,
filename
)
if error or not saved_path:
raise HTTPException(status_code=500, detail=f"Failed to save image file: {error}")
# Construct public URL
image_url = f"/api/assets/{user_id}/avatars/{filename}"
# Save to Asset Library
asset_id = save_asset_to_library(
db=db,
user_id=user_id,
file_path=file_path,
asset_type="image",
category="brand_avatar",
meta_data={
"prompt": request.prompt,
source_module="brand_avatar_generator",
filename=filename,
file_url=image_url,
file_path=str(saved_path),
prompt=request.prompt,
asset_metadata={
"provider": result.get("provider", "unknown"),
"style": request.style_preset
"style": request.style_preset,
"category": "brand_avatar"
}
)
# Construct public URL (this depends on your static file serving setup)
# Assuming /api/assets/{user_id}/avatars/{filename}
image_url = f"/api/assets/{user_id}/avatars/{filename}"
return {
"success": True,
"image_url": image_url,
"image_base64": image_data, # Optional: return base64 for immediate display
"image_base64": image_data if isinstance(image_data, str) else base64.b64encode(image_data).decode('utf-8'),
"asset_id": asset_id
}
@@ -126,14 +264,15 @@ async def generate_avatar(
@router.post("/enhance-prompt")
async def enhance_prompt_route(
request: AvatarEnhanceRequest
request: AvatarEnhanceRequest,
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""Enhance a simple prompt into a detailed midjourney-style prompt."""
try:
user_id = _extract_user_id(request.user_id)
user_id = _extract_user_id(current_user)
logger.info(f"Enhancing prompt for user {user_id}: {request.prompt}")
enhanced_prompt = await enhance_image_prompt(request.prompt)
enhanced_prompt = await enhance_image_prompt(request.prompt, user_id=user_id)
return {
"success": True,
@@ -145,52 +284,347 @@ async def enhance_prompt_route(
raise HTTPException(status_code=500, detail=str(e))
@router.post("/create-variation")
async def create_variation_route(
prompt: str = Form(...),
file: UploadFile = File(...),
user_id: Optional[str] = Form(None), # Ignored in favor of authenticated user
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""Generate a variation of an existing avatar."""
try:
user_id = _extract_user_id(current_user)
logger.info(f"Creating variation for user {user_id} with prompt: {prompt}")
# Read file
file_content = await file.read()
result = await generate_image_variation(
image=file_content,
prompt=prompt,
user_id=user_id
)
if not result.get("success"):
raise HTTPException(status_code=500, detail=result.get("error", "Variation generation failed"))
# Save result
image_data = result.get("image_base64")
if image_data:
filename = generate_unique_filename("avatar_variation", "png")
content_to_save = base64.b64decode(image_data)
# Construct user assets directory
user_assets_dir = Path(WORKSPACE_DIR) / f"workspace_{user_id}" / "assets" / "avatars"
saved_path, error = save_file_safely(
content_to_save,
user_assets_dir,
filename
)
if error or not saved_path:
raise HTTPException(status_code=500, detail=f"Failed to save variation file: {error}")
# Construct public URL
image_url = f"/api/assets/{user_id}/avatars/{filename}"
# Save to Asset Library
asset_id = save_asset_to_library(
db=next(get_db()),
user_id=user_id,
asset_type="image",
source_module="brand_avatar_variation",
filename=filename,
file_url=image_url,
file_path=str(saved_path),
asset_metadata={
"prompt": prompt,
"provider": "wavespeed",
"original_filename": file.filename,
"category": "brand_avatar_variation"
}
)
return {
"success": True,
"image_url": image_url,
"image_base64": image_data,
"asset_id": asset_id
}
return {"success": False, "error": "No image data returned"}
except Exception as e:
logger.error(f"Variation generation failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/enhance-avatar")
async def enhance_avatar_route(
file: UploadFile = File(...),
user_id: Optional[str] = Form(None), # Ignored in favor of authenticated user
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""Enhance/Upscale an existing avatar."""
try:
user_id = _extract_user_id(current_user)
logger.info(f"Enhancing avatar for user {user_id}")
# Read file
file_content = await file.read()
result = await generate_image_enhance(
image=file_content,
user_id=user_id
)
if not result.get("success"):
raise HTTPException(status_code=500, detail=result.get("error", "Enhancement failed"))
# Save result
image_data = result.get("image_base64")
if image_data:
filename = generate_unique_filename("avatar_enhanced", "png")
content_to_save = base64.b64decode(image_data)
# Construct user assets directory
user_assets_dir = Path(WORKSPACE_DIR) / f"workspace_{user_id}" / "assets" / "avatars"
saved_path, error = save_file_safely(
content_to_save,
user_assets_dir,
filename
)
if error or not saved_path:
raise HTTPException(status_code=500, detail=f"Failed to save enhanced file: {error}")
# Construct public URL
image_url = f"/api/assets/{user_id}/avatars/{filename}"
# Save to Asset Library
asset_id = save_asset_to_library(
db=next(get_db()),
user_id=user_id,
asset_type="image",
source_module="brand_avatar_enhancer",
filename=filename,
file_url=image_url,
file_path=str(saved_path),
asset_metadata={
"provider": "wavespeed",
"category": "brand_avatar_enhanced",
"original_filename": file.filename
}
)
return {
"success": True,
"image_url": image_url,
"image_base64": image_data,
"asset_id": asset_id
}
return {"success": False, "error": "No image data returned"}
except Exception as e:
logger.error(f"Avatar enhancement failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/create-voice-clone")
async def create_voice_clone(
voice_name: str = Form(...),
description: str = Form(None),
engine: str = Form("qwen3"),
file: UploadFile = File(...),
user_id: Optional[str] = Form(None),
user_id: Optional[str] = Form(None), # Ignored in favor of authenticated user
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""Create a voice clone from an audio file."""
try:
user_id = _extract_user_id(user_id)
logger.info(f"Creating voice clone '{voice_name}' for user {user_id}")
user_id = _extract_user_id(current_user)
logger.info(f"Creating voice clone '{voice_name}' (engine={engine}) for user {user_id}")
# 1. Save uploaded audio file
file_content = await file.read()
filename = generate_unique_filename("voice_sample", Path(file.filename).suffix.lstrip("."))
file_path = save_file_safely(file_content, user_id, "voice_samples", filename)
# 2. Call Voice Cloning API (Placeholder for actual implementation)
# TODO: Integrate with Minimax or CosyVoice API
# For now, we simulate success
user_voice_dir = Path(WORKSPACE_DIR) / f"workspace_{user_id}" / "assets" / "voice_samples"
saved_path, error = save_file_safely(file_content, user_voice_dir, filename)
# 3. Save to Asset Library
if error or not saved_path:
raise HTTPException(status_code=500, detail=f"Failed to save voice sample: {error}")
file_path = str(saved_path)
# 2. Call Voice Cloning API
preview_audio_bytes = None
custom_voice_id = None
loop = asyncio.get_event_loop()
# Default preview text
preview_text = "Hello! This is a preview of my cloned voice using AI technology. I hope you like it!"
if engine.lower() == "minimax":
# Generate valid voice ID for Minimax (alphanumeric, starts with letter, 8+ chars)
random_suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=8))
custom_voice_id = f"vc_{random_suffix}"
logger.info(f"Cloning voice with Minimax, ID: {custom_voice_id}")
# Run blocking call in executor
result = await loop.run_in_executor(
None,
lambda: clone_voice(
audio_bytes=file_content,
custom_voice_id=custom_voice_id,
text=preview_text,
user_id=user_id
)
)
preview_audio_bytes = result.preview_audio_bytes
elif engine.lower() == "cosyvoice":
logger.info("Cloning voice with CosyVoice")
result = await loop.run_in_executor(
None,
lambda: cosyvoice_voice_clone(
audio_bytes=file_content,
text=preview_text,
user_id=user_id
)
)
preview_audio_bytes = result.preview_audio_bytes
# CosyVoice doesn't persist ID on provider side, but we need one for DB
asset_uuid = ''.join(random.choices(string.ascii_letters + string.digits, k=8))
custom_voice_id = f"vc_cosy_{asset_uuid}"
else: # qwen3 (default)
logger.info("Cloning voice with Qwen3")
result = await loop.run_in_executor(
None,
lambda: qwen3_voice_clone(
audio_bytes=file_content,
text=preview_text,
user_id=user_id
)
)
preview_audio_bytes = result.preview_audio_bytes
# Qwen3 doesn't persist ID on provider side
asset_uuid = ''.join(random.choices(string.ascii_letters + string.digits, k=8))
custom_voice_id = f"vc_qwen_{asset_uuid}"
# 3. Save Preview Audio (if generated)
preview_url = None
if preview_audio_bytes:
preview_filename = f"preview_{filename}"
# Ensure it ends with .wav
if not preview_filename.endswith(".wav"):
preview_filename = str(Path(preview_filename).with_suffix('.wav'))
user_voice_dir = Path(WORKSPACE_DIR) / f"workspace_{user_id}" / "assets" / "voice_samples"
saved_preview_path, error = save_file_safely(preview_audio_bytes, user_voice_dir, preview_filename)
if not error and saved_preview_path:
preview_url = f"/api/assets/{user_id}/voice_samples/{preview_filename}"
# 4. Save to Asset Library
asset_id = save_asset_to_library(
db=db,
user_id=user_id,
file_path=file_path,
asset_type="audio",
category="voice_clone",
meta_data={
source_module="voice_cloner",
filename=filename,
file_url=f"/api/assets/{user_id}/voice_samples/{filename}",
asset_metadata={
"voice_name": voice_name,
"engine": engine,
"description": description,
"original_filename": file.filename
"original_filename": file.filename,
"custom_voice_id": custom_voice_id,
"preview_url": preview_url,
"category": "voice_clone"
}
)
return {
"success": True,
"custom_voice_id": f"vc_{asset_id}", # Mock ID
"preview_audio_url": f"/api/assets/{user_id}/voice_samples/{filename}",
"custom_voice_id": custom_voice_id,
"preview_audio_url": preview_url or f"/api/assets/{user_id}/voice_samples/{filename}",
"asset_id": asset_id,
"message": "Voice clone created successfully (simulated)"
"message": "Voice clone created successfully"
}
except Exception as e:
logger.error(f"Voice cloning failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/create-voice-design")
async def create_voice_design(
request: VoiceDesignRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""Create a voice from text description (Voice Design)."""
try:
user_id = _extract_user_id(current_user)
logger.info(f"Designing voice for user {user_id}")
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(
None,
lambda: qwen3_voice_design(
text=request.text,
voice_description=request.voice_description,
language=request.language,
user_id=user_id
)
)
# Save the result to a temporary file
filename = generate_unique_filename("voice_design_preview", "wav")
user_voice_dir = Path(WORKSPACE_DIR) / f"workspace_{user_id}" / "assets" / "voice_samples"
saved_path, error = save_file_safely(result.preview_audio_bytes, user_voice_dir, filename)
if error or not saved_path:
raise HTTPException(status_code=500, detail=f"Failed to save voice design: {error}")
# Generate URL
preview_url = f"/api/assets/{user_id}/voice_samples/{filename}"
# Save to Asset Library
asset_id = save_asset_to_library(
db=db,
user_id=user_id,
file_path=str(saved_path),
asset_type="audio",
source_module="voice_cloner",
filename=filename,
file_url=preview_url,
asset_metadata={
"voice_description": request.voice_description,
"text": request.text,
"language": request.language,
"engine": "qwen3-design",
"category": "voice_design",
"preview_url": preview_url
}
)
return {
"success": True,
"preview_audio_url": preview_url,
"asset_id": asset_id,
"message": "Voice generated successfully"
}
except Exception as e:
logger.error(f"Voice design failed: {e}")
raise HTTPException(status_code=500, detail=str(e))

View File

@@ -4,10 +4,10 @@ Podcast Audio Handlers
Audio generation, combining, and serving endpoints.
"""
from fastapi import APIRouter, Depends, HTTPException
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session
from typing import Dict, Any
from typing import Dict, Any, Optional
from pathlib import Path
from urllib.parse import urlparse
import tempfile
@@ -31,6 +31,83 @@ from ..models import (
router = APIRouter()
@router.post("/audio/upload")
async def upload_podcast_audio(
file: UploadFile = File(...),
project_id: Optional[str] = Form(None),
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Upload an audio file (voice sample) for a podcast project.
Returns the audio URL for use in video generation.
"""
user_id = require_authenticated_user(current_user)
# Validate file type
if not file.content_type or not file.content_type.startswith('audio/'):
# Allow octet-stream if extension is audio
allowed_exts = ['.mp3', '.wav', '.m4a', '.aac']
file_ext = Path(file.filename).suffix.lower()
if file_ext not in allowed_exts and file.content_type != 'application/octet-stream':
raise HTTPException(status_code=400, detail="File must be an audio file")
# Validate file size (max 20MB)
file_content = await file.read()
if len(file_content) > 20 * 1024 * 1024:
raise HTTPException(status_code=400, detail="Audio file size must be less than 20MB")
try:
# Generate filename
file_ext = Path(file.filename).suffix or '.mp3'
unique_id = str(uuid.uuid4())[:8]
audio_filename = f"audio_{project_id or 'temp'}_{unique_id}{file_ext}"
audio_path = PODCAST_AUDIO_DIR / audio_filename
# Save file
with open(audio_path, "wb") as f:
f.write(file_content)
logger.info(f"[Podcast] Audio uploaded: {audio_path}")
# Create audio URL
audio_url = f"/api/podcast/audio/{audio_filename}"
# Save to asset library if project_id provided
if project_id:
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="audio",
source_module="podcast_maker",
filename=audio_filename,
file_url=audio_url,
file_path=str(audio_path),
file_size=len(file_content),
mime_type=file.content_type,
title=f"Uploaded Audio - {project_id}",
description="Uploaded podcast audio/voice sample",
tags=["podcast", "audio", "upload", project_id],
asset_metadata={
"project_id": project_id,
"type": "uploaded_audio",
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save audio asset: {e}")
return {
"audio_url": audio_url,
"audio_filename": audio_filename,
"message": "Audio uploaded successfully"
}
except Exception as exc:
logger.error(f"[Podcast] Audio upload failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Audio upload failed: {str(exc)}")
@router.post("/audio", response_model=PodcastAudioResponse)
async def generate_podcast_audio(
request: PodcastAudioRequest,

View File

@@ -10,6 +10,7 @@ from fastapi import HTTPException
from loguru import logger
from .constants import PODCAST_AUDIO_DIR, PODCAST_IMAGES_DIR
from utils.media_utils import load_media_bytes
def load_podcast_audio_bytes(audio_url: str) -> bytes:
@@ -54,49 +55,23 @@ def load_podcast_audio_bytes(audio_url: str) -> bytes:
def load_podcast_image_bytes(image_url: str) -> bytes:
"""Load podcast image bytes from URL. Only handles /api/podcast/images/ URLs."""
"""Load podcast image bytes from URL. Uses centralized media loader."""
if not image_url:
raise HTTPException(status_code=400, detail="Image URL is required")
logger.info(f"[Podcast] Loading image from URL: {image_url}")
try:
parsed = urlparse(image_url)
path = parsed.path if parsed.scheme else image_url
# REUSE: Use centralized media loader which handles cross-module lookups
image_bytes = load_media_bytes(image_url)
# Only handle /api/podcast/images/ URLs
prefix = "/api/podcast/images/"
if prefix not in path:
logger.error(f"[Podcast] Unsupported image URL format: {image_url}")
raise HTTPException(
status_code=400,
detail=f"Unsupported image URL format: {image_url}. Only /api/podcast/images/ URLs are supported."
)
filename = path.split(prefix, 1)[1].split("?", 1)[0].strip()
if not filename:
logger.error(f"[Podcast] Could not extract filename from URL: {image_url}")
raise HTTPException(status_code=400, detail=f"Could not extract filename from URL: {image_url}")
logger.info(f"[Podcast] Extracted filename: {filename}")
logger.info(f"[Podcast] PODCAST_IMAGES_DIR: {PODCAST_IMAGES_DIR}")
# Podcast images are stored in podcast_images directory
image_path = (PODCAST_IMAGES_DIR / filename).resolve()
logger.info(f"[Podcast] Resolved image path: {image_path}")
# Security check: ensure path is within PODCAST_IMAGES_DIR
if not str(image_path).startswith(str(PODCAST_IMAGES_DIR)):
logger.error(f"[Podcast] Attempted path traversal when resolving image: {image_url} -> {image_path}")
raise HTTPException(status_code=403, detail="Invalid image path")
if not image_path.exists():
logger.error(f"[Podcast] Image file not found: {image_path}")
raise HTTPException(status_code=404, detail=f"Image file not found: {filename}")
image_bytes = image_path.read_bytes()
logger.info(f"[Podcast] ✅ Successfully loaded image: {len(image_bytes)} bytes from {image_path}")
if not image_bytes:
logger.error(f"[Podcast] Image file not found for URL: {image_url}")
raise HTTPException(status_code=404, detail=f"Image file not found: {image_url}")
logger.info(f"[Podcast] ✅ Successfully loaded image: {len(image_bytes)} bytes")
return image_bytes
except HTTPException:
raise
except Exception as exc:

View File

@@ -56,6 +56,8 @@ async def preflight_check(
provider_enum = APIProvider.MISTRAL # Maps to HuggingFace
elif provider_str == "video":
provider_enum = APIProvider.VIDEO
elif provider_str == "fal-ai" or provider_str == "fal":
provider_enum = APIProvider.VIDEO # Map fal-ai to VIDEO as it's primarily used for media gen
elif provider_str == "image_edit":
provider_enum = APIProvider.IMAGE_EDIT
elif provider_str == "stability":

View File

@@ -0,0 +1 @@
# Video Studio API Module

View File

@@ -0,0 +1,173 @@
from fastapi import APIRouter, UploadFile, File, Form, BackgroundTasks, HTTPException, Depends
from fastapi.responses import FileResponse
from typing import Optional, Dict, Any
import shutil
import os
from pathlib import Path
from services.wavespeed.infinitetalk import animate_scene_with_voiceover
from ..task_manager import task_manager
from middleware.auth_middleware import get_current_user
from loguru import logger
from services.database import get_engine_for_user
from sqlalchemy.orm import sessionmaker
from utils.asset_tracker import save_asset_to_library
router = APIRouter()
# Define storage directory
UPLOAD_DIR = Path("backend/data/video_studio/uploads")
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
def _process_avatar_generation(task_id: str, image_path: Path, audio_path: Path, user_id: str, resolution: str, model: str):
"""
Background task to process avatar generation using shared InfiniteTalk service.
"""
try:
task_manager.update_task(task_id, "processing", user_id=user_id)
# Read file bytes
with open(image_path, "rb") as f:
image_bytes = f.read()
with open(audio_path, "rb") as f:
audio_bytes = f.read()
# Dummy scene data required by the service (used for prompt generation)
scene_data = {
"title": "Test Persona",
"description": "A talking avatar video generated via Video Studio."
}
story_context = {}
# Call the common interface function
logger.info(f"[VideoStudio] Starting InfiniteTalk generation for task {task_id}")
result = animate_scene_with_voiceover(
image_bytes=image_bytes,
audio_bytes=audio_bytes,
scene_data=scene_data,
story_context=story_context,
user_id=user_id,
resolution=resolution
)
# Save the resulting video bytes to a file
video_filename = f"video_{task_id}.mp4"
video_path = UPLOAD_DIR / video_filename
with open(video_path, "wb") as f:
f.write(result["video_bytes"])
# Prepare result for frontend (remove raw bytes)
result.pop("video_bytes", None)
# Add local download URL
video_url = f"/api/video-studio/download/{video_filename}"
result["video_url"] = video_url
# Save asset to library
try:
engine = get_engine_for_user(user_id)
SessionLocal = sessionmaker(bind=engine)
db = SessionLocal()
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="video",
source_module="video_studio",
filename=video_filename,
file_url=video_url,
file_path=str(video_path),
file_size=video_path.stat().st_size,
mime_type="video/mp4",
title=f"Avatar Video {task_id}",
description=f"Generated avatar video using {model}",
model=model,
cost=result.get("cost", 0.0),
generation_time=result.get("generation_time", 0.0)
)
finally:
db.close()
except Exception as e:
logger.error(f"[VideoStudio] Failed to save asset to library: {e}")
logger.info(f"[VideoStudio] Task {task_id} completed successfully")
task_manager.update_task(task_id, "completed", result=result, user_id=user_id)
except Exception as e:
logger.error(f"[VideoStudio] Avatar generation failed for task {task_id}: {e}", exc_info=True)
task_manager.update_task(task_id, "failed", error=str(e), user_id=user_id)
finally:
# Cleanup temp upload files
try:
if image_path.exists(): image_path.unlink()
if audio_path.exists(): audio_path.unlink()
except Exception as e:
logger.warning(f"[VideoStudio] Failed to cleanup temp files: {e}")
@router.post("/avatar/create-async")
async def create_avatar_video(
background_tasks: BackgroundTasks,
image: UploadFile = File(...),
audio: UploadFile = File(...),
resolution: str = Form("720p"),
model: str = Form("infinitetalk"),
current_user: dict = Depends(get_current_user)
):
"""
Create a talking avatar video using InfiniteTalk (WaveSpeed).
Directly uses the common backend service without Podcast Maker dependencies.
"""
user_id = current_user.get("id", "anonymous")
# Validate file types roughly
if not image.content_type.startswith("image/"):
raise HTTPException(status_code=400, detail="Invalid image file type")
task_id = task_manager.create_task("avatar_generation", user_id=user_id)
# Generate temp paths
image_ext = Path(image.filename).suffix or ".png"
audio_ext = Path(audio.filename).suffix or ".mp3"
image_path = UPLOAD_DIR / f"img_{task_id}{image_ext}"
audio_path = UPLOAD_DIR / f"aud_{task_id}{audio_ext}"
try:
# Save uploaded files
with open(image_path, "wb") as f:
shutil.copyfileobj(image.file, f)
with open(audio_path, "wb") as f:
shutil.copyfileobj(audio.file, f)
# Start background task
background_tasks.add_task(
_process_avatar_generation,
task_id,
image_path,
audio_path,
user_id,
resolution,
model
)
return {"task_id": task_id, "status": "pending", "message": "Video generation started successfully."}
except Exception as e:
# Cleanup if immediate failure
if image_path.exists(): image_path.unlink()
if audio_path.exists(): audio_path.unlink()
logger.error(f"[VideoStudio] Failed to start generation: {e}")
raise HTTPException(status_code=500, detail=f"Failed to start generation: {str(e)}")
@router.get("/task/{task_id}")
async def get_task_status(task_id: str, current_user: dict = Depends(get_current_user)):
user_id = current_user.get("id", "anonymous")
task = task_manager.get_task(task_id, user_id=user_id)
if not task:
raise HTTPException(status_code=404, detail="Task not found")
return task
@router.get("/download/{filename}")
async def download_video(filename: str):
file_path = UPLOAD_DIR / filename
if not file_path.exists():
raise HTTPException(status_code=404, detail="File not found")
return FileResponse(file_path)

View File

@@ -0,0 +1,6 @@
from fastapi import APIRouter
from .handlers import avatar
router = APIRouter(prefix="/api/video-studio", tags=["Video Studio"])
router.include_router(avatar.router)

View File

@@ -0,0 +1,126 @@
from typing import Dict, Any, Optional
import uuid
from datetime import datetime
from loguru import logger
from sqlalchemy.orm import sessionmaker
from services.database import get_engine_for_user
from models.video_models import VideoGenerationTask, VideoTaskStatus, Base
class TaskManager:
def __init__(self):
pass
def create_task(self, task_type: str, user_id: str, request_data: Optional[Dict] = None) -> str:
"""Create a new persistent task."""
task_id = str(uuid.uuid4())
try:
engine = get_engine_for_user(user_id)
# Ensure table exists
Base.metadata.create_all(bind=engine)
SessionLocal = sessionmaker(bind=engine)
db = SessionLocal()
try:
task = VideoGenerationTask(
task_id=task_id,
user_id=user_id,
status=VideoTaskStatus.PENDING,
request_data=request_data
)
db.add(task)
db.commit()
logger.info(f"[VideoStudio] Created persistent task {task_id} for user {user_id}")
return task_id
finally:
db.close()
except Exception as e:
logger.error(f"[VideoStudio] Failed to create task: {e}")
raise
def update_task(self, task_id: str, status: str, result: Optional[Dict] = None, error: Optional[str] = None, user_id: str = None, progress: float = None, message: str = None):
"""Update an existing task."""
if not user_id:
logger.error(f"[VideoStudio] Cannot update task {task_id} without user_id")
return
try:
engine = get_engine_for_user(user_id)
SessionLocal = sessionmaker(bind=engine)
db = SessionLocal()
try:
task = db.query(VideoGenerationTask).filter(VideoGenerationTask.task_id == task_id).first()
if not task:
logger.warning(f"[VideoStudio] Task {task_id} not found in DB for update")
return
# Map string status to Enum
try:
# Handle case-insensitive status mapping
status_upper = status.upper()
if status_upper == "RUNNING":
status_upper = "PROCESSING"
enum_status = VideoTaskStatus[status_upper]
except KeyError:
logger.warning(f"[VideoStudio] Invalid status {status}, defaulting to PROCESSING")
enum_status = VideoTaskStatus.PROCESSING
task.status = enum_status
task.updated_at = datetime.utcnow()
if result:
task.result = result
if error:
task.error = error
if progress is not None:
task.progress = progress
if message:
task.message = message
db.commit()
logger.debug(f"[VideoStudio] Updated task {task_id} to {status}")
finally:
db.close()
except Exception as e:
logger.error(f"[VideoStudio] Failed to update task {task_id}: {e}")
def get_task(self, task_id: str, user_id: str = None) -> Optional[Dict[str, Any]]:
"""Retrieve task status."""
if not user_id:
logger.error(f"[VideoStudio] Cannot get task {task_id} without user_id")
return None
try:
engine = get_engine_for_user(user_id)
SessionLocal = sessionmaker(bind=engine)
db = SessionLocal()
try:
task = db.query(VideoGenerationTask).filter(VideoGenerationTask.task_id == task_id).first()
if not task:
return None
# Map internal status to frontend status
status_val = task.status.value
if status_val == "processing":
status_val = "running"
return {
"task_id": task.task_id,
"status": status_val,
"result": task.result,
"error": task.error,
"progress": task.progress,
"message": task.message,
"created_at": task.created_at,
"updated_at": task.updated_at
}
finally:
db.close()
except Exception as e:
logger.error(f"[VideoStudio] Failed to get task {task_id}: {e}")
return None
task_manager = TaskManager()

View File

@@ -17,6 +17,7 @@ from services.subscription.preflight_validator import validate_image_generation_
from services.llm_providers.main_image_generation import generate_image, generate_character_image
from utils.asset_tracker import save_asset_to_library
from utils.logger_utils import get_service_logger
from utils.media_utils import load_media_bytes
from ..task_manager import task_manager
router = APIRouter(tags=["youtube-image"])
@@ -59,36 +60,15 @@ def require_authenticated_user(current_user: Dict[str, Any]) -> str:
def _load_base_avatar_bytes(avatar_url: str) -> Optional[bytes]:
"""Load base avatar bytes for character consistency."""
try:
# Handle different avatar URL formats
if avatar_url.startswith("/api/youtube/avatars/"):
# YouTube avatar
filename = avatar_url.split("/")[-1].split("?")[0]
avatar_path = YOUTUBE_AVATARS_DIR / filename
elif avatar_url.startswith("/api/podcast/avatars/"):
# Podcast avatar (cross-module usage)
filename = avatar_url.split("/")[-1].split("?")[0]
from pathlib import Path
podcast_avatars_dir = Path(__file__).parent.parent.parent.parent / "podcast_avatars"
avatar_path = podcast_avatars_dir / filename
else:
# Try to extract filename and check YouTube avatars first
filename = avatar_url.split("/")[-1].split("?")[0]
avatar_path = YOUTUBE_AVATARS_DIR / filename
if not avatar_path.exists():
# Fallback to podcast avatars
podcast_avatars_dir = Path(__file__).parent.parent.parent.parent / "podcast_avatars"
avatar_path = podcast_avatars_dir / filename
if not avatar_path.exists() or not avatar_path.is_file():
logger.warning(f"[YouTube] Avatar file not found: {avatar_path}")
return None
logger.info(f"[YouTube] Successfully loaded avatar: {avatar_path}")
return avatar_path.read_bytes()
except Exception as e:
logger.error(f"[YouTube] Error loading avatar from {avatar_url}: {e}")
return None
# REUSE: Use centralized media loader
avatar_bytes = load_media_bytes(avatar_url)
if avatar_bytes:
logger.info(f"[YouTube] Successfully loaded avatar from: {avatar_url}")
return avatar_bytes
logger.warning(f"[YouTube] Avatar file not found for URL: {avatar_url}")
return None
def _save_scene_image(image_bytes: bytes, scene_id: str) -> Dict[str, str]: