feat: voice clone audio generation + podcast workspace architecture
- Voice clone integration: When user selects voice clone in Write phase, backend uses their uploaded voice sample + scene script text to generate audio via qwen3/minimax/cosyvoice voice clone APIs - Multi-tenant workspace storage: All podcast assets (audio, video, images, charts) now use workspace-specific directories per user - Chart preview improvements: Card-based B-Roll charts UI with thumbnails, takeaway text, and action buttons; public endpoint for image serving - Voice clone caching: In-memory LRU cache for voice samples (avoids re-downloading per scene); frontend caches voice clone metadata - Thread pool for voice clone: Audio generation uses ThreadPoolExecutor to avoid blocking the FastAPI event loop - Auto-detect voice clone IDs (vc_*, MY_VOICE_CLONE) to route correctly - DB fallback for voice sample URL: Fetches from ContentAsset if not passed - Fixed API URL resolution for chart previews - Fixed GlassyCard DOM warnings for motion props - Fixed ScriptGenerationProgressView syntax error - Fixed usePodcastWorkflow scriptData reference
This commit is contained in:
@@ -10,22 +10,32 @@ from loguru import logger
|
||||
from services.story_writer.audio_generation_service import StoryAudioGenerationService
|
||||
|
||||
# Directory paths
|
||||
# router.py is at: backend/api/podcast/router.py
|
||||
# parents[0] = backend/api/podcast/
|
||||
# parents[1] = backend/api/
|
||||
# parents[2] = backend/
|
||||
# parents[3] = root/
|
||||
ROOT_DIR = Path(__file__).resolve().parents[3] # root/
|
||||
DATA_MEDIA_DIR = ROOT_DIR / "data" / "media"
|
||||
# Find root by looking for 'data' or 'backend' folder
|
||||
def _find_root() -> Path:
|
||||
"""Find project root by searching up for data directory."""
|
||||
current = Path(__file__).resolve()
|
||||
for _ in range(10): # max 10 levels up
|
||||
if (current / "data").exists() and (current / "data" / "media").exists():
|
||||
return current
|
||||
if (current / "backend").exists():
|
||||
return current / "backend"
|
||||
parent = current.parent
|
||||
if parent == current:
|
||||
break
|
||||
current = parent
|
||||
# Fallback: assume backend is root
|
||||
return Path(__file__).resolve().parents[1]
|
||||
|
||||
PODCAST_AUDIO_DIR = (DATA_MEDIA_DIR / "podcast_audio").resolve()
|
||||
PODCAST_IMAGES_DIR = (DATA_MEDIA_DIR / "podcast_images").resolve()
|
||||
PODCAST_VIDEOS_DIR = (DATA_MEDIA_DIR / "podcast_videos").resolve()
|
||||
ROOT_DIR = _find_root()
|
||||
|
||||
# Video subdirectory
|
||||
# Video subdirectory (relative to workspace media dir)
|
||||
AI_VIDEO_SUBDIR = Path("AI_Videos")
|
||||
|
||||
MediaType = Literal["audio", "image", "video"]
|
||||
# Legacy constants - DEPRECATED, use get_podcast_media_dir() instead
|
||||
# Kept for backward compatibility with some handlers
|
||||
PODCAST_AVATARS_SUBDIR = Path("avatars")
|
||||
|
||||
MediaType = Literal["audio", "image", "video", "chart"]
|
||||
|
||||
|
||||
def _sanitize_user_id(user_id: str) -> str:
|
||||
@@ -38,21 +48,31 @@ def get_podcast_media_dir(
|
||||
*,
|
||||
ensure_exists: bool = False,
|
||||
) -> Path:
|
||||
"""Resolve podcast media directory (tenant workspace first, legacy global fallback)."""
|
||||
"""
|
||||
Resolve podcast media directory (workspace-only for multi-tenant isolation).
|
||||
|
||||
Always requires user_id for tenant isolation. Falls back to default workspace
|
||||
only if no user_id provided (for backward compat in development).
|
||||
"""
|
||||
media_subdir = {
|
||||
"audio": "podcast_audio",
|
||||
"image": "podcast_images",
|
||||
"video": "podcast_videos",
|
||||
"chart": "podcast_charts",
|
||||
}[media_type]
|
||||
|
||||
if user_id:
|
||||
sanitized = _sanitize_user_id(user_id)
|
||||
tenant_media_dir = ROOT_DIR / "workspace" / f"workspace_{sanitized}" / "media" / media_subdir
|
||||
resolved_dir = tenant_media_dir.resolve()
|
||||
resolved_dir = (
|
||||
ROOT_DIR / "workspace" / f"workspace_{sanitized}" / "media" / media_subdir
|
||||
).resolve()
|
||||
else:
|
||||
resolved_dir = (DATA_MEDIA_DIR / media_subdir).resolve()
|
||||
# Development fallback: use a default workspace
|
||||
resolved_dir = (
|
||||
ROOT_DIR / "workspace" / "workspace_alwrity" / "media" / media_subdir
|
||||
).resolve()
|
||||
|
||||
logger.debug(f"[Podcast] get_podcast_media_dir: type={media_type}, user_id={user_id}, sanitized={user_id and _sanitize_user_id(user_id)}, resolved={resolved_dir}")
|
||||
logger.warning(f"[Podcast] get_podcast_media_dir: type={media_type}, user_id={user_id}, resolved={resolved_dir}")
|
||||
|
||||
if ensure_exists:
|
||||
resolved_dir.mkdir(parents=True, exist_ok=True)
|
||||
@@ -61,14 +81,11 @@ def get_podcast_media_dir(
|
||||
|
||||
|
||||
def get_podcast_media_read_dirs(media_type: MediaType, user_id: str | None = None) -> list[Path]:
|
||||
"""Return ordered directories to search (tenant path first, then legacy global path)."""
|
||||
dirs: list[Path] = []
|
||||
if user_id:
|
||||
dirs.append(get_podcast_media_dir(media_type, user_id))
|
||||
logger.debug(f"[Podcast] get_podcast_media_read_dirs: added user dir for {user_id}")
|
||||
dirs.append(get_podcast_media_dir(media_type, None))
|
||||
logger.debug(f"[Podcast] get_podcast_media_read_dirs: dirs={dirs}")
|
||||
return dirs
|
||||
"""
|
||||
Return directories to search for podcast media.
|
||||
Now workspace-only (no legacy fallback).
|
||||
"""
|
||||
return [get_podcast_media_dir(media_type, user_id)]
|
||||
|
||||
|
||||
def get_podcast_audio_service(user_id: str | None = None) -> StoryAudioGenerationService:
|
||||
|
||||
@@ -20,7 +20,7 @@ from services.podcast_bible_service import PodcastBibleService
|
||||
from utils.asset_tracker import save_asset_to_library
|
||||
from loguru import logger
|
||||
import os
|
||||
from ..constants import PODCAST_IMAGES_DIR
|
||||
from ..constants import get_podcast_media_dir
|
||||
from ..models import (
|
||||
PodcastAnalyzeRequest,
|
||||
PodcastAnalyzeResponse,
|
||||
@@ -247,7 +247,8 @@ async def analyze_podcast_idea(
|
||||
if image_result and image_result.image_bytes:
|
||||
img_id = str(uuid.uuid4())[:8]
|
||||
filename = f"presenter_podcast_{user_id}_{img_id}.png"
|
||||
avatars_dir = PODCAST_IMAGES_DIR / "avatars"
|
||||
images_dir = get_podcast_media_dir("image", user_id, ensure_exists=True)
|
||||
avatars_dir = images_dir / "avatars"
|
||||
avatars_dir.mkdir(parents=True, exist_ok=True)
|
||||
output_path = avatars_dir / filename
|
||||
|
||||
|
||||
@@ -12,7 +12,15 @@ from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
import tempfile
|
||||
import uuid
|
||||
import hashlib
|
||||
import time
|
||||
import shutil
|
||||
import requests
|
||||
import asyncio
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import asyncio
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from services.database import get_db
|
||||
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
|
||||
@@ -31,6 +39,124 @@ from ..models import (
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# Thread pool for CPU/IO-intensive voice clone operations
|
||||
_audio_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="podcast_audio")
|
||||
|
||||
# In-memory LRU cache for voice samples (per user) to avoid re-downloading
|
||||
_voice_sample_cache: dict[str, tuple[float, bytes]] = {}
|
||||
_VOICE_SAMPLE_CACHE_TTL = 1800 # 30 minutes
|
||||
|
||||
|
||||
def _get_cached_voice_sample(cache_key: str) -> Optional[bytes]:
|
||||
"""Get voice sample bytes from in-memory cache if fresh."""
|
||||
if cache_key in _voice_sample_cache:
|
||||
ts, data = _voice_sample_cache[cache_key]
|
||||
if time.time() - ts < _VOICE_SAMPLE_CACHE_TTL:
|
||||
logger.debug(f"[Podcast] Voice sample cache hit for {cache_key[:16]}...")
|
||||
return data
|
||||
del _voice_sample_cache[cache_key]
|
||||
return None
|
||||
|
||||
|
||||
def _cache_voice_sample(cache_key: str, data: bytes) -> None:
|
||||
"""Store voice sample bytes in in-memory cache."""
|
||||
# Evict oldest entries if cache grows too large
|
||||
if len(_voice_sample_cache) > 50:
|
||||
oldest_key = min(_voice_sample_cache, key=lambda k: _voice_sample_cache[k][0])
|
||||
del _voice_sample_cache[oldest_key]
|
||||
_voice_sample_cache[cache_key] = (time.time(), data)
|
||||
|
||||
|
||||
def _get_latest_voice_sample_url(user_id: str, db) -> Optional[str]:
|
||||
"""Get the latest voice sample URL for a user from their voice clone assets."""
|
||||
try:
|
||||
from models.content_asset_models import ContentAsset, AssetType, AssetSource
|
||||
from sqlalchemy import desc
|
||||
|
||||
asset = db.query(ContentAsset).filter(
|
||||
ContentAsset.user_id == user_id,
|
||||
ContentAsset.asset_type == AssetType.AUDIO,
|
||||
ContentAsset.source_module == AssetSource.VOICE_CLONER,
|
||||
).order_by(desc(ContentAsset.created_at)).first()
|
||||
|
||||
if asset and asset.file_url:
|
||||
logger.info(f"[Podcast] Found voice sample for user {user_id}: {asset.file_url}")
|
||||
return asset.file_url
|
||||
|
||||
logger.warning(f"[Podcast] No voice sample asset found for user {user_id}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"[Podcast] Error fetching voice sample URL: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _fetch_voice_sample(voice_sample_url: str, user_id: str) -> Optional[bytes]:
|
||||
"""Fetch voice sample audio bytes from URL, with caching."""
|
||||
cache_key = hashlib.md5(f"{user_id}:{voice_sample_url}".encode()).hexdigest()
|
||||
|
||||
# Check in-memory cache first
|
||||
cached = _get_cached_voice_sample(cache_key)
|
||||
if cached is not None:
|
||||
return cached
|
||||
|
||||
try:
|
||||
from utils.media_utils import resolve_media_path
|
||||
|
||||
# Try resolving as a local workspace path first (fastest)
|
||||
if "/api/assets/" in voice_sample_url:
|
||||
# Resolve user workspace path directly
|
||||
sanitized_uid = "".join(c for c in user_id if c.isalnum() or c in ("-", "_"))
|
||||
from api.podcast.constants import ROOT_DIR
|
||||
parts = voice_sample_url.split("/")
|
||||
# Expected: /api/assets/{user_id}/voice_samples/{filename}
|
||||
try:
|
||||
idx = parts.index("voice_samples")
|
||||
filename = parts[idx + 1].split("?")[0]
|
||||
local_path = ROOT_DIR / "workspace" / f"workspace_{sanitized_uid}" / "assets" / "voice_samples" / filename
|
||||
if local_path.exists():
|
||||
data = local_path.read_bytes()
|
||||
_cache_voice_sample(cache_key, data)
|
||||
logger.info(f"[Podcast] Voice sample loaded from workspace: {local_path}")
|
||||
return data
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
|
||||
# Fall back to media utils resolver
|
||||
local_path = resolve_media_path(voice_sample_url)
|
||||
if local_path and local_path.exists():
|
||||
data = local_path.read_bytes()
|
||||
_cache_voice_sample(cache_key, data)
|
||||
return data
|
||||
|
||||
# Try resolving as a podcast audio file
|
||||
if "/api/podcast/audio/" in voice_sample_url:
|
||||
filename = voice_sample_url.split("/api/podcast/audio/")[-1].split("?")[0]
|
||||
try:
|
||||
audio_dir = get_podcast_media_dir("audio", user_id)
|
||||
local_path = audio_dir / filename
|
||||
if local_path.exists():
|
||||
data = local_path.read_bytes()
|
||||
_cache_voice_sample(cache_key, data)
|
||||
return data
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Try direct HTTP fetch as fallback
|
||||
if voice_sample_url.startswith("http"):
|
||||
logger.info(f"[Podcast] Fetching voice sample via HTTP: {voice_sample_url[:80]}...")
|
||||
resp = requests.get(voice_sample_url, timeout=30)
|
||||
if resp.status_code == 200:
|
||||
data = resp.content
|
||||
_cache_voice_sample(cache_key, data)
|
||||
logger.info(f"[Podcast] Voice sample fetched via HTTP ({len(data)} bytes)")
|
||||
return data
|
||||
|
||||
logger.warning(f"[Podcast] Could not fetch voice sample from: {voice_sample_url}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"[Podcast] Error fetching voice sample: {e}")
|
||||
return None
|
||||
|
||||
|
||||
@router.post("/audio/upload")
|
||||
async def upload_podcast_audio(
|
||||
@@ -125,35 +251,176 @@ async def generate_podcast_audio(
|
||||
raise HTTPException(status_code=400, detail="Text is required")
|
||||
|
||||
try:
|
||||
audio_service = get_podcast_audio_service(user_id)
|
||||
logger.warning(f"[Podcast] Generating audio with service dir: {audio_service.output_dir}")
|
||||
result: StoryAudioResult = audio_service.generate_ai_audio(
|
||||
scene_number=0,
|
||||
scene_title=request.scene_title,
|
||||
text=request.text.strip(),
|
||||
user_id=user_id,
|
||||
voice_id=request.voice_id or "Wise_Woman",
|
||||
custom_voice_id=request.custom_voice_id,
|
||||
speed=request.speed or 1.0, # Normal speed (was 0.9, but too slow - causing duration issues)
|
||||
volume=request.volume or 1.0,
|
||||
pitch=request.pitch or 0.0, # Normal pitch (0.0 = neutral)
|
||||
emotion=request.emotion or "neutral",
|
||||
english_normalization=request.english_normalization or False,
|
||||
sample_rate=request.sample_rate,
|
||||
bitrate=request.bitrate,
|
||||
channel=request.channel,
|
||||
format=request.format,
|
||||
language_boost=request.language_boost,
|
||||
enable_sync_mode=request.enable_sync_mode,
|
||||
# Determine if we should use voice clone path
|
||||
# Voice clone is used when: explicitly requested, OR when voice_id/custom_voice_id indicates a clone
|
||||
# (cloned voice IDs start with "vc_" or match the placeholder "MY_VOICE_CLONE")
|
||||
_vid = request.voice_id or ""
|
||||
_cvid = request.custom_voice_id or ""
|
||||
is_voice_clone = request.use_voice_clone or (
|
||||
_cvid.startswith("vc_") or _cvid == "MY_VOICE_CLONE"
|
||||
) or (
|
||||
_vid.startswith("vc_") or _vid == "MY_VOICE_CLONE"
|
||||
)
|
||||
|
||||
# Override URL to use podcast endpoint instead of story endpoint
|
||||
if result.get("audio_url") and "/api/story/audio/" in result.get("audio_url", ""):
|
||||
audio_filename = result.get("audio_filename", "")
|
||||
result["audio_url"] = f"/api/podcast/audio/{audio_filename}"
|
||||
|
||||
logger.warning(f"[Podcast] Audio generated - path: {result.get('audio_path')}, url: {result.get('audio_url')}")
|
||||
# If voice_id is a clone ID, normalize it to use Wise_Woman for TTS fallback
|
||||
effective_voice_id = _vid if not (_vid.startswith("vc_") or _vid == "MY_VOICE_CLONE") else "Wise_Woman"
|
||||
|
||||
logger.warning(f"[Podcast] Audio request: use_voice_clone={request.use_voice_clone}, voice_id={request.voice_id}, custom_voice_id={request.custom_voice_id}, is_voice_clone={is_voice_clone}, voice_sample_url={request.voice_sample_url}, voice_clone_engine={request.voice_clone_engine}")
|
||||
|
||||
# Voice clone path: use user's voice sample with scene text as reference
|
||||
if is_voice_clone:
|
||||
# If no voice_sample_url provided, try to fetch it from the user's latest voice clone
|
||||
voice_sample_url = request.voice_sample_url
|
||||
if not voice_sample_url:
|
||||
try:
|
||||
voice_sample_url = _get_latest_voice_sample_url(user_id, db)
|
||||
logger.warning(f"[Podcast] DB fallback voice sample URL for user {user_id}: {voice_sample_url}")
|
||||
except Exception as e:
|
||||
logger.warning(f"[Podcast] Could not fetch voice sample URL: {e}")
|
||||
|
||||
if voice_sample_url:
|
||||
from services.llm_providers.main_audio_generation import qwen3_voice_clone, cosyvoice_voice_clone
|
||||
|
||||
engine = (request.voice_clone_engine or "qwen3").lower()
|
||||
logger.warning(f"[Podcast] 🔊 Voice clone path: engine={engine}, scene='{request.scene_title}', voice_sample_url={voice_sample_url[:80]}...")
|
||||
|
||||
# Download voice sample from URL (with caching)
|
||||
logger.warning(f"[Podcast] Fetching voice sample from: {voice_sample_url}")
|
||||
try:
|
||||
voice_sample_bytes = _fetch_voice_sample(voice_sample_url, user_id)
|
||||
except Exception as fetch_err:
|
||||
logger.error(f"[Podcast] ❌ Failed to fetch voice sample: {fetch_err}", exc_info=True)
|
||||
raise HTTPException(status_code=400, detail=f"Could not fetch voice sample: {str(fetch_err)}")
|
||||
logger.warning(f"[Podcast] Voice sample fetch result: {len(voice_sample_bytes) if voice_sample_bytes else 0} bytes")
|
||||
if not voice_sample_bytes:
|
||||
raise HTTPException(status_code=400, detail=f"Could not fetch voice sample from {voice_sample_url}")
|
||||
|
||||
scene_text = request.text.strip()
|
||||
if len(scene_text) > 4000:
|
||||
scene_text = scene_text[:4000]
|
||||
|
||||
# Run voice clone in thread pool to avoid blocking the event loop
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
try:
|
||||
if engine == "minimax":
|
||||
from services.llm_providers.main_audio_generation import clone_voice
|
||||
import random
|
||||
import string
|
||||
random_suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=8))
|
||||
custom_vid = request.custom_voice_id or f"vc_{random_suffix}"
|
||||
|
||||
result_obj = await loop.run_in_executor(
|
||||
_audio_executor,
|
||||
lambda cv=custom_vid: clone_voice(
|
||||
audio_bytes=voice_sample_bytes,
|
||||
custom_voice_id=cv,
|
||||
text=scene_text,
|
||||
user_id=user_id,
|
||||
),
|
||||
)
|
||||
audio_bytes = result_obj.preview_audio_bytes
|
||||
provider = "minimax"
|
||||
model = "minimax/voice-clone"
|
||||
elif engine == "cosyvoice":
|
||||
result_obj = await loop.run_in_executor(
|
||||
_audio_executor,
|
||||
lambda: cosyvoice_voice_clone(
|
||||
audio_bytes=voice_sample_bytes,
|
||||
text=scene_text,
|
||||
user_id=user_id,
|
||||
),
|
||||
)
|
||||
audio_bytes = result_obj.preview_audio_bytes
|
||||
provider = "wavespeed-ai"
|
||||
model = "wavespeed-ai/cosyvoice-tts/voice-clone"
|
||||
else:
|
||||
result_obj = await loop.run_in_executor(
|
||||
_audio_executor,
|
||||
lambda: qwen3_voice_clone(
|
||||
audio_bytes=voice_sample_bytes,
|
||||
text=scene_text,
|
||||
user_id=user_id,
|
||||
),
|
||||
)
|
||||
audio_bytes = result_obj.preview_audio_bytes
|
||||
provider = "wavespeed-ai"
|
||||
model = "wavespeed-ai/qwen3-tts/voice-clone"
|
||||
|
||||
logger.warning(f"[Podcast] 🔊 Voice clone result: {len(audio_bytes) if audio_bytes else 0} bytes, provider={provider}")
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as clone_err:
|
||||
logger.error(f"[Podcast] ❌ Voice clone failed: {clone_err}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Voice clone generation failed: {str(clone_err)}")
|
||||
|
||||
# Save audio bytes to file
|
||||
audio_service = get_podcast_audio_service(user_id)
|
||||
audio_filename = f"scene_{request.scene_id}_{uuid.uuid4().hex[:8]}.mp3"
|
||||
audio_path = audio_service.output_dir / audio_filename
|
||||
|
||||
with open(audio_path, "wb") as f:
|
||||
f.write(audio_bytes)
|
||||
|
||||
file_size = len(audio_bytes)
|
||||
audio_url = f"/api/podcast/audio/{audio_filename}"
|
||||
cost = max(0.005, 0.005 * (len(scene_text) / 100.0))
|
||||
|
||||
result = {
|
||||
"audio_path": str(audio_path),
|
||||
"audio_filename": audio_filename,
|
||||
"audio_url": audio_url,
|
||||
"file_size": file_size,
|
||||
"provider": provider,
|
||||
"model": model,
|
||||
"cost": cost,
|
||||
"scene_number": 0,
|
||||
"scene_title": request.scene_title,
|
||||
}
|
||||
|
||||
else:
|
||||
# Standard TTS path - but NOT if custom_voice_id is a clone ID
|
||||
# Clone IDs (vc_*, MY_VOICE_CLONE) are not valid for minimax TTS
|
||||
if is_voice_clone:
|
||||
logger.warning(f"[Podcast] ⚠️ Voice clone detected but no voice sample available - falling back to standard TTS with voice_id={effective_voice_id}")
|
||||
effective_custom_voice_id = request.custom_voice_id
|
||||
if effective_custom_voice_id and (
|
||||
effective_custom_voice_id.startswith("vc_") or
|
||||
effective_custom_voice_id == "MY_VOICE_CLONE"
|
||||
):
|
||||
logger.warning(f"[Podcast] Ignoring clone ID '{effective_custom_voice_id}' in standard TTS path - no voice sample URL available")
|
||||
effective_custom_voice_id = None
|
||||
|
||||
audio_service = get_podcast_audio_service(user_id)
|
||||
logger.warning(f"[Podcast] Standard TTS path: voice_id={effective_voice_id}, custom_voice_id={effective_custom_voice_id}")
|
||||
result: StoryAudioResult = audio_service.generate_ai_audio(
|
||||
scene_number=0,
|
||||
scene_title=request.scene_title,
|
||||
text=request.text.strip(),
|
||||
user_id=user_id,
|
||||
voice_id=effective_voice_id,
|
||||
custom_voice_id=effective_custom_voice_id,
|
||||
speed=request.speed or 1.0, # Normal speed (was 0.9, but too slow - causing duration issues)
|
||||
volume=request.volume or 1.0,
|
||||
pitch=request.pitch or 0.0, # Normal pitch (0.0 = neutral)
|
||||
emotion=request.emotion or "neutral",
|
||||
english_normalization=request.english_normalization or False,
|
||||
sample_rate=request.sample_rate,
|
||||
bitrate=request.bitrate,
|
||||
channel=request.channel,
|
||||
format=request.format,
|
||||
language_boost=request.language_boost,
|
||||
enable_sync_mode=request.enable_sync_mode,
|
||||
)
|
||||
|
||||
# Override URL to use podcast endpoint instead of story endpoint
|
||||
if result.get("audio_url") and "/api/story/audio/" in result.get("audio_url", ""):
|
||||
audio_filename = result.get("audio_filename", "")
|
||||
result["audio_url"] = f"/api/podcast/audio/{audio_filename}"
|
||||
|
||||
logger.warning(f"[Podcast] Audio generated - path: {result.get('audio_path')}, url: {result.get('audio_url')}")
|
||||
except Exception as exc:
|
||||
logger.error(f"[Podcast] ❌ Audio generation failed: {exc}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Audio generation failed: {exc}")
|
||||
|
||||
# Save to asset library (podcast module)
|
||||
|
||||
@@ -19,15 +19,18 @@ from services.llm_providers.main_image_generation import generate_image
|
||||
from services.llm_providers.main_image_editing import edit_image
|
||||
from utils.asset_tracker import save_asset_to_library
|
||||
from loguru import logger
|
||||
from ..constants import PODCAST_IMAGES_DIR
|
||||
from ..constants import get_podcast_media_dir, PODCAST_AVATARS_SUBDIR
|
||||
from ..presenter_personas import choose_persona_id, get_persona
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# Avatar subdirectory
|
||||
AVATAR_SUBDIR = "avatars"
|
||||
PODCAST_AVATARS_DIR = PODCAST_IMAGES_DIR / AVATAR_SUBDIR
|
||||
PODCAST_AVATARS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
AVATAR_SUBDIR = PODCAST_AVATARS_SUBDIR
|
||||
|
||||
|
||||
def _get_podcast_avatars_dir(user_id: str) -> Path:
|
||||
"""Get podcast avatars directory for a user (workspace-aware)."""
|
||||
return get_podcast_media_dir("image", user_id, ensure_exists=True) / AVATAR_SUBDIR
|
||||
|
||||
|
||||
@router.post("/avatar/upload")
|
||||
@@ -57,7 +60,8 @@ async def upload_podcast_avatar(
|
||||
file_ext = Path(file.filename).suffix or '.png'
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
avatar_filename = f"avatar_{project_id or 'temp'}_{unique_id}{file_ext}"
|
||||
avatar_path = PODCAST_AVATARS_DIR / avatar_filename
|
||||
avatars_dir = _get_podcast_avatars_dir(user_id)
|
||||
avatar_path = avatars_dir / avatar_filename
|
||||
|
||||
# Save file
|
||||
with open(avatar_path, "wb") as f:
|
||||
@@ -163,7 +167,8 @@ async def make_avatar_presentable(
|
||||
# Save transformed avatar
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
transformed_filename = f"presenter_transformed_{project_id or 'temp'}_{unique_id}.png"
|
||||
transformed_path = PODCAST_AVATARS_DIR / transformed_filename
|
||||
avatars_dir = _get_podcast_avatars_dir(user_id)
|
||||
transformed_path = avatars_dir / transformed_filename
|
||||
|
||||
with open(transformed_path, "wb") as f:
|
||||
f.write(result.image_bytes)
|
||||
@@ -345,7 +350,8 @@ async def generate_podcast_presenters(
|
||||
# Save avatar
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
avatar_filename = f"presenter_{project_id or 'temp'}_{i+1}_{unique_id}.png"
|
||||
avatar_path = PODCAST_AVATARS_DIR / avatar_filename
|
||||
avatars_dir = _get_podcast_avatars_dir(user_id)
|
||||
avatar_path = avatars_dir / avatar_filename
|
||||
|
||||
with open(avatar_path, "wb") as f:
|
||||
f.write(result.image_bytes)
|
||||
|
||||
@@ -191,8 +191,11 @@ async def generate_chart_preview(
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
# Debug logging
|
||||
logger.warning(f"[Broll] Chart preview request: type={request.chart_type}, title={request.title}, chart_data keys={list(request.chart_data.keys())}, user_id={user_id}")
|
||||
|
||||
try:
|
||||
broll_service = get_broll_service()
|
||||
broll_service = get_broll_service(user_id=user_id)
|
||||
chart_id = uuid.uuid4().hex[:8]
|
||||
|
||||
preview_path = broll_service.generate_chart_preview(
|
||||
@@ -203,11 +206,17 @@ async def generate_chart_preview(
|
||||
chart_id=chart_id,
|
||||
)
|
||||
|
||||
# If chart generation failed (empty path), return a placeholder instead of 500
|
||||
if not preview_path:
|
||||
raise HTTPException(status_code=500, detail="Failed to generate chart preview")
|
||||
# Return a fallback response so frontend doesn't crash
|
||||
logger.warning(f"[Broll] Chart preview skipped - invalid data for type: {request.chart_type}")
|
||||
return ChartPreviewResponse(
|
||||
preview_url="",
|
||||
chart_id=chart_id,
|
||||
)
|
||||
|
||||
preview_filename = Path(preview_path).name
|
||||
preview_url = f"/api/podcast/broll/preview/{chart_id}/{preview_filename}"
|
||||
preview_url = f"/api/podcast/preview/{chart_id}/{preview_filename}"
|
||||
|
||||
return ChartPreviewResponse(
|
||||
preview_url=preview_url,
|
||||
@@ -324,17 +333,29 @@ async def compose_broll_videos(
|
||||
async def serve_chart_preview(
|
||||
chart_id: str,
|
||||
filename: str,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
user_id: Optional[str] = None,
|
||||
):
|
||||
"""Serve chart preview PNG files."""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
"""
|
||||
Serve chart preview PNG files.
|
||||
|
||||
broll_service = get_broll_service()
|
||||
- user_id passed as query param for multi-tenant workspace resolution
|
||||
- endpoint is public (no auth) to allow direct image loading in browser
|
||||
"""
|
||||
# Validate filename to prevent directory traversal
|
||||
if ".." in filename or "/" in filename or "\\" in filename:
|
||||
raise HTTPException(status_code=400, detail="Invalid filename")
|
||||
|
||||
logger.warning(f"[Broll] serve_chart_preview: chart_id={chart_id}, filename={filename}, user_id={user_id}")
|
||||
|
||||
broll_service = get_broll_service(user_id=user_id)
|
||||
expected_filename = broll_service.get_chart_preview_filename(chart_id)
|
||||
if filename != expected_filename:
|
||||
raise HTTPException(status_code=404, detail="Chart preview not found")
|
||||
|
||||
file_path = broll_service.get_output_path(filename)
|
||||
# Use expected_filename to get the correct path
|
||||
file_path = broll_service.get_output_path(expected_filename)
|
||||
|
||||
logger.warning(f"[Broll] serve_chart_preview: resolved path={file_path}, exists={file_path.exists()}")
|
||||
|
||||
if not file_path.exists():
|
||||
raise HTTPException(status_code=404, detail="Chart preview not found")
|
||||
@@ -342,7 +363,7 @@ async def serve_chart_preview(
|
||||
return FileResponse(
|
||||
path=str(file_path),
|
||||
media_type="image/png",
|
||||
filename=filename,
|
||||
filename=expected_filename,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ from api.story_writer.utils.auth import require_authenticated_user
|
||||
from services.llm_providers.main_image_generation import generate_image, generate_character_image
|
||||
from utils.asset_tracker import save_asset_to_library
|
||||
from loguru import logger
|
||||
from ..constants import PODCAST_IMAGES_DIR
|
||||
from ..constants import get_podcast_media_dir
|
||||
from ..models import PodcastImageRequest, PodcastImageResponse
|
||||
|
||||
router = APIRouter()
|
||||
@@ -377,14 +377,14 @@ async def generate_podcast_scene_image(
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
# Save image to podcast images directory
|
||||
PODCAST_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
|
||||
# Save image to podcast images directory (workspace-aware)
|
||||
images_dir = get_podcast_media_dir("image", user_id, ensure_exists=True)
|
||||
|
||||
# Generate filename
|
||||
clean_title = "".join(c if c.isalnum() or c in ('-', '_') else '_' for c in request.scene_title[:30])
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
image_filename = f"scene_{request.scene_id}_{clean_title}_{unique_id}.png"
|
||||
image_path = PODCAST_IMAGES_DIR / image_filename
|
||||
image_path = images_dir / image_filename
|
||||
|
||||
# Save image
|
||||
with open(image_path, "wb") as f:
|
||||
@@ -470,16 +470,17 @@ async def serve_podcast_image(
|
||||
Query parameter is useful for HTML elements like <img> that cannot send custom headers.
|
||||
Supports subdirectories like avatars/
|
||||
"""
|
||||
require_authenticated_user(current_user)
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
# Security check: ensure path doesn't contain path traversal or absolute paths
|
||||
if ".." in path or path.startswith("/"):
|
||||
raise HTTPException(status_code=400, detail="Invalid path")
|
||||
|
||||
image_path = (PODCAST_IMAGES_DIR / path).resolve()
|
||||
images_dir = get_podcast_media_dir("image", user_id)
|
||||
image_path = (images_dir / path).resolve()
|
||||
|
||||
# Security check: ensure resolved path is within PODCAST_IMAGES_DIR
|
||||
if not str(image_path).startswith(str(PODCAST_IMAGES_DIR)):
|
||||
# Security check: ensure resolved path is within images_dir
|
||||
if not str(image_path).startswith(str(images_dir)):
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
if not image_path.exists():
|
||||
|
||||
@@ -11,6 +11,7 @@ from typing import Optional, Dict, Any
|
||||
from services.database import get_db
|
||||
from middleware.auth_middleware import get_current_user
|
||||
from services.podcast_service import PodcastService
|
||||
from loguru import logger
|
||||
from ..models import (
|
||||
PodcastProjectResponse,
|
||||
CreateProjectRequest,
|
||||
@@ -106,14 +107,21 @@ async def update_project(
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""Update a podcast project state."""
|
||||
import time
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
user_id = current_user.get("user_id") or current_user.get("id")
|
||||
if not user_id:
|
||||
logger.error(f"[Podcast] update_project: No user_id found in current_user: {current_user}")
|
||||
raise HTTPException(status_code=401, detail="User ID not found")
|
||||
|
||||
logger.warning(f"[Podcast] update_project: project_id={project_id}, user_id={user_id}")
|
||||
logger.warning(f"[Podcast] update_project: request data: {request.model_dump()}")
|
||||
# Get only field names being updated (not full data to avoid console flooding)
|
||||
request_dict = request.model_dump(exclude_none=True)
|
||||
updated_fields = list(request_dict.keys())
|
||||
|
||||
logger.warning(f"[Podcast] ===== UPDATE_PROJECT_START =====")
|
||||
logger.warning(f"[Podcast] project_id={project_id}, user_id={user_id}, fields={updated_fields}")
|
||||
|
||||
service = PodcastService(db)
|
||||
|
||||
@@ -140,10 +148,15 @@ async def update_project(
|
||||
updates = request.model_dump(exclude_unset=True)
|
||||
project = service.update_project(user_id, project_id, **updates)
|
||||
|
||||
duration_ms = int((time.time() - start_time) * 1000)
|
||||
logger.warning(f"[Podcast] ===== UPDATE_PROJECT_END (took {duration_ms}ms) =====")
|
||||
|
||||
return PodcastProjectResponse.model_validate(project)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
duration_ms = int((time.time() - start_time) * 1000)
|
||||
logger.error(f"[Podcast] ===== UPDATE_PROJECT_ERROR (took {duration_ms}ms): {str(e)} =====")
|
||||
raise HTTPException(status_code=500, detail=f"Error updating project: {str(e)}")
|
||||
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ from typing import Dict, Any, List
|
||||
from types import SimpleNamespace
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
@@ -138,10 +139,12 @@ async def podcast_research_exa(
|
||||
Run podcast research via Exa and then use LLM to extract deep insights.
|
||||
Uses Podcast Bible and Analysis context for hyper-personalization.
|
||||
"""
|
||||
start_time = time.time()
|
||||
user_id = require_authenticated_user(current_user)
|
||||
logger.warning(f"[Podcast Research] ========== REQUEST START ==========")
|
||||
logger.warning(f"[Podcast Research] User: {user_id}, Topic: {request.topic[:80]}...")
|
||||
logger.warning(f"[Podcast Research] Queries count: {len(request.queries) if request.queries else 0}")
|
||||
|
||||
# Log only essential info, not full request data
|
||||
logger.warning(f"[Podcast Research] ===== RESEARCH_START =====")
|
||||
logger.warning(f"[Podcast Research] user={user_id}, topic='{request.topic[:50]}...', queries={len(request.queries) if request.queries else 0}")
|
||||
|
||||
|
||||
queries = [q.strip() for q in request.queries if q and q.strip()]
|
||||
@@ -424,6 +427,10 @@ QUALITY STANDARDS:
|
||||
include_avatar_phase=True,
|
||||
)
|
||||
|
||||
duration_ms = int((time.time() - start_time) * 1000)
|
||||
logger.warning(f"[Podcast Research] ===== RESEARCH_END (took {duration_ms}ms) =====")
|
||||
logger.warning(f"[Podcast Research] sources={len(sources_payload)}, insights={len(key_insights)}, summary_len={len(summary)}")
|
||||
|
||||
return PodcastExaResearchResponse(
|
||||
sources=sources_payload,
|
||||
search_queries=result.get("search_queries", queries) if isinstance(result, dict) else queries,
|
||||
|
||||
@@ -9,6 +9,7 @@ from typing import Dict, Any, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
from middleware.auth_middleware import get_current_user
|
||||
from api.story_writer.utils.auth import require_authenticated_user
|
||||
@@ -60,11 +61,11 @@ async def generate_podcast_script(
|
||||
Generate a podcast script outline (scenes + lines) using podcast-oriented prompting.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
logger.warning(f"[ScriptGen] ========== SCRIPT GENERATION START ==========")
|
||||
logger.warning(f"[ScriptGen] Topic: {request.idea[:60]}...")
|
||||
logger.warning(f"[ScriptGen] Duration: {request.duration_minutes} min, Speakers: {request.speakers}")
|
||||
start_time = time.time()
|
||||
logger.warning(f"[ScriptGen] ===== SCRIPT_GEN_START =====")
|
||||
logger.warning(f"[ScriptGen] user={user_id}, topic='{request.idea[:50]}...', duration={request.duration_minutes}min, speakers={request.speakers}")
|
||||
podcast_mode = (request.podcast_mode or "video_only").strip().lower()
|
||||
logger.warning(f"[ScriptGen] Has research: {bool(request.research)}, Has bible: {bool(request.bible)}, Has analysis: {bool(request.analysis)}, Mode: {podcast_mode}")
|
||||
logger.warning(f"[ScriptGen] research={bool(request.research)}, bible={bool(request.bible)}, analysis={bool(request.analysis)}, mode={podcast_mode}")
|
||||
research_fact_cards = request.research.get("factCards", []) if request.research else []
|
||||
|
||||
# Build comprehensive research context for higher-quality scripts
|
||||
@@ -399,5 +400,8 @@ COST OPTIMIZATION:
|
||||
logger.warning(f"[ScriptGen] Script generated: {len(scenes)} scenes, {total_lines_output}/{total_lines_input} lines")
|
||||
if dropped_empty_lines > 0:
|
||||
logger.warning(f"[ScriptGen] Dropped {dropped_empty_lines} empty lines")
|
||||
|
||||
duration_ms = int((time.time() - start_time) * 1000)
|
||||
logger.warning(f"[ScriptGen] ===== SCRIPT_GEN_END (took {duration_ms}ms) =====")
|
||||
|
||||
return PodcastScriptResponse(scenes=scenes)
|
||||
|
||||
@@ -223,6 +223,9 @@ class PodcastAudioRequest(BaseModel):
|
||||
text: str
|
||||
voice_id: Optional[str] = "Wise_Woman"
|
||||
custom_voice_id: Optional[str] = None # Voice clone ID for custom voice
|
||||
use_voice_clone: Optional[bool] = False # If True, use voice clone with voice_sample_url
|
||||
voice_sample_url: Optional[str] = None # URL to user's voice sample for cloning
|
||||
voice_clone_engine: Optional[str] = None # Engine: "qwen3", "minimax", "cosyvoice"
|
||||
speed: Optional[float] = 1.0
|
||||
volume: Optional[float] = 1.0
|
||||
pitch: Optional[float] = 0.0
|
||||
|
||||
Reference in New Issue
Block a user