feat: voice clone audio generation + podcast workspace architecture

- Voice clone integration: When user selects voice clone in Write phase,
  backend uses their uploaded voice sample + scene script text to generate
  audio via qwen3/minimax/cosyvoice voice clone APIs
- Multi-tenant workspace storage: All podcast assets (audio, video, images,
  charts) now use workspace-specific directories per user
- Chart preview improvements: Card-based B-Roll charts UI with thumbnails,
  takeaway text, and action buttons; public endpoint for image serving
- Voice clone caching: In-memory LRU cache for voice samples (avoids
  re-downloading per scene); frontend caches voice clone metadata
- Thread pool for voice clone: Audio generation uses ThreadPoolExecutor to
  avoid blocking the FastAPI event loop
- Auto-detect voice clone IDs (vc_*, MY_VOICE_CLONE) to route correctly
- DB fallback for voice sample URL: Fetches from ContentAsset if not passed
- Fixed API URL resolution for chart previews
- Fixed GlassyCard DOM warnings for motion props
- Fixed ScriptGenerationProgressView syntax error
- Fixed usePodcastWorkflow scriptData reference
This commit is contained in:
ajaysi
2026-04-21 19:38:50 +05:30
parent 7637babd7d
commit 91b2f996fd
33 changed files with 1642 additions and 457 deletions

View File

@@ -10,22 +10,32 @@ from loguru import logger
from services.story_writer.audio_generation_service import StoryAudioGenerationService
# Directory paths
# router.py is at: backend/api/podcast/router.py
# parents[0] = backend/api/podcast/
# parents[1] = backend/api/
# parents[2] = backend/
# parents[3] = root/
ROOT_DIR = Path(__file__).resolve().parents[3] # root/
DATA_MEDIA_DIR = ROOT_DIR / "data" / "media"
# Find root by looking for 'data' or 'backend' folder
def _find_root() -> Path:
"""Find project root by searching up for data directory."""
current = Path(__file__).resolve()
for _ in range(10): # max 10 levels up
if (current / "data").exists() and (current / "data" / "media").exists():
return current
if (current / "backend").exists():
return current / "backend"
parent = current.parent
if parent == current:
break
current = parent
# Fallback: assume backend is root
return Path(__file__).resolve().parents[1]
PODCAST_AUDIO_DIR = (DATA_MEDIA_DIR / "podcast_audio").resolve()
PODCAST_IMAGES_DIR = (DATA_MEDIA_DIR / "podcast_images").resolve()
PODCAST_VIDEOS_DIR = (DATA_MEDIA_DIR / "podcast_videos").resolve()
ROOT_DIR = _find_root()
# Video subdirectory
# Video subdirectory (relative to workspace media dir)
AI_VIDEO_SUBDIR = Path("AI_Videos")
MediaType = Literal["audio", "image", "video"]
# Legacy constants - DEPRECATED, use get_podcast_media_dir() instead
# Kept for backward compatibility with some handlers
PODCAST_AVATARS_SUBDIR = Path("avatars")
MediaType = Literal["audio", "image", "video", "chart"]
def _sanitize_user_id(user_id: str) -> str:
@@ -38,21 +48,31 @@ def get_podcast_media_dir(
*,
ensure_exists: bool = False,
) -> Path:
"""Resolve podcast media directory (tenant workspace first, legacy global fallback)."""
"""
Resolve podcast media directory (workspace-only for multi-tenant isolation).
Always requires user_id for tenant isolation. Falls back to default workspace
only if no user_id provided (for backward compat in development).
"""
media_subdir = {
"audio": "podcast_audio",
"image": "podcast_images",
"video": "podcast_videos",
"chart": "podcast_charts",
}[media_type]
if user_id:
sanitized = _sanitize_user_id(user_id)
tenant_media_dir = ROOT_DIR / "workspace" / f"workspace_{sanitized}" / "media" / media_subdir
resolved_dir = tenant_media_dir.resolve()
resolved_dir = (
ROOT_DIR / "workspace" / f"workspace_{sanitized}" / "media" / media_subdir
).resolve()
else:
resolved_dir = (DATA_MEDIA_DIR / media_subdir).resolve()
# Development fallback: use a default workspace
resolved_dir = (
ROOT_DIR / "workspace" / "workspace_alwrity" / "media" / media_subdir
).resolve()
logger.debug(f"[Podcast] get_podcast_media_dir: type={media_type}, user_id={user_id}, sanitized={user_id and _sanitize_user_id(user_id)}, resolved={resolved_dir}")
logger.warning(f"[Podcast] get_podcast_media_dir: type={media_type}, user_id={user_id}, resolved={resolved_dir}")
if ensure_exists:
resolved_dir.mkdir(parents=True, exist_ok=True)
@@ -61,14 +81,11 @@ def get_podcast_media_dir(
def get_podcast_media_read_dirs(media_type: MediaType, user_id: str | None = None) -> list[Path]:
"""Return ordered directories to search (tenant path first, then legacy global path)."""
dirs: list[Path] = []
if user_id:
dirs.append(get_podcast_media_dir(media_type, user_id))
logger.debug(f"[Podcast] get_podcast_media_read_dirs: added user dir for {user_id}")
dirs.append(get_podcast_media_dir(media_type, None))
logger.debug(f"[Podcast] get_podcast_media_read_dirs: dirs={dirs}")
return dirs
"""
Return directories to search for podcast media.
Now workspace-only (no legacy fallback).
"""
return [get_podcast_media_dir(media_type, user_id)]
def get_podcast_audio_service(user_id: str | None = None) -> StoryAudioGenerationService:

View File

@@ -20,7 +20,7 @@ from services.podcast_bible_service import PodcastBibleService
from utils.asset_tracker import save_asset_to_library
from loguru import logger
import os
from ..constants import PODCAST_IMAGES_DIR
from ..constants import get_podcast_media_dir
from ..models import (
PodcastAnalyzeRequest,
PodcastAnalyzeResponse,
@@ -247,7 +247,8 @@ async def analyze_podcast_idea(
if image_result and image_result.image_bytes:
img_id = str(uuid.uuid4())[:8]
filename = f"presenter_podcast_{user_id}_{img_id}.png"
avatars_dir = PODCAST_IMAGES_DIR / "avatars"
images_dir = get_podcast_media_dir("image", user_id, ensure_exists=True)
avatars_dir = images_dir / "avatars"
avatars_dir.mkdir(parents=True, exist_ok=True)
output_path = avatars_dir / filename

View File

@@ -12,7 +12,15 @@ from pathlib import Path
from urllib.parse import urlparse
import tempfile
import uuid
import hashlib
import time
import shutil
import requests
import asyncio
from concurrent.futures import ThreadPoolExecutor
import asyncio
from concurrent.futures import ThreadPoolExecutor
from services.database import get_db
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
@@ -31,6 +39,124 @@ from ..models import (
router = APIRouter()
# Thread pool for CPU/IO-intensive voice clone operations
_audio_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="podcast_audio")
# In-memory LRU cache for voice samples (per user) to avoid re-downloading
_voice_sample_cache: dict[str, tuple[float, bytes]] = {}
_VOICE_SAMPLE_CACHE_TTL = 1800 # 30 minutes
def _get_cached_voice_sample(cache_key: str) -> Optional[bytes]:
"""Get voice sample bytes from in-memory cache if fresh."""
if cache_key in _voice_sample_cache:
ts, data = _voice_sample_cache[cache_key]
if time.time() - ts < _VOICE_SAMPLE_CACHE_TTL:
logger.debug(f"[Podcast] Voice sample cache hit for {cache_key[:16]}...")
return data
del _voice_sample_cache[cache_key]
return None
def _cache_voice_sample(cache_key: str, data: bytes) -> None:
"""Store voice sample bytes in in-memory cache."""
# Evict oldest entries if cache grows too large
if len(_voice_sample_cache) > 50:
oldest_key = min(_voice_sample_cache, key=lambda k: _voice_sample_cache[k][0])
del _voice_sample_cache[oldest_key]
_voice_sample_cache[cache_key] = (time.time(), data)
def _get_latest_voice_sample_url(user_id: str, db) -> Optional[str]:
"""Get the latest voice sample URL for a user from their voice clone assets."""
try:
from models.content_asset_models import ContentAsset, AssetType, AssetSource
from sqlalchemy import desc
asset = db.query(ContentAsset).filter(
ContentAsset.user_id == user_id,
ContentAsset.asset_type == AssetType.AUDIO,
ContentAsset.source_module == AssetSource.VOICE_CLONER,
).order_by(desc(ContentAsset.created_at)).first()
if asset and asset.file_url:
logger.info(f"[Podcast] Found voice sample for user {user_id}: {asset.file_url}")
return asset.file_url
logger.warning(f"[Podcast] No voice sample asset found for user {user_id}")
return None
except Exception as e:
logger.error(f"[Podcast] Error fetching voice sample URL: {e}")
return None
def _fetch_voice_sample(voice_sample_url: str, user_id: str) -> Optional[bytes]:
"""Fetch voice sample audio bytes from URL, with caching."""
cache_key = hashlib.md5(f"{user_id}:{voice_sample_url}".encode()).hexdigest()
# Check in-memory cache first
cached = _get_cached_voice_sample(cache_key)
if cached is not None:
return cached
try:
from utils.media_utils import resolve_media_path
# Try resolving as a local workspace path first (fastest)
if "/api/assets/" in voice_sample_url:
# Resolve user workspace path directly
sanitized_uid = "".join(c for c in user_id if c.isalnum() or c in ("-", "_"))
from api.podcast.constants import ROOT_DIR
parts = voice_sample_url.split("/")
# Expected: /api/assets/{user_id}/voice_samples/{filename}
try:
idx = parts.index("voice_samples")
filename = parts[idx + 1].split("?")[0]
local_path = ROOT_DIR / "workspace" / f"workspace_{sanitized_uid}" / "assets" / "voice_samples" / filename
if local_path.exists():
data = local_path.read_bytes()
_cache_voice_sample(cache_key, data)
logger.info(f"[Podcast] Voice sample loaded from workspace: {local_path}")
return data
except (ValueError, IndexError):
pass
# Fall back to media utils resolver
local_path = resolve_media_path(voice_sample_url)
if local_path and local_path.exists():
data = local_path.read_bytes()
_cache_voice_sample(cache_key, data)
return data
# Try resolving as a podcast audio file
if "/api/podcast/audio/" in voice_sample_url:
filename = voice_sample_url.split("/api/podcast/audio/")[-1].split("?")[0]
try:
audio_dir = get_podcast_media_dir("audio", user_id)
local_path = audio_dir / filename
if local_path.exists():
data = local_path.read_bytes()
_cache_voice_sample(cache_key, data)
return data
except Exception:
pass
# Try direct HTTP fetch as fallback
if voice_sample_url.startswith("http"):
logger.info(f"[Podcast] Fetching voice sample via HTTP: {voice_sample_url[:80]}...")
resp = requests.get(voice_sample_url, timeout=30)
if resp.status_code == 200:
data = resp.content
_cache_voice_sample(cache_key, data)
logger.info(f"[Podcast] Voice sample fetched via HTTP ({len(data)} bytes)")
return data
logger.warning(f"[Podcast] Could not fetch voice sample from: {voice_sample_url}")
return None
except Exception as e:
logger.error(f"[Podcast] Error fetching voice sample: {e}")
return None
@router.post("/audio/upload")
async def upload_podcast_audio(
@@ -125,35 +251,176 @@ async def generate_podcast_audio(
raise HTTPException(status_code=400, detail="Text is required")
try:
audio_service = get_podcast_audio_service(user_id)
logger.warning(f"[Podcast] Generating audio with service dir: {audio_service.output_dir}")
result: StoryAudioResult = audio_service.generate_ai_audio(
scene_number=0,
scene_title=request.scene_title,
text=request.text.strip(),
user_id=user_id,
voice_id=request.voice_id or "Wise_Woman",
custom_voice_id=request.custom_voice_id,
speed=request.speed or 1.0, # Normal speed (was 0.9, but too slow - causing duration issues)
volume=request.volume or 1.0,
pitch=request.pitch or 0.0, # Normal pitch (0.0 = neutral)
emotion=request.emotion or "neutral",
english_normalization=request.english_normalization or False,
sample_rate=request.sample_rate,
bitrate=request.bitrate,
channel=request.channel,
format=request.format,
language_boost=request.language_boost,
enable_sync_mode=request.enable_sync_mode,
# Determine if we should use voice clone path
# Voice clone is used when: explicitly requested, OR when voice_id/custom_voice_id indicates a clone
# (cloned voice IDs start with "vc_" or match the placeholder "MY_VOICE_CLONE")
_vid = request.voice_id or ""
_cvid = request.custom_voice_id or ""
is_voice_clone = request.use_voice_clone or (
_cvid.startswith("vc_") or _cvid == "MY_VOICE_CLONE"
) or (
_vid.startswith("vc_") or _vid == "MY_VOICE_CLONE"
)
# Override URL to use podcast endpoint instead of story endpoint
if result.get("audio_url") and "/api/story/audio/" in result.get("audio_url", ""):
audio_filename = result.get("audio_filename", "")
result["audio_url"] = f"/api/podcast/audio/{audio_filename}"
logger.warning(f"[Podcast] Audio generated - path: {result.get('audio_path')}, url: {result.get('audio_url')}")
# If voice_id is a clone ID, normalize it to use Wise_Woman for TTS fallback
effective_voice_id = _vid if not (_vid.startswith("vc_") or _vid == "MY_VOICE_CLONE") else "Wise_Woman"
logger.warning(f"[Podcast] Audio request: use_voice_clone={request.use_voice_clone}, voice_id={request.voice_id}, custom_voice_id={request.custom_voice_id}, is_voice_clone={is_voice_clone}, voice_sample_url={request.voice_sample_url}, voice_clone_engine={request.voice_clone_engine}")
# Voice clone path: use user's voice sample with scene text as reference
if is_voice_clone:
# If no voice_sample_url provided, try to fetch it from the user's latest voice clone
voice_sample_url = request.voice_sample_url
if not voice_sample_url:
try:
voice_sample_url = _get_latest_voice_sample_url(user_id, db)
logger.warning(f"[Podcast] DB fallback voice sample URL for user {user_id}: {voice_sample_url}")
except Exception as e:
logger.warning(f"[Podcast] Could not fetch voice sample URL: {e}")
if voice_sample_url:
from services.llm_providers.main_audio_generation import qwen3_voice_clone, cosyvoice_voice_clone
engine = (request.voice_clone_engine or "qwen3").lower()
logger.warning(f"[Podcast] 🔊 Voice clone path: engine={engine}, scene='{request.scene_title}', voice_sample_url={voice_sample_url[:80]}...")
# Download voice sample from URL (with caching)
logger.warning(f"[Podcast] Fetching voice sample from: {voice_sample_url}")
try:
voice_sample_bytes = _fetch_voice_sample(voice_sample_url, user_id)
except Exception as fetch_err:
logger.error(f"[Podcast] ❌ Failed to fetch voice sample: {fetch_err}", exc_info=True)
raise HTTPException(status_code=400, detail=f"Could not fetch voice sample: {str(fetch_err)}")
logger.warning(f"[Podcast] Voice sample fetch result: {len(voice_sample_bytes) if voice_sample_bytes else 0} bytes")
if not voice_sample_bytes:
raise HTTPException(status_code=400, detail=f"Could not fetch voice sample from {voice_sample_url}")
scene_text = request.text.strip()
if len(scene_text) > 4000:
scene_text = scene_text[:4000]
# Run voice clone in thread pool to avoid blocking the event loop
loop = asyncio.get_event_loop()
try:
if engine == "minimax":
from services.llm_providers.main_audio_generation import clone_voice
import random
import string
random_suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=8))
custom_vid = request.custom_voice_id or f"vc_{random_suffix}"
result_obj = await loop.run_in_executor(
_audio_executor,
lambda cv=custom_vid: clone_voice(
audio_bytes=voice_sample_bytes,
custom_voice_id=cv,
text=scene_text,
user_id=user_id,
),
)
audio_bytes = result_obj.preview_audio_bytes
provider = "minimax"
model = "minimax/voice-clone"
elif engine == "cosyvoice":
result_obj = await loop.run_in_executor(
_audio_executor,
lambda: cosyvoice_voice_clone(
audio_bytes=voice_sample_bytes,
text=scene_text,
user_id=user_id,
),
)
audio_bytes = result_obj.preview_audio_bytes
provider = "wavespeed-ai"
model = "wavespeed-ai/cosyvoice-tts/voice-clone"
else:
result_obj = await loop.run_in_executor(
_audio_executor,
lambda: qwen3_voice_clone(
audio_bytes=voice_sample_bytes,
text=scene_text,
user_id=user_id,
),
)
audio_bytes = result_obj.preview_audio_bytes
provider = "wavespeed-ai"
model = "wavespeed-ai/qwen3-tts/voice-clone"
logger.warning(f"[Podcast] 🔊 Voice clone result: {len(audio_bytes) if audio_bytes else 0} bytes, provider={provider}")
except HTTPException:
raise
except Exception as clone_err:
logger.error(f"[Podcast] ❌ Voice clone failed: {clone_err}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Voice clone generation failed: {str(clone_err)}")
# Save audio bytes to file
audio_service = get_podcast_audio_service(user_id)
audio_filename = f"scene_{request.scene_id}_{uuid.uuid4().hex[:8]}.mp3"
audio_path = audio_service.output_dir / audio_filename
with open(audio_path, "wb") as f:
f.write(audio_bytes)
file_size = len(audio_bytes)
audio_url = f"/api/podcast/audio/{audio_filename}"
cost = max(0.005, 0.005 * (len(scene_text) / 100.0))
result = {
"audio_path": str(audio_path),
"audio_filename": audio_filename,
"audio_url": audio_url,
"file_size": file_size,
"provider": provider,
"model": model,
"cost": cost,
"scene_number": 0,
"scene_title": request.scene_title,
}
else:
# Standard TTS path - but NOT if custom_voice_id is a clone ID
# Clone IDs (vc_*, MY_VOICE_CLONE) are not valid for minimax TTS
if is_voice_clone:
logger.warning(f"[Podcast] ⚠️ Voice clone detected but no voice sample available - falling back to standard TTS with voice_id={effective_voice_id}")
effective_custom_voice_id = request.custom_voice_id
if effective_custom_voice_id and (
effective_custom_voice_id.startswith("vc_") or
effective_custom_voice_id == "MY_VOICE_CLONE"
):
logger.warning(f"[Podcast] Ignoring clone ID '{effective_custom_voice_id}' in standard TTS path - no voice sample URL available")
effective_custom_voice_id = None
audio_service = get_podcast_audio_service(user_id)
logger.warning(f"[Podcast] Standard TTS path: voice_id={effective_voice_id}, custom_voice_id={effective_custom_voice_id}")
result: StoryAudioResult = audio_service.generate_ai_audio(
scene_number=0,
scene_title=request.scene_title,
text=request.text.strip(),
user_id=user_id,
voice_id=effective_voice_id,
custom_voice_id=effective_custom_voice_id,
speed=request.speed or 1.0, # Normal speed (was 0.9, but too slow - causing duration issues)
volume=request.volume or 1.0,
pitch=request.pitch or 0.0, # Normal pitch (0.0 = neutral)
emotion=request.emotion or "neutral",
english_normalization=request.english_normalization or False,
sample_rate=request.sample_rate,
bitrate=request.bitrate,
channel=request.channel,
format=request.format,
language_boost=request.language_boost,
enable_sync_mode=request.enable_sync_mode,
)
# Override URL to use podcast endpoint instead of story endpoint
if result.get("audio_url") and "/api/story/audio/" in result.get("audio_url", ""):
audio_filename = result.get("audio_filename", "")
result["audio_url"] = f"/api/podcast/audio/{audio_filename}"
logger.warning(f"[Podcast] Audio generated - path: {result.get('audio_path')}, url: {result.get('audio_url')}")
except Exception as exc:
logger.error(f"[Podcast] ❌ Audio generation failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Audio generation failed: {exc}")
# Save to asset library (podcast module)

View File

@@ -19,15 +19,18 @@ from services.llm_providers.main_image_generation import generate_image
from services.llm_providers.main_image_editing import edit_image
from utils.asset_tracker import save_asset_to_library
from loguru import logger
from ..constants import PODCAST_IMAGES_DIR
from ..constants import get_podcast_media_dir, PODCAST_AVATARS_SUBDIR
from ..presenter_personas import choose_persona_id, get_persona
router = APIRouter()
# Avatar subdirectory
AVATAR_SUBDIR = "avatars"
PODCAST_AVATARS_DIR = PODCAST_IMAGES_DIR / AVATAR_SUBDIR
PODCAST_AVATARS_DIR.mkdir(parents=True, exist_ok=True)
AVATAR_SUBDIR = PODCAST_AVATARS_SUBDIR
def _get_podcast_avatars_dir(user_id: str) -> Path:
"""Get podcast avatars directory for a user (workspace-aware)."""
return get_podcast_media_dir("image", user_id, ensure_exists=True) / AVATAR_SUBDIR
@router.post("/avatar/upload")
@@ -57,7 +60,8 @@ async def upload_podcast_avatar(
file_ext = Path(file.filename).suffix or '.png'
unique_id = str(uuid.uuid4())[:8]
avatar_filename = f"avatar_{project_id or 'temp'}_{unique_id}{file_ext}"
avatar_path = PODCAST_AVATARS_DIR / avatar_filename
avatars_dir = _get_podcast_avatars_dir(user_id)
avatar_path = avatars_dir / avatar_filename
# Save file
with open(avatar_path, "wb") as f:
@@ -163,7 +167,8 @@ async def make_avatar_presentable(
# Save transformed avatar
unique_id = str(uuid.uuid4())[:8]
transformed_filename = f"presenter_transformed_{project_id or 'temp'}_{unique_id}.png"
transformed_path = PODCAST_AVATARS_DIR / transformed_filename
avatars_dir = _get_podcast_avatars_dir(user_id)
transformed_path = avatars_dir / transformed_filename
with open(transformed_path, "wb") as f:
f.write(result.image_bytes)
@@ -345,7 +350,8 @@ async def generate_podcast_presenters(
# Save avatar
unique_id = str(uuid.uuid4())[:8]
avatar_filename = f"presenter_{project_id or 'temp'}_{i+1}_{unique_id}.png"
avatar_path = PODCAST_AVATARS_DIR / avatar_filename
avatars_dir = _get_podcast_avatars_dir(user_id)
avatar_path = avatars_dir / avatar_filename
with open(avatar_path, "wb") as f:
f.write(result.image_bytes)

View File

@@ -191,8 +191,11 @@ async def generate_chart_preview(
"""
user_id = require_authenticated_user(current_user)
# Debug logging
logger.warning(f"[Broll] Chart preview request: type={request.chart_type}, title={request.title}, chart_data keys={list(request.chart_data.keys())}, user_id={user_id}")
try:
broll_service = get_broll_service()
broll_service = get_broll_service(user_id=user_id)
chart_id = uuid.uuid4().hex[:8]
preview_path = broll_service.generate_chart_preview(
@@ -203,11 +206,17 @@ async def generate_chart_preview(
chart_id=chart_id,
)
# If chart generation failed (empty path), return a placeholder instead of 500
if not preview_path:
raise HTTPException(status_code=500, detail="Failed to generate chart preview")
# Return a fallback response so frontend doesn't crash
logger.warning(f"[Broll] Chart preview skipped - invalid data for type: {request.chart_type}")
return ChartPreviewResponse(
preview_url="",
chart_id=chart_id,
)
preview_filename = Path(preview_path).name
preview_url = f"/api/podcast/broll/preview/{chart_id}/{preview_filename}"
preview_url = f"/api/podcast/preview/{chart_id}/{preview_filename}"
return ChartPreviewResponse(
preview_url=preview_url,
@@ -324,17 +333,29 @@ async def compose_broll_videos(
async def serve_chart_preview(
chart_id: str,
filename: str,
current_user: Dict[str, Any] = Depends(get_current_user),
user_id: Optional[str] = None,
):
"""Serve chart preview PNG files."""
user_id = require_authenticated_user(current_user)
"""
Serve chart preview PNG files.
broll_service = get_broll_service()
- user_id passed as query param for multi-tenant workspace resolution
- endpoint is public (no auth) to allow direct image loading in browser
"""
# Validate filename to prevent directory traversal
if ".." in filename or "/" in filename or "\\" in filename:
raise HTTPException(status_code=400, detail="Invalid filename")
logger.warning(f"[Broll] serve_chart_preview: chart_id={chart_id}, filename={filename}, user_id={user_id}")
broll_service = get_broll_service(user_id=user_id)
expected_filename = broll_service.get_chart_preview_filename(chart_id)
if filename != expected_filename:
raise HTTPException(status_code=404, detail="Chart preview not found")
file_path = broll_service.get_output_path(filename)
# Use expected_filename to get the correct path
file_path = broll_service.get_output_path(expected_filename)
logger.warning(f"[Broll] serve_chart_preview: resolved path={file_path}, exists={file_path.exists()}")
if not file_path.exists():
raise HTTPException(status_code=404, detail="Chart preview not found")
@@ -342,7 +363,7 @@ async def serve_chart_preview(
return FileResponse(
path=str(file_path),
media_type="image/png",
filename=filename,
filename=expected_filename,
)

View File

@@ -17,7 +17,7 @@ from api.story_writer.utils.auth import require_authenticated_user
from services.llm_providers.main_image_generation import generate_image, generate_character_image
from utils.asset_tracker import save_asset_to_library
from loguru import logger
from ..constants import PODCAST_IMAGES_DIR
from ..constants import get_podcast_media_dir
from ..models import PodcastImageRequest, PodcastImageResponse
router = APIRouter()
@@ -377,14 +377,14 @@ async def generate_podcast_scene_image(
user_id=user_id
)
# Save image to podcast images directory
PODCAST_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
# Save image to podcast images directory (workspace-aware)
images_dir = get_podcast_media_dir("image", user_id, ensure_exists=True)
# Generate filename
clean_title = "".join(c if c.isalnum() or c in ('-', '_') else '_' for c in request.scene_title[:30])
unique_id = str(uuid.uuid4())[:8]
image_filename = f"scene_{request.scene_id}_{clean_title}_{unique_id}.png"
image_path = PODCAST_IMAGES_DIR / image_filename
image_path = images_dir / image_filename
# Save image
with open(image_path, "wb") as f:
@@ -470,16 +470,17 @@ async def serve_podcast_image(
Query parameter is useful for HTML elements like <img> that cannot send custom headers.
Supports subdirectories like avatars/
"""
require_authenticated_user(current_user)
user_id = require_authenticated_user(current_user)
# Security check: ensure path doesn't contain path traversal or absolute paths
if ".." in path or path.startswith("/"):
raise HTTPException(status_code=400, detail="Invalid path")
image_path = (PODCAST_IMAGES_DIR / path).resolve()
images_dir = get_podcast_media_dir("image", user_id)
image_path = (images_dir / path).resolve()
# Security check: ensure resolved path is within PODCAST_IMAGES_DIR
if not str(image_path).startswith(str(PODCAST_IMAGES_DIR)):
# Security check: ensure resolved path is within images_dir
if not str(image_path).startswith(str(images_dir)):
raise HTTPException(status_code=403, detail="Access denied")
if not image_path.exists():

View File

@@ -11,6 +11,7 @@ from typing import Optional, Dict, Any
from services.database import get_db
from middleware.auth_middleware import get_current_user
from services.podcast_service import PodcastService
from loguru import logger
from ..models import (
PodcastProjectResponse,
CreateProjectRequest,
@@ -106,14 +107,21 @@ async def update_project(
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""Update a podcast project state."""
import time
start_time = time.time()
try:
user_id = current_user.get("user_id") or current_user.get("id")
if not user_id:
logger.error(f"[Podcast] update_project: No user_id found in current_user: {current_user}")
raise HTTPException(status_code=401, detail="User ID not found")
logger.warning(f"[Podcast] update_project: project_id={project_id}, user_id={user_id}")
logger.warning(f"[Podcast] update_project: request data: {request.model_dump()}")
# Get only field names being updated (not full data to avoid console flooding)
request_dict = request.model_dump(exclude_none=True)
updated_fields = list(request_dict.keys())
logger.warning(f"[Podcast] ===== UPDATE_PROJECT_START =====")
logger.warning(f"[Podcast] project_id={project_id}, user_id={user_id}, fields={updated_fields}")
service = PodcastService(db)
@@ -140,10 +148,15 @@ async def update_project(
updates = request.model_dump(exclude_unset=True)
project = service.update_project(user_id, project_id, **updates)
duration_ms = int((time.time() - start_time) * 1000)
logger.warning(f"[Podcast] ===== UPDATE_PROJECT_END (took {duration_ms}ms) =====")
return PodcastProjectResponse.model_validate(project)
except HTTPException:
raise
except Exception as e:
duration_ms = int((time.time() - start_time) * 1000)
logger.error(f"[Podcast] ===== UPDATE_PROJECT_ERROR (took {duration_ms}ms): {str(e)} =====")
raise HTTPException(status_code=500, detail=f"Error updating project: {str(e)}")

View File

@@ -9,6 +9,7 @@ from typing import Dict, Any, List
from types import SimpleNamespace
import json
import re
import time
from datetime import datetime, timezone
from sqlalchemy.orm import Session
@@ -138,10 +139,12 @@ async def podcast_research_exa(
Run podcast research via Exa and then use LLM to extract deep insights.
Uses Podcast Bible and Analysis context for hyper-personalization.
"""
start_time = time.time()
user_id = require_authenticated_user(current_user)
logger.warning(f"[Podcast Research] ========== REQUEST START ==========")
logger.warning(f"[Podcast Research] User: {user_id}, Topic: {request.topic[:80]}...")
logger.warning(f"[Podcast Research] Queries count: {len(request.queries) if request.queries else 0}")
# Log only essential info, not full request data
logger.warning(f"[Podcast Research] ===== RESEARCH_START =====")
logger.warning(f"[Podcast Research] user={user_id}, topic='{request.topic[:50]}...', queries={len(request.queries) if request.queries else 0}")
queries = [q.strip() for q in request.queries if q and q.strip()]
@@ -424,6 +427,10 @@ QUALITY STANDARDS:
include_avatar_phase=True,
)
duration_ms = int((time.time() - start_time) * 1000)
logger.warning(f"[Podcast Research] ===== RESEARCH_END (took {duration_ms}ms) =====")
logger.warning(f"[Podcast Research] sources={len(sources_payload)}, insights={len(key_insights)}, summary_len={len(summary)}")
return PodcastExaResearchResponse(
sources=sources_payload,
search_queries=result.get("search_queries", queries) if isinstance(result, dict) else queries,

View File

@@ -9,6 +9,7 @@ from typing import Dict, Any, Optional
from pydantic import BaseModel, Field
import json
import re
import time
from middleware.auth_middleware import get_current_user
from api.story_writer.utils.auth import require_authenticated_user
@@ -60,11 +61,11 @@ async def generate_podcast_script(
Generate a podcast script outline (scenes + lines) using podcast-oriented prompting.
"""
user_id = require_authenticated_user(current_user)
logger.warning(f"[ScriptGen] ========== SCRIPT GENERATION START ==========")
logger.warning(f"[ScriptGen] Topic: {request.idea[:60]}...")
logger.warning(f"[ScriptGen] Duration: {request.duration_minutes} min, Speakers: {request.speakers}")
start_time = time.time()
logger.warning(f"[ScriptGen] ===== SCRIPT_GEN_START =====")
logger.warning(f"[ScriptGen] user={user_id}, topic='{request.idea[:50]}...', duration={request.duration_minutes}min, speakers={request.speakers}")
podcast_mode = (request.podcast_mode or "video_only").strip().lower()
logger.warning(f"[ScriptGen] Has research: {bool(request.research)}, Has bible: {bool(request.bible)}, Has analysis: {bool(request.analysis)}, Mode: {podcast_mode}")
logger.warning(f"[ScriptGen] research={bool(request.research)}, bible={bool(request.bible)}, analysis={bool(request.analysis)}, mode={podcast_mode}")
research_fact_cards = request.research.get("factCards", []) if request.research else []
# Build comprehensive research context for higher-quality scripts
@@ -399,5 +400,8 @@ COST OPTIMIZATION:
logger.warning(f"[ScriptGen] Script generated: {len(scenes)} scenes, {total_lines_output}/{total_lines_input} lines")
if dropped_empty_lines > 0:
logger.warning(f"[ScriptGen] Dropped {dropped_empty_lines} empty lines")
duration_ms = int((time.time() - start_time) * 1000)
logger.warning(f"[ScriptGen] ===== SCRIPT_GEN_END (took {duration_ms}ms) =====")
return PodcastScriptResponse(scenes=scenes)

View File

@@ -223,6 +223,9 @@ class PodcastAudioRequest(BaseModel):
text: str
voice_id: Optional[str] = "Wise_Woman"
custom_voice_id: Optional[str] = None # Voice clone ID for custom voice
use_voice_clone: Optional[bool] = False # If True, use voice clone with voice_sample_url
voice_sample_url: Optional[str] = None # URL to user's voice sample for cloning
voice_clone_engine: Optional[str] = None # Engine: "qwen3", "minimax", "cosyvoice"
speed: Optional[float] = 1.0
volume: Optional[float] = 1.0
pitch: Optional[float] = 0.0