Files
ALwrity/backend/api/podcast/handlers/audio.py
ajaysi e59c77b221 feat: Improve podcast maker UX and fix bugs
Frontend:
- Add progress modals with educational content for analysis and voice cloning
- Improve tab navigation in AnalysisPanel (combine Titles, Hook, CTA into one tab)
- Fix tab styling to make inactive tabs visible
- Fix avatar 'Make Presentable' not updating preview (blob URL handling)
- Improve mobile responsiveness for avatar tabs
- Clean up verbose console logging (AnalysisPanel, demoMode, RobustCamera)
- Add sequential progress messages instead of cycling

Backend:
- Fix 'Depends object has no attribute get' error in auth and image editing
- Use get_session_for_user instead of get_db outside FastAPI DI context
- Reduce WARNING logs to DEBUG in audio handler
- Add proper emphasis boolean handling in script generation
- Add missing fields to PodcastScene and PodcastSceneLine models
- Fix voice cloning cost estimate display issue
2026-04-07 16:28:11 +05:30

400 lines
17 KiB
Python

"""
Podcast Audio Handlers
Audio generation, combining, and serving endpoints.
"""
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session
from typing import Dict, Any, Optional
from pathlib import Path
from urllib.parse import urlparse
import tempfile
import uuid
import shutil
from services.database import get_db
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
from api.story_writer.utils.auth import require_authenticated_user
from utils.asset_tracker import save_asset_to_library
from models.story_models import StoryAudioResult
from loguru import logger
from ..constants import get_podcast_audio_service, get_podcast_media_dir
from ..utils import _resolve_podcast_media_file
from ..models import (
PodcastAudioRequest,
PodcastAudioResponse,
PodcastCombineAudioRequest,
PodcastCombineAudioResponse,
)
router = APIRouter()
@router.post("/audio/upload")
async def upload_podcast_audio(
file: UploadFile = File(...),
project_id: Optional[str] = Form(None),
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Upload an audio file (voice sample) for a podcast project.
Returns the audio URL for use in video generation.
"""
user_id = require_authenticated_user(current_user)
# Validate file type
if not file.content_type or not file.content_type.startswith('audio/'):
# Allow octet-stream if extension is audio
allowed_exts = ['.mp3', '.wav', '.m4a', '.aac']
file_ext = Path(file.filename).suffix.lower()
if file_ext not in allowed_exts and file.content_type != 'application/octet-stream':
raise HTTPException(status_code=400, detail="File must be an audio file")
# Validate file size (max 20MB)
file_content = await file.read()
if len(file_content) > 20 * 1024 * 1024:
raise HTTPException(status_code=400, detail="Audio file size must be less than 20MB")
try:
# Generate filename
file_ext = Path(file.filename).suffix or '.mp3'
unique_id = str(uuid.uuid4())[:8]
audio_filename = f"audio_{project_id or 'temp'}_{unique_id}{file_ext}"
audio_base_dir = get_podcast_media_dir("audio", user_id, ensure_exists=True)
audio_path = audio_base_dir / audio_filename
# Save file
with open(audio_path, "wb") as f:
f.write(file_content)
logger.info(f"[Podcast] Audio uploaded: {audio_path}")
# Create audio URL
audio_url = f"/api/podcast/audio/{audio_filename}"
# Save to asset library if project_id provided
if project_id:
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="audio",
source_module="podcast_maker",
filename=audio_filename,
file_url=audio_url,
file_path=str(audio_path),
file_size=len(file_content),
mime_type=file.content_type,
title=f"Uploaded Audio - {project_id}",
description="Uploaded podcast audio/voice sample",
tags=["podcast", "audio", "upload", project_id],
asset_metadata={
"project_id": project_id,
"type": "uploaded_audio",
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save audio asset: {e}")
return {
"audio_url": audio_url,
"audio_filename": audio_filename,
"message": "Audio uploaded successfully"
}
except Exception as exc:
logger.error(f"[Podcast] Audio upload failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Audio upload failed: {str(exc)}")
@router.post("/audio", response_model=PodcastAudioResponse)
async def generate_podcast_audio(
request: PodcastAudioRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Generate AI audio for a podcast scene using shared audio service.
"""
user_id = require_authenticated_user(current_user)
if not request.text or not request.text.strip():
raise HTTPException(status_code=400, detail="Text is required")
try:
audio_service = get_podcast_audio_service(user_id)
logger.warning(f"[Podcast] Generating audio with service dir: {audio_service.output_dir}")
result: StoryAudioResult = audio_service.generate_ai_audio(
scene_number=0,
scene_title=request.scene_title,
text=request.text.strip(),
user_id=user_id,
voice_id=request.voice_id or "Wise_Woman",
custom_voice_id=request.custom_voice_id,
speed=request.speed or 1.0, # Normal speed (was 0.9, but too slow - causing duration issues)
volume=request.volume or 1.0,
pitch=request.pitch or 0.0, # Normal pitch (0.0 = neutral)
emotion=request.emotion or "neutral",
english_normalization=request.english_normalization or False,
sample_rate=request.sample_rate,
bitrate=request.bitrate,
channel=request.channel,
format=request.format,
language_boost=request.language_boost,
enable_sync_mode=request.enable_sync_mode,
)
# Override URL to use podcast endpoint instead of story endpoint
if result.get("audio_url") and "/api/story/audio/" in result.get("audio_url", ""):
audio_filename = result.get("audio_filename", "")
result["audio_url"] = f"/api/podcast/audio/{audio_filename}"
logger.warning(f"[Podcast] Audio generated - path: {result.get('audio_path')}, url: {result.get('audio_url')}")
except Exception as exc:
raise HTTPException(status_code=500, detail=f"Audio generation failed: {exc}")
# Save to asset library (podcast module)
try:
if result.get("audio_url"):
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="audio",
source_module="podcast_maker",
filename=result.get("audio_filename", ""),
file_url=result.get("audio_url", ""),
file_path=result.get("audio_path"),
file_size=result.get("file_size"),
mime_type="audio/mpeg",
title=f"{request.scene_title} - Podcast",
description="Podcast scene narration",
tags=["podcast", "audio", request.scene_id],
provider=result.get("provider"),
model=result.get("model"),
cost=result.get("cost"),
asset_metadata={
"scene_id": request.scene_id,
"scene_title": request.scene_title,
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save audio asset: {e}")
return PodcastAudioResponse(
scene_id=request.scene_id,
scene_title=request.scene_title,
audio_filename=result.get("audio_filename", ""),
audio_url=result.get("audio_url", ""),
provider=result.get("provider", "wavespeed"),
model=result.get("model", "minimax/speech-02-hd"),
voice_id=result.get("voice_id", request.voice_id or "Wise_Woman"),
text_length=result.get("text_length", len(request.text)),
file_size=result.get("file_size", 0),
cost=result.get("cost", 0.0),
)
@router.post("/combine-audio", response_model=PodcastCombineAudioResponse)
async def combine_podcast_audio(
request: PodcastCombineAudioRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Combine multiple scene audio files into a single podcast audio file.
"""
user_id = require_authenticated_user(current_user)
if not request.scene_ids or not request.scene_audio_urls:
raise HTTPException(status_code=400, detail="Scene IDs and audio URLs are required")
if len(request.scene_ids) != len(request.scene_audio_urls):
raise HTTPException(status_code=400, detail="Scene IDs and audio URLs count must match")
try:
# Import moviepy for audio concatenation
try:
from moviepy import AudioFileClip, concatenate_audioclips
except ImportError:
logger.error("[Podcast] MoviePy not available for audio combination")
raise HTTPException(
status_code=500,
detail="Audio combination requires MoviePy. Please install: pip install moviepy"
)
# Create temporary directory for audio processing
temp_dir = Path(tempfile.gettempdir()) / f"podcast_combine_{uuid.uuid4().hex[:8]}"
temp_dir.mkdir(parents=True, exist_ok=True)
audio_clips = []
total_duration = 0.0
try:
# Log incoming request for debugging
logger.info(f"[Podcast] Combining audio: {len(request.scene_audio_urls)} URLs received")
for idx, url in enumerate(request.scene_audio_urls):
logger.info(f"[Podcast] URL {idx+1}: {url}")
# Download and load each audio file from podcast_audio directory
for idx, audio_url in enumerate(request.scene_audio_urls):
try:
# Normalize audio URL - handle both absolute and relative paths
if audio_url.startswith("http"):
# External URL - would need to download
logger.error(f"[Podcast] External URLs not supported: {audio_url}")
raise HTTPException(
status_code=400,
detail=f"External URLs not supported. Please use local file paths."
)
# Handle relative paths - only /api/podcast/audio/... URLs are supported
audio_path = None
if audio_url.startswith("/api/"):
# Extract filename from URL
parsed = urlparse(audio_url)
path = parsed.path if parsed.scheme else audio_url
# Handle both /api/podcast/audio/ and /api/story/audio/ URLs (for backward compatibility)
if "/api/podcast/audio/" in path:
filename = path.split("/api/podcast/audio/", 1)[1].split("?", 1)[0].strip()
elif "/api/story/audio/" in path:
# Convert story audio URLs to podcast audio (they're in the same directory now)
filename = path.split("/api/story/audio/", 1)[1].split("?", 1)[0].strip()
logger.info(f"[Podcast] Converting story audio URL to podcast: {audio_url} -> {filename}")
else:
logger.error(f"[Podcast] Unsupported audio URL format: {audio_url}. Expected /api/podcast/audio/ or /api/story/audio/ URLs.")
continue
if not filename:
logger.error(f"[Podcast] Could not extract filename from URL: {audio_url}")
continue
# Podcast audio files are stored in podcast_audio directory
audio_path = _resolve_podcast_media_file(filename, "audio", user_id)
else:
logger.warning(f"[Podcast] Non-API URL format, treating as direct path: {audio_url}")
audio_path = Path(audio_url)
if not audio_path or not audio_path.exists():
logger.error(f"[Podcast] Audio file not found: {audio_path} (from URL: {audio_url})")
continue
# Load audio clip
audio_clip = AudioFileClip(str(audio_path))
audio_clips.append(audio_clip)
total_duration += audio_clip.duration
logger.info(f"[Podcast] Loaded audio {idx+1}/{len(request.scene_audio_urls)}: {audio_path.name} ({audio_clip.duration:.2f}s)")
except HTTPException:
raise
except Exception as e:
logger.error(f"[Podcast] Failed to load audio {idx+1}: {e}", exc_info=True)
# Continue with other audio files
continue
if not audio_clips:
raise HTTPException(status_code=400, detail="No valid audio files found to combine")
# Concatenate all audio clips
logger.info(f"[Podcast] Combining {len(audio_clips)} audio clips (total duration: {total_duration:.2f}s)")
combined_audio = concatenate_audioclips(audio_clips)
# Generate output filename
output_filename = f"podcast_combined_{request.project_id}_{uuid.uuid4().hex[:8]}.mp3"
audio_base_dir = get_podcast_media_dir("audio", user_id, ensure_exists=True)
output_path = audio_base_dir / output_filename
# Write combined audio file
combined_audio.write_audiofile(
str(output_path),
codec="mp3",
bitrate="192k",
logger=None, # Suppress moviepy logging
)
# Close audio clips to free resources
for clip in audio_clips:
clip.close()
combined_audio.close()
file_size = output_path.stat().st_size
audio_url = f"/api/podcast/audio/{output_filename}"
logger.info(f"[Podcast] Combined audio saved: {output_path} ({file_size} bytes)")
# Save to asset library
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="audio",
source_module="podcast_maker",
filename=output_filename,
file_url=audio_url,
file_path=str(output_path),
file_size=file_size,
mime_type="audio/mpeg",
title=f"Combined Podcast - {request.project_id}",
description=f"Combined podcast audio from {len(request.scene_ids)} scenes",
tags=["podcast", "audio", "combined", request.project_id],
asset_metadata={
"project_id": request.project_id,
"scene_ids": request.scene_ids,
"scene_count": len(request.scene_ids),
"total_duration": total_duration,
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save combined audio asset: {e}")
return PodcastCombineAudioResponse(
combined_audio_url=audio_url,
combined_audio_filename=output_filename,
total_duration=total_duration,
file_size=file_size,
scene_count=len(request.scene_ids),
)
finally:
# Cleanup temporary directory
try:
if temp_dir.exists():
shutil.rmtree(temp_dir)
except Exception as e:
logger.warning(f"[Podcast] Failed to cleanup temp directory: {e}")
except HTTPException:
raise
except Exception as exc:
logger.error(f"[Podcast] Audio combination failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Audio combination failed: {exc}")
@router.get("/audio/{filename}")
async def serve_podcast_audio(
filename: str,
current_user: Dict[str, Any] = Depends(get_current_user_with_query_token),
):
"""Serve generated podcast scene audio files.
Supports authentication via Authorization header or token query parameter.
Query parameter is useful for HTML elements like <audio> that cannot send custom headers.
"""
# Security check: ensure filename doesn't contain path traversal
if ".." in filename or "/" in filename or "\\" in filename:
raise HTTPException(status_code=400, detail="Invalid filename")
user_id = require_authenticated_user(current_user)
logger.debug(f"[Podcast] serve_podcast_audio called: user_id={user_id}, filename={filename}")
audio_path = _resolve_podcast_media_file(filename, "audio", user_id)
logger.debug(f"[Podcast] Resolved audio path: {audio_path}")
return FileResponse(audio_path, media_type="audio/mpeg")