Base code

This commit is contained in:
Kunthawat Greethong
2026-01-08 22:39:53 +07:00
parent 697115c61a
commit c35fa52117
2169 changed files with 626670 additions and 0 deletions

View File

@@ -0,0 +1,6 @@
"""
Podcast API Handlers
Handler modules for different podcast operations.
"""

View File

@@ -0,0 +1,96 @@
"""
Podcast Analysis Handlers
Analysis endpoint for podcast ideas.
"""
from fastapi import APIRouter, Depends, HTTPException
from typing import Dict, Any
import json
from middleware.auth_middleware import get_current_user
from api.story_writer.utils.auth import require_authenticated_user
from services.llm_providers.main_text_generation import llm_text_gen
from loguru import logger
from ..models import PodcastAnalyzeRequest, PodcastAnalyzeResponse
router = APIRouter()
@router.post("/analyze", response_model=PodcastAnalyzeResponse)
async def analyze_podcast_idea(
request: PodcastAnalyzeRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""
Analyze a podcast idea and return podcast-oriented outlines, keywords, and titles.
This uses the shared LLM provider but with a podcast-specific prompt (not story format).
"""
user_id = require_authenticated_user(current_user)
prompt = f"""
You are an expert podcast producer. Given a podcast idea, craft concise podcast-ready assets
that sound like episode plans (not fiction stories).
Podcast Idea: "{request.idea}"
Duration: ~{request.duration} minutes
Speakers: {request.speakers} (host + optional guest)
Return JSON with:
- audience: short target audience description
- content_type: podcast style/format
- top_keywords: 5 podcast-relevant keywords/phrases
- suggested_outlines: 2 items, each with title (<=60 chars) and 4-6 short segments (bullet-friendly, factual)
- title_suggestions: 3 concise episode titles (no cliffhanger storytelling)
- exa_suggested_config: suggested Exa search options to power research (keep conservative defaults to control cost), with:
- exa_search_type: "auto" | "neural" | "keyword" (prefer "auto" unless clearly news-heavy)
- exa_category: one of ["research paper","news","company","github","tweet","personal site","pdf","financial report","linkedin profile"]
- exa_include_domains: up to 3 reputable domains to prioritize (optional)
- exa_exclude_domains: up to 3 domains to avoid (optional)
- max_sources: 6-10
- include_statistics: boolean (true if topic needs fresh stats)
- date_range: one of ["last_month","last_3_months","last_year","all_time"] (pick recent if time-sensitive)
Requirements:
- Keep language factual, actionable, and suited for spoken audio.
- Avoid narrative fiction tone; focus on insights, hooks, objections, and takeaways.
- Prefer 2024-2025 context when relevant.
"""
try:
raw = llm_text_gen(prompt=prompt, user_id=user_id, json_struct=None)
except HTTPException:
# Re-raise HTTPExceptions (e.g., 429 subscription limit) - preserve error details
raise
except Exception as exc:
logger.error(f"[Podcast Analyze] Analysis failed for user {user_id}: {exc}")
raise HTTPException(status_code=500, detail=f"Analysis failed: {exc}")
# Normalize response (accept dict or JSON string)
if isinstance(raw, str):
try:
data = json.loads(raw)
except json.JSONDecodeError:
raise HTTPException(status_code=500, detail="LLM returned non-JSON output")
elif isinstance(raw, dict):
data = raw
else:
raise HTTPException(status_code=500, detail="Unexpected LLM response format")
audience = data.get("audience") or "Growth-focused professionals"
content_type = data.get("content_type") or "Interview + insights"
top_keywords = data.get("top_keywords") or []
suggested_outlines = data.get("suggested_outlines") or []
title_suggestions = data.get("title_suggestions") or []
exa_suggested_config = data.get("exa_suggested_config") or None
return PodcastAnalyzeResponse(
audience=audience,
content_type=content_type,
top_keywords=top_keywords,
suggested_outlines=suggested_outlines,
title_suggestions=title_suggestions,
exa_suggested_config=exa_suggested_config,
)

View File

@@ -0,0 +1,324 @@
"""
Podcast Audio Handlers
Audio generation, combining, and serving endpoints.
"""
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session
from typing import Dict, Any
from pathlib import Path
from urllib.parse import urlparse
import tempfile
import uuid
import shutil
from services.database import get_db
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
from api.story_writer.utils.auth import require_authenticated_user
from utils.asset_tracker import save_asset_to_library
from models.story_models import StoryAudioResult
from loguru import logger
from ..constants import PODCAST_AUDIO_DIR, audio_service
from ..models import (
PodcastAudioRequest,
PodcastAudioResponse,
PodcastCombineAudioRequest,
PodcastCombineAudioResponse,
)
router = APIRouter()
@router.post("/audio", response_model=PodcastAudioResponse)
async def generate_podcast_audio(
request: PodcastAudioRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Generate AI audio for a podcast scene using shared audio service.
"""
user_id = require_authenticated_user(current_user)
if not request.text or not request.text.strip():
raise HTTPException(status_code=400, detail="Text is required")
try:
result: StoryAudioResult = audio_service.generate_ai_audio(
scene_number=0,
scene_title=request.scene_title,
text=request.text.strip(),
user_id=user_id,
voice_id=request.voice_id or "Wise_Woman",
speed=request.speed or 1.0, # Normal speed (was 0.9, but too slow - causing duration issues)
volume=request.volume or 1.0,
pitch=request.pitch or 0.0, # Normal pitch (0.0 = neutral)
emotion=request.emotion or "neutral",
english_normalization=request.english_normalization or False,
sample_rate=request.sample_rate,
bitrate=request.bitrate,
channel=request.channel,
format=request.format,
language_boost=request.language_boost,
enable_sync_mode=request.enable_sync_mode,
)
# Override URL to use podcast endpoint instead of story endpoint
if result.get("audio_url") and "/api/story/audio/" in result.get("audio_url", ""):
audio_filename = result.get("audio_filename", "")
result["audio_url"] = f"/api/podcast/audio/{audio_filename}"
except Exception as exc:
raise HTTPException(status_code=500, detail=f"Audio generation failed: {exc}")
# Save to asset library (podcast module)
try:
if result.get("audio_url"):
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="audio",
source_module="podcast_maker",
filename=result.get("audio_filename", ""),
file_url=result.get("audio_url", ""),
file_path=result.get("audio_path"),
file_size=result.get("file_size"),
mime_type="audio/mpeg",
title=f"{request.scene_title} - Podcast",
description="Podcast scene narration",
tags=["podcast", "audio", request.scene_id],
provider=result.get("provider"),
model=result.get("model"),
cost=result.get("cost"),
asset_metadata={
"scene_id": request.scene_id,
"scene_title": request.scene_title,
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save audio asset: {e}")
return PodcastAudioResponse(
scene_id=request.scene_id,
scene_title=request.scene_title,
audio_filename=result.get("audio_filename", ""),
audio_url=result.get("audio_url", ""),
provider=result.get("provider", "wavespeed"),
model=result.get("model", "minimax/speech-02-hd"),
voice_id=result.get("voice_id", request.voice_id or "Wise_Woman"),
text_length=result.get("text_length", len(request.text)),
file_size=result.get("file_size", 0),
cost=result.get("cost", 0.0),
)
@router.post("/combine-audio", response_model=PodcastCombineAudioResponse)
async def combine_podcast_audio(
request: PodcastCombineAudioRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Combine multiple scene audio files into a single podcast audio file.
"""
user_id = require_authenticated_user(current_user)
if not request.scene_ids or not request.scene_audio_urls:
raise HTTPException(status_code=400, detail="Scene IDs and audio URLs are required")
if len(request.scene_ids) != len(request.scene_audio_urls):
raise HTTPException(status_code=400, detail="Scene IDs and audio URLs count must match")
try:
# Import moviepy for audio concatenation
try:
from moviepy import AudioFileClip, concatenate_audioclips
except ImportError:
logger.error("[Podcast] MoviePy not available for audio combination")
raise HTTPException(
status_code=500,
detail="Audio combination requires MoviePy. Please install: pip install moviepy"
)
# Create temporary directory for audio processing
temp_dir = Path(tempfile.gettempdir()) / f"podcast_combine_{uuid.uuid4().hex[:8]}"
temp_dir.mkdir(parents=True, exist_ok=True)
audio_clips = []
total_duration = 0.0
try:
# Log incoming request for debugging
logger.info(f"[Podcast] Combining audio: {len(request.scene_audio_urls)} URLs received")
for idx, url in enumerate(request.scene_audio_urls):
logger.info(f"[Podcast] URL {idx+1}: {url}")
# Download and load each audio file from podcast_audio directory
for idx, audio_url in enumerate(request.scene_audio_urls):
try:
# Normalize audio URL - handle both absolute and relative paths
if audio_url.startswith("http"):
# External URL - would need to download
logger.error(f"[Podcast] External URLs not supported: {audio_url}")
raise HTTPException(
status_code=400,
detail=f"External URLs not supported. Please use local file paths."
)
# Handle relative paths - only /api/podcast/audio/... URLs are supported
audio_path = None
if audio_url.startswith("/api/"):
# Extract filename from URL
parsed = urlparse(audio_url)
path = parsed.path if parsed.scheme else audio_url
# Handle both /api/podcast/audio/ and /api/story/audio/ URLs (for backward compatibility)
if "/api/podcast/audio/" in path:
filename = path.split("/api/podcast/audio/", 1)[1].split("?", 1)[0].strip()
elif "/api/story/audio/" in path:
# Convert story audio URLs to podcast audio (they're in the same directory now)
filename = path.split("/api/story/audio/", 1)[1].split("?", 1)[0].strip()
logger.info(f"[Podcast] Converting story audio URL to podcast: {audio_url} -> {filename}")
else:
logger.error(f"[Podcast] Unsupported audio URL format: {audio_url}. Expected /api/podcast/audio/ or /api/story/audio/ URLs.")
continue
if not filename:
logger.error(f"[Podcast] Could not extract filename from URL: {audio_url}")
continue
# Podcast audio files are stored in podcast_audio directory
audio_path = (PODCAST_AUDIO_DIR / filename).resolve()
# Security check: ensure path is within PODCAST_AUDIO_DIR
if not str(audio_path).startswith(str(PODCAST_AUDIO_DIR)):
logger.error(f"[Podcast] Attempted path traversal when resolving audio: {audio_url}")
continue
else:
logger.warning(f"[Podcast] Non-API URL format, treating as direct path: {audio_url}")
audio_path = Path(audio_url)
if not audio_path or not audio_path.exists():
logger.error(f"[Podcast] Audio file not found: {audio_path} (from URL: {audio_url})")
continue
# Load audio clip
audio_clip = AudioFileClip(str(audio_path))
audio_clips.append(audio_clip)
total_duration += audio_clip.duration
logger.info(f"[Podcast] Loaded audio {idx+1}/{len(request.scene_audio_urls)}: {audio_path.name} ({audio_clip.duration:.2f}s)")
except HTTPException:
raise
except Exception as e:
logger.error(f"[Podcast] Failed to load audio {idx+1}: {e}", exc_info=True)
# Continue with other audio files
continue
if not audio_clips:
raise HTTPException(status_code=400, detail="No valid audio files found to combine")
# Concatenate all audio clips
logger.info(f"[Podcast] Combining {len(audio_clips)} audio clips (total duration: {total_duration:.2f}s)")
combined_audio = concatenate_audioclips(audio_clips)
# Generate output filename
output_filename = f"podcast_combined_{request.project_id}_{uuid.uuid4().hex[:8]}.mp3"
output_path = PODCAST_AUDIO_DIR / output_filename
# Write combined audio file
combined_audio.write_audiofile(
str(output_path),
codec="mp3",
bitrate="192k",
logger=None, # Suppress moviepy logging
)
# Close audio clips to free resources
for clip in audio_clips:
clip.close()
combined_audio.close()
file_size = output_path.stat().st_size
audio_url = f"/api/podcast/audio/{output_filename}"
logger.info(f"[Podcast] Combined audio saved: {output_path} ({file_size} bytes)")
# Save to asset library
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="audio",
source_module="podcast_maker",
filename=output_filename,
file_url=audio_url,
file_path=str(output_path),
file_size=file_size,
mime_type="audio/mpeg",
title=f"Combined Podcast - {request.project_id}",
description=f"Combined podcast audio from {len(request.scene_ids)} scenes",
tags=["podcast", "audio", "combined", request.project_id],
asset_metadata={
"project_id": request.project_id,
"scene_ids": request.scene_ids,
"scene_count": len(request.scene_ids),
"total_duration": total_duration,
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save combined audio asset: {e}")
return PodcastCombineAudioResponse(
combined_audio_url=audio_url,
combined_audio_filename=output_filename,
total_duration=total_duration,
file_size=file_size,
scene_count=len(request.scene_ids),
)
finally:
# Cleanup temporary directory
try:
if temp_dir.exists():
shutil.rmtree(temp_dir)
except Exception as e:
logger.warning(f"[Podcast] Failed to cleanup temp directory: {e}")
except HTTPException:
raise
except Exception as exc:
logger.error(f"[Podcast] Audio combination failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Audio combination failed: {exc}")
@router.get("/audio/{filename}")
async def serve_podcast_audio(
filename: str,
current_user: Dict[str, Any] = Depends(get_current_user_with_query_token),
):
"""Serve generated podcast scene audio files.
Supports authentication via Authorization header or token query parameter.
Query parameter is useful for HTML elements like <audio> that cannot send custom headers.
"""
require_authenticated_user(current_user)
# Security check: ensure filename doesn't contain path traversal
if ".." in filename or "/" in filename or "\\" in filename:
raise HTTPException(status_code=400, detail="Invalid filename")
audio_path = (PODCAST_AUDIO_DIR / filename).resolve()
# Security check: ensure path is within PODCAST_AUDIO_DIR
if not str(audio_path).startswith(str(PODCAST_AUDIO_DIR)):
raise HTTPException(status_code=403, detail="Access denied")
if not audio_path.exists():
raise HTTPException(status_code=404, detail="Audio file not found")
return FileResponse(audio_path, media_type="audio/mpeg")

View File

@@ -0,0 +1,381 @@
"""
Podcast Avatar Handlers
Avatar upload and presenter generation endpoints.
"""
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session
from typing import Dict, Any, List, Optional
from pathlib import Path
import uuid
import hashlib
from services.database import get_db
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
from api.story_writer.utils.auth import require_authenticated_user
from services.llm_providers.main_image_generation import generate_image
from services.llm_providers.main_image_editing import edit_image
from utils.asset_tracker import save_asset_to_library
from loguru import logger
from ..constants import PODCAST_IMAGES_DIR
from ..presenter_personas import choose_persona_id, get_persona
router = APIRouter()
# Avatar subdirectory
AVATAR_SUBDIR = "avatars"
PODCAST_AVATARS_DIR = PODCAST_IMAGES_DIR / AVATAR_SUBDIR
PODCAST_AVATARS_DIR.mkdir(parents=True, exist_ok=True)
@router.post("/avatar/upload")
async def upload_podcast_avatar(
file: UploadFile = File(...),
project_id: Optional[str] = Form(None),
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Upload a presenter avatar image for a podcast project.
Returns the avatar URL for use in scene image generation.
"""
user_id = require_authenticated_user(current_user)
# Validate file type
if not file.content_type or not file.content_type.startswith('image/'):
raise HTTPException(status_code=400, detail="File must be an image")
# Validate file size (max 5MB)
file_content = await file.read()
if len(file_content) > 5 * 1024 * 1024:
raise HTTPException(status_code=400, detail="Image file size must be less than 5MB")
try:
# Generate filename
file_ext = Path(file.filename).suffix or '.png'
unique_id = str(uuid.uuid4())[:8]
avatar_filename = f"avatar_{project_id or 'temp'}_{unique_id}{file_ext}"
avatar_path = PODCAST_AVATARS_DIR / avatar_filename
# Save file
with open(avatar_path, "wb") as f:
f.write(file_content)
logger.info(f"[Podcast] Avatar uploaded: {avatar_path}")
# Create avatar URL
avatar_url = f"/api/podcast/images/{AVATAR_SUBDIR}/{avatar_filename}"
# Save to asset library if project_id provided
if project_id:
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="image",
source_module="podcast_maker",
filename=avatar_filename,
file_url=avatar_url,
file_path=str(avatar_path),
file_size=len(file_content),
mime_type=file.content_type,
title=f"Podcast Presenter Avatar - {project_id}",
description="Podcast presenter avatar image",
tags=["podcast", "avatar", project_id],
asset_metadata={
"project_id": project_id,
"type": "presenter_avatar",
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save avatar asset: {e}")
return {
"avatar_url": avatar_url,
"avatar_filename": avatar_filename,
"message": "Avatar uploaded successfully"
}
except Exception as exc:
logger.error(f"[Podcast] Avatar upload failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Avatar upload failed: {str(exc)}")
@router.post("/avatar/make-presentable")
async def make_avatar_presentable(
avatar_url: str = Form(...),
project_id: Optional[str] = Form(None),
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Transform an uploaded avatar image into a podcast-appropriate presenter.
Uses AI image editing to convert the uploaded photo into a professional podcast presenter.
"""
user_id = require_authenticated_user(current_user)
try:
# Load the uploaded avatar image
from ..utils import load_podcast_image_bytes
avatar_bytes = load_podcast_image_bytes(avatar_url)
logger.info(f"[Podcast] Transforming avatar to podcast presenter for project {project_id}")
# Create transformation prompt based on WaveSpeed AI recommendations
# Transform the uploaded image into a professional podcast presenter
transformation_prompt = """Transform this image into a professional podcast presenter:
- Half-length portrait format, looking at camera
- Professional attire (white shirt and light gray blazer or business casual)
- Confident, friendly, engaging expression
- Soft studio lighting, plain light-gray or neutral background
- Professional podcast host appearance, suitable for video generation
- Clean composition, center-focused for avatar overlay
- Maintain the person's appearance and identity while making it podcast-appropriate
- Ultra realistic, 4k quality, professional photography style"""
# Transform the image using image editing
image_options = {
"provider": None, # Auto-select provider
"model": None, # Use default model
}
result = edit_image(
input_image_bytes=avatar_bytes,
prompt=transformation_prompt,
options=image_options,
user_id=user_id
)
# Save transformed avatar
unique_id = str(uuid.uuid4())[:8]
transformed_filename = f"presenter_transformed_{project_id or 'temp'}_{unique_id}.png"
transformed_path = PODCAST_AVATARS_DIR / transformed_filename
with open(transformed_path, "wb") as f:
f.write(result.image_bytes)
transformed_url = f"/api/podcast/images/{AVATAR_SUBDIR}/{transformed_filename}"
logger.info(f"[Podcast] Transformed avatar saved to: {transformed_path}")
# Save to asset library
if project_id:
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="image",
source_module="podcast_maker",
filename=transformed_filename,
file_url=transformed_url,
file_path=str(transformed_path),
file_size=len(result.image_bytes),
mime_type="image/png",
title=f"Podcast Presenter (Transformed) - {project_id}",
description="AI-transformed podcast presenter avatar from uploaded photo",
prompt=transformation_prompt,
tags=["podcast", "avatar", "presenter", "transformed", project_id],
provider=result.provider,
model=result.model,
asset_metadata={
"project_id": project_id,
"type": "transformed_presenter",
"original_avatar_url": avatar_url,
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save transformed avatar asset: {e}")
return {
"avatar_url": transformed_url,
"avatar_filename": transformed_filename,
"message": "Avatar transformed into podcast presenter successfully"
}
except Exception as exc:
logger.error(f"[Podcast] Avatar transformation failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Avatar transformation failed: {str(exc)}")
@router.post("/avatar/generate")
async def generate_podcast_presenters(
speakers: int = Form(...),
project_id: Optional[str] = Form(None),
audience: Optional[str] = Form(None),
content_type: Optional[str] = Form(None),
top_keywords: Optional[str] = Form(None), # JSON string array
persona_id: Optional[str] = Form(None),
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Generate presenter avatar images based on number of speakers and AI analysis insights.
Uses analysis data (audience, content_type, keywords) to create more relevant presenters.
Returns list of avatar URLs.
Based on WaveSpeed AI recommendations for professional podcast presenters.
"""
user_id = require_authenticated_user(current_user)
if speakers < 1 or speakers > 2:
raise HTTPException(status_code=400, detail="Speakers must be between 1 and 2")
try:
# Parse keywords if provided
keywords_list = []
if top_keywords:
try:
import json
keywords_list = json.loads(top_keywords) if isinstance(top_keywords, str) else top_keywords
except:
keywords_list = []
# Choose persona (market-fit + style) using analysis if not explicitly provided.
# Do not infer sensitive traits (like ethnicity); personas represent market + style only.
selected_persona_id = persona_id or choose_persona_id(
audience=audience,
content_type=content_type,
top_keywords=keywords_list,
)
persona = get_persona(selected_persona_id)
generated_avatars = []
for i in range(speakers):
# Generate presenter-specific prompt based on WaveSpeed AI recommendations
# Enhanced with analysis insights for more relevant presenter appearance
gender = "female" if i == 0 else "male" # First speaker female, second male
# Build context-aware prompt using analysis insights + persona preset
prompt_parts = [
f"Half-length portrait of a professional podcast presenter ({gender}, 25-35 years old)",
"photo-realistic, professional photography",
]
if persona:
prompt_parts.append(persona.prompt)
# Use content_type to influence attire/style
if content_type:
content_lower = content_type.lower()
if "business" in content_lower or "corporate" in content_lower:
prompt_parts.append("business professional attire (white shirt and light gray blazer)")
elif "casual" in content_lower or "conversational" in content_lower:
prompt_parts.append("business casual attire (smart casual, approachable)")
elif "tech" in content_lower or "technology" in content_lower:
prompt_parts.append("modern professional attire (tech-forward, contemporary style)")
else:
prompt_parts.append("professional attire (white shirt and light gray blazer or business casual)")
else:
prompt_parts.append("professional attire (white shirt and light gray blazer or business casual)")
# Use audience to influence expression and style
if audience:
audience_lower = audience.lower()
if "young" in audience_lower or "millennial" in audience_lower or "gen z" in audience_lower:
prompt_parts.append("modern, energetic, approachable expression")
elif "executive" in audience_lower or "professional" in audience_lower or "business" in audience_lower:
prompt_parts.append("confident, authoritative, professional expression")
else:
prompt_parts.append("confident, friendly, engaging expression")
else:
prompt_parts.append("confident, friendly expression")
# Add keywords context if available (for visual style hints)
if keywords_list and len(keywords_list) > 0:
# Extract visual-relevant keywords
visual_keywords = [k for k in keywords_list[:3] if any(word in k.lower() for word in ["tech", "business", "creative", "modern", "professional"])]
if visual_keywords:
prompt_parts.append(f"context: {', '.join(visual_keywords[:2])}")
# Technical requirements
prompt_parts.extend([
"looking at camera",
"soft studio lighting, plain light-gray or neutral background",
"ultra realistic, 4k quality, 85mm lens, f/2.8",
"professional podcast host appearance, suitable for video generation",
"clean composition, center-focused for avatar overlay"
])
prompt = ", ".join(prompt_parts)
logger.info(f"[Podcast] Generating presenter {i+1}/{speakers} for project {project_id}")
# Generate image
# Use a deterministic seed per (project_id, speaker_number, persona_id) to keep presenter identity stable.
# Note: determinism may vary by provider/model, but seed improves consistency substantially.
seed_source = f"{project_id or 'temp'}|speaker={i+1}|persona={selected_persona_id}"
seed = int(hashlib.sha256(seed_source.encode("utf-8")).hexdigest()[:8], 16)
image_options = {
"provider": None, # Auto-select provider
"width": 1024,
"height": 1024,
"seed": seed,
}
result = generate_image(
prompt=prompt,
options=image_options,
user_id=user_id
)
# Save avatar
unique_id = str(uuid.uuid4())[:8]
avatar_filename = f"presenter_{project_id or 'temp'}_{i+1}_{unique_id}.png"
avatar_path = PODCAST_AVATARS_DIR / avatar_filename
with open(avatar_path, "wb") as f:
f.write(result.image_bytes)
avatar_url = f"/api/podcast/images/{AVATAR_SUBDIR}/{avatar_filename}"
# Save to asset library
if project_id:
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="image",
source_module="podcast_maker",
filename=avatar_filename,
file_url=avatar_url,
file_path=str(avatar_path),
file_size=len(result.image_bytes),
mime_type="image/png",
title=f"Podcast Presenter {i+1} - {project_id}",
description=f"Generated podcast presenter avatar for speaker {i+1}",
prompt=prompt,
tags=["podcast", "avatar", "presenter", project_id],
provider=result.provider,
model=result.model,
asset_metadata={
"project_id": project_id,
"speaker_number": i + 1,
"type": "generated_presenter",
"status": "completed",
"persona_id": selected_persona_id,
"seed": seed,
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save presenter asset: {e}")
generated_avatars.append({
"avatar_url": avatar_url,
"avatar_filename": avatar_filename,
"speaker_number": i + 1,
"prompt": prompt, # Include the prompt used for generation
"persona_id": selected_persona_id,
"seed": seed,
})
return {
"avatars": generated_avatars,
"message": f"Generated {speakers} presenter avatar(s)",
"persona_id": selected_persona_id,
}
except Exception as exc:
logger.error(f"[Podcast] Presenter generation failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Presenter generation failed: {str(exc)}")

View File

@@ -0,0 +1,399 @@
"""
Podcast Image Handlers
Image generation and serving endpoints.
"""
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session
from typing import Dict, Any
from pathlib import Path
import uuid
from services.database import get_db
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
from api.story_writer.utils.auth import require_authenticated_user
from services.llm_providers.main_image_generation import generate_image, generate_character_image
from utils.asset_tracker import save_asset_to_library
from loguru import logger
from ..constants import PODCAST_IMAGES_DIR
from ..models import PodcastImageRequest, PodcastImageResponse
router = APIRouter()
@router.post("/image", response_model=PodcastImageResponse)
async def generate_podcast_scene_image(
request: PodcastImageRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Generate an AI image for a podcast scene.
Creates a professional, podcast-appropriate image based on scene title and content.
"""
user_id = require_authenticated_user(current_user)
if not request.scene_title:
raise HTTPException(status_code=400, detail="Scene title is required")
try:
# PRE-FLIGHT VALIDATION: Check subscription limits before any API calls
from services.subscription import PricingService
from services.subscription.preflight_validator import validate_image_generation_operations
from fastapi import HTTPException as FastAPIHTTPException
pricing_service = PricingService(db)
try:
# Raises HTTPException immediately if validation fails
validate_image_generation_operations(
pricing_service=pricing_service,
user_id=user_id,
num_images=1
)
logger.info(f"[Podcast] ✅ Pre-flight validation passed for user {user_id}")
except FastAPIHTTPException as http_ex:
logger.error(f"[Podcast] ❌ Pre-flight validation failed for user {user_id}: {http_ex.detail}")
raise
# If base avatar is provided, create scene-specific variation
# Otherwise, generate from scratch
logger.info(f"[Podcast] Image generation request for scene {request.scene_id}")
logger.info(f"[Podcast] base_avatar_url={request.base_avatar_url}")
logger.info(f"[Podcast] custom_prompt={request.custom_prompt}")
logger.info(f"[Podcast] style={request.style}, rendering_speed={request.rendering_speed}, aspect_ratio={request.aspect_ratio}")
if request.base_avatar_url:
# Load base avatar image for reference
from ..utils import load_podcast_image_bytes
try:
logger.info(f"[Podcast] Attempting to load base avatar from: {request.base_avatar_url}")
base_avatar_bytes = load_podcast_image_bytes(request.base_avatar_url)
logger.info(f"[Podcast] ✅ Successfully loaded base avatar ({len(base_avatar_bytes)} bytes) for scene {request.scene_id}")
except Exception as e:
logger.error(f"[Podcast] ❌ Failed to load base avatar from {request.base_avatar_url}: {e}", exc_info=True)
# If base avatar fails to load, we cannot maintain character consistency
# Raise an error instead of falling back to standard generation
raise HTTPException(
status_code=500,
detail={
"error": "Failed to load base avatar",
"message": f"Could not load the base avatar image for character consistency: {str(e)}. Please ensure the avatar image is accessible.",
},
)
else:
logger.info(f"[Podcast] No base avatar URL provided, will generate from scratch")
base_avatar_bytes = None
# Build optimized prompt for scene image generation
# When base avatar is provided, use Ideogram Character to maintain consistency
# Otherwise, generate from scratch with podcast-optimized prompt
image_prompt = "" # Initialize prompt variable
if base_avatar_bytes:
# Use Ideogram Character API for consistent character generation
# Use custom prompt if provided, otherwise build scene-specific prompt
if request.custom_prompt:
# User provided custom prompt - use it directly
image_prompt = request.custom_prompt
logger.info(f"[Podcast] Using custom prompt from user for scene {request.scene_id}")
else:
# Build scene-specific prompt that respects the base avatar
prompt_parts = []
# Scene context (primary focus)
if request.scene_title:
prompt_parts.append(f"Scene: {request.scene_title}")
# Scene content insights for visual context
if request.scene_content:
content_preview = request.scene_content[:200].replace("\n", " ").strip()
# Extract visualizable themes
visual_keywords = []
content_lower = content_preview.lower()
if any(word in content_lower for word in ["data", "statistics", "numbers", "chart", "graph"]):
visual_keywords.append("data visualization background")
if any(word in content_lower for word in ["technology", "tech", "digital", "ai", "software"]):
visual_keywords.append("modern tech studio setting")
if any(word in content_lower for word in ["business", "growth", "strategy", "market"]):
visual_keywords.append("professional business studio")
if visual_keywords:
prompt_parts.append(", ".join(visual_keywords))
# Podcast theme context
if request.idea:
idea_preview = request.idea[:60].strip()
prompt_parts.append(f"Topic: {idea_preview}")
# Studio setting (maintains podcast aesthetic)
prompt_parts.extend([
"Professional podcast recording studio",
"Modern microphone setup",
"Clean background, professional lighting",
"16:9 aspect ratio, video-optimized composition"
])
image_prompt = ", ".join(prompt_parts)
logger.info(f"[Podcast] Using Ideogram Character for scene {request.scene_id} with base avatar")
logger.info(f"[Podcast] Scene prompt: {image_prompt[:150]}...")
# Use centralized character image generation with subscription checks and tracking
# Use custom settings if provided, otherwise use defaults
style = request.style or "Realistic" # Default to Realistic for professional podcast presenters
rendering_speed = request.rendering_speed or "Quality" # Default to Quality for podcast videos
# Calculate aspect ratio from custom setting or dimensions
if request.aspect_ratio:
aspect_ratio = request.aspect_ratio
else:
aspect_ratio_map = {
(1024, 1024): "1:1",
(1920, 1080): "16:9",
(1080, 1920): "9:16",
(1280, 960): "4:3",
(960, 1280): "3:4",
}
aspect_ratio = aspect_ratio_map.get((request.width, request.height), "16:9")
logger.info(f"[Podcast] Ideogram Character settings: style={style}, rendering_speed={rendering_speed}, aspect_ratio={aspect_ratio}")
try:
image_bytes = generate_character_image(
prompt=image_prompt,
reference_image_bytes=base_avatar_bytes,
user_id=user_id,
style=style,
aspect_ratio=aspect_ratio,
rendering_speed=rendering_speed,
timeout=None, # No timeout - poll until WaveSpeed says it's done or failed
)
# Create result object compatible with ImageGenerationResult
from services.llm_providers.image_generation.base import ImageGenerationResult
result = ImageGenerationResult(
image_bytes=image_bytes,
provider="wavespeed",
model="ideogram-ai/ideogram-character",
width=request.width,
height=request.height,
)
logger.info(f"[Podcast] ✅ Successfully generated character-consistent scene image")
except HTTPException as http_err:
# Re-raise HTTPExceptions from wavespeed client as-is
logger.error(f"[Podcast] ❌ Ideogram Character HTTPException: {http_err.status_code} - {http_err.detail}")
raise
except Exception as char_error:
error_msg = str(char_error)
error_type = type(char_error).__name__
logger.error(f"[Podcast] ❌ Ideogram Character failed: {error_type}: {error_msg}", exc_info=True)
# If Ideogram Character fails, we should NOT fall back to standard generation
# because that would lose character consistency. Instead, raise an error.
# However, if it's a timeout/connection issue, we can provide a helpful message.
error_msg_lower = error_msg.lower()
if "timeout" in error_msg_lower or "connection" in error_msg_lower or "504" in error_msg:
raise HTTPException(
status_code=504,
detail={
"error": "Image generation service unavailable",
"message": "The character-consistent image generation service is currently unavailable. Please try again in a few moments. If the problem persists, the service may be experiencing high load.",
"retry_recommended": True,
},
)
else:
raise HTTPException(
status_code=502,
detail={
"error": "Character-consistent image generation failed",
"message": f"Failed to generate image with character consistency: {error_msg}",
"retry_recommended": True,
},
)
# CRITICAL: If base_avatar_url was provided but we don't have base_avatar_bytes,
# this means either loading failed (already raised error) or Ideogram Character failed (already raised error)
# So this path should only be reached if NO base_avatar_url was provided in the first place
if not base_avatar_bytes:
logger.info(f"[Podcast] No base avatar provided - generating standard image from scratch")
# Standard generation from scratch (no base avatar provided)
prompt_parts = []
# Core podcast studio elements
prompt_parts.extend([
"Professional podcast recording studio",
"Modern podcast setup with high-quality microphone",
"Clean, minimalist background suitable for video",
"Professional studio lighting with soft, even illumination",
"Podcast host environment, professional and inviting"
])
# Scene-specific context
if request.scene_title:
prompt_parts.append(f"Scene theme: {request.scene_title}")
# Content context for visual relevance
if request.scene_content:
content_preview = request.scene_content[:150].replace("\n", " ").strip()
visual_keywords = []
content_lower = content_preview.lower()
if any(word in content_lower for word in ["data", "statistics", "numbers", "chart", "graph"]):
visual_keywords.append("data visualization elements")
if any(word in content_lower for word in ["technology", "tech", "digital", "ai", "software"]):
visual_keywords.append("modern technology aesthetic")
if any(word in content_lower for word in ["business", "growth", "strategy", "market"]):
visual_keywords.append("professional business environment")
if visual_keywords:
prompt_parts.append(", ".join(visual_keywords))
# Podcast theme context
if request.idea:
idea_preview = request.idea[:80].strip()
prompt_parts.append(f"Podcast topic context: {idea_preview}")
# Technical requirements for video generation
prompt_parts.extend([
"16:9 aspect ratio optimized for video",
"Center-focused composition for talking avatar overlay",
"Neutral color palette with professional tones",
"High resolution, sharp focus, professional photography quality",
"No text, no logos, no distracting elements",
"Suitable for InfiniteTalk video generation with animated avatar"
])
# Style constraints
prompt_parts.extend([
"Realistic photography style, not illustration or cartoon",
"Professional broadcast quality",
"Warm, inviting atmosphere",
"Clean composition with breathing room for avatar placement"
])
image_prompt = ", ".join(prompt_parts)
logger.info(f"[Podcast] Generating image for scene {request.scene_id}: {request.scene_title}")
# Generate image using main_image_generation service
image_options = {
"provider": None, # Auto-select provider
"width": request.width,
"height": request.height,
}
result = generate_image(
prompt=image_prompt,
options=image_options,
user_id=user_id
)
# Save image to podcast images directory
PODCAST_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
# Generate filename
clean_title = "".join(c if c.isalnum() or c in ('-', '_') else '_' for c in request.scene_title[:30])
unique_id = str(uuid.uuid4())[:8]
image_filename = f"scene_{request.scene_id}_{clean_title}_{unique_id}.png"
image_path = PODCAST_IMAGES_DIR / image_filename
# Save image
with open(image_path, "wb") as f:
f.write(result.image_bytes)
logger.info(f"[Podcast] Saved image to: {image_path}")
# Create image URL (served via API endpoint)
image_url = f"/api/podcast/images/{image_filename}"
# Estimate cost (rough estimate: ~$0.04 per image for most providers, ~$0.10 for Ideogram Character)
# Note: Actual usage tracking is handled by centralized generate_image()/generate_character_image() functions
cost = 0.10 if result.provider == "wavespeed" and result.model == "ideogram-ai/ideogram-character" else 0.04
# Save to asset library
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="image",
source_module="podcast_maker",
filename=image_filename,
file_url=image_url,
file_path=str(image_path),
file_size=len(result.image_bytes),
mime_type="image/png",
title=f"{request.scene_title} - Podcast Scene",
description=f"Podcast scene image: {request.scene_title}",
prompt=image_prompt,
tags=["podcast", "scene", request.scene_id],
provider=result.provider,
model=result.model,
asset_metadata={
"scene_id": request.scene_id,
"scene_title": request.scene_title,
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save image asset: {e}")
return PodcastImageResponse(
scene_id=request.scene_id,
scene_title=request.scene_title,
image_filename=image_filename,
image_url=image_url,
width=result.width,
height=result.height,
provider=result.provider,
model=result.model,
cost=cost,
)
except HTTPException:
# Re-raise HTTPExceptions as-is (they already have proper error details)
raise
except Exception as exc:
# Log the full exception for debugging
error_msg = str(exc)
error_type = type(exc).__name__
logger.error(f"[Podcast] Image generation failed: {error_type}: {error_msg}", exc_info=True)
# Create a safe error detail
raise HTTPException(
status_code=500,
detail={
"error": "Image generation failed",
"message": error_msg,
"type": error_type,
}
)
@router.get("/images/{path:path}")
async def serve_podcast_image(
path: str, # Changed from filename to path to support subdirectories
current_user: Dict[str, Any] = Depends(get_current_user_with_query_token),
):
"""Serve generated podcast scene images and avatars.
Supports authentication via Authorization header or token query parameter.
Query parameter is useful for HTML elements like <img> that cannot send custom headers.
Supports subdirectories like avatars/
"""
require_authenticated_user(current_user)
# Security check: ensure path doesn't contain path traversal or absolute paths
if ".." in path or path.startswith("/"):
raise HTTPException(status_code=400, detail="Invalid path")
image_path = (PODCAST_IMAGES_DIR / path).resolve()
# Security check: ensure resolved path is within PODCAST_IMAGES_DIR
if not str(image_path).startswith(str(PODCAST_IMAGES_DIR)):
raise HTTPException(status_code=403, detail="Access denied")
if not image_path.exists():
raise HTTPException(status_code=404, detail="Image not found")
return FileResponse(image_path, media_type="image/png")

View File

@@ -0,0 +1,203 @@
"""
Podcast Project Handlers
CRUD operations for podcast projects.
"""
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy.orm import Session
from typing import Optional, Dict, Any
from services.database import get_db
from middleware.auth_middleware import get_current_user
from services.podcast_service import PodcastService
from ..models import (
PodcastProjectResponse,
CreateProjectRequest,
UpdateProjectRequest,
PodcastProjectListResponse,
)
router = APIRouter()
@router.post("/projects", response_model=PodcastProjectResponse, status_code=201)
async def create_project(
request: CreateProjectRequest,
db: Session = Depends(get_db),
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""Create a new podcast project."""
try:
user_id = current_user.get("user_id") or current_user.get("id")
if not user_id:
raise HTTPException(status_code=401, detail="User ID not found")
service = PodcastService(db)
# Check if project_id already exists for this user
existing = service.get_project(user_id, request.project_id)
if existing:
raise HTTPException(status_code=400, detail="Project ID already exists")
project = service.create_project(
user_id=user_id,
project_id=request.project_id,
idea=request.idea,
duration=request.duration,
speakers=request.speakers,
budget_cap=request.budget_cap,
)
return PodcastProjectResponse.model_validate(project)
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error creating project: {str(e)}")
@router.get("/projects/{project_id}", response_model=PodcastProjectResponse)
async def get_project(
project_id: str,
db: Session = Depends(get_db),
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""Get a podcast project by ID."""
try:
user_id = current_user.get("user_id") or current_user.get("id")
if not user_id:
raise HTTPException(status_code=401, detail="User ID not found")
service = PodcastService(db)
project = service.get_project(user_id, project_id)
if not project:
raise HTTPException(status_code=404, detail="Project not found")
return PodcastProjectResponse.model_validate(project)
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error fetching project: {str(e)}")
@router.put("/projects/{project_id}", response_model=PodcastProjectResponse)
async def update_project(
project_id: str,
request: UpdateProjectRequest,
db: Session = Depends(get_db),
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""Update a podcast project state."""
try:
user_id = current_user.get("user_id") or current_user.get("id")
if not user_id:
raise HTTPException(status_code=401, detail="User ID not found")
service = PodcastService(db)
# Convert request to dict, excluding None values
updates = request.model_dump(exclude_unset=True)
project = service.update_project(user_id, project_id, **updates)
if not project:
raise HTTPException(status_code=404, detail="Project not found")
return PodcastProjectResponse.model_validate(project)
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error updating project: {str(e)}")
@router.get("/projects", response_model=PodcastProjectListResponse)
async def list_projects(
status: Optional[str] = Query(None, description="Filter by status"),
favorites_only: bool = Query(False, description="Only favorites"),
limit: int = Query(50, ge=1, le=200),
offset: int = Query(0, ge=0),
order_by: str = Query("updated_at", description="Order by: updated_at or created_at"),
db: Session = Depends(get_db),
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""List user's podcast projects."""
try:
user_id = current_user.get("user_id") or current_user.get("id")
if not user_id:
raise HTTPException(status_code=401, detail="User ID not found")
if order_by not in ["updated_at", "created_at"]:
raise HTTPException(status_code=400, detail="order_by must be 'updated_at' or 'created_at'")
service = PodcastService(db)
projects, total = service.list_projects(
user_id=user_id,
status=status,
favorites_only=favorites_only,
limit=limit,
offset=offset,
order_by=order_by,
)
return PodcastProjectListResponse(
projects=[PodcastProjectResponse.model_validate(p) for p in projects],
total=total,
limit=limit,
offset=offset,
)
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error listing projects: {str(e)}")
@router.delete("/projects/{project_id}", status_code=204)
async def delete_project(
project_id: str,
db: Session = Depends(get_db),
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""Delete a podcast project."""
try:
user_id = current_user.get("user_id") or current_user.get("id")
if not user_id:
raise HTTPException(status_code=401, detail="User ID not found")
service = PodcastService(db)
deleted = service.delete_project(user_id, project_id)
if not deleted:
raise HTTPException(status_code=404, detail="Project not found")
return None
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error deleting project: {str(e)}")
@router.post("/projects/{project_id}/favorite", response_model=PodcastProjectResponse)
async def toggle_favorite(
project_id: str,
db: Session = Depends(get_db),
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""Toggle favorite status of a project."""
try:
user_id = current_user.get("user_id") or current_user.get("id")
if not user_id:
raise HTTPException(status_code=401, detail="User ID not found")
service = PodcastService(db)
project = service.toggle_favorite(user_id, project_id)
if not project:
raise HTTPException(status_code=404, detail="Project not found")
return PodcastProjectResponse.model_validate(project)
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error toggling favorite: {str(e)}")

View File

@@ -0,0 +1,99 @@
"""
Podcast Research Handlers
Research endpoints using Exa provider.
"""
from fastapi import APIRouter, Depends, HTTPException
from typing import Dict, Any
from types import SimpleNamespace
from middleware.auth_middleware import get_current_user
from api.story_writer.utils.auth import require_authenticated_user
from services.blog_writer.research.exa_provider import ExaResearchProvider
from loguru import logger
from ..models import (
PodcastExaResearchRequest,
PodcastExaResearchResponse,
PodcastExaSource,
PodcastExaConfig,
)
router = APIRouter()
@router.post("/research/exa", response_model=PodcastExaResearchResponse)
async def podcast_research_exa(
request: PodcastExaResearchRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""
Run podcast research directly via Exa (no blog writer pipeline).
"""
user_id = require_authenticated_user(current_user)
queries = [q.strip() for q in request.queries if q and q.strip()]
if not queries:
raise HTTPException(status_code=400, detail="At least one query is required for research.")
exa_cfg = request.exa_config or PodcastExaConfig()
cfg = SimpleNamespace(
exa_search_type=exa_cfg.exa_search_type or "auto",
exa_category=exa_cfg.exa_category,
exa_include_domains=exa_cfg.exa_include_domains or [],
exa_exclude_domains=exa_cfg.exa_exclude_domains or [],
max_sources=exa_cfg.max_sources or 8,
source_types=[],
)
provider = ExaResearchProvider()
prompt = request.topic
try:
result = await provider.search(
prompt=prompt,
topic=request.topic,
industry="",
target_audience="",
config=cfg,
user_id=user_id,
)
except Exception as exc:
logger.error(f"[Podcast Exa Research] Failed for user {user_id}: {exc}")
raise HTTPException(status_code=500, detail=f"Exa research failed: {exc}")
# Track usage if available
try:
cost_total = 0.0
if isinstance(result, dict):
cost_total = result.get("cost", {}).get("total", 0.005) if result.get("cost") else 0.005
provider.track_exa_usage(user_id, cost_total)
except Exception as track_err:
logger.warning(f"[Podcast Exa Research] Failed to track usage: {track_err}")
sources_payload = []
if isinstance(result, dict):
for src in result.get("sources", []) or []:
try:
sources_payload.append(PodcastExaSource(**src))
except Exception:
sources_payload.append(PodcastExaSource(**{
"title": src.get("title", ""),
"url": src.get("url", ""),
"excerpt": src.get("excerpt", ""),
"published_at": src.get("published_at"),
"highlights": src.get("highlights"),
"summary": src.get("summary"),
"source_type": src.get("source_type"),
"index": src.get("index"),
}))
return PodcastExaResearchResponse(
sources=sources_payload,
search_queries=result.get("search_queries", queries) if isinstance(result, dict) else queries,
cost=result.get("cost") if isinstance(result, dict) else None,
search_type=result.get("search_type") if isinstance(result, dict) else None,
provider=result.get("provider", "exa") if isinstance(result, dict) else "exa",
content=result.get("content") if isinstance(result, dict) else None,
)

View File

@@ -0,0 +1,142 @@
"""
Podcast Script Handlers
Script generation endpoint.
"""
from fastapi import APIRouter, Depends, HTTPException
from typing import Dict, Any
import json
from middleware.auth_middleware import get_current_user
from api.story_writer.utils.auth import require_authenticated_user
from services.llm_providers.main_text_generation import llm_text_gen
from loguru import logger
from ..models import (
PodcastScriptRequest,
PodcastScriptResponse,
PodcastScene,
PodcastSceneLine,
)
router = APIRouter()
@router.post("/script", response_model=PodcastScriptResponse)
async def generate_podcast_script(
request: PodcastScriptRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""
Generate a podcast script outline (scenes + lines) using podcast-oriented prompting.
"""
user_id = require_authenticated_user(current_user)
# Build comprehensive research context for higher-quality scripts
research_context = ""
if request.research:
try:
key_insights = request.research.get("keyword_analysis", {}).get("key_insights") or []
fact_cards = request.research.get("factCards", []) or []
mapped_angles = request.research.get("mappedAngles", []) or []
sources = request.research.get("sources", []) or []
top_facts = [f.get("quote", "") for f in fact_cards[:5] if f.get("quote")]
angles_summary = [
f"{a.get('title', '')}: {a.get('why', '')}" for a in mapped_angles[:3] if a.get("title") or a.get("why")
]
top_sources = [s.get("url") for s in sources[:3] if s.get("url")]
research_parts = []
if key_insights:
research_parts.append(f"Key Insights: {', '.join(key_insights[:5])}")
if top_facts:
research_parts.append(f"Key Facts: {', '.join(top_facts)}")
if angles_summary:
research_parts.append(f"Research Angles: {' | '.join(angles_summary)}")
if top_sources:
research_parts.append(f"Top Sources: {', '.join(top_sources)}")
research_context = "\n".join(research_parts)
except Exception as exc:
logger.warning(f"Failed to parse research context: {exc}")
research_context = ""
prompt = f"""You are an expert podcast script planner. Create natural, conversational podcast scenes.
Podcast Idea: "{request.idea}"
Duration: ~{request.duration_minutes} minutes
Speakers: {request.speakers} (Host + optional Guest)
{f"RESEARCH CONTEXT:\n{research_context}\n" if research_context else ""}
Return JSON with:
- scenes: array of scenes. Each scene has:
- id: string
- title: short scene title (<= 60 chars)
- duration: duration in seconds (evenly split across total duration)
- emotion: string (one of: "neutral", "happy", "excited", "serious", "curious", "confident")
- lines: array of {{"speaker": "...", "text": "...", "emphasis": boolean}}
* Write natural, conversational dialogue
* Each line can be a sentence or a few sentences that flow together
* Use plain text only - no markdown formatting (no asterisks, underscores, etc.)
* Mark "emphasis": true for key statistics or important points
Guidelines:
- Write for spoken delivery: conversational, natural, with contractions
- Use research insights naturally - weave statistics into dialogue, don't just list them
- Vary emotion per scene based on content
- Ensure scenes match target duration: aim for ~2.5 words per second of audio
- Keep it engaging and informative, like a real podcast conversation
"""
try:
raw = llm_text_gen(prompt=prompt, user_id=user_id, json_struct=None)
except Exception as exc:
raise HTTPException(status_code=500, detail=f"Script generation failed: {exc}")
if isinstance(raw, str):
try:
data = json.loads(raw)
except json.JSONDecodeError:
raise HTTPException(status_code=500, detail="LLM returned non-JSON output")
elif isinstance(raw, dict):
data = raw
else:
raise HTTPException(status_code=500, detail="Unexpected LLM response format")
scenes_data = data.get("scenes") or []
if not isinstance(scenes_data, list):
raise HTTPException(status_code=500, detail="LLM response missing scenes array")
valid_emotions = {"neutral", "happy", "excited", "serious", "curious", "confident"}
# Normalize scenes
scenes: list[PodcastScene] = []
for idx, scene in enumerate(scenes_data):
title = scene.get("title") or f"Scene {idx + 1}"
duration = int(scene.get("duration") or max(30, (request.duration_minutes * 60) // max(1, len(scenes_data))))
emotion = scene.get("emotion") or "neutral"
if emotion not in valid_emotions:
emotion = "neutral"
lines_raw = scene.get("lines") or []
lines: list[PodcastSceneLine] = []
for line in lines_raw:
speaker = line.get("speaker") or ("Host" if len(lines) % request.speakers == 0 else "Guest")
text = line.get("text") or ""
emphasis = line.get("emphasis", False)
if text:
lines.append(PodcastSceneLine(speaker=speaker, text=text, emphasis=emphasis))
scenes.append(
PodcastScene(
id=scene.get("id") or f"scene-{idx + 1}",
title=title,
duration=duration,
lines=lines,
approved=False,
emotion=emotion,
)
)
return PodcastScriptResponse(scenes=scenes)

View File

@@ -0,0 +1,585 @@
"""
Podcast Video Handlers
Video generation and serving endpoints.
"""
from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks, Request
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session
from typing import Dict, Any, Optional
from pathlib import Path
from urllib.parse import quote
import re
import json
from concurrent.futures import ThreadPoolExecutor
from services.database import get_db
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
from api.story_writer.utils.auth import require_authenticated_user
from services.wavespeed.infinitetalk import animate_scene_with_voiceover
from services.podcast.video_combination_service import PodcastVideoCombinationService
from services.llm_providers.main_video_generation import track_video_usage
from services.subscription import PricingService
from services.subscription.preflight_validator import validate_scene_animation_operation
from api.story_writer.task_manager import task_manager
from loguru import logger
from ..constants import AI_VIDEO_SUBDIR, PODCAST_VIDEOS_DIR
from ..utils import load_podcast_audio_bytes, load_podcast_image_bytes
from services.podcast_service import PodcastService
from ..models import (
PodcastVideoGenerationRequest,
PodcastVideoGenerationResponse,
PodcastCombineVideosRequest,
PodcastCombineVideosResponse,
)
router = APIRouter()
# Thread pool executor for CPU-intensive video operations
# This prevents blocking the FastAPI event loop
_video_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="podcast_video")
def _extract_error_message(exc: Exception) -> str:
"""
Extract user-friendly error message from exception.
Handles HTTPException with nested error details from WaveSpeed API.
"""
if isinstance(exc, HTTPException):
detail = exc.detail
# If detail is a dict (from WaveSpeed client)
if isinstance(detail, dict):
# Try to extract message from nested response JSON
response_str = detail.get("response", "")
if response_str:
try:
response_json = json.loads(response_str)
if isinstance(response_json, dict) and "message" in response_json:
return response_json["message"]
except (json.JSONDecodeError, TypeError):
pass
# Fall back to error field
if "error" in detail:
return detail["error"]
# If detail is a string
elif isinstance(detail, str):
return detail
# For other exceptions, use string representation
error_str = str(exc)
# Try to extract meaningful message from HTTPException string format
# Format: "502: {'error': '...', 'response': '{"message":"..."}'}"
if "Insufficient credits" in error_str or "insufficient credits" in error_str.lower():
return "Insufficient WaveSpeed credits. Please top up your account."
# Try to extract JSON message from string
try:
# Look for JSON-like structures in the error string
json_match = re.search(r'"message"\s*:\s*"([^"]+)"', error_str)
if json_match:
return json_match.group(1)
except Exception:
pass
return error_str
def _execute_podcast_video_task(
task_id: str,
request: PodcastVideoGenerationRequest,
user_id: str,
image_bytes: bytes,
audio_bytes: bytes,
auth_token: Optional[str] = None,
mask_image_bytes: Optional[bytes] = None,
):
"""Background task to generate InfiniteTalk video for podcast scene."""
try:
task_manager.update_task_status(
task_id, "processing", progress=5.0, message="Submitting to WaveSpeed InfiniteTalk..."
)
# Extract scene number from scene_id
scene_number_match = re.search(r'\d+', request.scene_id)
scene_number = int(scene_number_match.group()) if scene_number_match else 0
# Prepare scene data for animation
scene_data = {
"scene_number": scene_number,
"title": request.scene_title,
"scene_id": request.scene_id,
}
story_context = {
"project_id": request.project_id,
"type": "podcast",
}
animation_result = animate_scene_with_voiceover(
image_bytes=image_bytes,
audio_bytes=audio_bytes,
scene_data=scene_data,
story_context=story_context,
user_id=user_id,
resolution=request.resolution or "720p",
prompt_override=request.prompt,
mask_image_bytes=mask_image_bytes,
seed=request.seed if request.seed is not None else -1,
image_mime="image/png",
audio_mime="audio/mpeg",
)
task_manager.update_task_status(
task_id, "processing", progress=80.0, message="Saving video file..."
)
# Use podcast-specific video directory
ai_video_dir = PODCAST_VIDEOS_DIR / AI_VIDEO_SUBDIR
ai_video_dir.mkdir(parents=True, exist_ok=True)
video_service = PodcastVideoCombinationService(output_dir=str(PODCAST_VIDEOS_DIR / "Final_Videos"))
save_result = video_service.save_scene_video(
video_bytes=animation_result["video_bytes"],
scene_number=scene_number,
user_id=user_id,
)
video_filename = save_result["video_filename"]
video_url = f"/api/podcast/videos/{video_filename}"
if auth_token:
video_url = f"{video_url}?token={quote(auth_token)}"
logger.info(
f"[Podcast] Video saved: filename={video_filename}, url={video_url}, scene={request.scene_id}"
)
usage_info = track_video_usage(
user_id=user_id,
provider=animation_result["provider"],
model_name=animation_result["model_name"],
prompt=animation_result["prompt"],
video_bytes=animation_result["video_bytes"],
cost_override=animation_result["cost"],
)
result_data = {
"video_url": video_url,
"video_filename": video_filename,
"cost": animation_result["cost"],
"duration": animation_result["duration"],
"provider": animation_result["provider"],
"model": animation_result["model_name"],
}
logger.info(
f"[Podcast] Updating task status to completed: task_id={task_id}, result={result_data}"
)
task_manager.update_task_status(
task_id,
"completed",
progress=100.0,
message="Video generation complete!",
result=result_data,
)
# Verify the task status was updated correctly
updated_status = task_manager.get_task_status(task_id)
logger.info(
f"[Podcast] Task status after update: task_id={task_id}, status={updated_status.get('status') if updated_status else 'None'}, has_result={bool(updated_status.get('result') if updated_status else False)}, video_url={updated_status.get('result', {}).get('video_url') if updated_status else 'N/A'}"
)
logger.info(
f"[Podcast] Video generation completed for project {request.project_id}, scene {request.scene_id}"
)
except Exception as exc:
# Use logger.exception to avoid KeyError when exception message contains curly braces
logger.exception(f"[Podcast] Video generation failed for project {request.project_id}, scene {request.scene_id}")
# Extract user-friendly error message from exception
error_msg = _extract_error_message(exc)
task_manager.update_task_status(
task_id, "failed", error=error_msg, message=f"Video generation failed: {error_msg}"
)
@router.post("/render/video", response_model=PodcastVideoGenerationResponse)
async def generate_podcast_video(
request_obj: Request,
request: PodcastVideoGenerationRequest,
background_tasks: BackgroundTasks,
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""
Generate video for a podcast scene using WaveSpeed InfiniteTalk (avatar image + audio).
Returns task_id for polling since InfiniteTalk can take up to 10 minutes.
"""
user_id = require_authenticated_user(current_user)
logger.info(
f"[Podcast] Starting video generation for project {request.project_id}, scene {request.scene_id}"
)
# Load audio bytes
audio_bytes = load_podcast_audio_bytes(request.audio_url)
# Validate resolution
if request.resolution not in {"480p", "720p"}:
raise HTTPException(status_code=400, detail="Resolution must be '480p' or '720p'.")
# Load image bytes (scene image is required for video generation)
if request.avatar_image_url:
image_bytes = load_podcast_image_bytes(request.avatar_image_url)
else:
# Scene-specific image should be generated before video generation
raise HTTPException(
status_code=400,
detail="Scene image is required for video generation. Please generate images for scenes first.",
)
mask_image_bytes = None
if request.mask_image_url:
try:
mask_image_bytes = load_podcast_image_bytes(request.mask_image_url)
except Exception as e:
logger.error(f"[Podcast] Failed to load mask image: {e}")
raise HTTPException(
status_code=400,
detail="Failed to load mask image for video generation.",
)
# Validate subscription limits
db = next(get_db())
try:
pricing_service = PricingService(db)
validate_scene_animation_operation(pricing_service=pricing_service, user_id=user_id)
finally:
db.close()
# Extract token for authenticated URL building
auth_token = None
auth_header = request_obj.headers.get("Authorization")
if auth_header and auth_header.startswith("Bearer "):
auth_token = auth_header.replace("Bearer ", "").strip()
# Create async task
task_id = task_manager.create_task("podcast_video_generation")
background_tasks.add_task(
_execute_podcast_video_task,
task_id=task_id,
request=request,
user_id=user_id,
image_bytes=image_bytes,
audio_bytes=audio_bytes,
auth_token=auth_token,
mask_image_bytes=mask_image_bytes,
)
return PodcastVideoGenerationResponse(
task_id=task_id,
status="pending",
message="Video generation started. This may take up to 10 minutes.",
)
@router.get("/videos/{filename}")
async def serve_podcast_video(
filename: str,
current_user: Dict[str, Any] = Depends(get_current_user_with_query_token),
):
"""Serve generated podcast scene video files.
Supports authentication via Authorization header or token query parameter.
Query parameter is useful for HTML elements like <video> that cannot send custom headers.
"""
require_authenticated_user(current_user)
# Security check: ensure filename doesn't contain path traversal
if ".." in filename or "/" in filename or "\\" in filename:
raise HTTPException(status_code=400, detail="Invalid filename")
# Look for video in podcast_videos directory (including AI_Videos subdirectory)
video_path = None
possible_paths = [
PODCAST_VIDEOS_DIR / filename,
PODCAST_VIDEOS_DIR / AI_VIDEO_SUBDIR / filename,
]
for path in possible_paths:
resolved_path = path.resolve()
# Security check: ensure path is within PODCAST_VIDEOS_DIR
if str(resolved_path).startswith(str(PODCAST_VIDEOS_DIR)) and resolved_path.exists():
video_path = resolved_path
break
if not video_path:
raise HTTPException(status_code=404, detail="Video file not found")
return FileResponse(video_path, media_type="video/mp4")
@router.get("/videos")
async def list_podcast_videos(
project_id: Optional[str] = None,
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""
List existing video files for the current user, optionally filtered by project.
Returns videos mapped to scene numbers for easy matching.
"""
try:
user_id = require_authenticated_user(current_user)
logger.info(f"[Podcast] Listing videos for user_id={user_id}, project_id={project_id}")
# Look in podcast_videos/AI_Videos directory
ai_video_dir = PODCAST_VIDEOS_DIR / AI_VIDEO_SUBDIR
ai_video_dir.mkdir(parents=True, exist_ok=True)
videos = []
if ai_video_dir.exists():
# Pattern: scene_{scene_number}_{user_id}_{timestamp}.mp4
# Extract user_id from current user (same logic as save_scene_video)
clean_user_id = "".join(c if c.isalnum() or c in ('-', '_') else '_' for c in user_id[:16])
logger.info(f"[Podcast] Looking for videos with clean_user_id={clean_user_id} in {ai_video_dir}")
# Map scene_number -> (most recent video info)
scene_video_map: Dict[int, Dict[str, Any]] = {}
all_files = list(ai_video_dir.glob("*.mp4"))
logger.info(f"[Podcast] Found {len(all_files)} MP4 files in directory")
for video_file in all_files:
filename = video_file.name
# Match pattern: scene_{number}_{user_id}_{hash}.mp4
# Use greedy match for user_id and match hash as "anything except underscore before .mp4"
match = re.match(r"scene_(\d+)_(.+)_([^_]+)\.mp4", filename)
if match:
scene_number = int(match.group(1))
file_user_id = match.group(2)
hash_part = match.group(3)
# Only include videos for this user
if file_user_id == clean_user_id:
video_url = f"/api/podcast/videos/{filename}"
file_mtime = video_file.stat().st_mtime
# Keep the most recent video for each scene
if scene_number not in scene_video_map or file_mtime > scene_video_map[scene_number]["mtime"]:
scene_video_map[scene_number] = {
"scene_number": scene_number,
"filename": filename,
"video_url": video_url,
"file_size": video_file.stat().st_size,
"mtime": file_mtime,
}
# Convert map to list and sort by scene number
videos = list(scene_video_map.values())
videos.sort(key=lambda v: v["scene_number"])
logger.info(f"[Podcast] Returning {len(videos)} videos for user: {[v['scene_number'] for v in videos]}")
else:
logger.warning(f"[Podcast] Video directory does not exist: {ai_video_dir}")
return {"videos": videos}
except Exception as e:
logger.exception(f"[Podcast] Error listing videos")
return {"videos": []}
@router.post("/render/combine-videos", response_model=PodcastCombineVideosResponse)
async def combine_podcast_videos(
request_obj: Request,
request: PodcastCombineVideosRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""
Combine all scene videos into a single final podcast video.
Returns task_id for polling.
"""
user_id = require_authenticated_user(current_user)
logger.info(f"[Podcast] Combining {len(request.scene_video_urls)} scene videos for project {request.project_id}")
if not request.scene_video_urls:
raise HTTPException(status_code=400, detail="No scene videos provided")
# Create async task
task_id = task_manager.create_task("podcast_combine_videos")
# Extract token for authenticated URL building
auth_token = None
auth_header = request_obj.headers.get("Authorization")
if auth_header and auth_header.startswith("Bearer "):
auth_token = auth_header.replace("Bearer ", "").strip()
# Run video combination in thread pool executor to prevent blocking event loop
# Submit directly to executor - this runs in a background thread and doesn't block
# The executor handles the thread pool management automatically
def handle_task_completion(future):
"""Callback to handle task completion and log errors."""
try:
future.result() # This will raise if there was an exception
except Exception as e:
logger.error(f"[Podcast] Error in video combination task: {e}", exc_info=True)
# Submit to executor - returns immediately, task runs in background thread
future = _video_executor.submit(
_execute_combine_videos_task,
task_id,
request.project_id,
request.scene_video_urls,
request.podcast_title,
user_id,
auth_token,
)
# Add callback to log errors without blocking
future.add_done_callback(handle_task_completion)
return PodcastCombineVideosResponse(
task_id=task_id,
status="pending",
message="Video combination started. This may take a few minutes.",
)
def _execute_combine_videos_task(
task_id: str,
project_id: str,
scene_video_urls: list[str],
podcast_title: str,
user_id: str,
auth_token: Optional[str] = None,
):
"""Background task to combine scene videos into final podcast."""
try:
task_manager.update_task_status(
task_id, "processing", progress=10.0, message="Preparing scene videos..."
)
# Convert scene video URLs to local file paths
scene_video_paths = []
for video_url in scene_video_urls:
# Extract filename from URL (e.g., /api/podcast/videos/scene_1_user_xxx.mp4)
filename = video_url.split("/")[-1].split("?")[0] # Remove query params
video_path = PODCAST_VIDEOS_DIR / AI_VIDEO_SUBDIR / filename
if not video_path.exists():
logger.warning(f"[Podcast] Scene video not found: {video_path}")
continue
scene_video_paths.append(str(video_path))
if not scene_video_paths:
raise ValueError("No valid scene videos found to combine")
logger.info(f"[Podcast] Found {len(scene_video_paths)} scene videos to combine")
task_manager.update_task_status(
task_id, "processing", progress=30.0, message="Combining videos..."
)
# Use dedicated PodcastVideoCombinationService
final_videos_dir = PODCAST_VIDEOS_DIR / "Final_Videos"
final_videos_dir.mkdir(parents=True, exist_ok=True)
video_service = PodcastVideoCombinationService(output_dir=str(final_videos_dir))
# Progress callback for task updates
def progress_callback(progress: float, message: str):
task_manager.update_task_status(
task_id, "processing", progress=progress, message=message
)
task_manager.update_task_status(
task_id, "processing", progress=50.0, message="Combining videos..."
)
# Combine videos using dedicated podcast service
result = video_service.combine_videos(
video_paths=scene_video_paths,
podcast_title=podcast_title,
fps=30,
progress_callback=progress_callback,
)
video_filename = Path(result["video_path"]).name
video_url = f"/api/podcast/final-videos/{video_filename}"
if auth_token:
video_url = f"{video_url}?token={quote(auth_token)}"
logger.info(f"[Podcast] Final video combined: {video_filename}")
result_data = {
"video_url": video_url,
"video_filename": video_filename,
"duration": result.get("duration", 0),
"file_size": result.get("file_size", 0),
}
task_manager.update_task_status(
task_id,
"completed",
progress=100.0,
message="Podcast video ready!",
result=result_data,
)
# Save final video URL to project for persistence across reloads
# Do this quickly and synchronously - database operations are fast
try:
from services.database import SessionLocal
db = SessionLocal()
try:
service = PodcastService(db)
service.update_project(user_id, project_id, final_video_url=video_url)
db.commit()
logger.info(f"[Podcast] Saved final video URL to project {project_id}: {video_url}")
finally:
db.close()
except Exception as e:
logger.warning(f"[Podcast] Failed to save final video URL to project: {e}")
# Don't fail the task if project update fails - video is still available via task result
logger.info(f"[Podcast] Task {task_id} marked as completed successfully")
except Exception as e:
logger.exception(f"[Podcast] Failed to combine videos: {e}")
error_msg = _extract_error_message(e)
task_manager.update_task_status(
task_id,
"failed",
progress=0.0,
message=f"Video combination failed: {error_msg}",
error=str(error_msg),
)
logger.error(f"[Podcast] Task {task_id} marked as failed: {error_msg}")
@router.get("/final-videos/{filename}")
async def serve_final_podcast_video(
filename: str,
current_user: Dict[str, Any] = Depends(get_current_user_with_query_token),
):
"""Serve the final combined podcast video with authentication."""
user_id = require_authenticated_user(current_user)
final_videos_dir = PODCAST_VIDEOS_DIR / "Final_Videos"
video_path = final_videos_dir / filename
if not video_path.exists():
raise HTTPException(status_code=404, detail="Video not found")
# Basic security: ensure filename doesn't contain path traversal
if ".." in filename or "/" in filename or "\\" in filename:
raise HTTPException(status_code=400, detail="Invalid filename")
return FileResponse(
path=str(video_path),
media_type="video/mp4",
filename=filename,
)