AI podcast project
This commit is contained in:
@@ -45,7 +45,11 @@ class CacheHeadersMiddleware(BaseHTTPMiddleware):
|
||||
# Immutable files (with hash) - cache for 1 year
|
||||
# These files never change (new hash = new file)
|
||||
response.headers["Cache-Control"] = "public, max-age=31536000, immutable"
|
||||
response.headers["Expires"] = "Thu, 31 Dec 2025 23:59:59 GMT"
|
||||
# Expires header calculated dynamically to match max-age
|
||||
# Modern browsers prefer Cache-Control, but Expires provides compatibility
|
||||
from datetime import datetime, timedelta
|
||||
expires_date = datetime.utcnow() + timedelta(seconds=31536000)
|
||||
response.headers["Expires"] = expires_date.strftime("%a, %d %b %Y %H:%M:%S GMT")
|
||||
else:
|
||||
# Non-hashed files - shorter cache (1 hour)
|
||||
# These might be updated, so cache for shorter time
|
||||
|
||||
28
backend/api/podcast/constants.py
Normal file
28
backend/api/podcast/constants.py
Normal file
@@ -0,0 +1,28 @@
|
||||
"""
|
||||
Podcast API Constants
|
||||
|
||||
Centralized constants and directory configuration for podcast module.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from services.story_writer.audio_generation_service import StoryAudioGenerationService
|
||||
|
||||
# Directory paths
|
||||
# router.py is at: backend/api/podcast/router.py
|
||||
# parents[0] = backend/api/podcast/
|
||||
# parents[1] = backend/api/
|
||||
# parents[2] = backend/
|
||||
BASE_DIR = Path(__file__).resolve().parents[2] # backend/
|
||||
PODCAST_AUDIO_DIR = (BASE_DIR / "podcast_audio").resolve()
|
||||
PODCAST_AUDIO_DIR.mkdir(parents=True, exist_ok=True)
|
||||
PODCAST_IMAGES_DIR = (BASE_DIR / "podcast_images").resolve()
|
||||
PODCAST_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
|
||||
PODCAST_VIDEOS_DIR = (BASE_DIR / "podcast_videos").resolve()
|
||||
PODCAST_VIDEOS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Video subdirectory
|
||||
AI_VIDEO_SUBDIR = Path("AI_Videos")
|
||||
|
||||
# Initialize audio service
|
||||
audio_service = StoryAudioGenerationService(output_dir=str(PODCAST_AUDIO_DIR))
|
||||
|
||||
6
backend/api/podcast/handlers/__init__.py
Normal file
6
backend/api/podcast/handlers/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
"""
|
||||
Podcast API Handlers
|
||||
|
||||
Handler modules for different podcast operations.
|
||||
"""
|
||||
|
||||
96
backend/api/podcast/handlers/analysis.py
Normal file
96
backend/api/podcast/handlers/analysis.py
Normal file
@@ -0,0 +1,96 @@
|
||||
"""
|
||||
Podcast Analysis Handlers
|
||||
|
||||
Analysis endpoint for podcast ideas.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from typing import Dict, Any
|
||||
import json
|
||||
|
||||
from middleware.auth_middleware import get_current_user
|
||||
from api.story_writer.utils.auth import require_authenticated_user
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from loguru import logger
|
||||
from ..models import PodcastAnalyzeRequest, PodcastAnalyzeResponse
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/analyze", response_model=PodcastAnalyzeResponse)
|
||||
async def analyze_podcast_idea(
|
||||
request: PodcastAnalyzeRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""
|
||||
Analyze a podcast idea and return podcast-oriented outlines, keywords, and titles.
|
||||
This uses the shared LLM provider but with a podcast-specific prompt (not story format).
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
prompt = f"""
|
||||
You are an expert podcast producer. Given a podcast idea, craft concise podcast-ready assets
|
||||
that sound like episode plans (not fiction stories).
|
||||
|
||||
Podcast Idea: "{request.idea}"
|
||||
Duration: ~{request.duration} minutes
|
||||
Speakers: {request.speakers} (host + optional guest)
|
||||
|
||||
Return JSON with:
|
||||
- audience: short target audience description
|
||||
- content_type: podcast style/format
|
||||
- top_keywords: 5 podcast-relevant keywords/phrases
|
||||
- suggested_outlines: 2 items, each with title (<=60 chars) and 4-6 short segments (bullet-friendly, factual)
|
||||
- title_suggestions: 3 concise episode titles (no cliffhanger storytelling)
|
||||
- exa_suggested_config: suggested Exa search options to power research (keep conservative defaults to control cost), with:
|
||||
- exa_search_type: "auto" | "neural" | "keyword" (prefer "auto" unless clearly news-heavy)
|
||||
- exa_category: one of ["research paper","news","company","github","tweet","personal site","pdf","financial report","linkedin profile"]
|
||||
- exa_include_domains: up to 3 reputable domains to prioritize (optional)
|
||||
- exa_exclude_domains: up to 3 domains to avoid (optional)
|
||||
- max_sources: 6-10
|
||||
- include_statistics: boolean (true if topic needs fresh stats)
|
||||
- date_range: one of ["last_month","last_3_months","last_year","all_time"] (pick recent if time-sensitive)
|
||||
|
||||
Requirements:
|
||||
- Keep language factual, actionable, and suited for spoken audio.
|
||||
- Avoid narrative fiction tone; focus on insights, hooks, objections, and takeaways.
|
||||
- Prefer 2024-2025 context when relevant.
|
||||
"""
|
||||
|
||||
try:
|
||||
raw = llm_text_gen(prompt=prompt, user_id=user_id, json_struct=None)
|
||||
except HTTPException:
|
||||
# Re-raise HTTPExceptions (e.g., 429 subscription limit) - preserve error details
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.error(f"[Podcast Analyze] Analysis failed for user {user_id}: {exc}")
|
||||
raise HTTPException(status_code=500, detail=f"Analysis failed: {exc}")
|
||||
|
||||
# Normalize response (accept dict or JSON string)
|
||||
if isinstance(raw, str):
|
||||
try:
|
||||
data = json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
raise HTTPException(status_code=500, detail="LLM returned non-JSON output")
|
||||
elif isinstance(raw, dict):
|
||||
data = raw
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail="Unexpected LLM response format")
|
||||
|
||||
audience = data.get("audience") or "Growth-focused professionals"
|
||||
content_type = data.get("content_type") or "Interview + insights"
|
||||
top_keywords = data.get("top_keywords") or []
|
||||
suggested_outlines = data.get("suggested_outlines") or []
|
||||
title_suggestions = data.get("title_suggestions") or []
|
||||
|
||||
exa_suggested_config = data.get("exa_suggested_config") or None
|
||||
|
||||
return PodcastAnalyzeResponse(
|
||||
audience=audience,
|
||||
content_type=content_type,
|
||||
top_keywords=top_keywords,
|
||||
suggested_outlines=suggested_outlines,
|
||||
title_suggestions=title_suggestions,
|
||||
exa_suggested_config=exa_suggested_config,
|
||||
)
|
||||
|
||||
324
backend/api/podcast/handlers/audio.py
Normal file
324
backend/api/podcast/handlers/audio.py
Normal file
@@ -0,0 +1,324 @@
|
||||
"""
|
||||
Podcast Audio Handlers
|
||||
|
||||
Audio generation, combining, and serving endpoints.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.orm import Session
|
||||
from typing import Dict, Any
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
import tempfile
|
||||
import uuid
|
||||
import shutil
|
||||
|
||||
from services.database import get_db
|
||||
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
|
||||
from api.story_writer.utils.auth import require_authenticated_user
|
||||
from utils.asset_tracker import save_asset_to_library
|
||||
from models.story_models import StoryAudioResult
|
||||
from loguru import logger
|
||||
from ..constants import PODCAST_AUDIO_DIR, audio_service
|
||||
from ..models import (
|
||||
PodcastAudioRequest,
|
||||
PodcastAudioResponse,
|
||||
PodcastCombineAudioRequest,
|
||||
PodcastCombineAudioResponse,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/audio", response_model=PodcastAudioResponse)
|
||||
async def generate_podcast_audio(
|
||||
request: PodcastAudioRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Generate AI audio for a podcast scene using shared audio service.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
if not request.text or not request.text.strip():
|
||||
raise HTTPException(status_code=400, detail="Text is required")
|
||||
|
||||
try:
|
||||
result: StoryAudioResult = audio_service.generate_ai_audio(
|
||||
scene_number=0,
|
||||
scene_title=request.scene_title,
|
||||
text=request.text.strip(),
|
||||
user_id=user_id,
|
||||
voice_id=request.voice_id or "Wise_Woman",
|
||||
speed=request.speed or 1.0, # Normal speed (was 0.9, but too slow - causing duration issues)
|
||||
volume=request.volume or 1.0,
|
||||
pitch=request.pitch or 0.0, # Normal pitch (0.0 = neutral)
|
||||
emotion=request.emotion or "neutral",
|
||||
english_normalization=request.english_normalization or False,
|
||||
sample_rate=request.sample_rate,
|
||||
bitrate=request.bitrate,
|
||||
channel=request.channel,
|
||||
format=request.format,
|
||||
language_boost=request.language_boost,
|
||||
enable_sync_mode=request.enable_sync_mode,
|
||||
)
|
||||
|
||||
# Override URL to use podcast endpoint instead of story endpoint
|
||||
if result.get("audio_url") and "/api/story/audio/" in result.get("audio_url", ""):
|
||||
audio_filename = result.get("audio_filename", "")
|
||||
result["audio_url"] = f"/api/podcast/audio/{audio_filename}"
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=500, detail=f"Audio generation failed: {exc}")
|
||||
|
||||
# Save to asset library (podcast module)
|
||||
try:
|
||||
if result.get("audio_url"):
|
||||
save_asset_to_library(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
asset_type="audio",
|
||||
source_module="podcast_maker",
|
||||
filename=result.get("audio_filename", ""),
|
||||
file_url=result.get("audio_url", ""),
|
||||
file_path=result.get("audio_path"),
|
||||
file_size=result.get("file_size"),
|
||||
mime_type="audio/mpeg",
|
||||
title=f"{request.scene_title} - Podcast",
|
||||
description="Podcast scene narration",
|
||||
tags=["podcast", "audio", request.scene_id],
|
||||
provider=result.get("provider"),
|
||||
model=result.get("model"),
|
||||
cost=result.get("cost"),
|
||||
asset_metadata={
|
||||
"scene_id": request.scene_id,
|
||||
"scene_title": request.scene_title,
|
||||
"status": "completed",
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Podcast] Failed to save audio asset: {e}")
|
||||
|
||||
return PodcastAudioResponse(
|
||||
scene_id=request.scene_id,
|
||||
scene_title=request.scene_title,
|
||||
audio_filename=result.get("audio_filename", ""),
|
||||
audio_url=result.get("audio_url", ""),
|
||||
provider=result.get("provider", "wavespeed"),
|
||||
model=result.get("model", "minimax/speech-02-hd"),
|
||||
voice_id=result.get("voice_id", request.voice_id or "Wise_Woman"),
|
||||
text_length=result.get("text_length", len(request.text)),
|
||||
file_size=result.get("file_size", 0),
|
||||
cost=result.get("cost", 0.0),
|
||||
)
|
||||
|
||||
|
||||
@router.post("/combine-audio", response_model=PodcastCombineAudioResponse)
|
||||
async def combine_podcast_audio(
|
||||
request: PodcastCombineAudioRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Combine multiple scene audio files into a single podcast audio file.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
if not request.scene_ids or not request.scene_audio_urls:
|
||||
raise HTTPException(status_code=400, detail="Scene IDs and audio URLs are required")
|
||||
|
||||
if len(request.scene_ids) != len(request.scene_audio_urls):
|
||||
raise HTTPException(status_code=400, detail="Scene IDs and audio URLs count must match")
|
||||
|
||||
try:
|
||||
# Import moviepy for audio concatenation
|
||||
try:
|
||||
from moviepy import AudioFileClip, concatenate_audioclips
|
||||
except ImportError:
|
||||
logger.error("[Podcast] MoviePy not available for audio combination")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Audio combination requires MoviePy. Please install: pip install moviepy"
|
||||
)
|
||||
|
||||
# Create temporary directory for audio processing
|
||||
temp_dir = Path(tempfile.gettempdir()) / f"podcast_combine_{uuid.uuid4().hex[:8]}"
|
||||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
audio_clips = []
|
||||
total_duration = 0.0
|
||||
|
||||
try:
|
||||
# Log incoming request for debugging
|
||||
logger.info(f"[Podcast] Combining audio: {len(request.scene_audio_urls)} URLs received")
|
||||
for idx, url in enumerate(request.scene_audio_urls):
|
||||
logger.info(f"[Podcast] URL {idx+1}: {url}")
|
||||
|
||||
# Download and load each audio file from podcast_audio directory
|
||||
for idx, audio_url in enumerate(request.scene_audio_urls):
|
||||
try:
|
||||
# Normalize audio URL - handle both absolute and relative paths
|
||||
if audio_url.startswith("http"):
|
||||
# External URL - would need to download
|
||||
logger.error(f"[Podcast] External URLs not supported: {audio_url}")
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"External URLs not supported. Please use local file paths."
|
||||
)
|
||||
|
||||
# Handle relative paths - only /api/podcast/audio/... URLs are supported
|
||||
audio_path = None
|
||||
if audio_url.startswith("/api/"):
|
||||
# Extract filename from URL
|
||||
parsed = urlparse(audio_url)
|
||||
path = parsed.path if parsed.scheme else audio_url
|
||||
|
||||
# Handle both /api/podcast/audio/ and /api/story/audio/ URLs (for backward compatibility)
|
||||
if "/api/podcast/audio/" in path:
|
||||
filename = path.split("/api/podcast/audio/", 1)[1].split("?", 1)[0].strip()
|
||||
elif "/api/story/audio/" in path:
|
||||
# Convert story audio URLs to podcast audio (they're in the same directory now)
|
||||
filename = path.split("/api/story/audio/", 1)[1].split("?", 1)[0].strip()
|
||||
logger.info(f"[Podcast] Converting story audio URL to podcast: {audio_url} -> {filename}")
|
||||
else:
|
||||
logger.error(f"[Podcast] Unsupported audio URL format: {audio_url}. Expected /api/podcast/audio/ or /api/story/audio/ URLs.")
|
||||
continue
|
||||
|
||||
if not filename:
|
||||
logger.error(f"[Podcast] Could not extract filename from URL: {audio_url}")
|
||||
continue
|
||||
|
||||
# Podcast audio files are stored in podcast_audio directory
|
||||
audio_path = (PODCAST_AUDIO_DIR / filename).resolve()
|
||||
|
||||
# Security check: ensure path is within PODCAST_AUDIO_DIR
|
||||
if not str(audio_path).startswith(str(PODCAST_AUDIO_DIR)):
|
||||
logger.error(f"[Podcast] Attempted path traversal when resolving audio: {audio_url}")
|
||||
continue
|
||||
else:
|
||||
logger.warning(f"[Podcast] Non-API URL format, treating as direct path: {audio_url}")
|
||||
audio_path = Path(audio_url)
|
||||
|
||||
if not audio_path or not audio_path.exists():
|
||||
logger.error(f"[Podcast] Audio file not found: {audio_path} (from URL: {audio_url})")
|
||||
continue
|
||||
|
||||
# Load audio clip
|
||||
audio_clip = AudioFileClip(str(audio_path))
|
||||
audio_clips.append(audio_clip)
|
||||
total_duration += audio_clip.duration
|
||||
logger.info(f"[Podcast] Loaded audio {idx+1}/{len(request.scene_audio_urls)}: {audio_path.name} ({audio_clip.duration:.2f}s)")
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[Podcast] Failed to load audio {idx+1}: {e}", exc_info=True)
|
||||
# Continue with other audio files
|
||||
continue
|
||||
|
||||
if not audio_clips:
|
||||
raise HTTPException(status_code=400, detail="No valid audio files found to combine")
|
||||
|
||||
# Concatenate all audio clips
|
||||
logger.info(f"[Podcast] Combining {len(audio_clips)} audio clips (total duration: {total_duration:.2f}s)")
|
||||
combined_audio = concatenate_audioclips(audio_clips)
|
||||
|
||||
# Generate output filename
|
||||
output_filename = f"podcast_combined_{request.project_id}_{uuid.uuid4().hex[:8]}.mp3"
|
||||
output_path = PODCAST_AUDIO_DIR / output_filename
|
||||
|
||||
# Write combined audio file
|
||||
combined_audio.write_audiofile(
|
||||
str(output_path),
|
||||
codec="mp3",
|
||||
bitrate="192k",
|
||||
logger=None, # Suppress moviepy logging
|
||||
)
|
||||
|
||||
# Close audio clips to free resources
|
||||
for clip in audio_clips:
|
||||
clip.close()
|
||||
combined_audio.close()
|
||||
|
||||
file_size = output_path.stat().st_size
|
||||
audio_url = f"/api/podcast/audio/{output_filename}"
|
||||
|
||||
logger.info(f"[Podcast] Combined audio saved: {output_path} ({file_size} bytes)")
|
||||
|
||||
# Save to asset library
|
||||
try:
|
||||
save_asset_to_library(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
asset_type="audio",
|
||||
source_module="podcast_maker",
|
||||
filename=output_filename,
|
||||
file_url=audio_url,
|
||||
file_path=str(output_path),
|
||||
file_size=file_size,
|
||||
mime_type="audio/mpeg",
|
||||
title=f"Combined Podcast - {request.project_id}",
|
||||
description=f"Combined podcast audio from {len(request.scene_ids)} scenes",
|
||||
tags=["podcast", "audio", "combined", request.project_id],
|
||||
asset_metadata={
|
||||
"project_id": request.project_id,
|
||||
"scene_ids": request.scene_ids,
|
||||
"scene_count": len(request.scene_ids),
|
||||
"total_duration": total_duration,
|
||||
"status": "completed",
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Podcast] Failed to save combined audio asset: {e}")
|
||||
|
||||
return PodcastCombineAudioResponse(
|
||||
combined_audio_url=audio_url,
|
||||
combined_audio_filename=output_filename,
|
||||
total_duration=total_duration,
|
||||
file_size=file_size,
|
||||
scene_count=len(request.scene_ids),
|
||||
)
|
||||
|
||||
finally:
|
||||
# Cleanup temporary directory
|
||||
try:
|
||||
if temp_dir.exists():
|
||||
shutil.rmtree(temp_dir)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Podcast] Failed to cleanup temp directory: {e}")
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.error(f"[Podcast] Audio combination failed: {exc}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Audio combination failed: {exc}")
|
||||
|
||||
|
||||
@router.get("/audio/{filename}")
|
||||
async def serve_podcast_audio(
|
||||
filename: str,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user_with_query_token),
|
||||
):
|
||||
"""Serve generated podcast scene audio files.
|
||||
|
||||
Supports authentication via Authorization header or token query parameter.
|
||||
Query parameter is useful for HTML elements like <audio> that cannot send custom headers.
|
||||
"""
|
||||
require_authenticated_user(current_user)
|
||||
|
||||
# Security check: ensure filename doesn't contain path traversal
|
||||
if ".." in filename or "/" in filename or "\\" in filename:
|
||||
raise HTTPException(status_code=400, detail="Invalid filename")
|
||||
|
||||
audio_path = (PODCAST_AUDIO_DIR / filename).resolve()
|
||||
|
||||
# Security check: ensure path is within PODCAST_AUDIO_DIR
|
||||
if not str(audio_path).startswith(str(PODCAST_AUDIO_DIR)):
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
if not audio_path.exists():
|
||||
raise HTTPException(status_code=404, detail="Audio file not found")
|
||||
|
||||
return FileResponse(audio_path, media_type="audio/mpeg")
|
||||
|
||||
381
backend/api/podcast/handlers/avatar.py
Normal file
381
backend/api/podcast/handlers/avatar.py
Normal file
@@ -0,0 +1,381 @@
|
||||
"""
|
||||
Podcast Avatar Handlers
|
||||
|
||||
Avatar upload and presenter generation endpoints.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.orm import Session
|
||||
from typing import Dict, Any, List, Optional
|
||||
from pathlib import Path
|
||||
import uuid
|
||||
import hashlib
|
||||
|
||||
from services.database import get_db
|
||||
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
|
||||
from api.story_writer.utils.auth import require_authenticated_user
|
||||
from services.llm_providers.main_image_generation import generate_image
|
||||
from services.llm_providers.main_image_editing import edit_image
|
||||
from utils.asset_tracker import save_asset_to_library
|
||||
from loguru import logger
|
||||
from ..constants import PODCAST_IMAGES_DIR
|
||||
from ..presenter_personas import choose_persona_id, get_persona
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# Avatar subdirectory
|
||||
AVATAR_SUBDIR = "avatars"
|
||||
PODCAST_AVATARS_DIR = PODCAST_IMAGES_DIR / AVATAR_SUBDIR
|
||||
PODCAST_AVATARS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
@router.post("/avatar/upload")
|
||||
async def upload_podcast_avatar(
|
||||
file: UploadFile = File(...),
|
||||
project_id: Optional[str] = Form(None),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Upload a presenter avatar image for a podcast project.
|
||||
Returns the avatar URL for use in scene image generation.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
# Validate file type
|
||||
if not file.content_type or not file.content_type.startswith('image/'):
|
||||
raise HTTPException(status_code=400, detail="File must be an image")
|
||||
|
||||
# Validate file size (max 5MB)
|
||||
file_content = await file.read()
|
||||
if len(file_content) > 5 * 1024 * 1024:
|
||||
raise HTTPException(status_code=400, detail="Image file size must be less than 5MB")
|
||||
|
||||
try:
|
||||
# Generate filename
|
||||
file_ext = Path(file.filename).suffix or '.png'
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
avatar_filename = f"avatar_{project_id or 'temp'}_{unique_id}{file_ext}"
|
||||
avatar_path = PODCAST_AVATARS_DIR / avatar_filename
|
||||
|
||||
# Save file
|
||||
with open(avatar_path, "wb") as f:
|
||||
f.write(file_content)
|
||||
|
||||
logger.info(f"[Podcast] Avatar uploaded: {avatar_path}")
|
||||
|
||||
# Create avatar URL
|
||||
avatar_url = f"/api/podcast/images/{AVATAR_SUBDIR}/{avatar_filename}"
|
||||
|
||||
# Save to asset library if project_id provided
|
||||
if project_id:
|
||||
try:
|
||||
save_asset_to_library(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
asset_type="image",
|
||||
source_module="podcast_maker",
|
||||
filename=avatar_filename,
|
||||
file_url=avatar_url,
|
||||
file_path=str(avatar_path),
|
||||
file_size=len(file_content),
|
||||
mime_type=file.content_type,
|
||||
title=f"Podcast Presenter Avatar - {project_id}",
|
||||
description="Podcast presenter avatar image",
|
||||
tags=["podcast", "avatar", project_id],
|
||||
asset_metadata={
|
||||
"project_id": project_id,
|
||||
"type": "presenter_avatar",
|
||||
"status": "completed",
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Podcast] Failed to save avatar asset: {e}")
|
||||
|
||||
return {
|
||||
"avatar_url": avatar_url,
|
||||
"avatar_filename": avatar_filename,
|
||||
"message": "Avatar uploaded successfully"
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.error(f"[Podcast] Avatar upload failed: {exc}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Avatar upload failed: {str(exc)}")
|
||||
|
||||
|
||||
@router.post("/avatar/make-presentable")
|
||||
async def make_avatar_presentable(
|
||||
avatar_url: str = Form(...),
|
||||
project_id: Optional[str] = Form(None),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Transform an uploaded avatar image into a podcast-appropriate presenter.
|
||||
Uses AI image editing to convert the uploaded photo into a professional podcast presenter.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
try:
|
||||
# Load the uploaded avatar image
|
||||
from ..utils import load_podcast_image_bytes
|
||||
avatar_bytes = load_podcast_image_bytes(avatar_url)
|
||||
|
||||
logger.info(f"[Podcast] Transforming avatar to podcast presenter for project {project_id}")
|
||||
|
||||
# Create transformation prompt based on WaveSpeed AI recommendations
|
||||
# Transform the uploaded image into a professional podcast presenter
|
||||
transformation_prompt = """Transform this image into a professional podcast presenter:
|
||||
- Half-length portrait format, looking at camera
|
||||
- Professional attire (white shirt and light gray blazer or business casual)
|
||||
- Confident, friendly, engaging expression
|
||||
- Soft studio lighting, plain light-gray or neutral background
|
||||
- Professional podcast host appearance, suitable for video generation
|
||||
- Clean composition, center-focused for avatar overlay
|
||||
- Maintain the person's appearance and identity while making it podcast-appropriate
|
||||
- Ultra realistic, 4k quality, professional photography style"""
|
||||
|
||||
# Transform the image using image editing
|
||||
image_options = {
|
||||
"provider": None, # Auto-select provider
|
||||
"model": None, # Use default model
|
||||
}
|
||||
|
||||
result = edit_image(
|
||||
input_image_bytes=avatar_bytes,
|
||||
prompt=transformation_prompt,
|
||||
options=image_options,
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
# Save transformed avatar
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
transformed_filename = f"presenter_transformed_{project_id or 'temp'}_{unique_id}.png"
|
||||
transformed_path = PODCAST_AVATARS_DIR / transformed_filename
|
||||
|
||||
with open(transformed_path, "wb") as f:
|
||||
f.write(result.image_bytes)
|
||||
|
||||
transformed_url = f"/api/podcast/images/{AVATAR_SUBDIR}/{transformed_filename}"
|
||||
|
||||
logger.info(f"[Podcast] Transformed avatar saved to: {transformed_path}")
|
||||
|
||||
# Save to asset library
|
||||
if project_id:
|
||||
try:
|
||||
save_asset_to_library(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
asset_type="image",
|
||||
source_module="podcast_maker",
|
||||
filename=transformed_filename,
|
||||
file_url=transformed_url,
|
||||
file_path=str(transformed_path),
|
||||
file_size=len(result.image_bytes),
|
||||
mime_type="image/png",
|
||||
title=f"Podcast Presenter (Transformed) - {project_id}",
|
||||
description="AI-transformed podcast presenter avatar from uploaded photo",
|
||||
prompt=transformation_prompt,
|
||||
tags=["podcast", "avatar", "presenter", "transformed", project_id],
|
||||
provider=result.provider,
|
||||
model=result.model,
|
||||
asset_metadata={
|
||||
"project_id": project_id,
|
||||
"type": "transformed_presenter",
|
||||
"original_avatar_url": avatar_url,
|
||||
"status": "completed",
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Podcast] Failed to save transformed avatar asset: {e}")
|
||||
|
||||
return {
|
||||
"avatar_url": transformed_url,
|
||||
"avatar_filename": transformed_filename,
|
||||
"message": "Avatar transformed into podcast presenter successfully"
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.error(f"[Podcast] Avatar transformation failed: {exc}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Avatar transformation failed: {str(exc)}")
|
||||
|
||||
|
||||
@router.post("/avatar/generate")
|
||||
async def generate_podcast_presenters(
|
||||
speakers: int = Form(...),
|
||||
project_id: Optional[str] = Form(None),
|
||||
audience: Optional[str] = Form(None),
|
||||
content_type: Optional[str] = Form(None),
|
||||
top_keywords: Optional[str] = Form(None), # JSON string array
|
||||
persona_id: Optional[str] = Form(None),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Generate presenter avatar images based on number of speakers and AI analysis insights.
|
||||
Uses analysis data (audience, content_type, keywords) to create more relevant presenters.
|
||||
Returns list of avatar URLs.
|
||||
Based on WaveSpeed AI recommendations for professional podcast presenters.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
if speakers < 1 or speakers > 2:
|
||||
raise HTTPException(status_code=400, detail="Speakers must be between 1 and 2")
|
||||
|
||||
try:
|
||||
# Parse keywords if provided
|
||||
keywords_list = []
|
||||
if top_keywords:
|
||||
try:
|
||||
import json
|
||||
keywords_list = json.loads(top_keywords) if isinstance(top_keywords, str) else top_keywords
|
||||
except:
|
||||
keywords_list = []
|
||||
|
||||
# Choose persona (market-fit + style) using analysis if not explicitly provided.
|
||||
# Do not infer sensitive traits (like ethnicity); personas represent market + style only.
|
||||
selected_persona_id = persona_id or choose_persona_id(
|
||||
audience=audience,
|
||||
content_type=content_type,
|
||||
top_keywords=keywords_list,
|
||||
)
|
||||
persona = get_persona(selected_persona_id)
|
||||
|
||||
generated_avatars = []
|
||||
|
||||
for i in range(speakers):
|
||||
# Generate presenter-specific prompt based on WaveSpeed AI recommendations
|
||||
# Enhanced with analysis insights for more relevant presenter appearance
|
||||
gender = "female" if i == 0 else "male" # First speaker female, second male
|
||||
|
||||
# Build context-aware prompt using analysis insights + persona preset
|
||||
prompt_parts = [
|
||||
f"Half-length portrait of a professional podcast presenter ({gender}, 25-35 years old)",
|
||||
"photo-realistic, professional photography",
|
||||
]
|
||||
|
||||
if persona:
|
||||
prompt_parts.append(persona.prompt)
|
||||
|
||||
# Use content_type to influence attire/style
|
||||
if content_type:
|
||||
content_lower = content_type.lower()
|
||||
if "business" in content_lower or "corporate" in content_lower:
|
||||
prompt_parts.append("business professional attire (white shirt and light gray blazer)")
|
||||
elif "casual" in content_lower or "conversational" in content_lower:
|
||||
prompt_parts.append("business casual attire (smart casual, approachable)")
|
||||
elif "tech" in content_lower or "technology" in content_lower:
|
||||
prompt_parts.append("modern professional attire (tech-forward, contemporary style)")
|
||||
else:
|
||||
prompt_parts.append("professional attire (white shirt and light gray blazer or business casual)")
|
||||
else:
|
||||
prompt_parts.append("professional attire (white shirt and light gray blazer or business casual)")
|
||||
|
||||
# Use audience to influence expression and style
|
||||
if audience:
|
||||
audience_lower = audience.lower()
|
||||
if "young" in audience_lower or "millennial" in audience_lower or "gen z" in audience_lower:
|
||||
prompt_parts.append("modern, energetic, approachable expression")
|
||||
elif "executive" in audience_lower or "professional" in audience_lower or "business" in audience_lower:
|
||||
prompt_parts.append("confident, authoritative, professional expression")
|
||||
else:
|
||||
prompt_parts.append("confident, friendly, engaging expression")
|
||||
else:
|
||||
prompt_parts.append("confident, friendly expression")
|
||||
|
||||
# Add keywords context if available (for visual style hints)
|
||||
if keywords_list and len(keywords_list) > 0:
|
||||
# Extract visual-relevant keywords
|
||||
visual_keywords = [k for k in keywords_list[:3] if any(word in k.lower() for word in ["tech", "business", "creative", "modern", "professional"])]
|
||||
if visual_keywords:
|
||||
prompt_parts.append(f"context: {', '.join(visual_keywords[:2])}")
|
||||
|
||||
# Technical requirements
|
||||
prompt_parts.extend([
|
||||
"looking at camera",
|
||||
"soft studio lighting, plain light-gray or neutral background",
|
||||
"ultra realistic, 4k quality, 85mm lens, f/2.8",
|
||||
"professional podcast host appearance, suitable for video generation",
|
||||
"clean composition, center-focused for avatar overlay"
|
||||
])
|
||||
|
||||
prompt = ", ".join(prompt_parts)
|
||||
|
||||
logger.info(f"[Podcast] Generating presenter {i+1}/{speakers} for project {project_id}")
|
||||
|
||||
# Generate image
|
||||
# Use a deterministic seed per (project_id, speaker_number, persona_id) to keep presenter identity stable.
|
||||
# Note: determinism may vary by provider/model, but seed improves consistency substantially.
|
||||
seed_source = f"{project_id or 'temp'}|speaker={i+1}|persona={selected_persona_id}"
|
||||
seed = int(hashlib.sha256(seed_source.encode("utf-8")).hexdigest()[:8], 16)
|
||||
image_options = {
|
||||
"provider": None, # Auto-select provider
|
||||
"width": 1024,
|
||||
"height": 1024,
|
||||
"seed": seed,
|
||||
}
|
||||
|
||||
result = generate_image(
|
||||
prompt=prompt,
|
||||
options=image_options,
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
# Save avatar
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
avatar_filename = f"presenter_{project_id or 'temp'}_{i+1}_{unique_id}.png"
|
||||
avatar_path = PODCAST_AVATARS_DIR / avatar_filename
|
||||
|
||||
with open(avatar_path, "wb") as f:
|
||||
f.write(result.image_bytes)
|
||||
|
||||
avatar_url = f"/api/podcast/images/{AVATAR_SUBDIR}/{avatar_filename}"
|
||||
|
||||
# Save to asset library
|
||||
if project_id:
|
||||
try:
|
||||
save_asset_to_library(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
asset_type="image",
|
||||
source_module="podcast_maker",
|
||||
filename=avatar_filename,
|
||||
file_url=avatar_url,
|
||||
file_path=str(avatar_path),
|
||||
file_size=len(result.image_bytes),
|
||||
mime_type="image/png",
|
||||
title=f"Podcast Presenter {i+1} - {project_id}",
|
||||
description=f"Generated podcast presenter avatar for speaker {i+1}",
|
||||
prompt=prompt,
|
||||
tags=["podcast", "avatar", "presenter", project_id],
|
||||
provider=result.provider,
|
||||
model=result.model,
|
||||
asset_metadata={
|
||||
"project_id": project_id,
|
||||
"speaker_number": i + 1,
|
||||
"type": "generated_presenter",
|
||||
"status": "completed",
|
||||
"persona_id": selected_persona_id,
|
||||
"seed": seed,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Podcast] Failed to save presenter asset: {e}")
|
||||
|
||||
generated_avatars.append({
|
||||
"avatar_url": avatar_url,
|
||||
"avatar_filename": avatar_filename,
|
||||
"speaker_number": i + 1,
|
||||
"prompt": prompt, # Include the prompt used for generation
|
||||
"persona_id": selected_persona_id,
|
||||
"seed": seed,
|
||||
})
|
||||
|
||||
return {
|
||||
"avatars": generated_avatars,
|
||||
"message": f"Generated {speakers} presenter avatar(s)",
|
||||
"persona_id": selected_persona_id,
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.error(f"[Podcast] Presenter generation failed: {exc}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Presenter generation failed: {str(exc)}")
|
||||
|
||||
431
backend/api/podcast/handlers/images.py
Normal file
431
backend/api/podcast/handlers/images.py
Normal file
@@ -0,0 +1,431 @@
|
||||
"""
|
||||
Podcast Image Handlers
|
||||
|
||||
Image generation and serving endpoints.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.orm import Session
|
||||
from typing import Dict, Any
|
||||
from pathlib import Path
|
||||
import uuid
|
||||
|
||||
from services.database import get_db
|
||||
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
|
||||
from api.story_writer.utils.auth import require_authenticated_user
|
||||
from services.llm_providers.main_image_generation import generate_image
|
||||
from utils.asset_tracker import save_asset_to_library
|
||||
from loguru import logger
|
||||
from ..constants import PODCAST_IMAGES_DIR
|
||||
from ..models import PodcastImageRequest, PodcastImageResponse
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/image", response_model=PodcastImageResponse)
|
||||
async def generate_podcast_scene_image(
|
||||
request: PodcastImageRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Generate an AI image for a podcast scene.
|
||||
Creates a professional, podcast-appropriate image based on scene title and content.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
if not request.scene_title:
|
||||
raise HTTPException(status_code=400, detail="Scene title is required")
|
||||
|
||||
try:
|
||||
# PRE-FLIGHT VALIDATION: Check subscription limits before any API calls
|
||||
from services.subscription import PricingService
|
||||
from services.subscription.preflight_validator import validate_image_generation_operations
|
||||
from fastapi import HTTPException as FastAPIHTTPException
|
||||
|
||||
pricing_service = PricingService(db)
|
||||
try:
|
||||
# Raises HTTPException immediately if validation fails
|
||||
validate_image_generation_operations(
|
||||
pricing_service=pricing_service,
|
||||
user_id=user_id,
|
||||
num_images=1
|
||||
)
|
||||
logger.info(f"[Podcast] ✅ Pre-flight validation passed for user {user_id}")
|
||||
except FastAPIHTTPException as http_ex:
|
||||
logger.error(f"[Podcast] ❌ Pre-flight validation failed for user {user_id}: {http_ex.detail}")
|
||||
raise
|
||||
|
||||
# If base avatar is provided, create scene-specific variation
|
||||
# Otherwise, generate from scratch
|
||||
logger.info(f"[Podcast] Image generation request for scene {request.scene_id}")
|
||||
logger.info(f"[Podcast] base_avatar_url={request.base_avatar_url}")
|
||||
logger.info(f"[Podcast] custom_prompt={request.custom_prompt}")
|
||||
logger.info(f"[Podcast] style={request.style}, rendering_speed={request.rendering_speed}, aspect_ratio={request.aspect_ratio}")
|
||||
|
||||
if request.base_avatar_url:
|
||||
# Load base avatar image for reference
|
||||
from ..utils import load_podcast_image_bytes
|
||||
try:
|
||||
logger.info(f"[Podcast] Attempting to load base avatar from: {request.base_avatar_url}")
|
||||
base_avatar_bytes = load_podcast_image_bytes(request.base_avatar_url)
|
||||
logger.info(f"[Podcast] ✅ Successfully loaded base avatar ({len(base_avatar_bytes)} bytes) for scene {request.scene_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"[Podcast] ❌ Failed to load base avatar from {request.base_avatar_url}: {e}", exc_info=True)
|
||||
# If base avatar fails to load, we cannot maintain character consistency
|
||||
# Raise an error instead of falling back to standard generation
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail={
|
||||
"error": "Failed to load base avatar",
|
||||
"message": f"Could not load the base avatar image for character consistency: {str(e)}. Please ensure the avatar image is accessible.",
|
||||
},
|
||||
)
|
||||
else:
|
||||
logger.info(f"[Podcast] No base avatar URL provided, will generate from scratch")
|
||||
base_avatar_bytes = None
|
||||
|
||||
# Build optimized prompt for scene image generation
|
||||
# When base avatar is provided, use Ideogram Character to maintain consistency
|
||||
# Otherwise, generate from scratch with podcast-optimized prompt
|
||||
image_prompt = "" # Initialize prompt variable
|
||||
|
||||
if base_avatar_bytes:
|
||||
# Use Ideogram Character API for consistent character generation
|
||||
# Use custom prompt if provided, otherwise build scene-specific prompt
|
||||
if request.custom_prompt:
|
||||
# User provided custom prompt - use it directly
|
||||
image_prompt = request.custom_prompt
|
||||
logger.info(f"[Podcast] Using custom prompt from user for scene {request.scene_id}")
|
||||
else:
|
||||
# Build scene-specific prompt that respects the base avatar
|
||||
prompt_parts = []
|
||||
|
||||
# Scene context (primary focus)
|
||||
if request.scene_title:
|
||||
prompt_parts.append(f"Scene: {request.scene_title}")
|
||||
|
||||
# Scene content insights for visual context
|
||||
if request.scene_content:
|
||||
content_preview = request.scene_content[:200].replace("\n", " ").strip()
|
||||
# Extract visualizable themes
|
||||
visual_keywords = []
|
||||
content_lower = content_preview.lower()
|
||||
if any(word in content_lower for word in ["data", "statistics", "numbers", "chart", "graph"]):
|
||||
visual_keywords.append("data visualization background")
|
||||
if any(word in content_lower for word in ["technology", "tech", "digital", "ai", "software"]):
|
||||
visual_keywords.append("modern tech studio setting")
|
||||
if any(word in content_lower for word in ["business", "growth", "strategy", "market"]):
|
||||
visual_keywords.append("professional business studio")
|
||||
if visual_keywords:
|
||||
prompt_parts.append(", ".join(visual_keywords))
|
||||
|
||||
# Podcast theme context
|
||||
if request.idea:
|
||||
idea_preview = request.idea[:60].strip()
|
||||
prompt_parts.append(f"Topic: {idea_preview}")
|
||||
|
||||
# Studio setting (maintains podcast aesthetic)
|
||||
prompt_parts.extend([
|
||||
"Professional podcast recording studio",
|
||||
"Modern microphone setup",
|
||||
"Clean background, professional lighting",
|
||||
"16:9 aspect ratio, video-optimized composition"
|
||||
])
|
||||
|
||||
image_prompt = ", ".join(prompt_parts)
|
||||
|
||||
logger.info(f"[Podcast] Using Ideogram Character for scene {request.scene_id} with base avatar")
|
||||
logger.info(f"[Podcast] Scene prompt: {image_prompt[:150]}...")
|
||||
|
||||
# Use Ideogram Character API via WaveSpeed client
|
||||
from services.wavespeed.client import WaveSpeedClient
|
||||
wavespeed_client = WaveSpeedClient()
|
||||
|
||||
# Use custom settings if provided, otherwise use defaults
|
||||
style = request.style or "Realistic" # Default to Realistic for professional podcast presenters
|
||||
rendering_speed = request.rendering_speed or "Quality" # Default to Quality for podcast videos
|
||||
|
||||
# Calculate aspect ratio from custom setting or dimensions
|
||||
if request.aspect_ratio:
|
||||
aspect_ratio = request.aspect_ratio
|
||||
else:
|
||||
aspect_ratio_map = {
|
||||
(1024, 1024): "1:1",
|
||||
(1920, 1080): "16:9",
|
||||
(1080, 1920): "9:16",
|
||||
(1280, 960): "4:3",
|
||||
(960, 1280): "3:4",
|
||||
}
|
||||
aspect_ratio = aspect_ratio_map.get((request.width, request.height), "16:9")
|
||||
|
||||
logger.info(f"[Podcast] Ideogram Character settings: style={style}, rendering_speed={rendering_speed}, aspect_ratio={aspect_ratio}")
|
||||
|
||||
try:
|
||||
image_bytes = wavespeed_client.generate_character_image(
|
||||
prompt=image_prompt,
|
||||
reference_image_bytes=base_avatar_bytes,
|
||||
style=style,
|
||||
aspect_ratio=aspect_ratio,
|
||||
rendering_speed=rendering_speed,
|
||||
timeout=None, # No timeout - poll until WaveSpeed says it's done or failed
|
||||
)
|
||||
|
||||
# Create result object compatible with ImageGenerationResult
|
||||
from services.llm_providers.image_generation.base import ImageGenerationResult
|
||||
result = ImageGenerationResult(
|
||||
image_bytes=image_bytes,
|
||||
provider="wavespeed",
|
||||
model="ideogram-ai/ideogram-character",
|
||||
width=request.width,
|
||||
height=request.height,
|
||||
)
|
||||
|
||||
logger.info(f"[Podcast] ✅ Successfully generated character-consistent scene image")
|
||||
except HTTPException as http_err:
|
||||
# Re-raise HTTPExceptions from wavespeed client as-is
|
||||
logger.error(f"[Podcast] ❌ Ideogram Character HTTPException: {http_err.status_code} - {http_err.detail}")
|
||||
raise
|
||||
except Exception as char_error:
|
||||
error_msg = str(char_error)
|
||||
error_type = type(char_error).__name__
|
||||
logger.error(f"[Podcast] ❌ Ideogram Character failed: {error_type}: {error_msg}", exc_info=True)
|
||||
|
||||
# If Ideogram Character fails, we should NOT fall back to standard generation
|
||||
# because that would lose character consistency. Instead, raise an error.
|
||||
# However, if it's a timeout/connection issue, we can provide a helpful message.
|
||||
error_msg_lower = error_msg.lower()
|
||||
if "timeout" in error_msg_lower or "connection" in error_msg_lower or "504" in error_msg:
|
||||
raise HTTPException(
|
||||
status_code=504,
|
||||
detail={
|
||||
"error": "Image generation service unavailable",
|
||||
"message": "The character-consistent image generation service is currently unavailable. Please try again in a few moments. If the problem persists, the service may be experiencing high load.",
|
||||
"retry_recommended": True,
|
||||
},
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail={
|
||||
"error": "Character-consistent image generation failed",
|
||||
"message": f"Failed to generate image with character consistency: {error_msg}",
|
||||
"retry_recommended": True,
|
||||
},
|
||||
)
|
||||
|
||||
# CRITICAL: If base_avatar_url was provided but we don't have base_avatar_bytes,
|
||||
# this means either loading failed (already raised error) or Ideogram Character failed (already raised error)
|
||||
# So this path should only be reached if NO base_avatar_url was provided in the first place
|
||||
if not base_avatar_bytes:
|
||||
logger.info(f"[Podcast] No base avatar provided - generating standard image from scratch")
|
||||
# Standard generation from scratch (no base avatar provided)
|
||||
prompt_parts = []
|
||||
|
||||
# Core podcast studio elements
|
||||
prompt_parts.extend([
|
||||
"Professional podcast recording studio",
|
||||
"Modern podcast setup with high-quality microphone",
|
||||
"Clean, minimalist background suitable for video",
|
||||
"Professional studio lighting with soft, even illumination",
|
||||
"Podcast host environment, professional and inviting"
|
||||
])
|
||||
|
||||
# Scene-specific context
|
||||
if request.scene_title:
|
||||
prompt_parts.append(f"Scene theme: {request.scene_title}")
|
||||
|
||||
# Content context for visual relevance
|
||||
if request.scene_content:
|
||||
content_preview = request.scene_content[:150].replace("\n", " ").strip()
|
||||
visual_keywords = []
|
||||
content_lower = content_preview.lower()
|
||||
if any(word in content_lower for word in ["data", "statistics", "numbers", "chart", "graph"]):
|
||||
visual_keywords.append("data visualization elements")
|
||||
if any(word in content_lower for word in ["technology", "tech", "digital", "ai", "software"]):
|
||||
visual_keywords.append("modern technology aesthetic")
|
||||
if any(word in content_lower for word in ["business", "growth", "strategy", "market"]):
|
||||
visual_keywords.append("professional business environment")
|
||||
if visual_keywords:
|
||||
prompt_parts.append(", ".join(visual_keywords))
|
||||
|
||||
# Podcast theme context
|
||||
if request.idea:
|
||||
idea_preview = request.idea[:80].strip()
|
||||
prompt_parts.append(f"Podcast topic context: {idea_preview}")
|
||||
|
||||
# Technical requirements for video generation
|
||||
prompt_parts.extend([
|
||||
"16:9 aspect ratio optimized for video",
|
||||
"Center-focused composition for talking avatar overlay",
|
||||
"Neutral color palette with professional tones",
|
||||
"High resolution, sharp focus, professional photography quality",
|
||||
"No text, no logos, no distracting elements",
|
||||
"Suitable for InfiniteTalk video generation with animated avatar"
|
||||
])
|
||||
|
||||
# Style constraints
|
||||
prompt_parts.extend([
|
||||
"Realistic photography style, not illustration or cartoon",
|
||||
"Professional broadcast quality",
|
||||
"Warm, inviting atmosphere",
|
||||
"Clean composition with breathing room for avatar placement"
|
||||
])
|
||||
|
||||
image_prompt = ", ".join(prompt_parts)
|
||||
|
||||
logger.info(f"[Podcast] Generating image for scene {request.scene_id}: {request.scene_title}")
|
||||
|
||||
# Generate image using main_image_generation service
|
||||
image_options = {
|
||||
"provider": None, # Auto-select provider
|
||||
"width": request.width,
|
||||
"height": request.height,
|
||||
}
|
||||
|
||||
result = generate_image(
|
||||
prompt=image_prompt,
|
||||
options=image_options,
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
# Save image to podcast images directory
|
||||
PODCAST_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Generate filename
|
||||
clean_title = "".join(c if c.isalnum() or c in ('-', '_') else '_' for c in request.scene_title[:30])
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
image_filename = f"scene_{request.scene_id}_{clean_title}_{unique_id}.png"
|
||||
image_path = PODCAST_IMAGES_DIR / image_filename
|
||||
|
||||
# Save image
|
||||
with open(image_path, "wb") as f:
|
||||
f.write(result.image_bytes)
|
||||
|
||||
logger.info(f"[Podcast] Saved image to: {image_path}")
|
||||
|
||||
# Create image URL (served via API endpoint)
|
||||
image_url = f"/api/podcast/images/{image_filename}"
|
||||
|
||||
# Estimate cost (rough estimate: ~$0.04 per image for most providers, ~$0.08 for Ideogram Character Quality)
|
||||
cost = 0.08 if result.provider == "wavespeed" and result.model == "ideogram-ai/ideogram-character" else 0.04
|
||||
|
||||
# TRACK USAGE after successful image generation
|
||||
try:
|
||||
from models.subscription_models import UsageSummary, APIProvider
|
||||
from sqlalchemy import text as sql_text
|
||||
from datetime import datetime
|
||||
|
||||
current_period = pricing_service.get_current_billing_period(user_id) or datetime.now().strftime("%Y-%m")
|
||||
|
||||
# Update stability_calls and stability_cost (used for all image generation)
|
||||
# Note: stability_calls is used for all image generation providers, not just Stability AI
|
||||
update_query = sql_text("""
|
||||
UPDATE usage_summaries
|
||||
SET stability_calls = COALESCE(stability_calls, 0) + 1,
|
||||
stability_cost = COALESCE(stability_cost, 0) + :cost,
|
||||
total_calls = COALESCE(total_calls, 0) + 1,
|
||||
total_cost = COALESCE(total_cost, 0) + :cost
|
||||
WHERE user_id = :user_id AND billing_period = :period
|
||||
""")
|
||||
db.execute(update_query, {
|
||||
'cost': cost,
|
||||
'user_id': user_id,
|
||||
'period': current_period
|
||||
})
|
||||
db.commit()
|
||||
|
||||
logger.info(f"[Podcast] ✅ Tracked image generation usage: user={user_id}, cost=${cost:.4f}, provider={result.provider}")
|
||||
except Exception as usage_error:
|
||||
logger.error(f"[Podcast] Failed to track image generation usage: {usage_error}")
|
||||
db.rollback()
|
||||
# Don't fail the request if usage tracking fails
|
||||
|
||||
# Save to asset library
|
||||
try:
|
||||
save_asset_to_library(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
asset_type="image",
|
||||
source_module="podcast_maker",
|
||||
filename=image_filename,
|
||||
file_url=image_url,
|
||||
file_path=str(image_path),
|
||||
file_size=len(result.image_bytes),
|
||||
mime_type="image/png",
|
||||
title=f"{request.scene_title} - Podcast Scene",
|
||||
description=f"Podcast scene image: {request.scene_title}",
|
||||
prompt=image_prompt,
|
||||
tags=["podcast", "scene", request.scene_id],
|
||||
provider=result.provider,
|
||||
model=result.model,
|
||||
asset_metadata={
|
||||
"scene_id": request.scene_id,
|
||||
"scene_title": request.scene_title,
|
||||
"status": "completed",
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Podcast] Failed to save image asset: {e}")
|
||||
|
||||
return PodcastImageResponse(
|
||||
scene_id=request.scene_id,
|
||||
scene_title=request.scene_title,
|
||||
image_filename=image_filename,
|
||||
image_url=image_url,
|
||||
width=result.width,
|
||||
height=result.height,
|
||||
provider=result.provider,
|
||||
model=result.model,
|
||||
cost=cost,
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
# Re-raise HTTPExceptions as-is (they already have proper error details)
|
||||
raise
|
||||
except Exception as exc:
|
||||
# Log the full exception for debugging
|
||||
error_msg = str(exc)
|
||||
error_type = type(exc).__name__
|
||||
logger.error(f"[Podcast] Image generation failed: {error_type}: {error_msg}", exc_info=True)
|
||||
|
||||
# Create a safe error detail
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail={
|
||||
"error": "Image generation failed",
|
||||
"message": error_msg,
|
||||
"type": error_type,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@router.get("/images/{path:path}")
|
||||
async def serve_podcast_image(
|
||||
path: str, # Changed from filename to path to support subdirectories
|
||||
current_user: Dict[str, Any] = Depends(get_current_user_with_query_token),
|
||||
):
|
||||
"""Serve generated podcast scene images and avatars.
|
||||
|
||||
Supports authentication via Authorization header or token query parameter.
|
||||
Query parameter is useful for HTML elements like <img> that cannot send custom headers.
|
||||
Supports subdirectories like avatars/
|
||||
"""
|
||||
require_authenticated_user(current_user)
|
||||
|
||||
# Security check: ensure path doesn't contain path traversal or absolute paths
|
||||
if ".." in path or path.startswith("/"):
|
||||
raise HTTPException(status_code=400, detail="Invalid path")
|
||||
|
||||
image_path = (PODCAST_IMAGES_DIR / path).resolve()
|
||||
|
||||
# Security check: ensure resolved path is within PODCAST_IMAGES_DIR
|
||||
if not str(image_path).startswith(str(PODCAST_IMAGES_DIR)):
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
if not image_path.exists():
|
||||
raise HTTPException(status_code=404, detail="Image not found")
|
||||
|
||||
return FileResponse(image_path, media_type="image/png")
|
||||
|
||||
203
backend/api/podcast/handlers/projects.py
Normal file
203
backend/api/podcast/handlers/projects.py
Normal file
@@ -0,0 +1,203 @@
|
||||
"""
|
||||
Podcast Project Handlers
|
||||
|
||||
CRUD operations for podcast projects.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy.orm import Session
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
from services.database import get_db
|
||||
from middleware.auth_middleware import get_current_user
|
||||
from services.podcast_service import PodcastService
|
||||
from ..models import (
|
||||
PodcastProjectResponse,
|
||||
CreateProjectRequest,
|
||||
UpdateProjectRequest,
|
||||
PodcastProjectListResponse,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/projects", response_model=PodcastProjectResponse, status_code=201)
|
||||
async def create_project(
|
||||
request: CreateProjectRequest,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""Create a new podcast project."""
|
||||
try:
|
||||
user_id = current_user.get("user_id") or current_user.get("id")
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=401, detail="User ID not found")
|
||||
|
||||
service = PodcastService(db)
|
||||
|
||||
# Check if project_id already exists for this user
|
||||
existing = service.get_project(user_id, request.project_id)
|
||||
if existing:
|
||||
raise HTTPException(status_code=400, detail="Project ID already exists")
|
||||
|
||||
project = service.create_project(
|
||||
user_id=user_id,
|
||||
project_id=request.project_id,
|
||||
idea=request.idea,
|
||||
duration=request.duration,
|
||||
speakers=request.speakers,
|
||||
budget_cap=request.budget_cap,
|
||||
)
|
||||
|
||||
return PodcastProjectResponse.model_validate(project)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Error creating project: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/projects/{project_id}", response_model=PodcastProjectResponse)
|
||||
async def get_project(
|
||||
project_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""Get a podcast project by ID."""
|
||||
try:
|
||||
user_id = current_user.get("user_id") or current_user.get("id")
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=401, detail="User ID not found")
|
||||
|
||||
service = PodcastService(db)
|
||||
project = service.get_project(user_id, project_id)
|
||||
|
||||
if not project:
|
||||
raise HTTPException(status_code=404, detail="Project not found")
|
||||
|
||||
return PodcastProjectResponse.model_validate(project)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Error fetching project: {str(e)}")
|
||||
|
||||
|
||||
@router.put("/projects/{project_id}", response_model=PodcastProjectResponse)
|
||||
async def update_project(
|
||||
project_id: str,
|
||||
request: UpdateProjectRequest,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""Update a podcast project state."""
|
||||
try:
|
||||
user_id = current_user.get("user_id") or current_user.get("id")
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=401, detail="User ID not found")
|
||||
|
||||
service = PodcastService(db)
|
||||
|
||||
# Convert request to dict, excluding None values
|
||||
updates = request.model_dump(exclude_unset=True)
|
||||
|
||||
project = service.update_project(user_id, project_id, **updates)
|
||||
|
||||
if not project:
|
||||
raise HTTPException(status_code=404, detail="Project not found")
|
||||
|
||||
return PodcastProjectResponse.model_validate(project)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Error updating project: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/projects", response_model=PodcastProjectListResponse)
|
||||
async def list_projects(
|
||||
status: Optional[str] = Query(None, description="Filter by status"),
|
||||
favorites_only: bool = Query(False, description="Only favorites"),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
offset: int = Query(0, ge=0),
|
||||
order_by: str = Query("updated_at", description="Order by: updated_at or created_at"),
|
||||
db: Session = Depends(get_db),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""List user's podcast projects."""
|
||||
try:
|
||||
user_id = current_user.get("user_id") or current_user.get("id")
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=401, detail="User ID not found")
|
||||
|
||||
if order_by not in ["updated_at", "created_at"]:
|
||||
raise HTTPException(status_code=400, detail="order_by must be 'updated_at' or 'created_at'")
|
||||
|
||||
service = PodcastService(db)
|
||||
projects, total = service.list_projects(
|
||||
user_id=user_id,
|
||||
status=status,
|
||||
favorites_only=favorites_only,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
order_by=order_by,
|
||||
)
|
||||
|
||||
return PodcastProjectListResponse(
|
||||
projects=[PodcastProjectResponse.model_validate(p) for p in projects],
|
||||
total=total,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Error listing projects: {str(e)}")
|
||||
|
||||
|
||||
@router.delete("/projects/{project_id}", status_code=204)
|
||||
async def delete_project(
|
||||
project_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""Delete a podcast project."""
|
||||
try:
|
||||
user_id = current_user.get("user_id") or current_user.get("id")
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=401, detail="User ID not found")
|
||||
|
||||
service = PodcastService(db)
|
||||
deleted = service.delete_project(user_id, project_id)
|
||||
|
||||
if not deleted:
|
||||
raise HTTPException(status_code=404, detail="Project not found")
|
||||
|
||||
return None
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Error deleting project: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/projects/{project_id}/favorite", response_model=PodcastProjectResponse)
|
||||
async def toggle_favorite(
|
||||
project_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""Toggle favorite status of a project."""
|
||||
try:
|
||||
user_id = current_user.get("user_id") or current_user.get("id")
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=401, detail="User ID not found")
|
||||
|
||||
service = PodcastService(db)
|
||||
project = service.toggle_favorite(user_id, project_id)
|
||||
|
||||
if not project:
|
||||
raise HTTPException(status_code=404, detail="Project not found")
|
||||
|
||||
return PodcastProjectResponse.model_validate(project)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Error toggling favorite: {str(e)}")
|
||||
|
||||
99
backend/api/podcast/handlers/research.py
Normal file
99
backend/api/podcast/handlers/research.py
Normal file
@@ -0,0 +1,99 @@
|
||||
"""
|
||||
Podcast Research Handlers
|
||||
|
||||
Research endpoints using Exa provider.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from typing import Dict, Any
|
||||
from types import SimpleNamespace
|
||||
|
||||
from middleware.auth_middleware import get_current_user
|
||||
from api.story_writer.utils.auth import require_authenticated_user
|
||||
from services.blog_writer.research.exa_provider import ExaResearchProvider
|
||||
from loguru import logger
|
||||
from ..models import (
|
||||
PodcastExaResearchRequest,
|
||||
PodcastExaResearchResponse,
|
||||
PodcastExaSource,
|
||||
PodcastExaConfig,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/research/exa", response_model=PodcastExaResearchResponse)
|
||||
async def podcast_research_exa(
|
||||
request: PodcastExaResearchRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""
|
||||
Run podcast research directly via Exa (no blog writer pipeline).
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
queries = [q.strip() for q in request.queries if q and q.strip()]
|
||||
if not queries:
|
||||
raise HTTPException(status_code=400, detail="At least one query is required for research.")
|
||||
|
||||
exa_cfg = request.exa_config or PodcastExaConfig()
|
||||
cfg = SimpleNamespace(
|
||||
exa_search_type=exa_cfg.exa_search_type or "auto",
|
||||
exa_category=exa_cfg.exa_category,
|
||||
exa_include_domains=exa_cfg.exa_include_domains or [],
|
||||
exa_exclude_domains=exa_cfg.exa_exclude_domains or [],
|
||||
max_sources=exa_cfg.max_sources or 8,
|
||||
source_types=[],
|
||||
)
|
||||
|
||||
provider = ExaResearchProvider()
|
||||
prompt = request.topic
|
||||
|
||||
try:
|
||||
result = await provider.search(
|
||||
prompt=prompt,
|
||||
topic=request.topic,
|
||||
industry="",
|
||||
target_audience="",
|
||||
config=cfg,
|
||||
user_id=user_id,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error(f"[Podcast Exa Research] Failed for user {user_id}: {exc}")
|
||||
raise HTTPException(status_code=500, detail=f"Exa research failed: {exc}")
|
||||
|
||||
# Track usage if available
|
||||
try:
|
||||
cost_total = 0.0
|
||||
if isinstance(result, dict):
|
||||
cost_total = result.get("cost", {}).get("total", 0.005) if result.get("cost") else 0.005
|
||||
provider.track_exa_usage(user_id, cost_total)
|
||||
except Exception as track_err:
|
||||
logger.warning(f"[Podcast Exa Research] Failed to track usage: {track_err}")
|
||||
|
||||
sources_payload = []
|
||||
if isinstance(result, dict):
|
||||
for src in result.get("sources", []) or []:
|
||||
try:
|
||||
sources_payload.append(PodcastExaSource(**src))
|
||||
except Exception:
|
||||
sources_payload.append(PodcastExaSource(**{
|
||||
"title": src.get("title", ""),
|
||||
"url": src.get("url", ""),
|
||||
"excerpt": src.get("excerpt", ""),
|
||||
"published_at": src.get("published_at"),
|
||||
"highlights": src.get("highlights"),
|
||||
"summary": src.get("summary"),
|
||||
"source_type": src.get("source_type"),
|
||||
"index": src.get("index"),
|
||||
}))
|
||||
|
||||
return PodcastExaResearchResponse(
|
||||
sources=sources_payload,
|
||||
search_queries=result.get("search_queries", queries) if isinstance(result, dict) else queries,
|
||||
cost=result.get("cost") if isinstance(result, dict) else None,
|
||||
search_type=result.get("search_type") if isinstance(result, dict) else None,
|
||||
provider=result.get("provider", "exa") if isinstance(result, dict) else "exa",
|
||||
content=result.get("content") if isinstance(result, dict) else None,
|
||||
)
|
||||
|
||||
142
backend/api/podcast/handlers/script.py
Normal file
142
backend/api/podcast/handlers/script.py
Normal file
@@ -0,0 +1,142 @@
|
||||
"""
|
||||
Podcast Script Handlers
|
||||
|
||||
Script generation endpoint.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from typing import Dict, Any
|
||||
import json
|
||||
|
||||
from middleware.auth_middleware import get_current_user
|
||||
from api.story_writer.utils.auth import require_authenticated_user
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from loguru import logger
|
||||
from ..models import (
|
||||
PodcastScriptRequest,
|
||||
PodcastScriptResponse,
|
||||
PodcastScene,
|
||||
PodcastSceneLine,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/script", response_model=PodcastScriptResponse)
|
||||
async def generate_podcast_script(
|
||||
request: PodcastScriptRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""
|
||||
Generate a podcast script outline (scenes + lines) using podcast-oriented prompting.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
# Build comprehensive research context for higher-quality scripts
|
||||
research_context = ""
|
||||
if request.research:
|
||||
try:
|
||||
key_insights = request.research.get("keyword_analysis", {}).get("key_insights") or []
|
||||
fact_cards = request.research.get("factCards", []) or []
|
||||
mapped_angles = request.research.get("mappedAngles", []) or []
|
||||
sources = request.research.get("sources", []) or []
|
||||
|
||||
top_facts = [f.get("quote", "") for f in fact_cards[:5] if f.get("quote")]
|
||||
angles_summary = [
|
||||
f"{a.get('title', '')}: {a.get('why', '')}" for a in mapped_angles[:3] if a.get("title") or a.get("why")
|
||||
]
|
||||
top_sources = [s.get("url") for s in sources[:3] if s.get("url")]
|
||||
|
||||
research_parts = []
|
||||
if key_insights:
|
||||
research_parts.append(f"Key Insights: {', '.join(key_insights[:5])}")
|
||||
if top_facts:
|
||||
research_parts.append(f"Key Facts: {', '.join(top_facts)}")
|
||||
if angles_summary:
|
||||
research_parts.append(f"Research Angles: {' | '.join(angles_summary)}")
|
||||
if top_sources:
|
||||
research_parts.append(f"Top Sources: {', '.join(top_sources)}")
|
||||
|
||||
research_context = "\n".join(research_parts)
|
||||
except Exception as exc:
|
||||
logger.warning(f"Failed to parse research context: {exc}")
|
||||
research_context = ""
|
||||
|
||||
prompt = f"""You are an expert podcast script planner. Create natural, conversational podcast scenes.
|
||||
|
||||
Podcast Idea: "{request.idea}"
|
||||
Duration: ~{request.duration_minutes} minutes
|
||||
Speakers: {request.speakers} (Host + optional Guest)
|
||||
|
||||
{f"RESEARCH CONTEXT:\n{research_context}\n" if research_context else ""}
|
||||
|
||||
Return JSON with:
|
||||
- scenes: array of scenes. Each scene has:
|
||||
- id: string
|
||||
- title: short scene title (<= 60 chars)
|
||||
- duration: duration in seconds (evenly split across total duration)
|
||||
- emotion: string (one of: "neutral", "happy", "excited", "serious", "curious", "confident")
|
||||
- lines: array of {{"speaker": "...", "text": "...", "emphasis": boolean}}
|
||||
* Write natural, conversational dialogue
|
||||
* Each line can be a sentence or a few sentences that flow together
|
||||
* Use plain text only - no markdown formatting (no asterisks, underscores, etc.)
|
||||
* Mark "emphasis": true for key statistics or important points
|
||||
|
||||
Guidelines:
|
||||
- Write for spoken delivery: conversational, natural, with contractions
|
||||
- Use research insights naturally - weave statistics into dialogue, don't just list them
|
||||
- Vary emotion per scene based on content
|
||||
- Ensure scenes match target duration: aim for ~2.5 words per second of audio
|
||||
- Keep it engaging and informative, like a real podcast conversation
|
||||
"""
|
||||
|
||||
try:
|
||||
raw = llm_text_gen(prompt=prompt, user_id=user_id, json_struct=None)
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=500, detail=f"Script generation failed: {exc}")
|
||||
|
||||
if isinstance(raw, str):
|
||||
try:
|
||||
data = json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
raise HTTPException(status_code=500, detail="LLM returned non-JSON output")
|
||||
elif isinstance(raw, dict):
|
||||
data = raw
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail="Unexpected LLM response format")
|
||||
|
||||
scenes_data = data.get("scenes") or []
|
||||
if not isinstance(scenes_data, list):
|
||||
raise HTTPException(status_code=500, detail="LLM response missing scenes array")
|
||||
|
||||
valid_emotions = {"neutral", "happy", "excited", "serious", "curious", "confident"}
|
||||
|
||||
# Normalize scenes
|
||||
scenes: list[PodcastScene] = []
|
||||
for idx, scene in enumerate(scenes_data):
|
||||
title = scene.get("title") or f"Scene {idx + 1}"
|
||||
duration = int(scene.get("duration") or max(30, (request.duration_minutes * 60) // max(1, len(scenes_data))))
|
||||
emotion = scene.get("emotion") or "neutral"
|
||||
if emotion not in valid_emotions:
|
||||
emotion = "neutral"
|
||||
lines_raw = scene.get("lines") or []
|
||||
lines: list[PodcastSceneLine] = []
|
||||
for line in lines_raw:
|
||||
speaker = line.get("speaker") or ("Host" if len(lines) % request.speakers == 0 else "Guest")
|
||||
text = line.get("text") or ""
|
||||
emphasis = line.get("emphasis", False)
|
||||
if text:
|
||||
lines.append(PodcastSceneLine(speaker=speaker, text=text, emphasis=emphasis))
|
||||
scenes.append(
|
||||
PodcastScene(
|
||||
id=scene.get("id") or f"scene-{idx + 1}",
|
||||
title=title,
|
||||
duration=duration,
|
||||
lines=lines,
|
||||
approved=False,
|
||||
emotion=emotion,
|
||||
)
|
||||
)
|
||||
|
||||
return PodcastScriptResponse(scenes=scenes)
|
||||
|
||||
585
backend/api/podcast/handlers/video.py
Normal file
585
backend/api/podcast/handlers/video.py
Normal file
@@ -0,0 +1,585 @@
|
||||
"""
|
||||
Podcast Video Handlers
|
||||
|
||||
Video generation and serving endpoints.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks, Request
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.orm import Session
|
||||
from typing import Dict, Any, Optional
|
||||
from pathlib import Path
|
||||
from urllib.parse import quote
|
||||
import re
|
||||
import json
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from services.database import get_db
|
||||
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
|
||||
from api.story_writer.utils.auth import require_authenticated_user
|
||||
from services.wavespeed.infinitetalk import animate_scene_with_voiceover
|
||||
from services.podcast.video_combination_service import PodcastVideoCombinationService
|
||||
from services.llm_providers.main_video_generation import track_video_usage
|
||||
from services.subscription import PricingService
|
||||
from services.subscription.preflight_validator import validate_scene_animation_operation
|
||||
from api.story_writer.task_manager import task_manager
|
||||
from loguru import logger
|
||||
from ..constants import AI_VIDEO_SUBDIR, PODCAST_VIDEOS_DIR
|
||||
from ..utils import load_podcast_audio_bytes, load_podcast_image_bytes
|
||||
from services.podcast_service import PodcastService
|
||||
from ..models import (
|
||||
PodcastVideoGenerationRequest,
|
||||
PodcastVideoGenerationResponse,
|
||||
PodcastCombineVideosRequest,
|
||||
PodcastCombineVideosResponse,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# Thread pool executor for CPU-intensive video operations
|
||||
# This prevents blocking the FastAPI event loop
|
||||
_video_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="podcast_video")
|
||||
|
||||
|
||||
def _extract_error_message(exc: Exception) -> str:
|
||||
"""
|
||||
Extract user-friendly error message from exception.
|
||||
Handles HTTPException with nested error details from WaveSpeed API.
|
||||
"""
|
||||
if isinstance(exc, HTTPException):
|
||||
detail = exc.detail
|
||||
# If detail is a dict (from WaveSpeed client)
|
||||
if isinstance(detail, dict):
|
||||
# Try to extract message from nested response JSON
|
||||
response_str = detail.get("response", "")
|
||||
if response_str:
|
||||
try:
|
||||
response_json = json.loads(response_str)
|
||||
if isinstance(response_json, dict) and "message" in response_json:
|
||||
return response_json["message"]
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
# Fall back to error field
|
||||
if "error" in detail:
|
||||
return detail["error"]
|
||||
# If detail is a string
|
||||
elif isinstance(detail, str):
|
||||
return detail
|
||||
|
||||
# For other exceptions, use string representation
|
||||
error_str = str(exc)
|
||||
|
||||
# Try to extract meaningful message from HTTPException string format
|
||||
# Format: "502: {'error': '...', 'response': '{"message":"..."}'}"
|
||||
if "Insufficient credits" in error_str or "insufficient credits" in error_str.lower():
|
||||
return "Insufficient WaveSpeed credits. Please top up your account."
|
||||
|
||||
# Try to extract JSON message from string
|
||||
try:
|
||||
# Look for JSON-like structures in the error string
|
||||
json_match = re.search(r'"message"\s*:\s*"([^"]+)"', error_str)
|
||||
if json_match:
|
||||
return json_match.group(1)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return error_str
|
||||
|
||||
|
||||
def _execute_podcast_video_task(
|
||||
task_id: str,
|
||||
request: PodcastVideoGenerationRequest,
|
||||
user_id: str,
|
||||
image_bytes: bytes,
|
||||
audio_bytes: bytes,
|
||||
auth_token: Optional[str] = None,
|
||||
mask_image_bytes: Optional[bytes] = None,
|
||||
):
|
||||
"""Background task to generate InfiniteTalk video for podcast scene."""
|
||||
try:
|
||||
task_manager.update_task_status(
|
||||
task_id, "processing", progress=5.0, message="Submitting to WaveSpeed InfiniteTalk..."
|
||||
)
|
||||
|
||||
# Extract scene number from scene_id
|
||||
scene_number_match = re.search(r'\d+', request.scene_id)
|
||||
scene_number = int(scene_number_match.group()) if scene_number_match else 0
|
||||
|
||||
# Prepare scene data for animation
|
||||
scene_data = {
|
||||
"scene_number": scene_number,
|
||||
"title": request.scene_title,
|
||||
"scene_id": request.scene_id,
|
||||
}
|
||||
story_context = {
|
||||
"project_id": request.project_id,
|
||||
"type": "podcast",
|
||||
}
|
||||
|
||||
animation_result = animate_scene_with_voiceover(
|
||||
image_bytes=image_bytes,
|
||||
audio_bytes=audio_bytes,
|
||||
scene_data=scene_data,
|
||||
story_context=story_context,
|
||||
user_id=user_id,
|
||||
resolution=request.resolution or "720p",
|
||||
prompt_override=request.prompt,
|
||||
mask_image_bytes=mask_image_bytes,
|
||||
seed=request.seed if request.seed is not None else -1,
|
||||
image_mime="image/png",
|
||||
audio_mime="audio/mpeg",
|
||||
)
|
||||
|
||||
task_manager.update_task_status(
|
||||
task_id, "processing", progress=80.0, message="Saving video file..."
|
||||
)
|
||||
|
||||
# Use podcast-specific video directory
|
||||
ai_video_dir = PODCAST_VIDEOS_DIR / AI_VIDEO_SUBDIR
|
||||
ai_video_dir.mkdir(parents=True, exist_ok=True)
|
||||
video_service = PodcastVideoCombinationService(output_dir=str(PODCAST_VIDEOS_DIR / "Final_Videos"))
|
||||
|
||||
save_result = video_service.save_scene_video(
|
||||
video_bytes=animation_result["video_bytes"],
|
||||
scene_number=scene_number,
|
||||
user_id=user_id,
|
||||
)
|
||||
video_filename = save_result["video_filename"]
|
||||
video_url = f"/api/podcast/videos/{video_filename}"
|
||||
if auth_token:
|
||||
video_url = f"{video_url}?token={quote(auth_token)}"
|
||||
|
||||
logger.info(
|
||||
f"[Podcast] Video saved: filename={video_filename}, url={video_url}, scene={request.scene_id}"
|
||||
)
|
||||
|
||||
usage_info = track_video_usage(
|
||||
user_id=user_id,
|
||||
provider=animation_result["provider"],
|
||||
model_name=animation_result["model_name"],
|
||||
prompt=animation_result["prompt"],
|
||||
video_bytes=animation_result["video_bytes"],
|
||||
cost_override=animation_result["cost"],
|
||||
)
|
||||
|
||||
result_data = {
|
||||
"video_url": video_url,
|
||||
"video_filename": video_filename,
|
||||
"cost": animation_result["cost"],
|
||||
"duration": animation_result["duration"],
|
||||
"provider": animation_result["provider"],
|
||||
"model": animation_result["model_name"],
|
||||
}
|
||||
|
||||
logger.info(
|
||||
f"[Podcast] Updating task status to completed: task_id={task_id}, result={result_data}"
|
||||
)
|
||||
|
||||
task_manager.update_task_status(
|
||||
task_id,
|
||||
"completed",
|
||||
progress=100.0,
|
||||
message="Video generation complete!",
|
||||
result=result_data,
|
||||
)
|
||||
|
||||
# Verify the task status was updated correctly
|
||||
updated_status = task_manager.get_task_status(task_id)
|
||||
logger.info(
|
||||
f"[Podcast] Task status after update: task_id={task_id}, status={updated_status.get('status') if updated_status else 'None'}, has_result={bool(updated_status.get('result') if updated_status else False)}, video_url={updated_status.get('result', {}).get('video_url') if updated_status else 'N/A'}"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"[Podcast] Video generation completed for project {request.project_id}, scene {request.scene_id}"
|
||||
)
|
||||
|
||||
except Exception as exc:
|
||||
# Use logger.exception to avoid KeyError when exception message contains curly braces
|
||||
logger.exception(f"[Podcast] Video generation failed for project {request.project_id}, scene {request.scene_id}")
|
||||
|
||||
# Extract user-friendly error message from exception
|
||||
error_msg = _extract_error_message(exc)
|
||||
|
||||
task_manager.update_task_status(
|
||||
task_id, "failed", error=error_msg, message=f"Video generation failed: {error_msg}"
|
||||
)
|
||||
|
||||
|
||||
@router.post("/render/video", response_model=PodcastVideoGenerationResponse)
|
||||
async def generate_podcast_video(
|
||||
request_obj: Request,
|
||||
request: PodcastVideoGenerationRequest,
|
||||
background_tasks: BackgroundTasks,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""
|
||||
Generate video for a podcast scene using WaveSpeed InfiniteTalk (avatar image + audio).
|
||||
Returns task_id for polling since InfiniteTalk can take up to 10 minutes.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
logger.info(
|
||||
f"[Podcast] Starting video generation for project {request.project_id}, scene {request.scene_id}"
|
||||
)
|
||||
|
||||
# Load audio bytes
|
||||
audio_bytes = load_podcast_audio_bytes(request.audio_url)
|
||||
|
||||
# Validate resolution
|
||||
if request.resolution not in {"480p", "720p"}:
|
||||
raise HTTPException(status_code=400, detail="Resolution must be '480p' or '720p'.")
|
||||
|
||||
# Load image bytes (scene image is required for video generation)
|
||||
if request.avatar_image_url:
|
||||
image_bytes = load_podcast_image_bytes(request.avatar_image_url)
|
||||
else:
|
||||
# Scene-specific image should be generated before video generation
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Scene image is required for video generation. Please generate images for scenes first.",
|
||||
)
|
||||
|
||||
mask_image_bytes = None
|
||||
if request.mask_image_url:
|
||||
try:
|
||||
mask_image_bytes = load_podcast_image_bytes(request.mask_image_url)
|
||||
except Exception as e:
|
||||
logger.error(f"[Podcast] Failed to load mask image: {e}")
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Failed to load mask image for video generation.",
|
||||
)
|
||||
|
||||
# Validate subscription limits
|
||||
db = next(get_db())
|
||||
try:
|
||||
pricing_service = PricingService(db)
|
||||
validate_scene_animation_operation(pricing_service=pricing_service, user_id=user_id)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
# Extract token for authenticated URL building
|
||||
auth_token = None
|
||||
auth_header = request_obj.headers.get("Authorization")
|
||||
if auth_header and auth_header.startswith("Bearer "):
|
||||
auth_token = auth_header.replace("Bearer ", "").strip()
|
||||
|
||||
# Create async task
|
||||
task_id = task_manager.create_task("podcast_video_generation")
|
||||
background_tasks.add_task(
|
||||
_execute_podcast_video_task,
|
||||
task_id=task_id,
|
||||
request=request,
|
||||
user_id=user_id,
|
||||
image_bytes=image_bytes,
|
||||
audio_bytes=audio_bytes,
|
||||
auth_token=auth_token,
|
||||
mask_image_bytes=mask_image_bytes,
|
||||
)
|
||||
|
||||
return PodcastVideoGenerationResponse(
|
||||
task_id=task_id,
|
||||
status="pending",
|
||||
message="Video generation started. This may take up to 10 minutes.",
|
||||
)
|
||||
|
||||
|
||||
@router.get("/videos/{filename}")
|
||||
async def serve_podcast_video(
|
||||
filename: str,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user_with_query_token),
|
||||
):
|
||||
"""Serve generated podcast scene video files.
|
||||
|
||||
Supports authentication via Authorization header or token query parameter.
|
||||
Query parameter is useful for HTML elements like <video> that cannot send custom headers.
|
||||
"""
|
||||
require_authenticated_user(current_user)
|
||||
|
||||
# Security check: ensure filename doesn't contain path traversal
|
||||
if ".." in filename or "/" in filename or "\\" in filename:
|
||||
raise HTTPException(status_code=400, detail="Invalid filename")
|
||||
|
||||
# Look for video in podcast_videos directory (including AI_Videos subdirectory)
|
||||
video_path = None
|
||||
possible_paths = [
|
||||
PODCAST_VIDEOS_DIR / filename,
|
||||
PODCAST_VIDEOS_DIR / AI_VIDEO_SUBDIR / filename,
|
||||
]
|
||||
|
||||
for path in possible_paths:
|
||||
resolved_path = path.resolve()
|
||||
# Security check: ensure path is within PODCAST_VIDEOS_DIR
|
||||
if str(resolved_path).startswith(str(PODCAST_VIDEOS_DIR)) and resolved_path.exists():
|
||||
video_path = resolved_path
|
||||
break
|
||||
|
||||
if not video_path:
|
||||
raise HTTPException(status_code=404, detail="Video file not found")
|
||||
|
||||
return FileResponse(video_path, media_type="video/mp4")
|
||||
|
||||
|
||||
@router.get("/videos")
|
||||
async def list_podcast_videos(
|
||||
project_id: Optional[str] = None,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""
|
||||
List existing video files for the current user, optionally filtered by project.
|
||||
Returns videos mapped to scene numbers for easy matching.
|
||||
"""
|
||||
try:
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
logger.info(f"[Podcast] Listing videos for user_id={user_id}, project_id={project_id}")
|
||||
|
||||
# Look in podcast_videos/AI_Videos directory
|
||||
ai_video_dir = PODCAST_VIDEOS_DIR / AI_VIDEO_SUBDIR
|
||||
ai_video_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
videos = []
|
||||
if ai_video_dir.exists():
|
||||
# Pattern: scene_{scene_number}_{user_id}_{timestamp}.mp4
|
||||
# Extract user_id from current user (same logic as save_scene_video)
|
||||
clean_user_id = "".join(c if c.isalnum() or c in ('-', '_') else '_' for c in user_id[:16])
|
||||
|
||||
logger.info(f"[Podcast] Looking for videos with clean_user_id={clean_user_id} in {ai_video_dir}")
|
||||
|
||||
# Map scene_number -> (most recent video info)
|
||||
scene_video_map: Dict[int, Dict[str, Any]] = {}
|
||||
|
||||
all_files = list(ai_video_dir.glob("*.mp4"))
|
||||
logger.info(f"[Podcast] Found {len(all_files)} MP4 files in directory")
|
||||
|
||||
for video_file in all_files:
|
||||
filename = video_file.name
|
||||
# Match pattern: scene_{number}_{user_id}_{hash}.mp4
|
||||
# Use greedy match for user_id and match hash as "anything except underscore before .mp4"
|
||||
match = re.match(r"scene_(\d+)_(.+)_([^_]+)\.mp4", filename)
|
||||
if match:
|
||||
scene_number = int(match.group(1))
|
||||
file_user_id = match.group(2)
|
||||
hash_part = match.group(3)
|
||||
# Only include videos for this user
|
||||
if file_user_id == clean_user_id:
|
||||
video_url = f"/api/podcast/videos/{filename}"
|
||||
file_mtime = video_file.stat().st_mtime
|
||||
|
||||
# Keep the most recent video for each scene
|
||||
if scene_number not in scene_video_map or file_mtime > scene_video_map[scene_number]["mtime"]:
|
||||
scene_video_map[scene_number] = {
|
||||
"scene_number": scene_number,
|
||||
"filename": filename,
|
||||
"video_url": video_url,
|
||||
"file_size": video_file.stat().st_size,
|
||||
"mtime": file_mtime,
|
||||
}
|
||||
|
||||
# Convert map to list and sort by scene number
|
||||
videos = list(scene_video_map.values())
|
||||
videos.sort(key=lambda v: v["scene_number"])
|
||||
|
||||
logger.info(f"[Podcast] Returning {len(videos)} videos for user: {[v['scene_number'] for v in videos]}")
|
||||
else:
|
||||
logger.warning(f"[Podcast] Video directory does not exist: {ai_video_dir}")
|
||||
|
||||
return {"videos": videos}
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"[Podcast] Error listing videos")
|
||||
return {"videos": []}
|
||||
|
||||
|
||||
@router.post("/render/combine-videos", response_model=PodcastCombineVideosResponse)
|
||||
async def combine_podcast_videos(
|
||||
request_obj: Request,
|
||||
request: PodcastCombineVideosRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""
|
||||
Combine all scene videos into a single final podcast video.
|
||||
Returns task_id for polling.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
logger.info(f"[Podcast] Combining {len(request.scene_video_urls)} scene videos for project {request.project_id}")
|
||||
|
||||
if not request.scene_video_urls:
|
||||
raise HTTPException(status_code=400, detail="No scene videos provided")
|
||||
|
||||
# Create async task
|
||||
task_id = task_manager.create_task("podcast_combine_videos")
|
||||
|
||||
# Extract token for authenticated URL building
|
||||
auth_token = None
|
||||
auth_header = request_obj.headers.get("Authorization")
|
||||
if auth_header and auth_header.startswith("Bearer "):
|
||||
auth_token = auth_header.replace("Bearer ", "").strip()
|
||||
|
||||
# Run video combination in thread pool executor to prevent blocking event loop
|
||||
# Submit directly to executor - this runs in a background thread and doesn't block
|
||||
# The executor handles the thread pool management automatically
|
||||
def handle_task_completion(future):
|
||||
"""Callback to handle task completion and log errors."""
|
||||
try:
|
||||
future.result() # This will raise if there was an exception
|
||||
except Exception as e:
|
||||
logger.error(f"[Podcast] Error in video combination task: {e}", exc_info=True)
|
||||
|
||||
# Submit to executor - returns immediately, task runs in background thread
|
||||
future = _video_executor.submit(
|
||||
_execute_combine_videos_task,
|
||||
task_id,
|
||||
request.project_id,
|
||||
request.scene_video_urls,
|
||||
request.podcast_title,
|
||||
user_id,
|
||||
auth_token,
|
||||
)
|
||||
# Add callback to log errors without blocking
|
||||
future.add_done_callback(handle_task_completion)
|
||||
|
||||
return PodcastCombineVideosResponse(
|
||||
task_id=task_id,
|
||||
status="pending",
|
||||
message="Video combination started. This may take a few minutes.",
|
||||
)
|
||||
|
||||
|
||||
def _execute_combine_videos_task(
|
||||
task_id: str,
|
||||
project_id: str,
|
||||
scene_video_urls: list[str],
|
||||
podcast_title: str,
|
||||
user_id: str,
|
||||
auth_token: Optional[str] = None,
|
||||
):
|
||||
"""Background task to combine scene videos into final podcast."""
|
||||
try:
|
||||
task_manager.update_task_status(
|
||||
task_id, "processing", progress=10.0, message="Preparing scene videos..."
|
||||
)
|
||||
|
||||
# Convert scene video URLs to local file paths
|
||||
scene_video_paths = []
|
||||
for video_url in scene_video_urls:
|
||||
# Extract filename from URL (e.g., /api/podcast/videos/scene_1_user_xxx.mp4)
|
||||
filename = video_url.split("/")[-1].split("?")[0] # Remove query params
|
||||
video_path = PODCAST_VIDEOS_DIR / AI_VIDEO_SUBDIR / filename
|
||||
|
||||
if not video_path.exists():
|
||||
logger.warning(f"[Podcast] Scene video not found: {video_path}")
|
||||
continue
|
||||
|
||||
scene_video_paths.append(str(video_path))
|
||||
|
||||
if not scene_video_paths:
|
||||
raise ValueError("No valid scene videos found to combine")
|
||||
|
||||
logger.info(f"[Podcast] Found {len(scene_video_paths)} scene videos to combine")
|
||||
|
||||
task_manager.update_task_status(
|
||||
task_id, "processing", progress=30.0, message="Combining videos..."
|
||||
)
|
||||
|
||||
# Use dedicated PodcastVideoCombinationService
|
||||
final_videos_dir = PODCAST_VIDEOS_DIR / "Final_Videos"
|
||||
final_videos_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
video_service = PodcastVideoCombinationService(output_dir=str(final_videos_dir))
|
||||
|
||||
# Progress callback for task updates
|
||||
def progress_callback(progress: float, message: str):
|
||||
task_manager.update_task_status(
|
||||
task_id, "processing", progress=progress, message=message
|
||||
)
|
||||
|
||||
task_manager.update_task_status(
|
||||
task_id, "processing", progress=50.0, message="Combining videos..."
|
||||
)
|
||||
|
||||
# Combine videos using dedicated podcast service
|
||||
result = video_service.combine_videos(
|
||||
video_paths=scene_video_paths,
|
||||
podcast_title=podcast_title,
|
||||
fps=30,
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
|
||||
video_filename = Path(result["video_path"]).name
|
||||
video_url = f"/api/podcast/final-videos/{video_filename}"
|
||||
if auth_token:
|
||||
video_url = f"{video_url}?token={quote(auth_token)}"
|
||||
|
||||
logger.info(f"[Podcast] Final video combined: {video_filename}")
|
||||
|
||||
result_data = {
|
||||
"video_url": video_url,
|
||||
"video_filename": video_filename,
|
||||
"duration": result.get("duration", 0),
|
||||
"file_size": result.get("file_size", 0),
|
||||
}
|
||||
|
||||
task_manager.update_task_status(
|
||||
task_id,
|
||||
"completed",
|
||||
progress=100.0,
|
||||
message="Podcast video ready!",
|
||||
result=result_data,
|
||||
)
|
||||
|
||||
# Save final video URL to project for persistence across reloads
|
||||
# Do this quickly and synchronously - database operations are fast
|
||||
try:
|
||||
from services.database import SessionLocal
|
||||
db = SessionLocal()
|
||||
try:
|
||||
service = PodcastService(db)
|
||||
service.update_project(user_id, project_id, final_video_url=video_url)
|
||||
db.commit()
|
||||
logger.info(f"[Podcast] Saved final video URL to project {project_id}: {video_url}")
|
||||
finally:
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"[Podcast] Failed to save final video URL to project: {e}")
|
||||
# Don't fail the task if project update fails - video is still available via task result
|
||||
|
||||
logger.info(f"[Podcast] Task {task_id} marked as completed successfully")
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"[Podcast] Failed to combine videos: {e}")
|
||||
error_msg = _extract_error_message(e)
|
||||
task_manager.update_task_status(
|
||||
task_id,
|
||||
"failed",
|
||||
progress=0.0,
|
||||
message=f"Video combination failed: {error_msg}",
|
||||
error=str(error_msg),
|
||||
)
|
||||
logger.error(f"[Podcast] Task {task_id} marked as failed: {error_msg}")
|
||||
|
||||
|
||||
@router.get("/final-videos/{filename}")
|
||||
async def serve_final_podcast_video(
|
||||
filename: str,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user_with_query_token),
|
||||
):
|
||||
"""Serve the final combined podcast video with authentication."""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
final_videos_dir = PODCAST_VIDEOS_DIR / "Final_Videos"
|
||||
video_path = final_videos_dir / filename
|
||||
|
||||
if not video_path.exists():
|
||||
raise HTTPException(status_code=404, detail="Video not found")
|
||||
|
||||
# Basic security: ensure filename doesn't contain path traversal
|
||||
if ".." in filename or "/" in filename or "\\" in filename:
|
||||
raise HTTPException(status_code=400, detail="Invalid filename")
|
||||
|
||||
return FileResponse(
|
||||
path=str(video_path),
|
||||
media_type="video/mp4",
|
||||
filename=filename,
|
||||
)
|
||||
280
backend/api/podcast/models.py
Normal file
280
backend/api/podcast/models.py
Normal file
@@ -0,0 +1,280 @@
|
||||
"""
|
||||
Podcast API Models
|
||||
|
||||
All Pydantic request/response models for podcast endpoints.
|
||||
"""
|
||||
|
||||
from pydantic import BaseModel, Field, model_validator
|
||||
from typing import List, Optional, Dict, Any
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class PodcastProjectResponse(BaseModel):
|
||||
"""Response model for podcast project."""
|
||||
id: int
|
||||
project_id: str
|
||||
user_id: str
|
||||
idea: str
|
||||
duration: int
|
||||
speakers: int
|
||||
budget_cap: float
|
||||
analysis: Optional[Dict[str, Any]] = None
|
||||
queries: Optional[List[Dict[str, Any]]] = None
|
||||
selected_queries: Optional[List[str]] = None
|
||||
research: Optional[Dict[str, Any]] = None
|
||||
raw_research: Optional[Dict[str, Any]] = None
|
||||
estimate: Optional[Dict[str, Any]] = None
|
||||
script_data: Optional[Dict[str, Any]] = None
|
||||
render_jobs: Optional[List[Dict[str, Any]]] = None
|
||||
knobs: Optional[Dict[str, Any]] = None
|
||||
research_provider: Optional[str] = None
|
||||
show_script_editor: bool = False
|
||||
show_render_queue: bool = False
|
||||
current_step: Optional[str] = None
|
||||
status: str = "draft"
|
||||
is_favorite: bool = False
|
||||
final_video_url: Optional[str] = None
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class PodcastAnalyzeRequest(BaseModel):
|
||||
"""Request model for podcast idea analysis."""
|
||||
idea: str = Field(..., description="Podcast topic or idea")
|
||||
duration: int = Field(default=10, description="Target duration in minutes")
|
||||
speakers: int = Field(default=1, description="Number of speakers")
|
||||
|
||||
|
||||
class PodcastAnalyzeResponse(BaseModel):
|
||||
"""Response model for podcast idea analysis."""
|
||||
audience: str
|
||||
content_type: str
|
||||
top_keywords: list[str]
|
||||
suggested_outlines: list[Dict[str, Any]]
|
||||
title_suggestions: list[str]
|
||||
exa_suggested_config: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class PodcastScriptRequest(BaseModel):
|
||||
"""Request model for podcast script generation."""
|
||||
idea: str = Field(..., description="Podcast idea or topic")
|
||||
duration_minutes: int = Field(default=10, description="Target duration in minutes")
|
||||
speakers: int = Field(default=1, description="Number of speakers")
|
||||
research: Optional[Dict[str, Any]] = Field(None, description="Optional research payload to ground the script")
|
||||
|
||||
|
||||
class PodcastSceneLine(BaseModel):
|
||||
speaker: str
|
||||
text: str
|
||||
emphasis: Optional[bool] = False
|
||||
|
||||
|
||||
class PodcastScene(BaseModel):
|
||||
id: str
|
||||
title: str
|
||||
duration: int
|
||||
lines: list[PodcastSceneLine]
|
||||
approved: bool = False
|
||||
emotion: Optional[str] = None
|
||||
imageUrl: Optional[str] = None # Generated image URL for video generation
|
||||
|
||||
|
||||
class PodcastExaConfig(BaseModel):
|
||||
"""Exa config for podcast research."""
|
||||
exa_search_type: Optional[str] = Field(default="auto", description="auto | keyword | neural")
|
||||
exa_category: Optional[str] = None
|
||||
exa_include_domains: List[str] = []
|
||||
exa_exclude_domains: List[str] = []
|
||||
max_sources: int = 8
|
||||
include_statistics: Optional[bool] = False
|
||||
date_range: Optional[str] = Field(default=None, description="last_month | last_3_months | last_year | all_time")
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_domains(self):
|
||||
if self.exa_include_domains and self.exa_exclude_domains:
|
||||
# Exa API does not allow both include and exclude domains together with contents
|
||||
# Prefer include_domains and drop exclude_domains
|
||||
self.exa_exclude_domains = []
|
||||
return self
|
||||
|
||||
|
||||
class PodcastExaResearchRequest(BaseModel):
|
||||
"""Request for podcast research using Exa directly (no blog writer)."""
|
||||
topic: str
|
||||
queries: List[str]
|
||||
exa_config: Optional[PodcastExaConfig] = None
|
||||
|
||||
|
||||
class PodcastExaSource(BaseModel):
|
||||
title: str = ""
|
||||
url: str = ""
|
||||
excerpt: str = ""
|
||||
published_at: Optional[str] = None
|
||||
highlights: Optional[List[str]] = None
|
||||
summary: Optional[str] = None
|
||||
source_type: Optional[str] = None
|
||||
index: Optional[int] = None
|
||||
|
||||
|
||||
class PodcastExaResearchResponse(BaseModel):
|
||||
sources: List[PodcastExaSource]
|
||||
search_queries: List[str] = []
|
||||
cost: Optional[Dict[str, Any]] = None
|
||||
search_type: Optional[str] = None
|
||||
provider: str = "exa"
|
||||
content: Optional[str] = None
|
||||
|
||||
|
||||
class PodcastScriptResponse(BaseModel):
|
||||
scenes: list[PodcastScene]
|
||||
|
||||
|
||||
class PodcastAudioRequest(BaseModel):
|
||||
"""Generate TTS for a podcast scene."""
|
||||
scene_id: str
|
||||
scene_title: str
|
||||
text: str
|
||||
voice_id: Optional[str] = "Wise_Woman"
|
||||
speed: Optional[float] = 1.0
|
||||
volume: Optional[float] = 1.0
|
||||
pitch: Optional[float] = 0.0
|
||||
emotion: Optional[str] = "neutral"
|
||||
english_normalization: Optional[bool] = False # Better number reading for statistics
|
||||
sample_rate: Optional[int] = None
|
||||
bitrate: Optional[int] = None
|
||||
channel: Optional[str] = None
|
||||
format: Optional[str] = None
|
||||
language_boost: Optional[str] = None
|
||||
enable_sync_mode: Optional[bool] = True
|
||||
|
||||
|
||||
class PodcastAudioResponse(BaseModel):
|
||||
scene_id: str
|
||||
scene_title: str
|
||||
audio_filename: str
|
||||
audio_url: str
|
||||
provider: str
|
||||
model: str
|
||||
voice_id: str
|
||||
text_length: int
|
||||
file_size: int
|
||||
cost: float
|
||||
|
||||
|
||||
class PodcastProjectListResponse(BaseModel):
|
||||
"""Response model for project list."""
|
||||
projects: List[PodcastProjectResponse]
|
||||
total: int
|
||||
limit: int
|
||||
offset: int
|
||||
|
||||
|
||||
class CreateProjectRequest(BaseModel):
|
||||
"""Request model for creating a project."""
|
||||
project_id: str = Field(..., description="Unique project ID")
|
||||
idea: str = Field(..., description="Episode idea or URL")
|
||||
duration: int = Field(..., description="Duration in minutes")
|
||||
speakers: int = Field(default=1, description="Number of speakers")
|
||||
budget_cap: float = Field(default=50.0, description="Budget cap in USD")
|
||||
avatar_url: Optional[str] = Field(None, description="Optional presenter avatar URL")
|
||||
|
||||
|
||||
class UpdateProjectRequest(BaseModel):
|
||||
"""Request model for updating project state."""
|
||||
analysis: Optional[Dict[str, Any]] = None
|
||||
queries: Optional[List[Dict[str, Any]]] = None
|
||||
selected_queries: Optional[List[str]] = None
|
||||
research: Optional[Dict[str, Any]] = None
|
||||
raw_research: Optional[Dict[str, Any]] = None
|
||||
estimate: Optional[Dict[str, Any]] = None
|
||||
script_data: Optional[Dict[str, Any]] = None
|
||||
render_jobs: Optional[List[Dict[str, Any]]] = None
|
||||
knobs: Optional[Dict[str, Any]] = None
|
||||
research_provider: Optional[str] = None
|
||||
show_script_editor: Optional[bool] = None
|
||||
show_render_queue: Optional[bool] = None
|
||||
current_step: Optional[str] = None
|
||||
status: Optional[str] = None
|
||||
final_video_url: Optional[str] = None
|
||||
|
||||
|
||||
class PodcastCombineAudioRequest(BaseModel):
|
||||
"""Request model for combining podcast audio files."""
|
||||
project_id: str
|
||||
scene_ids: List[str] = Field(..., description="List of scene IDs to combine")
|
||||
scene_audio_urls: List[str] = Field(..., description="List of audio URLs for each scene")
|
||||
|
||||
|
||||
class PodcastCombineAudioResponse(BaseModel):
|
||||
"""Response model for combined podcast audio."""
|
||||
combined_audio_url: str
|
||||
combined_audio_filename: str
|
||||
total_duration: float
|
||||
file_size: int
|
||||
scene_count: int
|
||||
|
||||
|
||||
class PodcastImageRequest(BaseModel):
|
||||
"""Request for generating an image for a podcast scene."""
|
||||
scene_id: str
|
||||
scene_title: str
|
||||
scene_content: Optional[str] = None # Optional: scene lines text for context
|
||||
idea: Optional[str] = None # Optional: podcast idea for context
|
||||
base_avatar_url: Optional[str] = None # Base avatar image URL for scene variations
|
||||
width: int = 1024
|
||||
height: int = 1024
|
||||
custom_prompt: Optional[str] = None # Custom prompt from user (overrides auto-generated prompt)
|
||||
style: Optional[str] = None # "Auto", "Fiction", or "Realistic"
|
||||
rendering_speed: Optional[str] = None # "Default", "Turbo", or "Quality"
|
||||
aspect_ratio: Optional[str] = None # "1:1", "16:9", "9:16", "4:3", "3:4"
|
||||
|
||||
|
||||
class PodcastImageResponse(BaseModel):
|
||||
"""Response for podcast scene image generation."""
|
||||
scene_id: str
|
||||
scene_title: str
|
||||
image_filename: str
|
||||
image_url: str
|
||||
width: int
|
||||
height: int
|
||||
provider: str
|
||||
model: Optional[str] = None
|
||||
cost: float
|
||||
|
||||
|
||||
class PodcastVideoGenerationRequest(BaseModel):
|
||||
"""Request model for podcast video generation."""
|
||||
project_id: str = Field(..., description="Podcast project ID")
|
||||
scene_id: str = Field(..., description="Scene ID")
|
||||
scene_title: str = Field(..., description="Scene title")
|
||||
audio_url: str = Field(..., description="URL to the generated audio file")
|
||||
avatar_image_url: Optional[str] = Field(None, description="URL to scene image (required for video generation)")
|
||||
resolution: str = Field("720p", description="Video resolution (480p or 720p)")
|
||||
prompt: Optional[str] = Field(None, description="Optional animation prompt override")
|
||||
seed: Optional[int] = Field(-1, description="Random seed; -1 for random")
|
||||
mask_image_url: Optional[str] = Field(None, description="Optional mask image URL to specify animated region")
|
||||
|
||||
|
||||
class PodcastVideoGenerationResponse(BaseModel):
|
||||
"""Response model for podcast video generation."""
|
||||
task_id: str
|
||||
status: str
|
||||
message: str
|
||||
|
||||
|
||||
class PodcastCombineVideosRequest(BaseModel):
|
||||
"""Request to combine scene videos into final podcast"""
|
||||
project_id: str = Field(..., description="Project ID")
|
||||
scene_video_urls: list[str] = Field(..., description="List of scene video URLs in order")
|
||||
podcast_title: str = Field(default="Podcast", description="Title for the final podcast video")
|
||||
|
||||
|
||||
class PodcastCombineVideosResponse(BaseModel):
|
||||
"""Response from combine videos endpoint"""
|
||||
task_id: str
|
||||
status: str
|
||||
message: str
|
||||
|
||||
143
backend/api/podcast/presenter_personas.py
Normal file
143
backend/api/podcast/presenter_personas.py
Normal file
@@ -0,0 +1,143 @@
|
||||
"""
|
||||
Podcast Presenter Personas
|
||||
|
||||
Lightweight, podcast-specific presenter persona presets used to steer avatar generation.
|
||||
|
||||
Design goals:
|
||||
- Market-fit + style consistency without asking end-users to choose sensitive traits.
|
||||
- Deterministic persona selection using analysis hints (audience/content type/keywords).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, Optional, List
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PresenterPersona:
|
||||
id: str
|
||||
label: str
|
||||
target_market: str # e.g. "global", "us_canada", "uk_eu", "india", "latam"
|
||||
style: str # e.g. "corporate", "tech_modern", "creator"
|
||||
prompt: str # prompt fragment to inject
|
||||
|
||||
|
||||
# NOTE: Avoid encoding/guessing ethnicity. Keep personas about market-fit + style.
|
||||
PERSONAS: Dict[str, PresenterPersona] = {
|
||||
"global_corporate": PresenterPersona(
|
||||
id="global_corporate",
|
||||
label="Global — Corporate Host",
|
||||
target_market="global",
|
||||
style="corporate",
|
||||
prompt=(
|
||||
"professional podcast presenter, business professional attire (white shirt and light gray blazer), "
|
||||
"confident, friendly, camera-ready, neutral background, studio lighting"
|
||||
),
|
||||
),
|
||||
"global_tech_modern": PresenterPersona(
|
||||
id="global_tech_modern",
|
||||
label="Global — Tech Modern Host",
|
||||
target_market="global",
|
||||
style="tech_modern",
|
||||
prompt=(
|
||||
"modern professional podcast presenter, contemporary tech-forward style, "
|
||||
"clean minimal studio background, soft studio lighting, friendly and energetic expression"
|
||||
),
|
||||
),
|
||||
"global_news_anchor": PresenterPersona(
|
||||
id="global_news_anchor",
|
||||
label="Global — News Anchor",
|
||||
target_market="global",
|
||||
style="news_anchor",
|
||||
prompt=(
|
||||
"professional news-style presenter, polished on-camera appearance, "
|
||||
"formal attire, authoritative yet approachable expression, studio lighting, neutral background"
|
||||
),
|
||||
),
|
||||
"india_corporate": PresenterPersona(
|
||||
id="india_corporate",
|
||||
label="India — Corporate Host",
|
||||
target_market="india",
|
||||
style="corporate",
|
||||
prompt=(
|
||||
"professional podcast presenter for the Indian market, business professional attire, "
|
||||
"polished and confident on-camera presence, clean studio background, soft studio lighting"
|
||||
),
|
||||
),
|
||||
"us_canada_creator": PresenterPersona(
|
||||
id="us_canada_creator",
|
||||
label="US/Canada — Creator Host",
|
||||
target_market="us_canada",
|
||||
style="creator",
|
||||
prompt=(
|
||||
"professional podcast creator host, business casual style, approachable and conversational expression, "
|
||||
"clean studio background, soft studio lighting"
|
||||
),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def get_persona(persona_id: Optional[str]) -> Optional[PresenterPersona]:
|
||||
if not persona_id:
|
||||
return None
|
||||
return PERSONAS.get(persona_id)
|
||||
|
||||
|
||||
def list_personas() -> List[PresenterPersona]:
|
||||
return list(PERSONAS.values())
|
||||
|
||||
|
||||
def choose_persona_id(
|
||||
audience: Optional[str] = None,
|
||||
content_type: Optional[str] = None,
|
||||
top_keywords: Optional[List[str]] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Choose a persona id using non-sensitive heuristics from analysis.
|
||||
|
||||
- Uses explicit market hints if present (e.g. "India", "US", "UK", etc.)
|
||||
- Uses content_type / keywords to pick a style
|
||||
- Falls back to global corporate
|
||||
"""
|
||||
audience_l = (audience or "").lower()
|
||||
content_l = (content_type or "").lower()
|
||||
keywords_l = [k.lower() for k in (top_keywords or [])]
|
||||
|
||||
# Market hints (explicit only)
|
||||
if any(x in audience_l for x in ["india", "indian"]):
|
||||
market = "india"
|
||||
elif any(x in audience_l for x in ["us", "usa", "united states", "canada", "north america"]):
|
||||
market = "us_canada"
|
||||
elif any(x in audience_l for x in ["uk", "united kingdom", "europe", "eu", "european"]):
|
||||
market = "uk_eu"
|
||||
elif any(x in audience_l for x in ["latam", "latin america", "south america"]):
|
||||
market = "latam"
|
||||
else:
|
||||
market = "global"
|
||||
|
||||
# Style hints
|
||||
style = "corporate"
|
||||
if "news" in content_l or "analysis" in content_l:
|
||||
style = "news_anchor"
|
||||
if any(x in content_l for x in ["tech", "technology", "ai", "software"]) or any(
|
||||
kw in ["ai", "technology", "tech", "software"] for kw in keywords_l
|
||||
):
|
||||
style = "tech_modern"
|
||||
if any(x in content_l for x in ["casual", "creator", "conversational"]) or any(
|
||||
kw in ["creator", "youtube", "tiktok", "instagram"] for kw in keywords_l
|
||||
):
|
||||
style = "creator"
|
||||
|
||||
# Map market+style to a concrete persona id
|
||||
if market == "india" and style == "corporate":
|
||||
return "india_corporate"
|
||||
if market == "us_canada" and style == "creator":
|
||||
return "us_canada_creator"
|
||||
if style == "news_anchor":
|
||||
return "global_news_anchor"
|
||||
if style == "tech_modern":
|
||||
return "global_tech_modern"
|
||||
return "global_corporate"
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
105
backend/api/podcast/utils.py
Normal file
105
backend/api/podcast/utils.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""
|
||||
Podcast API Utility Functions
|
||||
|
||||
Helper functions for loading media files and other utilities.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
from fastapi import HTTPException
|
||||
from loguru import logger
|
||||
|
||||
from .constants import PODCAST_AUDIO_DIR, PODCAST_IMAGES_DIR
|
||||
|
||||
|
||||
def load_podcast_audio_bytes(audio_url: str) -> bytes:
|
||||
"""Load podcast audio bytes from URL. Only handles /api/podcast/audio/ URLs."""
|
||||
if not audio_url:
|
||||
raise HTTPException(status_code=400, detail="Audio URL is required")
|
||||
|
||||
try:
|
||||
parsed = urlparse(audio_url)
|
||||
path = parsed.path if parsed.scheme else audio_url
|
||||
|
||||
# Only handle /api/podcast/audio/ URLs
|
||||
prefix = "/api/podcast/audio/"
|
||||
if prefix not in path:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Unsupported audio URL format: {audio_url}. Only /api/podcast/audio/ URLs are supported."
|
||||
)
|
||||
|
||||
filename = path.split(prefix, 1)[1].split("?", 1)[0].strip()
|
||||
if not filename:
|
||||
raise HTTPException(status_code=400, detail=f"Could not extract filename from URL: {audio_url}")
|
||||
|
||||
# Podcast audio files are stored in podcast_audio directory
|
||||
audio_path = (PODCAST_AUDIO_DIR / filename).resolve()
|
||||
|
||||
# Security check: ensure path is within PODCAST_AUDIO_DIR
|
||||
if not str(audio_path).startswith(str(PODCAST_AUDIO_DIR)):
|
||||
logger.error(f"[Podcast] Attempted path traversal when resolving audio: {audio_url}")
|
||||
raise HTTPException(status_code=403, detail="Invalid audio path")
|
||||
|
||||
if not audio_path.exists():
|
||||
logger.warning(f"[Podcast] Audio file not found: {audio_path}")
|
||||
raise HTTPException(status_code=404, detail=f"Audio file not found: {filename}")
|
||||
|
||||
return audio_path.read_bytes()
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.error(f"[Podcast] Failed to load audio: {exc}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to load audio: {str(exc)}")
|
||||
|
||||
|
||||
def load_podcast_image_bytes(image_url: str) -> bytes:
|
||||
"""Load podcast image bytes from URL. Only handles /api/podcast/images/ URLs."""
|
||||
if not image_url:
|
||||
raise HTTPException(status_code=400, detail="Image URL is required")
|
||||
|
||||
logger.info(f"[Podcast] Loading image from URL: {image_url}")
|
||||
|
||||
try:
|
||||
parsed = urlparse(image_url)
|
||||
path = parsed.path if parsed.scheme else image_url
|
||||
|
||||
# Only handle /api/podcast/images/ URLs
|
||||
prefix = "/api/podcast/images/"
|
||||
if prefix not in path:
|
||||
logger.error(f"[Podcast] Unsupported image URL format: {image_url}")
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Unsupported image URL format: {image_url}. Only /api/podcast/images/ URLs are supported."
|
||||
)
|
||||
|
||||
filename = path.split(prefix, 1)[1].split("?", 1)[0].strip()
|
||||
if not filename:
|
||||
logger.error(f"[Podcast] Could not extract filename from URL: {image_url}")
|
||||
raise HTTPException(status_code=400, detail=f"Could not extract filename from URL: {image_url}")
|
||||
|
||||
logger.info(f"[Podcast] Extracted filename: {filename}")
|
||||
logger.info(f"[Podcast] PODCAST_IMAGES_DIR: {PODCAST_IMAGES_DIR}")
|
||||
|
||||
# Podcast images are stored in podcast_images directory
|
||||
image_path = (PODCAST_IMAGES_DIR / filename).resolve()
|
||||
logger.info(f"[Podcast] Resolved image path: {image_path}")
|
||||
|
||||
# Security check: ensure path is within PODCAST_IMAGES_DIR
|
||||
if not str(image_path).startswith(str(PODCAST_IMAGES_DIR)):
|
||||
logger.error(f"[Podcast] Attempted path traversal when resolving image: {image_url} -> {image_path}")
|
||||
raise HTTPException(status_code=403, detail="Invalid image path")
|
||||
|
||||
if not image_path.exists():
|
||||
logger.error(f"[Podcast] Image file not found: {image_path}")
|
||||
raise HTTPException(status_code=404, detail=f"Image file not found: {filename}")
|
||||
|
||||
image_bytes = image_path.read_bytes()
|
||||
logger.info(f"[Podcast] ✅ Successfully loaded image: {len(image_bytes)} bytes from {image_path}")
|
||||
return image_bytes
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.error(f"[Podcast] Failed to load image: {exc}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to load image: {str(exc)}")
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import FileResponse
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel
|
||||
|
||||
@@ -53,6 +53,9 @@ class PodcastProject(Base):
|
||||
status = Column(String(50), default="draft", nullable=False, index=True) # draft, in_progress, completed, archived
|
||||
is_favorite = Column(Boolean, default=False, index=True)
|
||||
|
||||
# Final combined video URL (persisted for reloads)
|
||||
final_video_url = Column(String(1000), nullable=True) # URL to final combined podcast video
|
||||
|
||||
# Timestamps
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False, index=True)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False, index=True)
|
||||
|
||||
91
backend/scripts/run_final_video_url_migration.py
Normal file
91
backend/scripts/run_final_video_url_migration.py
Normal file
@@ -0,0 +1,91 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Migration script to add final_video_url column to podcast_projects table.
|
||||
This script should be run once to add the column to existing databases.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
from loguru import logger
|
||||
|
||||
# Add the backend directory to the Python path
|
||||
backend_dir = Path(__file__).parent.parent
|
||||
sys.path.insert(0, str(backend_dir))
|
||||
|
||||
def run_migration():
|
||||
"""Run the final_video_url column migration."""
|
||||
try:
|
||||
# Get the database path
|
||||
db_path = backend_dir / "alwrity.db"
|
||||
|
||||
logger.info(f"🔄 Starting final_video_url column migration...")
|
||||
logger.info(f"📁 Database path: {db_path}")
|
||||
|
||||
# Check if database exists
|
||||
if not db_path.exists():
|
||||
logger.warning(f"⚠️ Database file not found at {db_path}")
|
||||
logger.info("ℹ️ New databases will have this column created automatically by SQLAlchemy")
|
||||
return True
|
||||
|
||||
# Read the migration SQL
|
||||
migration_file = backend_dir / "database" / "migrations" / "009_add_final_video_url_to_podcast_projects.sql"
|
||||
|
||||
if not migration_file.exists():
|
||||
logger.error(f"❌ Migration file not found: {migration_file}")
|
||||
return False
|
||||
|
||||
with open(migration_file, 'r') as f:
|
||||
migration_sql = f.read()
|
||||
|
||||
logger.info("📋 Migration SQL loaded successfully")
|
||||
|
||||
# Connect to database and run migration
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Check if column already exists
|
||||
cursor.execute("PRAGMA table_info(podcast_projects)")
|
||||
columns = [row[1] for row in cursor.fetchall()]
|
||||
|
||||
if 'final_video_url' in columns:
|
||||
logger.info("ℹ️ Column 'final_video_url' already exists, skipping migration")
|
||||
conn.close()
|
||||
return True
|
||||
|
||||
# Execute the migration
|
||||
logger.info("🔧 Adding final_video_url column...")
|
||||
cursor.execute("ALTER TABLE podcast_projects ADD COLUMN final_video_url VARCHAR(1000) NULL")
|
||||
conn.commit()
|
||||
|
||||
# Verify the column was added
|
||||
cursor.execute("PRAGMA table_info(podcast_projects)")
|
||||
columns_after = [row[1] for row in cursor.fetchall()]
|
||||
|
||||
if 'final_video_url' in columns_after:
|
||||
logger.info("✅ Migration completed successfully! Column 'final_video_url' added to podcast_projects table")
|
||||
conn.close()
|
||||
return True
|
||||
else:
|
||||
logger.error("❌ Migration failed: Column was not added")
|
||||
conn.close()
|
||||
return False
|
||||
|
||||
except sqlite3.OperationalError as e:
|
||||
if "duplicate column name" in str(e).lower():
|
||||
logger.info("ℹ️ Column 'final_video_url' already exists, skipping migration")
|
||||
return True
|
||||
else:
|
||||
logger.error(f"❌ Database error: {e}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Error running migration: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = run_migration()
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
@@ -137,6 +137,9 @@ def generate_audio(
|
||||
|
||||
# Generate audio using WaveSpeed
|
||||
try:
|
||||
# Avoid passing duplicate enable_sync_mode; allow override via kwargs
|
||||
enable_sync_mode = kwargs.pop("enable_sync_mode", True)
|
||||
|
||||
client = WaveSpeedClient()
|
||||
audio_bytes = client.generate_speech(
|
||||
text=text,
|
||||
@@ -145,7 +148,7 @@ def generate_audio(
|
||||
volume=volume,
|
||||
pitch=pitch,
|
||||
emotion=emotion,
|
||||
enable_sync_mode=True,
|
||||
enable_sync_mode=enable_sync_mode,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
|
||||
11
backend/services/podcast/__init__.py
Normal file
11
backend/services/podcast/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""
|
||||
Podcast Services Module
|
||||
|
||||
Dedicated services for podcast generation functionality.
|
||||
Separate from story writer services to maintain clear separation of concerns.
|
||||
"""
|
||||
|
||||
from .video_combination_service import PodcastVideoCombinationService
|
||||
|
||||
__all__ = ["PodcastVideoCombinationService"]
|
||||
|
||||
382
backend/services/podcast/video_combination_service.py
Normal file
382
backend/services/podcast/video_combination_service.py
Normal file
@@ -0,0 +1,382 @@
|
||||
"""
|
||||
Podcast Video Combination Service
|
||||
|
||||
Dedicated service for combining podcast scene videos into final episodes.
|
||||
Separate from StoryVideoGenerationService to avoid breaking story writer functionality.
|
||||
"""
|
||||
|
||||
import uuid
|
||||
import warnings
|
||||
import time
|
||||
import threading
|
||||
from typing import List, Dict, Any, Optional
|
||||
from pathlib import Path
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class PodcastVideoCombinationService:
|
||||
"""Service for combining podcast scene videos into final episodes."""
|
||||
|
||||
def __init__(self, output_dir: Optional[str] = None):
|
||||
"""
|
||||
Initialize the podcast video combination service.
|
||||
|
||||
Parameters:
|
||||
output_dir (str, optional): Directory to save combined videos.
|
||||
Defaults to 'backend/podcast_videos/Final_Videos' if not provided.
|
||||
"""
|
||||
if output_dir:
|
||||
self.output_dir = Path(output_dir)
|
||||
else:
|
||||
# Default to podcast_videos/Final_Videos directory
|
||||
base_dir = Path(__file__).parent.parent.parent
|
||||
self.output_dir = base_dir / "podcast_videos" / "Final_Videos"
|
||||
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
logger.info(f"[PodcastVideoCombination] Initialized with output directory: {self.output_dir}")
|
||||
|
||||
def combine_videos(
|
||||
self,
|
||||
video_paths: List[str],
|
||||
podcast_title: str,
|
||||
fps: int = 30,
|
||||
progress_callback: Optional[callable] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Combine multiple video files into a single final podcast video.
|
||||
|
||||
This method is specifically designed for podcast videos that already have
|
||||
embedded audio. It does not require separate audio files.
|
||||
|
||||
Parameters:
|
||||
video_paths (List[str]): List of video file paths to combine.
|
||||
podcast_title (str): Title of the podcast episode.
|
||||
fps (int): Frames per second for output video (default: 30).
|
||||
progress_callback (callable, optional): Callback function for progress updates.
|
||||
Signature: callback(progress: float, message: str)
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Video metadata including file path, URL, duration, and file size.
|
||||
|
||||
Raises:
|
||||
ValueError: If no valid video files are provided.
|
||||
RuntimeError: If video combination fails.
|
||||
"""
|
||||
if not video_paths:
|
||||
raise ValueError("No video paths provided")
|
||||
|
||||
# Validate all video files exist
|
||||
valid_video_paths = []
|
||||
for video_path in video_paths:
|
||||
path = Path(video_path)
|
||||
if path.exists() and path.is_file():
|
||||
valid_video_paths.append(str(path))
|
||||
else:
|
||||
logger.warning(f"[PodcastVideoCombination] Video not found: {video_path}")
|
||||
|
||||
if not valid_video_paths:
|
||||
raise ValueError("No valid video files found to combine")
|
||||
|
||||
logger.info(f"[PodcastVideoCombination] Combining {len(valid_video_paths)} videos")
|
||||
|
||||
try:
|
||||
# Import MoviePy
|
||||
try:
|
||||
from moviepy import VideoFileClip, concatenate_videoclips
|
||||
except Exception as e:
|
||||
logger.error(f"[PodcastVideoCombination] MoviePy not installed: {e}")
|
||||
raise RuntimeError("MoviePy is not installed. Please install it to combine videos.")
|
||||
|
||||
# Suppress MoviePy warnings about incomplete frames (common with some video encodings)
|
||||
warnings.filterwarnings("ignore", category=UserWarning, module="moviepy")
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(10.0, "Loading video clips...")
|
||||
|
||||
# Load all video clips
|
||||
video_clips = []
|
||||
total_duration = 0.0
|
||||
|
||||
for idx, video_path in enumerate(valid_video_paths):
|
||||
try:
|
||||
logger.info(f"[PodcastVideoCombination] Loading video {idx + 1}/{len(valid_video_paths)}: {video_path}")
|
||||
|
||||
# Load video clip with error handling for incomplete files
|
||||
# MoviePy will use the last valid frame if frames are missing at the end
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", UserWarning)
|
||||
video_clip = VideoFileClip(str(video_path))
|
||||
|
||||
# Validate clip was loaded successfully
|
||||
if video_clip.duration <= 0:
|
||||
logger.warning(f"[PodcastVideoCombination] Video {video_path} has invalid duration, skipping")
|
||||
video_clip.close()
|
||||
continue
|
||||
|
||||
# Videos already have embedded audio, no need to replace
|
||||
video_clips.append(video_clip)
|
||||
total_duration += video_clip.duration
|
||||
|
||||
if progress_callback:
|
||||
progress = 10.0 + ((idx + 1) / len(valid_video_paths)) * 60.0
|
||||
progress_callback(progress, f"Loaded video {idx + 1}/{len(valid_video_paths)}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[PodcastVideoCombination] Failed to load video {video_path}: {e}")
|
||||
# Continue with other videos instead of failing completely
|
||||
continue
|
||||
|
||||
if not video_clips:
|
||||
raise RuntimeError("No valid video clips were loaded")
|
||||
|
||||
logger.info(f"[PodcastVideoCombination] Loaded {len(video_clips)} clips, total duration: {total_duration:.2f}s")
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(75.0, f"Concatenating {len(video_clips)} videos ({total_duration:.1f}s total)...")
|
||||
|
||||
# Concatenate all video clips
|
||||
logger.info(f"[PodcastVideoCombination] Concatenating {len(video_clips)} video clips (total duration: {total_duration:.2f}s)")
|
||||
final_video = concatenate_videoclips(video_clips, method="compose")
|
||||
logger.info(f"[PodcastVideoCombination] Concatenation complete, final video duration: {final_video.duration:.2f}s")
|
||||
|
||||
# Generate output filename
|
||||
video_filename = self._generate_video_filename(podcast_title)
|
||||
video_path = self.output_dir / video_filename
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(85.0, f"Rendering final video ({total_duration:.1f}s total)...")
|
||||
|
||||
# Write final video file
|
||||
logger.info(
|
||||
f"[PodcastVideoCombination] Rendering final video to: {video_path} "
|
||||
f"(duration: {total_duration:.2f}s, {len(video_clips)} clips)"
|
||||
)
|
||||
|
||||
# Use faster preset for quicker encoding (still good quality)
|
||||
# 'ultrafast' is fastest but lower quality, 'fast' is good balance
|
||||
encoding_preset = 'fast' # Faster than 'medium' but still good quality
|
||||
|
||||
# Suppress warnings during video writing as well
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", UserWarning)
|
||||
|
||||
# Write video with optimized settings
|
||||
# Note: write_videofile is blocking and can take several minutes for longer videos
|
||||
# Estimated time: ~1-2 minutes per minute of video content
|
||||
estimated_time_minutes = max(1, int(total_duration / 60) * 2)
|
||||
logger.info(
|
||||
f"[PodcastVideoCombination] Starting video encoding "
|
||||
f"(estimated time: ~{estimated_time_minutes} minutes for {total_duration:.1f}s video)..."
|
||||
)
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# Start a thread to update progress periodically during encoding
|
||||
# Since write_videofile is blocking, we'll simulate progress
|
||||
progress_thread = None
|
||||
encoding_done = threading.Event()
|
||||
|
||||
if progress_callback:
|
||||
def update_progress_periodically():
|
||||
"""Update progress every 5 seconds during encoding"""
|
||||
base_progress = 87.0
|
||||
max_progress = 98.0
|
||||
progress_range = max_progress - base_progress
|
||||
update_interval = 5.0 # Update every 5 seconds
|
||||
elapsed = 0.0
|
||||
|
||||
try:
|
||||
while not encoding_done.is_set():
|
||||
elapsed += update_interval
|
||||
# Simulate progress: start at 87%, gradually increase to 98%
|
||||
# Use logarithmic curve to slow down as we approach completion
|
||||
progress = base_progress + (progress_range * min(1.0, elapsed / (estimated_time_minutes * 60)))
|
||||
progress = min(max_progress, progress)
|
||||
|
||||
remaining_minutes = max(0, estimated_time_minutes - int(elapsed / 60))
|
||||
message = f"Encoding video... ({remaining_minutes} min remaining)"
|
||||
if remaining_minutes == 0:
|
||||
message = "Finalizing video..."
|
||||
|
||||
try:
|
||||
progress_callback(progress, message)
|
||||
except Exception as e:
|
||||
logger.warning(f"[PodcastVideoCombination] Error in progress callback: {e}")
|
||||
break
|
||||
|
||||
# Use wait with timeout instead of sleep to check event more frequently
|
||||
if encoding_done.wait(timeout=update_interval):
|
||||
break # Event was set, exit immediately
|
||||
except Exception as e:
|
||||
logger.warning(f"[PodcastVideoCombination] Error in progress thread: {e}")
|
||||
|
||||
progress_thread = threading.Thread(target=update_progress_periodically, daemon=True)
|
||||
progress_thread.start()
|
||||
|
||||
# Write video file - this is the blocking operation
|
||||
logger.info(f"[PodcastVideoCombination] Calling write_videofile...")
|
||||
try:
|
||||
final_video.write_videofile(
|
||||
str(video_path),
|
||||
fps=fps,
|
||||
codec='libx264',
|
||||
audio_codec='aac',
|
||||
preset=encoding_preset, # Faster encoding
|
||||
threads=4,
|
||||
logger=None, # Disable MoviePy's default logger
|
||||
bitrate=None, # Let encoder choose optimal bitrate
|
||||
audio_bitrate='192k', # Good quality audio
|
||||
temp_audiofile=str(video_path.with_suffix('.m4a')), # Temporary audio file
|
||||
remove_temp=True, # Clean up temp files
|
||||
write_logfile=False, # Don't write log file
|
||||
)
|
||||
logger.info(f"[PodcastVideoCombination] write_videofile completed successfully")
|
||||
except Exception as write_error:
|
||||
logger.error(f"[PodcastVideoCombination] Error in write_videofile: {write_error}")
|
||||
# Check if file was created despite error
|
||||
if video_path.exists() and video_path.stat().st_size > 0:
|
||||
logger.warning(f"[PodcastVideoCombination] Video file exists despite error, continuing...")
|
||||
else:
|
||||
raise
|
||||
finally:
|
||||
# Always signal that encoding is done - don't wait for progress thread
|
||||
if progress_thread:
|
||||
encoding_done.set()
|
||||
# Don't join - let it finish on its own (daemon thread)
|
||||
|
||||
elapsed_time = time.time() - start_time
|
||||
logger.info(
|
||||
f"[PodcastVideoCombination] Video encoding completed in {elapsed_time:.1f} seconds "
|
||||
f"({elapsed_time/60:.1f} minutes)"
|
||||
)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(99.0, "Video encoding complete! Finalizing...")
|
||||
|
||||
# Verify file was created and get file size
|
||||
# Use retry logic in case file is still being written
|
||||
max_retries = 5
|
||||
file_size = 0
|
||||
for retry in range(max_retries):
|
||||
if video_path.exists():
|
||||
file_size = video_path.stat().st_size
|
||||
if file_size > 0:
|
||||
break
|
||||
if retry < max_retries - 1:
|
||||
logger.info(f"[PodcastVideoCombination] Waiting for video file to be written (retry {retry + 1}/{max_retries})...")
|
||||
time.sleep(1)
|
||||
|
||||
if not video_path.exists():
|
||||
raise RuntimeError(f"Video file was not created: {video_path}")
|
||||
|
||||
if file_size == 0:
|
||||
raise RuntimeError(f"Video file is empty: {video_path}")
|
||||
|
||||
logger.info(f"[PodcastVideoCombination] Video file verified: {video_path} ({file_size} bytes)")
|
||||
|
||||
# Clean up clips immediately but quickly - don't block
|
||||
# Close clips synchronously but with timeout protection
|
||||
try:
|
||||
final_video.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"[PodcastVideoCombination] Error closing final video clip: {e}")
|
||||
|
||||
# Close individual clips quickly
|
||||
for clip in video_clips:
|
||||
try:
|
||||
clip.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"[PodcastVideoCombination] Error closing video clip: {e}")
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(100.0, "Video combination complete!")
|
||||
|
||||
logger.info(f"[PodcastVideoCombination] Saved combined video to: {video_path} ({file_size} bytes)")
|
||||
|
||||
# Return video metadata immediately - don't wait for cleanup
|
||||
# This prevents blocking if cleanup hangs
|
||||
return {
|
||||
"video_path": str(video_path),
|
||||
"video_filename": video_filename,
|
||||
"video_url": f"/api/podcast/final-videos/{video_filename}",
|
||||
"duration": total_duration,
|
||||
"fps": fps,
|
||||
"file_size": file_size,
|
||||
"num_scenes": len(video_clips),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"[PodcastVideoCombination] Error combining videos: {e}")
|
||||
raise RuntimeError(f"Failed to combine videos: {str(e)}") from e
|
||||
|
||||
def save_scene_video(self, video_bytes: bytes, scene_number: int, user_id: str) -> Dict[str, str]:
|
||||
"""
|
||||
Save a single scene video to disk.
|
||||
|
||||
This is a utility method for saving individual scene videos before combination.
|
||||
Separate from story writer to maintain clear separation of concerns.
|
||||
|
||||
Parameters:
|
||||
video_bytes (bytes): Raw video file bytes.
|
||||
scene_number (int): Scene number for filename.
|
||||
user_id (str): User ID for filename.
|
||||
|
||||
Returns:
|
||||
Dict[str, str]: Dictionary with 'video_filename', 'video_path', 'video_url', and 'file_size'.
|
||||
"""
|
||||
import uuid
|
||||
|
||||
try:
|
||||
# Generate unique filename matching story writer format
|
||||
clean_user_id = "".join(c if c.isalnum() or c in ('-', '_') else '_' for c in user_id[:16])
|
||||
timestamp = str(uuid.uuid4())[:8]
|
||||
video_filename = f"scene_{scene_number}_{clean_user_id}_{timestamp}.mp4"
|
||||
|
||||
# Save to AI_Videos subdirectory (scene videos before combination)
|
||||
# output_dir is Final_Videos, so parent is podcast_videos, then AI_Videos
|
||||
scene_videos_dir = self.output_dir.parent / "AI_Videos"
|
||||
scene_videos_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
video_path = scene_videos_dir / video_filename
|
||||
|
||||
# Write video bytes to file
|
||||
with open(video_path, "wb") as f:
|
||||
f.write(video_bytes)
|
||||
|
||||
file_size = video_path.stat().st_size
|
||||
logger.info(f"[PodcastVideoCombination] Saved scene {scene_number} video: {video_filename} ({file_size} bytes)")
|
||||
|
||||
# Generate URL path (relative to /api/podcast/videos/)
|
||||
video_url = f"/api/podcast/videos/{video_filename}"
|
||||
|
||||
return {
|
||||
"video_filename": video_filename,
|
||||
"video_url": video_url,
|
||||
"video_path": str(video_path),
|
||||
"file_size": file_size,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[PodcastVideoCombination] Error saving scene video: {e}", exc_info=True)
|
||||
raise RuntimeError(f"Failed to save scene video: {str(e)}") from e
|
||||
|
||||
def _generate_video_filename(self, podcast_title: str) -> str:
|
||||
"""
|
||||
Generate a unique filename for the combined video.
|
||||
|
||||
Parameters:
|
||||
podcast_title (str): Title of the podcast episode.
|
||||
|
||||
Returns:
|
||||
str: Generated filename.
|
||||
"""
|
||||
# Sanitize title for filename
|
||||
safe_title = "".join(c for c in podcast_title if c.isalnum() or c in (' ', '-', '_')).strip()
|
||||
safe_title = safe_title.replace(' ', '_')[:50] # Limit length
|
||||
|
||||
# Add unique ID and timestamp
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
timestamp = int(Path(__file__).stat().st_mtime) # Use file modification time as simple timestamp
|
||||
|
||||
return f"podcast_{safe_title}_{unique_id}_{timestamp}.mp4"
|
||||
|
||||
@@ -301,6 +301,12 @@ class StoryAudioGenerationService:
|
||||
pitch: float = 0.0,
|
||||
emotion: str = "happy",
|
||||
english_normalization: bool = False,
|
||||
sample_rate: Optional[int] = None,
|
||||
bitrate: Optional[int] = None,
|
||||
channel: Optional[str] = None,
|
||||
format: Optional[str] = None,
|
||||
language_boost: Optional[str] = None,
|
||||
enable_sync_mode: Optional[bool] = True,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate AI audio for a single scene using main_audio_generation.
|
||||
@@ -340,6 +346,12 @@ class StoryAudioGenerationService:
|
||||
emotion=emotion,
|
||||
user_id=user_id,
|
||||
english_normalization=english_normalization,
|
||||
sample_rate=sample_rate,
|
||||
bitrate=bitrate,
|
||||
channel=channel,
|
||||
format=format,
|
||||
language_boost=language_boost,
|
||||
enable_sync_mode=enable_sync_mode,
|
||||
)
|
||||
|
||||
# Save audio to file
|
||||
|
||||
@@ -252,10 +252,14 @@ class StoryVideoGenerationService:
|
||||
if len(scenes) != len(audio_paths):
|
||||
raise ValueError("Number of scenes and audio paths must match")
|
||||
|
||||
video_paths = video_paths or [None] * len(scenes)
|
||||
if len(video_paths) != len(scenes):
|
||||
# Ensure video_paths is a list and matches scenes length
|
||||
if video_paths is None:
|
||||
video_paths = [None] * len(scenes)
|
||||
elif len(video_paths) != len(scenes):
|
||||
video_paths = video_paths + [None] * (len(scenes) - len(video_paths))
|
||||
|
||||
logger.debug(f"[StoryVideoGeneration] video_paths length: {len(video_paths)}, scenes length: {len(scenes)}")
|
||||
|
||||
try:
|
||||
logger.info(f"[StoryVideoGeneration] Generating story video for {len(scenes)} scenes")
|
||||
|
||||
@@ -311,49 +315,64 @@ class StoryVideoGenerationService:
|
||||
scene_title = scene.get("title", "Untitled")
|
||||
|
||||
logger.info(f"[StoryVideoGeneration] Processing scene {scene_number}/{len(scenes)}: {scene_title}")
|
||||
|
||||
audio_file = Path(audio_path)
|
||||
if not audio_file.exists():
|
||||
logger.warning(f"[StoryVideoGeneration] Audio not found: {audio_path}, skipping scene {scene_number}")
|
||||
continue
|
||||
|
||||
# Load audio
|
||||
audio_clip = AudioFileClip(str(audio_file))
|
||||
audio_duration = audio_clip.duration
|
||||
logger.debug(f"[StoryVideoGeneration] Scene {scene_number} paths - video: {video_path}, audio: {audio_path}, image: {image_path}")
|
||||
|
||||
# Prefer animated video if available
|
||||
if video_path and Path(video_path).exists():
|
||||
# Check video_path is not None and is a valid string before calling Path()
|
||||
if video_path is not None and isinstance(video_path, (str, Path)) and video_path and Path(video_path).exists():
|
||||
logger.info(f"[StoryVideoGeneration] Using animated video for scene {scene_number}: {video_path}")
|
||||
# Load animated video
|
||||
if VideoFileClip is None:
|
||||
raise RuntimeError("VideoFileClip not available - MoviePy may not be fully installed")
|
||||
video_clip = VideoFileClip(str(video_path))
|
||||
# Replace audio with the preferred audio (AI or free)
|
||||
video_clip = video_clip.with_audio(audio_clip)
|
||||
# Match duration to audio if needed
|
||||
if video_clip.duration > audio_duration:
|
||||
video_clip = video_clip.subclip(0, audio_duration)
|
||||
elif video_clip.duration < audio_duration:
|
||||
# Loop the video if it's shorter than audio
|
||||
loops_needed = int(audio_duration / video_clip.duration) + 1
|
||||
video_clip = concatenate_videoclips([video_clip] * loops_needed).subclip(0, audio_duration)
|
||||
|
||||
# Handle audio: use embedded audio if no separate audio_path provided
|
||||
if audio_path is not None and isinstance(audio_path, (str, Path)) and audio_path and Path(audio_path).exists():
|
||||
# Load separate audio file and replace video's audio
|
||||
logger.info(f"[StoryVideoGeneration] Replacing video audio with separate audio file: {audio_path}")
|
||||
audio_clip = AudioFileClip(str(audio_path))
|
||||
audio_duration = audio_clip.duration
|
||||
video_clip = video_clip.with_audio(audio_clip)
|
||||
elif image_path and Path(image_path).exists():
|
||||
# Fall back to static image
|
||||
logger.info(f"[StoryVideoGeneration] Using static image for scene {scene_number}: {image_path}")
|
||||
image_file = Path(image_path)
|
||||
# Create image clip (MoviePy v2: use with_* API)
|
||||
image_clip = ImageClip(str(image_file)).with_duration(audio_duration)
|
||||
image_clip = image_clip.with_fps(fps)
|
||||
# Set audio to image clip
|
||||
video_clip = image_clip.with_audio(audio_clip)
|
||||
# Match duration to audio if needed
|
||||
if video_clip.duration > audio_duration:
|
||||
video_clip = video_clip.subclip(0, audio_duration)
|
||||
elif video_clip.duration < audio_duration:
|
||||
# Loop the video if it's shorter than audio
|
||||
loops_needed = int(audio_duration / video_clip.duration) + 1
|
||||
video_clip = concatenate_videoclips([video_clip] * loops_needed).subclip(0, audio_duration)
|
||||
video_clip = video_clip.with_audio(audio_clip)
|
||||
else:
|
||||
# Use embedded audio from video
|
||||
logger.info(f"[StoryVideoGeneration] Using embedded audio from video for scene {scene_number}")
|
||||
audio_duration = video_clip.duration
|
||||
# Video already has audio, no need to replace
|
||||
|
||||
scene_clips.append(video_clip)
|
||||
total_duration += audio_duration
|
||||
elif audio_path is not None and isinstance(audio_path, (str, Path)) and audio_path and Path(audio_path).exists():
|
||||
# No video, but we have audio - use with image or create blank
|
||||
audio_file = Path(audio_path)
|
||||
audio_clip = AudioFileClip(str(audio_file))
|
||||
audio_duration = audio_clip.duration
|
||||
|
||||
if image_path is not None and isinstance(image_path, (str, Path)) and image_path and Path(image_path).exists():
|
||||
# Fall back to static image with audio
|
||||
logger.info(f"[StoryVideoGeneration] Using static image for scene {scene_number}: {image_path}")
|
||||
image_file = Path(image_path)
|
||||
# Create image clip (MoviePy v2: use with_* API)
|
||||
image_clip = ImageClip(str(image_file)).with_duration(audio_duration)
|
||||
image_clip = image_clip.with_fps(fps)
|
||||
# Set audio to image clip
|
||||
video_clip = image_clip.with_audio(audio_clip)
|
||||
scene_clips.append(video_clip)
|
||||
total_duration += audio_duration
|
||||
else:
|
||||
logger.warning(f"[StoryVideoGeneration] Audio provided but no video or image for scene {scene_number}, skipping")
|
||||
continue
|
||||
else:
|
||||
logger.warning(f"[StoryVideoGeneration] No video or image found for scene {scene_number}, skipping")
|
||||
logger.warning(f"[StoryVideoGeneration] No video, audio, or image found for scene {scene_number}, skipping")
|
||||
continue
|
||||
|
||||
scene_clips.append(video_clip)
|
||||
total_duration += audio_duration
|
||||
|
||||
# Call progress callback if provided
|
||||
if progress_callback:
|
||||
progress = ((idx + 1) / len(scenes)) * 90 # Reserve 10% for final composition
|
||||
@@ -362,7 +381,12 @@ class StoryVideoGenerationService:
|
||||
logger.info(f"[StoryVideoGeneration] Processed scene {idx + 1}/{len(scenes)}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[StoryVideoGeneration] Failed to process scene {idx + 1}: {e}")
|
||||
logger.error(
|
||||
f"[StoryVideoGeneration] Failed to process scene {idx + 1} ({scene_number}): {e}\n"
|
||||
f" video_path: {video_path} (type: {type(video_path)})\n"
|
||||
f" audio_path: {audio_path} (type: {type(audio_path)})\n"
|
||||
f" image_path: {image_path} (type: {type(image_path)})"
|
||||
)
|
||||
# Continue with next scene instead of failing completely
|
||||
continue
|
||||
|
||||
|
||||
@@ -71,13 +71,16 @@ class WaveSpeedClient:
|
||||
logger.info(f"[WaveSpeed] Submitted request: {prediction_id}")
|
||||
return prediction_id
|
||||
|
||||
def get_prediction_result(self, prediction_id: str, timeout: int = 120) -> Dict[str, Any]:
|
||||
def get_prediction_result(self, prediction_id: str, timeout: int = 30) -> Dict[str, Any]:
|
||||
"""
|
||||
Fetch the current status/result for a prediction.
|
||||
Matches the example pattern: simple GET request, check status_code == 200, return data.
|
||||
"""
|
||||
url = f"{self.BASE_URL}/predictions/{prediction_id}/result"
|
||||
headers = {"Authorization": f"Bearer {self.api_key}"}
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers={"Authorization": f"Bearer {self.api_key}"}, timeout=timeout)
|
||||
response = requests.get(url, headers=headers, timeout=timeout)
|
||||
except requests_exceptions.Timeout as exc:
|
||||
raise HTTPException(
|
||||
status_code=504,
|
||||
@@ -98,7 +101,15 @@ class WaveSpeedClient:
|
||||
"exception": str(exc),
|
||||
},
|
||||
) from exc
|
||||
if response.status_code != 200:
|
||||
|
||||
# Match example pattern: check status_code == 200, then get data
|
||||
if response.status_code == 200:
|
||||
result = response.json().get("data")
|
||||
if not result:
|
||||
raise HTTPException(status_code=502, detail={"error": "WaveSpeed polling response missing data"})
|
||||
return result
|
||||
else:
|
||||
# Non-200 status - log and raise error (matching example's break behavior)
|
||||
logger.error(f"[WaveSpeed] Polling failed: {response.status_code} {response.text}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
@@ -109,59 +120,116 @@ class WaveSpeedClient:
|
||||
},
|
||||
)
|
||||
|
||||
result = response.json().get("data")
|
||||
if not result:
|
||||
raise HTTPException(status_code=502, detail={"error": "WaveSpeed polling response missing data"})
|
||||
return result
|
||||
|
||||
def poll_until_complete(
|
||||
self,
|
||||
prediction_id: str,
|
||||
timeout_seconds: int = 240,
|
||||
timeout_seconds: Optional[int] = None,
|
||||
interval_seconds: float = 1.0,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Poll WaveSpeed until the job completes, fails, or times out.
|
||||
Poll WaveSpeed until the job completes or fails.
|
||||
Matches the example pattern: simple polling loop until status is "completed" or "failed".
|
||||
|
||||
Args:
|
||||
prediction_id: The prediction ID to poll for
|
||||
timeout_seconds: Optional timeout in seconds. If None, polls indefinitely until completion/failure.
|
||||
interval_seconds: Seconds to wait between polling attempts (default: 1.0, faster than 2.0)
|
||||
|
||||
Returns:
|
||||
Dict containing the completed result
|
||||
|
||||
Raises:
|
||||
HTTPException: If the task fails, polling fails, or times out (if timeout_seconds is set)
|
||||
"""
|
||||
start_time = time.time()
|
||||
consecutive_errors = 0
|
||||
max_consecutive_errors = 6 # safety guard for non-transient errors
|
||||
|
||||
while True:
|
||||
try:
|
||||
result = self.get_prediction_result(prediction_id)
|
||||
consecutive_errors = 0 # Reset error counter on success
|
||||
except HTTPException as exc:
|
||||
detail = exc.detail or {}
|
||||
if isinstance(detail, dict):
|
||||
detail.setdefault("prediction_id", prediction_id)
|
||||
detail.setdefault("resume_available", True)
|
||||
detail.setdefault("error", detail.get("error", "WaveSpeed polling failed"))
|
||||
raise HTTPException(status_code=exc.status_code, detail=detail) from exc
|
||||
|
||||
# Determine underlying status code (WaveSpeed vs proxy)
|
||||
status_code = detail.get("status_code", exc.status_code)
|
||||
|
||||
# Treat 5xx as transient: keep polling indefinitely with backoff
|
||||
if 500 <= int(status_code) < 600:
|
||||
consecutive_errors += 1
|
||||
backoff = min(30.0, interval_seconds * (2 ** (consecutive_errors - 1)))
|
||||
logger.warning(
|
||||
f"[WaveSpeed] Transient polling error {consecutive_errors} for {prediction_id}: "
|
||||
f"{status_code}. Backing off {backoff:.1f}s"
|
||||
)
|
||||
time.sleep(backoff)
|
||||
continue
|
||||
|
||||
# For non-transient (typically 4xx) errors, apply safety cap
|
||||
consecutive_errors += 1
|
||||
if consecutive_errors >= max_consecutive_errors:
|
||||
logger.error(
|
||||
f"[WaveSpeed] Too many polling errors ({consecutive_errors}) for {prediction_id}, "
|
||||
f"status_code={status_code}. Giving up."
|
||||
)
|
||||
raise HTTPException(status_code=exc.status_code, detail=detail) from exc
|
||||
|
||||
backoff = min(30.0, interval_seconds * (2 ** (consecutive_errors - 1)))
|
||||
logger.warning(
|
||||
f"[WaveSpeed] Polling error {consecutive_errors}/{max_consecutive_errors} for {prediction_id}: "
|
||||
f"{status_code}. Backing off {backoff:.1f}s"
|
||||
)
|
||||
time.sleep(backoff)
|
||||
continue
|
||||
|
||||
# Extract status from result (matching example pattern)
|
||||
status = result.get("status")
|
||||
|
||||
if status == "completed":
|
||||
logger.info(f"[WaveSpeed] Prediction {prediction_id} completed.")
|
||||
elapsed = time.time() - start_time
|
||||
logger.info(f"[WaveSpeed] Prediction {prediction_id} completed in {elapsed:.1f}s")
|
||||
return result
|
||||
|
||||
if status == "failed":
|
||||
logger.error(f"[WaveSpeed] Prediction {prediction_id} failed: {result.get('error')}")
|
||||
error_msg = result.get("error", "Unknown error")
|
||||
logger.error(f"[WaveSpeed] Prediction {prediction_id} failed: {error_msg}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail={
|
||||
"error": "WaveSpeed animation failed",
|
||||
"prediction_id": prediction_id,
|
||||
"details": result.get("error"),
|
||||
},
|
||||
)
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
if elapsed > timeout_seconds:
|
||||
logger.error(f"[WaveSpeed] Prediction {prediction_id} timed out after {timeout_seconds}s")
|
||||
raise HTTPException(
|
||||
status_code=504,
|
||||
detail={
|
||||
"error": "WaveSpeed animation timed out",
|
||||
"error": "WaveSpeed task failed",
|
||||
"prediction_id": prediction_id,
|
||||
"message": error_msg,
|
||||
"details": result,
|
||||
},
|
||||
)
|
||||
|
||||
logger.debug(f"[WaveSpeed] Prediction {prediction_id} status={status}. Waiting...")
|
||||
# Check timeout only if specified
|
||||
if timeout_seconds is not None:
|
||||
elapsed = time.time() - start_time
|
||||
if elapsed > timeout_seconds:
|
||||
logger.error(f"[WaveSpeed] Prediction {prediction_id} timed out after {timeout_seconds}s")
|
||||
raise HTTPException(
|
||||
status_code=504,
|
||||
detail={
|
||||
"error": "WaveSpeed task timed out",
|
||||
"prediction_id": prediction_id,
|
||||
"timeout_seconds": timeout_seconds,
|
||||
"current_status": status,
|
||||
"message": f"Task did not complete within {timeout_seconds} seconds. Status: {status}",
|
||||
},
|
||||
)
|
||||
|
||||
# Log progress periodically (every 30 seconds)
|
||||
elapsed = time.time() - start_time
|
||||
if int(elapsed) % 30 == 0 and elapsed > 0:
|
||||
logger.info(f"[WaveSpeed] Polling {prediction_id}: status={status}, elapsed={elapsed:.0f}s")
|
||||
|
||||
# Poll faster (1.0s instead of 2.0s) to match example's responsiveness
|
||||
time.sleep(interval_seconds)
|
||||
|
||||
def optimize_prompt(
|
||||
@@ -469,7 +537,9 @@ class WaveSpeedClient:
|
||||
|
||||
# Fetch image bytes
|
||||
logger.info(f"[WaveSpeed] Fetching image from URL: {image_url}")
|
||||
image_response = requests.get(image_url, timeout=timeout)
|
||||
# Use reasonable timeout for downloading the final image (60s should be enough)
|
||||
# The timeout parameter is for polling, not for downloading
|
||||
image_response = requests.get(image_url, timeout=60)
|
||||
if image_response.status_code == 200:
|
||||
image_bytes = image_response.content
|
||||
logger.info(f"[WaveSpeed] Image generated successfully (size: {len(image_bytes)} bytes)")
|
||||
@@ -481,6 +551,208 @@ class WaveSpeedClient:
|
||||
detail="Failed to fetch generated image from WaveSpeed URL",
|
||||
)
|
||||
|
||||
def generate_character_image(
|
||||
self,
|
||||
prompt: str,
|
||||
reference_image_bytes: bytes,
|
||||
style: str = "Auto",
|
||||
aspect_ratio: str = "16:9",
|
||||
rendering_speed: str = "Default",
|
||||
timeout: Optional[int] = None,
|
||||
) -> bytes:
|
||||
"""
|
||||
Generate image using Ideogram Character API to maintain character consistency.
|
||||
Creates variations of a reference character image while respecting the base appearance.
|
||||
|
||||
Note: This API is always async and requires polling for results.
|
||||
|
||||
Args:
|
||||
prompt: Text prompt describing the scene/context for the character
|
||||
reference_image_bytes: Reference image bytes (base avatar)
|
||||
style: Character style type ("Auto", "Fiction", or "Realistic")
|
||||
aspect_ratio: Aspect ratio ("1:1", "16:9", "9:16", "4:3", "3:4")
|
||||
rendering_speed: Rendering speed ("Default", "Turbo", "Quality")
|
||||
timeout: Total timeout in seconds for submission + polling (default: 180)
|
||||
|
||||
Returns:
|
||||
bytes: Generated image bytes with consistent character
|
||||
"""
|
||||
import base64
|
||||
|
||||
# Encode reference image to base64
|
||||
image_base64 = base64.b64encode(reference_image_bytes).decode('utf-8')
|
||||
# Add data URI prefix
|
||||
image_data_uri = f"data:image/png;base64,{image_base64}"
|
||||
|
||||
url = f"{self.BASE_URL}/ideogram-ai/ideogram-character"
|
||||
|
||||
# Note: enable_sync_mode is not a valid parameter for Ideogram Character API
|
||||
# The API is always async and requires polling
|
||||
payload = {
|
||||
"prompt": prompt,
|
||||
"image": image_data_uri,
|
||||
"style": style,
|
||||
"aspect_ratio": aspect_ratio,
|
||||
"rendering_speed": rendering_speed,
|
||||
}
|
||||
|
||||
logger.info(f"[WaveSpeed] Generating character image via Ideogram Character (prompt_length={len(prompt)})")
|
||||
# POST request should return quickly with just the task ID
|
||||
# Use reasonable timeouts for the initial submission
|
||||
# Connection timeout: 30s (increased for reliability - network may be slow)
|
||||
# Read timeout: 30s (should be enough to get task ID response)
|
||||
# Retry logic for transient connection failures
|
||||
max_retries = 2
|
||||
retry_delay = 2.0 # seconds
|
||||
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
response = requests.post(
|
||||
url,
|
||||
headers=self._headers(),
|
||||
json=payload,
|
||||
timeout=(30, 30) # (connect_timeout, read_timeout) - increased for network reliability
|
||||
)
|
||||
break # Success, exit retry loop
|
||||
except (requests_exceptions.ConnectTimeout, requests_exceptions.ConnectionError) as e:
|
||||
if attempt < max_retries:
|
||||
logger.warning(f"[WaveSpeed] Connection attempt {attempt + 1}/{max_retries + 1} failed, retrying in {retry_delay}s: {e}")
|
||||
time.sleep(retry_delay)
|
||||
retry_delay *= 2 # Exponential backoff
|
||||
continue
|
||||
else:
|
||||
# Final attempt failed
|
||||
error_type = "Connection timeout" if isinstance(e, requests_exceptions.ConnectTimeout) else "Connection error"
|
||||
logger.error(f"[WaveSpeed] {error_type} to Ideogram Character API after {max_retries + 1} attempts: {e}")
|
||||
raise HTTPException(
|
||||
status_code=504 if isinstance(e, requests_exceptions.ConnectTimeout) else 502,
|
||||
detail={
|
||||
"error": f"{error_type} to WaveSpeed Ideogram Character API",
|
||||
"message": "Unable to establish connection to the image generation service after multiple attempts. Please check your network connection and try again.",
|
||||
"exception": str(e),
|
||||
"retry_recommended": True,
|
||||
},
|
||||
)
|
||||
except requests_exceptions.Timeout as e:
|
||||
logger.error(f"[WaveSpeed] Request timeout to Ideogram Character API: {e}")
|
||||
raise HTTPException(
|
||||
status_code=504,
|
||||
detail={
|
||||
"error": "Request timeout to WaveSpeed Ideogram Character API",
|
||||
"message": "The image generation request took too long. Please try again.",
|
||||
"exception": str(e),
|
||||
},
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.error(f"[WaveSpeed] Character image generation failed: {response.status_code} {response.text}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail={
|
||||
"error": "WaveSpeed Ideogram Character generation failed",
|
||||
"status_code": response.status_code,
|
||||
"response": response.text,
|
||||
},
|
||||
)
|
||||
|
||||
response_json = response.json()
|
||||
data = response_json.get("data") or response_json
|
||||
|
||||
# Extract prediction ID
|
||||
prediction_id = data.get("id")
|
||||
if not prediction_id:
|
||||
logger.error(f"[WaveSpeed] No prediction ID in response: {response.text}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail="WaveSpeed Ideogram Character response missing prediction id",
|
||||
)
|
||||
|
||||
# Ideogram Character API is always async - check status and poll if needed
|
||||
outputs = data.get("outputs") or []
|
||||
status = data.get("status", "unknown")
|
||||
|
||||
logger.info(f"[WaveSpeed] Ideogram Character task created: prediction_id={prediction_id}, status={status}")
|
||||
|
||||
# If status is already completed, use outputs directly (unlikely but possible)
|
||||
if outputs and status == "completed":
|
||||
logger.info(f"[WaveSpeed] Got immediate results from Ideogram Character")
|
||||
else:
|
||||
# Always need to poll for results (API is async)
|
||||
logger.info(f"[WaveSpeed] Polling for Ideogram Character result (status: {status}, prediction_id: {prediction_id})")
|
||||
# Poll until complete - use timeout if provided, otherwise poll indefinitely
|
||||
# Match example pattern exactly: simple while True loop, check status, break on completed/failed
|
||||
polling_timeout = timeout if timeout else None # None means poll indefinitely
|
||||
result = self.poll_until_complete(
|
||||
prediction_id,
|
||||
timeout_seconds=polling_timeout,
|
||||
interval_seconds=0.5, # Poll every 0.5s (closer to example's 0.1s)
|
||||
)
|
||||
# Safely extract outputs and status
|
||||
if not isinstance(result, dict):
|
||||
logger.error(f"[WaveSpeed] Unexpected result type: {type(result)}, value: {result}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail="WaveSpeed Ideogram Character returned unexpected response format",
|
||||
)
|
||||
|
||||
outputs = result.get("outputs") or []
|
||||
status = result.get("status", "unknown")
|
||||
|
||||
if status != "completed":
|
||||
# Safely extract error message
|
||||
error_msg = "Unknown error"
|
||||
if isinstance(result, dict):
|
||||
error_msg = result.get("error") or result.get("message") or str(result.get("details", "Unknown error"))
|
||||
else:
|
||||
error_msg = str(result)
|
||||
|
||||
logger.error(f"[WaveSpeed] Ideogram Character task did not complete: status={status}, error={error_msg}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail={
|
||||
"error": "WaveSpeed Ideogram Character task failed",
|
||||
"status": status,
|
||||
"message": error_msg,
|
||||
}
|
||||
)
|
||||
|
||||
# Extract image URL from outputs
|
||||
if not outputs:
|
||||
logger.error(f"[WaveSpeed] No outputs after polling: status={status}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail="WaveSpeed Ideogram Character returned no outputs",
|
||||
)
|
||||
|
||||
image_url = None
|
||||
if isinstance(outputs, list) and len(outputs) > 0:
|
||||
first_output = outputs[0]
|
||||
if isinstance(first_output, str):
|
||||
image_url = first_output
|
||||
elif isinstance(first_output, dict):
|
||||
image_url = first_output.get("url") or first_output.get("image_url")
|
||||
|
||||
if not image_url:
|
||||
logger.error(f"[WaveSpeed] No image URL in outputs: {outputs}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail="WaveSpeed Ideogram Character response missing image URL",
|
||||
)
|
||||
|
||||
# Download image
|
||||
logger.info(f"[WaveSpeed] Downloading character image from: {image_url}")
|
||||
image_response = requests.get(image_url, timeout=60)
|
||||
if image_response.status_code != 200:
|
||||
logger.error(f"[WaveSpeed] Failed to download image: {image_response.status_code}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail="Failed to download generated character image",
|
||||
)
|
||||
|
||||
image_bytes = image_response.content
|
||||
logger.info(f"[WaveSpeed] ✅ Successfully generated character image: {len(image_bytes)} bytes")
|
||||
return image_bytes
|
||||
|
||||
def generate_speech(
|
||||
self,
|
||||
text: str,
|
||||
@@ -490,7 +762,7 @@ class WaveSpeedClient:
|
||||
pitch: float = 0.0,
|
||||
emotion: str = "happy",
|
||||
enable_sync_mode: bool = True,
|
||||
timeout: int = 60,
|
||||
timeout: int = 120,
|
||||
**kwargs
|
||||
) -> bytes:
|
||||
"""
|
||||
@@ -537,7 +809,51 @@ class WaveSpeedClient:
|
||||
payload[param] = kwargs[param]
|
||||
|
||||
logger.info(f"[WaveSpeed] Generating speech via {url} (voice={voice_id}, text_length={len(text)})")
|
||||
response = requests.post(url, headers=self._headers(), json=payload, timeout=timeout)
|
||||
|
||||
# Retry on transient connection issues
|
||||
max_retries = 2
|
||||
retry_delay = 2.0
|
||||
last_error = None
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
response = requests.post(
|
||||
url,
|
||||
headers=self._headers(),
|
||||
json=payload,
|
||||
timeout=(30, 60), # connect, read
|
||||
)
|
||||
break
|
||||
except (requests_exceptions.ConnectTimeout, requests_exceptions.ConnectionError) as e:
|
||||
last_error = e
|
||||
if attempt < max_retries:
|
||||
logger.warning(
|
||||
f"[WaveSpeed] Speech connection attempt {attempt + 1}/{max_retries + 1} failed, "
|
||||
f"retrying in {retry_delay}s: {e}"
|
||||
)
|
||||
time.sleep(retry_delay)
|
||||
retry_delay *= 2
|
||||
continue
|
||||
logger.error(f"[WaveSpeed] Speech connection failed after {max_retries + 1} attempts: {e}")
|
||||
raise HTTPException(
|
||||
status_code=504,
|
||||
detail={
|
||||
"error": "Connection to WaveSpeed speech API timed out",
|
||||
"message": "Unable to reach the speech service. Please try again.",
|
||||
"exception": str(e),
|
||||
"retry_recommended": True,
|
||||
},
|
||||
)
|
||||
except requests_exceptions.Timeout as e:
|
||||
last_error = e
|
||||
logger.error(f"[WaveSpeed] Speech request timeout: {e}")
|
||||
raise HTTPException(
|
||||
status_code=504,
|
||||
detail={
|
||||
"error": "WaveSpeed speech request timed out",
|
||||
"message": "The speech generation request took too long. Please try again.",
|
||||
"exception": str(e),
|
||||
},
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.error(f"[WaveSpeed] Speech generation failed: {response.status_code} {response.text}")
|
||||
|
||||
@@ -8,7 +8,6 @@ from fastapi import HTTPException
|
||||
from loguru import logger
|
||||
|
||||
from .client import WaveSpeedClient
|
||||
from .kling_animation import generate_animation_prompt
|
||||
|
||||
INFINITALK_MODEL_PATH = "wavespeed-ai/infinitetalk"
|
||||
INFINITALK_MODEL_NAME = "wavespeed-ai/infinitetalk"
|
||||
@@ -22,6 +21,67 @@ def _as_data_uri(content_bytes: bytes, mime_type: str) -> str:
|
||||
return f"data:{mime_type};base64,{encoded}"
|
||||
|
||||
|
||||
def _generate_simple_infinitetalk_prompt(
|
||||
scene_data: Dict[str, Any],
|
||||
story_context: Dict[str, Any],
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Generate a balanced, concise prompt for InfiniteTalk.
|
||||
InfiniteTalk is audio-driven, so the prompt should describe the scene and suggest
|
||||
subtle motion, but avoid overly elaborate cinematic descriptions.
|
||||
|
||||
Returns None if no meaningful prompt can be generated.
|
||||
"""
|
||||
title = (scene_data.get("title") or "").strip()
|
||||
description = (scene_data.get("description") or "").strip()
|
||||
image_prompt = (scene_data.get("image_prompt") or "").strip()
|
||||
|
||||
# Build a balanced prompt: scene description + simple motion hint
|
||||
parts = []
|
||||
|
||||
# Start with the main subject/scene
|
||||
if title and len(title) > 5 and title.lower() not in ("scene", "podcast", "episode"):
|
||||
parts.append(title)
|
||||
elif description:
|
||||
# Take first sentence or first 60 chars
|
||||
desc_part = description.split('.')[0][:60].strip()
|
||||
if desc_part:
|
||||
parts.append(desc_part)
|
||||
elif image_prompt:
|
||||
# Take first sentence or first 60 chars
|
||||
img_part = image_prompt.split('.')[0][:60].strip()
|
||||
if img_part:
|
||||
parts.append(img_part)
|
||||
|
||||
if not parts:
|
||||
return None
|
||||
|
||||
# Add a simple, subtle motion suggestion (not elaborate camera movements)
|
||||
# Keep it natural and audio-driven
|
||||
motion_hints = [
|
||||
"with subtle movement",
|
||||
"with gentle motion",
|
||||
"with natural animation",
|
||||
]
|
||||
|
||||
# Combine scene description with subtle motion hint
|
||||
if len(parts[0]) < 80:
|
||||
# Room for a motion hint
|
||||
prompt = f"{parts[0]}, {motion_hints[0]}"
|
||||
else:
|
||||
# Just use the description if it's already long enough
|
||||
prompt = parts[0]
|
||||
|
||||
# Keep it concise - max 120 characters (allows for scene + motion hint)
|
||||
prompt = prompt[:120].strip()
|
||||
|
||||
# Clean up trailing commas or incomplete sentences
|
||||
if prompt.endswith(','):
|
||||
prompt = prompt[:-1].strip()
|
||||
|
||||
return prompt if len(prompt) >= 15 else None
|
||||
|
||||
|
||||
def animate_scene_with_voiceover(
|
||||
*,
|
||||
image_bytes: bytes,
|
||||
@@ -31,6 +91,8 @@ def animate_scene_with_voiceover(
|
||||
user_id: str,
|
||||
resolution: str = "720p",
|
||||
prompt_override: Optional[str] = None,
|
||||
mask_image_bytes: Optional[bytes] = None,
|
||||
seed: Optional[int] = -1,
|
||||
image_mime: str = "image/png",
|
||||
audio_mime: str = "audio/mpeg",
|
||||
client: Optional[WaveSpeedClient] = None,
|
||||
@@ -59,21 +121,28 @@ def animate_scene_with_voiceover(
|
||||
if resolution not in {"480p", "720p"}:
|
||||
raise HTTPException(status_code=400, detail="Resolution must be '480p' or '720p'.")
|
||||
|
||||
animation_prompt = prompt_override or generate_animation_prompt(scene_data, story_context, user_id)
|
||||
# Generate simple, concise prompt for InfiniteTalk (audio-driven, less need for elaborate descriptions)
|
||||
animation_prompt = prompt_override or _generate_simple_infinitetalk_prompt(scene_data, story_context)
|
||||
|
||||
payload = {
|
||||
payload: Dict[str, Any] = {
|
||||
"image": _as_data_uri(image_bytes, image_mime),
|
||||
"audio": _as_data_uri(audio_bytes, audio_mime),
|
||||
"resolution": resolution,
|
||||
}
|
||||
# Only include prompt if we have a meaningful one (InfiniteTalk works fine without it)
|
||||
if animation_prompt:
|
||||
payload["prompt"] = animation_prompt
|
||||
if mask_image_bytes:
|
||||
payload["mask_image"] = _as_data_uri(mask_image_bytes, image_mime)
|
||||
if seed is not None:
|
||||
payload["seed"] = seed
|
||||
|
||||
client = client or WaveSpeedClient()
|
||||
prediction_id = client.submit_image_to_video(INFINITALK_MODEL_PATH, payload, timeout=60)
|
||||
|
||||
try:
|
||||
result = client.poll_until_complete(prediction_id, timeout_seconds=600, interval_seconds=1.0)
|
||||
# Poll faster (0.5s) to mirror reference pattern; allow up to 10 minutes
|
||||
result = client.poll_until_complete(prediction_id, timeout_seconds=600, interval_seconds=0.5)
|
||||
except HTTPException as exc:
|
||||
detail = exc.detail or {}
|
||||
if isinstance(detail, dict):
|
||||
|
||||
Reference in New Issue
Block a user