AI podcast project

This commit is contained in:
ajaysi
2025-12-16 16:25:52 +05:30
parent eba5210577
commit 1d745c9bc8
50 changed files with 7637 additions and 2813 deletions

5
.gitignore vendored
View File

@@ -14,6 +14,11 @@ podcast_audio/*
backend/podcast_audio/
podcast_audio/
podcast_images/
youtube_videos/
backend/podcast_images/
backend/podcast_videos/
.cursorignore
story_videos

View File

@@ -45,7 +45,11 @@ class CacheHeadersMiddleware(BaseHTTPMiddleware):
# Immutable files (with hash) - cache for 1 year
# These files never change (new hash = new file)
response.headers["Cache-Control"] = "public, max-age=31536000, immutable"
response.headers["Expires"] = "Thu, 31 Dec 2025 23:59:59 GMT"
# Expires header calculated dynamically to match max-age
# Modern browsers prefer Cache-Control, but Expires provides compatibility
from datetime import datetime, timedelta
expires_date = datetime.utcnow() + timedelta(seconds=31536000)
response.headers["Expires"] = expires_date.strftime("%a, %d %b %Y %H:%M:%S GMT")
else:
# Non-hashed files - shorter cache (1 hour)
# These might be updated, so cache for shorter time

View File

@@ -0,0 +1,28 @@
"""
Podcast API Constants
Centralized constants and directory configuration for podcast module.
"""
from pathlib import Path
from services.story_writer.audio_generation_service import StoryAudioGenerationService
# Directory paths
# router.py is at: backend/api/podcast/router.py
# parents[0] = backend/api/podcast/
# parents[1] = backend/api/
# parents[2] = backend/
BASE_DIR = Path(__file__).resolve().parents[2] # backend/
PODCAST_AUDIO_DIR = (BASE_DIR / "podcast_audio").resolve()
PODCAST_AUDIO_DIR.mkdir(parents=True, exist_ok=True)
PODCAST_IMAGES_DIR = (BASE_DIR / "podcast_images").resolve()
PODCAST_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
PODCAST_VIDEOS_DIR = (BASE_DIR / "podcast_videos").resolve()
PODCAST_VIDEOS_DIR.mkdir(parents=True, exist_ok=True)
# Video subdirectory
AI_VIDEO_SUBDIR = Path("AI_Videos")
# Initialize audio service
audio_service = StoryAudioGenerationService(output_dir=str(PODCAST_AUDIO_DIR))

View File

@@ -0,0 +1,6 @@
"""
Podcast API Handlers
Handler modules for different podcast operations.
"""

View File

@@ -0,0 +1,96 @@
"""
Podcast Analysis Handlers
Analysis endpoint for podcast ideas.
"""
from fastapi import APIRouter, Depends, HTTPException
from typing import Dict, Any
import json
from middleware.auth_middleware import get_current_user
from api.story_writer.utils.auth import require_authenticated_user
from services.llm_providers.main_text_generation import llm_text_gen
from loguru import logger
from ..models import PodcastAnalyzeRequest, PodcastAnalyzeResponse
router = APIRouter()
@router.post("/analyze", response_model=PodcastAnalyzeResponse)
async def analyze_podcast_idea(
request: PodcastAnalyzeRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""
Analyze a podcast idea and return podcast-oriented outlines, keywords, and titles.
This uses the shared LLM provider but with a podcast-specific prompt (not story format).
"""
user_id = require_authenticated_user(current_user)
prompt = f"""
You are an expert podcast producer. Given a podcast idea, craft concise podcast-ready assets
that sound like episode plans (not fiction stories).
Podcast Idea: "{request.idea}"
Duration: ~{request.duration} minutes
Speakers: {request.speakers} (host + optional guest)
Return JSON with:
- audience: short target audience description
- content_type: podcast style/format
- top_keywords: 5 podcast-relevant keywords/phrases
- suggested_outlines: 2 items, each with title (<=60 chars) and 4-6 short segments (bullet-friendly, factual)
- title_suggestions: 3 concise episode titles (no cliffhanger storytelling)
- exa_suggested_config: suggested Exa search options to power research (keep conservative defaults to control cost), with:
- exa_search_type: "auto" | "neural" | "keyword" (prefer "auto" unless clearly news-heavy)
- exa_category: one of ["research paper","news","company","github","tweet","personal site","pdf","financial report","linkedin profile"]
- exa_include_domains: up to 3 reputable domains to prioritize (optional)
- exa_exclude_domains: up to 3 domains to avoid (optional)
- max_sources: 6-10
- include_statistics: boolean (true if topic needs fresh stats)
- date_range: one of ["last_month","last_3_months","last_year","all_time"] (pick recent if time-sensitive)
Requirements:
- Keep language factual, actionable, and suited for spoken audio.
- Avoid narrative fiction tone; focus on insights, hooks, objections, and takeaways.
- Prefer 2024-2025 context when relevant.
"""
try:
raw = llm_text_gen(prompt=prompt, user_id=user_id, json_struct=None)
except HTTPException:
# Re-raise HTTPExceptions (e.g., 429 subscription limit) - preserve error details
raise
except Exception as exc:
logger.error(f"[Podcast Analyze] Analysis failed for user {user_id}: {exc}")
raise HTTPException(status_code=500, detail=f"Analysis failed: {exc}")
# Normalize response (accept dict or JSON string)
if isinstance(raw, str):
try:
data = json.loads(raw)
except json.JSONDecodeError:
raise HTTPException(status_code=500, detail="LLM returned non-JSON output")
elif isinstance(raw, dict):
data = raw
else:
raise HTTPException(status_code=500, detail="Unexpected LLM response format")
audience = data.get("audience") or "Growth-focused professionals"
content_type = data.get("content_type") or "Interview + insights"
top_keywords = data.get("top_keywords") or []
suggested_outlines = data.get("suggested_outlines") or []
title_suggestions = data.get("title_suggestions") or []
exa_suggested_config = data.get("exa_suggested_config") or None
return PodcastAnalyzeResponse(
audience=audience,
content_type=content_type,
top_keywords=top_keywords,
suggested_outlines=suggested_outlines,
title_suggestions=title_suggestions,
exa_suggested_config=exa_suggested_config,
)

View File

@@ -0,0 +1,324 @@
"""
Podcast Audio Handlers
Audio generation, combining, and serving endpoints.
"""
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session
from typing import Dict, Any
from pathlib import Path
from urllib.parse import urlparse
import tempfile
import uuid
import shutil
from services.database import get_db
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
from api.story_writer.utils.auth import require_authenticated_user
from utils.asset_tracker import save_asset_to_library
from models.story_models import StoryAudioResult
from loguru import logger
from ..constants import PODCAST_AUDIO_DIR, audio_service
from ..models import (
PodcastAudioRequest,
PodcastAudioResponse,
PodcastCombineAudioRequest,
PodcastCombineAudioResponse,
)
router = APIRouter()
@router.post("/audio", response_model=PodcastAudioResponse)
async def generate_podcast_audio(
request: PodcastAudioRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Generate AI audio for a podcast scene using shared audio service.
"""
user_id = require_authenticated_user(current_user)
if not request.text or not request.text.strip():
raise HTTPException(status_code=400, detail="Text is required")
try:
result: StoryAudioResult = audio_service.generate_ai_audio(
scene_number=0,
scene_title=request.scene_title,
text=request.text.strip(),
user_id=user_id,
voice_id=request.voice_id or "Wise_Woman",
speed=request.speed or 1.0, # Normal speed (was 0.9, but too slow - causing duration issues)
volume=request.volume or 1.0,
pitch=request.pitch or 0.0, # Normal pitch (0.0 = neutral)
emotion=request.emotion or "neutral",
english_normalization=request.english_normalization or False,
sample_rate=request.sample_rate,
bitrate=request.bitrate,
channel=request.channel,
format=request.format,
language_boost=request.language_boost,
enable_sync_mode=request.enable_sync_mode,
)
# Override URL to use podcast endpoint instead of story endpoint
if result.get("audio_url") and "/api/story/audio/" in result.get("audio_url", ""):
audio_filename = result.get("audio_filename", "")
result["audio_url"] = f"/api/podcast/audio/{audio_filename}"
except Exception as exc:
raise HTTPException(status_code=500, detail=f"Audio generation failed: {exc}")
# Save to asset library (podcast module)
try:
if result.get("audio_url"):
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="audio",
source_module="podcast_maker",
filename=result.get("audio_filename", ""),
file_url=result.get("audio_url", ""),
file_path=result.get("audio_path"),
file_size=result.get("file_size"),
mime_type="audio/mpeg",
title=f"{request.scene_title} - Podcast",
description="Podcast scene narration",
tags=["podcast", "audio", request.scene_id],
provider=result.get("provider"),
model=result.get("model"),
cost=result.get("cost"),
asset_metadata={
"scene_id": request.scene_id,
"scene_title": request.scene_title,
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save audio asset: {e}")
return PodcastAudioResponse(
scene_id=request.scene_id,
scene_title=request.scene_title,
audio_filename=result.get("audio_filename", ""),
audio_url=result.get("audio_url", ""),
provider=result.get("provider", "wavespeed"),
model=result.get("model", "minimax/speech-02-hd"),
voice_id=result.get("voice_id", request.voice_id or "Wise_Woman"),
text_length=result.get("text_length", len(request.text)),
file_size=result.get("file_size", 0),
cost=result.get("cost", 0.0),
)
@router.post("/combine-audio", response_model=PodcastCombineAudioResponse)
async def combine_podcast_audio(
request: PodcastCombineAudioRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Combine multiple scene audio files into a single podcast audio file.
"""
user_id = require_authenticated_user(current_user)
if not request.scene_ids or not request.scene_audio_urls:
raise HTTPException(status_code=400, detail="Scene IDs and audio URLs are required")
if len(request.scene_ids) != len(request.scene_audio_urls):
raise HTTPException(status_code=400, detail="Scene IDs and audio URLs count must match")
try:
# Import moviepy for audio concatenation
try:
from moviepy import AudioFileClip, concatenate_audioclips
except ImportError:
logger.error("[Podcast] MoviePy not available for audio combination")
raise HTTPException(
status_code=500,
detail="Audio combination requires MoviePy. Please install: pip install moviepy"
)
# Create temporary directory for audio processing
temp_dir = Path(tempfile.gettempdir()) / f"podcast_combine_{uuid.uuid4().hex[:8]}"
temp_dir.mkdir(parents=True, exist_ok=True)
audio_clips = []
total_duration = 0.0
try:
# Log incoming request for debugging
logger.info(f"[Podcast] Combining audio: {len(request.scene_audio_urls)} URLs received")
for idx, url in enumerate(request.scene_audio_urls):
logger.info(f"[Podcast] URL {idx+1}: {url}")
# Download and load each audio file from podcast_audio directory
for idx, audio_url in enumerate(request.scene_audio_urls):
try:
# Normalize audio URL - handle both absolute and relative paths
if audio_url.startswith("http"):
# External URL - would need to download
logger.error(f"[Podcast] External URLs not supported: {audio_url}")
raise HTTPException(
status_code=400,
detail=f"External URLs not supported. Please use local file paths."
)
# Handle relative paths - only /api/podcast/audio/... URLs are supported
audio_path = None
if audio_url.startswith("/api/"):
# Extract filename from URL
parsed = urlparse(audio_url)
path = parsed.path if parsed.scheme else audio_url
# Handle both /api/podcast/audio/ and /api/story/audio/ URLs (for backward compatibility)
if "/api/podcast/audio/" in path:
filename = path.split("/api/podcast/audio/", 1)[1].split("?", 1)[0].strip()
elif "/api/story/audio/" in path:
# Convert story audio URLs to podcast audio (they're in the same directory now)
filename = path.split("/api/story/audio/", 1)[1].split("?", 1)[0].strip()
logger.info(f"[Podcast] Converting story audio URL to podcast: {audio_url} -> {filename}")
else:
logger.error(f"[Podcast] Unsupported audio URL format: {audio_url}. Expected /api/podcast/audio/ or /api/story/audio/ URLs.")
continue
if not filename:
logger.error(f"[Podcast] Could not extract filename from URL: {audio_url}")
continue
# Podcast audio files are stored in podcast_audio directory
audio_path = (PODCAST_AUDIO_DIR / filename).resolve()
# Security check: ensure path is within PODCAST_AUDIO_DIR
if not str(audio_path).startswith(str(PODCAST_AUDIO_DIR)):
logger.error(f"[Podcast] Attempted path traversal when resolving audio: {audio_url}")
continue
else:
logger.warning(f"[Podcast] Non-API URL format, treating as direct path: {audio_url}")
audio_path = Path(audio_url)
if not audio_path or not audio_path.exists():
logger.error(f"[Podcast] Audio file not found: {audio_path} (from URL: {audio_url})")
continue
# Load audio clip
audio_clip = AudioFileClip(str(audio_path))
audio_clips.append(audio_clip)
total_duration += audio_clip.duration
logger.info(f"[Podcast] Loaded audio {idx+1}/{len(request.scene_audio_urls)}: {audio_path.name} ({audio_clip.duration:.2f}s)")
except HTTPException:
raise
except Exception as e:
logger.error(f"[Podcast] Failed to load audio {idx+1}: {e}", exc_info=True)
# Continue with other audio files
continue
if not audio_clips:
raise HTTPException(status_code=400, detail="No valid audio files found to combine")
# Concatenate all audio clips
logger.info(f"[Podcast] Combining {len(audio_clips)} audio clips (total duration: {total_duration:.2f}s)")
combined_audio = concatenate_audioclips(audio_clips)
# Generate output filename
output_filename = f"podcast_combined_{request.project_id}_{uuid.uuid4().hex[:8]}.mp3"
output_path = PODCAST_AUDIO_DIR / output_filename
# Write combined audio file
combined_audio.write_audiofile(
str(output_path),
codec="mp3",
bitrate="192k",
logger=None, # Suppress moviepy logging
)
# Close audio clips to free resources
for clip in audio_clips:
clip.close()
combined_audio.close()
file_size = output_path.stat().st_size
audio_url = f"/api/podcast/audio/{output_filename}"
logger.info(f"[Podcast] Combined audio saved: {output_path} ({file_size} bytes)")
# Save to asset library
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="audio",
source_module="podcast_maker",
filename=output_filename,
file_url=audio_url,
file_path=str(output_path),
file_size=file_size,
mime_type="audio/mpeg",
title=f"Combined Podcast - {request.project_id}",
description=f"Combined podcast audio from {len(request.scene_ids)} scenes",
tags=["podcast", "audio", "combined", request.project_id],
asset_metadata={
"project_id": request.project_id,
"scene_ids": request.scene_ids,
"scene_count": len(request.scene_ids),
"total_duration": total_duration,
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save combined audio asset: {e}")
return PodcastCombineAudioResponse(
combined_audio_url=audio_url,
combined_audio_filename=output_filename,
total_duration=total_duration,
file_size=file_size,
scene_count=len(request.scene_ids),
)
finally:
# Cleanup temporary directory
try:
if temp_dir.exists():
shutil.rmtree(temp_dir)
except Exception as e:
logger.warning(f"[Podcast] Failed to cleanup temp directory: {e}")
except HTTPException:
raise
except Exception as exc:
logger.error(f"[Podcast] Audio combination failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Audio combination failed: {exc}")
@router.get("/audio/{filename}")
async def serve_podcast_audio(
filename: str,
current_user: Dict[str, Any] = Depends(get_current_user_with_query_token),
):
"""Serve generated podcast scene audio files.
Supports authentication via Authorization header or token query parameter.
Query parameter is useful for HTML elements like <audio> that cannot send custom headers.
"""
require_authenticated_user(current_user)
# Security check: ensure filename doesn't contain path traversal
if ".." in filename or "/" in filename or "\\" in filename:
raise HTTPException(status_code=400, detail="Invalid filename")
audio_path = (PODCAST_AUDIO_DIR / filename).resolve()
# Security check: ensure path is within PODCAST_AUDIO_DIR
if not str(audio_path).startswith(str(PODCAST_AUDIO_DIR)):
raise HTTPException(status_code=403, detail="Access denied")
if not audio_path.exists():
raise HTTPException(status_code=404, detail="Audio file not found")
return FileResponse(audio_path, media_type="audio/mpeg")

View File

@@ -0,0 +1,381 @@
"""
Podcast Avatar Handlers
Avatar upload and presenter generation endpoints.
"""
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session
from typing import Dict, Any, List, Optional
from pathlib import Path
import uuid
import hashlib
from services.database import get_db
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
from api.story_writer.utils.auth import require_authenticated_user
from services.llm_providers.main_image_generation import generate_image
from services.llm_providers.main_image_editing import edit_image
from utils.asset_tracker import save_asset_to_library
from loguru import logger
from ..constants import PODCAST_IMAGES_DIR
from ..presenter_personas import choose_persona_id, get_persona
router = APIRouter()
# Avatar subdirectory
AVATAR_SUBDIR = "avatars"
PODCAST_AVATARS_DIR = PODCAST_IMAGES_DIR / AVATAR_SUBDIR
PODCAST_AVATARS_DIR.mkdir(parents=True, exist_ok=True)
@router.post("/avatar/upload")
async def upload_podcast_avatar(
file: UploadFile = File(...),
project_id: Optional[str] = Form(None),
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Upload a presenter avatar image for a podcast project.
Returns the avatar URL for use in scene image generation.
"""
user_id = require_authenticated_user(current_user)
# Validate file type
if not file.content_type or not file.content_type.startswith('image/'):
raise HTTPException(status_code=400, detail="File must be an image")
# Validate file size (max 5MB)
file_content = await file.read()
if len(file_content) > 5 * 1024 * 1024:
raise HTTPException(status_code=400, detail="Image file size must be less than 5MB")
try:
# Generate filename
file_ext = Path(file.filename).suffix or '.png'
unique_id = str(uuid.uuid4())[:8]
avatar_filename = f"avatar_{project_id or 'temp'}_{unique_id}{file_ext}"
avatar_path = PODCAST_AVATARS_DIR / avatar_filename
# Save file
with open(avatar_path, "wb") as f:
f.write(file_content)
logger.info(f"[Podcast] Avatar uploaded: {avatar_path}")
# Create avatar URL
avatar_url = f"/api/podcast/images/{AVATAR_SUBDIR}/{avatar_filename}"
# Save to asset library if project_id provided
if project_id:
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="image",
source_module="podcast_maker",
filename=avatar_filename,
file_url=avatar_url,
file_path=str(avatar_path),
file_size=len(file_content),
mime_type=file.content_type,
title=f"Podcast Presenter Avatar - {project_id}",
description="Podcast presenter avatar image",
tags=["podcast", "avatar", project_id],
asset_metadata={
"project_id": project_id,
"type": "presenter_avatar",
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save avatar asset: {e}")
return {
"avatar_url": avatar_url,
"avatar_filename": avatar_filename,
"message": "Avatar uploaded successfully"
}
except Exception as exc:
logger.error(f"[Podcast] Avatar upload failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Avatar upload failed: {str(exc)}")
@router.post("/avatar/make-presentable")
async def make_avatar_presentable(
avatar_url: str = Form(...),
project_id: Optional[str] = Form(None),
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Transform an uploaded avatar image into a podcast-appropriate presenter.
Uses AI image editing to convert the uploaded photo into a professional podcast presenter.
"""
user_id = require_authenticated_user(current_user)
try:
# Load the uploaded avatar image
from ..utils import load_podcast_image_bytes
avatar_bytes = load_podcast_image_bytes(avatar_url)
logger.info(f"[Podcast] Transforming avatar to podcast presenter for project {project_id}")
# Create transformation prompt based on WaveSpeed AI recommendations
# Transform the uploaded image into a professional podcast presenter
transformation_prompt = """Transform this image into a professional podcast presenter:
- Half-length portrait format, looking at camera
- Professional attire (white shirt and light gray blazer or business casual)
- Confident, friendly, engaging expression
- Soft studio lighting, plain light-gray or neutral background
- Professional podcast host appearance, suitable for video generation
- Clean composition, center-focused for avatar overlay
- Maintain the person's appearance and identity while making it podcast-appropriate
- Ultra realistic, 4k quality, professional photography style"""
# Transform the image using image editing
image_options = {
"provider": None, # Auto-select provider
"model": None, # Use default model
}
result = edit_image(
input_image_bytes=avatar_bytes,
prompt=transformation_prompt,
options=image_options,
user_id=user_id
)
# Save transformed avatar
unique_id = str(uuid.uuid4())[:8]
transformed_filename = f"presenter_transformed_{project_id or 'temp'}_{unique_id}.png"
transformed_path = PODCAST_AVATARS_DIR / transformed_filename
with open(transformed_path, "wb") as f:
f.write(result.image_bytes)
transformed_url = f"/api/podcast/images/{AVATAR_SUBDIR}/{transformed_filename}"
logger.info(f"[Podcast] Transformed avatar saved to: {transformed_path}")
# Save to asset library
if project_id:
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="image",
source_module="podcast_maker",
filename=transformed_filename,
file_url=transformed_url,
file_path=str(transformed_path),
file_size=len(result.image_bytes),
mime_type="image/png",
title=f"Podcast Presenter (Transformed) - {project_id}",
description="AI-transformed podcast presenter avatar from uploaded photo",
prompt=transformation_prompt,
tags=["podcast", "avatar", "presenter", "transformed", project_id],
provider=result.provider,
model=result.model,
asset_metadata={
"project_id": project_id,
"type": "transformed_presenter",
"original_avatar_url": avatar_url,
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save transformed avatar asset: {e}")
return {
"avatar_url": transformed_url,
"avatar_filename": transformed_filename,
"message": "Avatar transformed into podcast presenter successfully"
}
except Exception as exc:
logger.error(f"[Podcast] Avatar transformation failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Avatar transformation failed: {str(exc)}")
@router.post("/avatar/generate")
async def generate_podcast_presenters(
speakers: int = Form(...),
project_id: Optional[str] = Form(None),
audience: Optional[str] = Form(None),
content_type: Optional[str] = Form(None),
top_keywords: Optional[str] = Form(None), # JSON string array
persona_id: Optional[str] = Form(None),
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Generate presenter avatar images based on number of speakers and AI analysis insights.
Uses analysis data (audience, content_type, keywords) to create more relevant presenters.
Returns list of avatar URLs.
Based on WaveSpeed AI recommendations for professional podcast presenters.
"""
user_id = require_authenticated_user(current_user)
if speakers < 1 or speakers > 2:
raise HTTPException(status_code=400, detail="Speakers must be between 1 and 2")
try:
# Parse keywords if provided
keywords_list = []
if top_keywords:
try:
import json
keywords_list = json.loads(top_keywords) if isinstance(top_keywords, str) else top_keywords
except:
keywords_list = []
# Choose persona (market-fit + style) using analysis if not explicitly provided.
# Do not infer sensitive traits (like ethnicity); personas represent market + style only.
selected_persona_id = persona_id or choose_persona_id(
audience=audience,
content_type=content_type,
top_keywords=keywords_list,
)
persona = get_persona(selected_persona_id)
generated_avatars = []
for i in range(speakers):
# Generate presenter-specific prompt based on WaveSpeed AI recommendations
# Enhanced with analysis insights for more relevant presenter appearance
gender = "female" if i == 0 else "male" # First speaker female, second male
# Build context-aware prompt using analysis insights + persona preset
prompt_parts = [
f"Half-length portrait of a professional podcast presenter ({gender}, 25-35 years old)",
"photo-realistic, professional photography",
]
if persona:
prompt_parts.append(persona.prompt)
# Use content_type to influence attire/style
if content_type:
content_lower = content_type.lower()
if "business" in content_lower or "corporate" in content_lower:
prompt_parts.append("business professional attire (white shirt and light gray blazer)")
elif "casual" in content_lower or "conversational" in content_lower:
prompt_parts.append("business casual attire (smart casual, approachable)")
elif "tech" in content_lower or "technology" in content_lower:
prompt_parts.append("modern professional attire (tech-forward, contemporary style)")
else:
prompt_parts.append("professional attire (white shirt and light gray blazer or business casual)")
else:
prompt_parts.append("professional attire (white shirt and light gray blazer or business casual)")
# Use audience to influence expression and style
if audience:
audience_lower = audience.lower()
if "young" in audience_lower or "millennial" in audience_lower or "gen z" in audience_lower:
prompt_parts.append("modern, energetic, approachable expression")
elif "executive" in audience_lower or "professional" in audience_lower or "business" in audience_lower:
prompt_parts.append("confident, authoritative, professional expression")
else:
prompt_parts.append("confident, friendly, engaging expression")
else:
prompt_parts.append("confident, friendly expression")
# Add keywords context if available (for visual style hints)
if keywords_list and len(keywords_list) > 0:
# Extract visual-relevant keywords
visual_keywords = [k for k in keywords_list[:3] if any(word in k.lower() for word in ["tech", "business", "creative", "modern", "professional"])]
if visual_keywords:
prompt_parts.append(f"context: {', '.join(visual_keywords[:2])}")
# Technical requirements
prompt_parts.extend([
"looking at camera",
"soft studio lighting, plain light-gray or neutral background",
"ultra realistic, 4k quality, 85mm lens, f/2.8",
"professional podcast host appearance, suitable for video generation",
"clean composition, center-focused for avatar overlay"
])
prompt = ", ".join(prompt_parts)
logger.info(f"[Podcast] Generating presenter {i+1}/{speakers} for project {project_id}")
# Generate image
# Use a deterministic seed per (project_id, speaker_number, persona_id) to keep presenter identity stable.
# Note: determinism may vary by provider/model, but seed improves consistency substantially.
seed_source = f"{project_id or 'temp'}|speaker={i+1}|persona={selected_persona_id}"
seed = int(hashlib.sha256(seed_source.encode("utf-8")).hexdigest()[:8], 16)
image_options = {
"provider": None, # Auto-select provider
"width": 1024,
"height": 1024,
"seed": seed,
}
result = generate_image(
prompt=prompt,
options=image_options,
user_id=user_id
)
# Save avatar
unique_id = str(uuid.uuid4())[:8]
avatar_filename = f"presenter_{project_id or 'temp'}_{i+1}_{unique_id}.png"
avatar_path = PODCAST_AVATARS_DIR / avatar_filename
with open(avatar_path, "wb") as f:
f.write(result.image_bytes)
avatar_url = f"/api/podcast/images/{AVATAR_SUBDIR}/{avatar_filename}"
# Save to asset library
if project_id:
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="image",
source_module="podcast_maker",
filename=avatar_filename,
file_url=avatar_url,
file_path=str(avatar_path),
file_size=len(result.image_bytes),
mime_type="image/png",
title=f"Podcast Presenter {i+1} - {project_id}",
description=f"Generated podcast presenter avatar for speaker {i+1}",
prompt=prompt,
tags=["podcast", "avatar", "presenter", project_id],
provider=result.provider,
model=result.model,
asset_metadata={
"project_id": project_id,
"speaker_number": i + 1,
"type": "generated_presenter",
"status": "completed",
"persona_id": selected_persona_id,
"seed": seed,
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save presenter asset: {e}")
generated_avatars.append({
"avatar_url": avatar_url,
"avatar_filename": avatar_filename,
"speaker_number": i + 1,
"prompt": prompt, # Include the prompt used for generation
"persona_id": selected_persona_id,
"seed": seed,
})
return {
"avatars": generated_avatars,
"message": f"Generated {speakers} presenter avatar(s)",
"persona_id": selected_persona_id,
}
except Exception as exc:
logger.error(f"[Podcast] Presenter generation failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Presenter generation failed: {str(exc)}")

View File

@@ -0,0 +1,431 @@
"""
Podcast Image Handlers
Image generation and serving endpoints.
"""
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session
from typing import Dict, Any
from pathlib import Path
import uuid
from services.database import get_db
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
from api.story_writer.utils.auth import require_authenticated_user
from services.llm_providers.main_image_generation import generate_image
from utils.asset_tracker import save_asset_to_library
from loguru import logger
from ..constants import PODCAST_IMAGES_DIR
from ..models import PodcastImageRequest, PodcastImageResponse
router = APIRouter()
@router.post("/image", response_model=PodcastImageResponse)
async def generate_podcast_scene_image(
request: PodcastImageRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Generate an AI image for a podcast scene.
Creates a professional, podcast-appropriate image based on scene title and content.
"""
user_id = require_authenticated_user(current_user)
if not request.scene_title:
raise HTTPException(status_code=400, detail="Scene title is required")
try:
# PRE-FLIGHT VALIDATION: Check subscription limits before any API calls
from services.subscription import PricingService
from services.subscription.preflight_validator import validate_image_generation_operations
from fastapi import HTTPException as FastAPIHTTPException
pricing_service = PricingService(db)
try:
# Raises HTTPException immediately if validation fails
validate_image_generation_operations(
pricing_service=pricing_service,
user_id=user_id,
num_images=1
)
logger.info(f"[Podcast] ✅ Pre-flight validation passed for user {user_id}")
except FastAPIHTTPException as http_ex:
logger.error(f"[Podcast] ❌ Pre-flight validation failed for user {user_id}: {http_ex.detail}")
raise
# If base avatar is provided, create scene-specific variation
# Otherwise, generate from scratch
logger.info(f"[Podcast] Image generation request for scene {request.scene_id}")
logger.info(f"[Podcast] base_avatar_url={request.base_avatar_url}")
logger.info(f"[Podcast] custom_prompt={request.custom_prompt}")
logger.info(f"[Podcast] style={request.style}, rendering_speed={request.rendering_speed}, aspect_ratio={request.aspect_ratio}")
if request.base_avatar_url:
# Load base avatar image for reference
from ..utils import load_podcast_image_bytes
try:
logger.info(f"[Podcast] Attempting to load base avatar from: {request.base_avatar_url}")
base_avatar_bytes = load_podcast_image_bytes(request.base_avatar_url)
logger.info(f"[Podcast] ✅ Successfully loaded base avatar ({len(base_avatar_bytes)} bytes) for scene {request.scene_id}")
except Exception as e:
logger.error(f"[Podcast] ❌ Failed to load base avatar from {request.base_avatar_url}: {e}", exc_info=True)
# If base avatar fails to load, we cannot maintain character consistency
# Raise an error instead of falling back to standard generation
raise HTTPException(
status_code=500,
detail={
"error": "Failed to load base avatar",
"message": f"Could not load the base avatar image for character consistency: {str(e)}. Please ensure the avatar image is accessible.",
},
)
else:
logger.info(f"[Podcast] No base avatar URL provided, will generate from scratch")
base_avatar_bytes = None
# Build optimized prompt for scene image generation
# When base avatar is provided, use Ideogram Character to maintain consistency
# Otherwise, generate from scratch with podcast-optimized prompt
image_prompt = "" # Initialize prompt variable
if base_avatar_bytes:
# Use Ideogram Character API for consistent character generation
# Use custom prompt if provided, otherwise build scene-specific prompt
if request.custom_prompt:
# User provided custom prompt - use it directly
image_prompt = request.custom_prompt
logger.info(f"[Podcast] Using custom prompt from user for scene {request.scene_id}")
else:
# Build scene-specific prompt that respects the base avatar
prompt_parts = []
# Scene context (primary focus)
if request.scene_title:
prompt_parts.append(f"Scene: {request.scene_title}")
# Scene content insights for visual context
if request.scene_content:
content_preview = request.scene_content[:200].replace("\n", " ").strip()
# Extract visualizable themes
visual_keywords = []
content_lower = content_preview.lower()
if any(word in content_lower for word in ["data", "statistics", "numbers", "chart", "graph"]):
visual_keywords.append("data visualization background")
if any(word in content_lower for word in ["technology", "tech", "digital", "ai", "software"]):
visual_keywords.append("modern tech studio setting")
if any(word in content_lower for word in ["business", "growth", "strategy", "market"]):
visual_keywords.append("professional business studio")
if visual_keywords:
prompt_parts.append(", ".join(visual_keywords))
# Podcast theme context
if request.idea:
idea_preview = request.idea[:60].strip()
prompt_parts.append(f"Topic: {idea_preview}")
# Studio setting (maintains podcast aesthetic)
prompt_parts.extend([
"Professional podcast recording studio",
"Modern microphone setup",
"Clean background, professional lighting",
"16:9 aspect ratio, video-optimized composition"
])
image_prompt = ", ".join(prompt_parts)
logger.info(f"[Podcast] Using Ideogram Character for scene {request.scene_id} with base avatar")
logger.info(f"[Podcast] Scene prompt: {image_prompt[:150]}...")
# Use Ideogram Character API via WaveSpeed client
from services.wavespeed.client import WaveSpeedClient
wavespeed_client = WaveSpeedClient()
# Use custom settings if provided, otherwise use defaults
style = request.style or "Realistic" # Default to Realistic for professional podcast presenters
rendering_speed = request.rendering_speed or "Quality" # Default to Quality for podcast videos
# Calculate aspect ratio from custom setting or dimensions
if request.aspect_ratio:
aspect_ratio = request.aspect_ratio
else:
aspect_ratio_map = {
(1024, 1024): "1:1",
(1920, 1080): "16:9",
(1080, 1920): "9:16",
(1280, 960): "4:3",
(960, 1280): "3:4",
}
aspect_ratio = aspect_ratio_map.get((request.width, request.height), "16:9")
logger.info(f"[Podcast] Ideogram Character settings: style={style}, rendering_speed={rendering_speed}, aspect_ratio={aspect_ratio}")
try:
image_bytes = wavespeed_client.generate_character_image(
prompt=image_prompt,
reference_image_bytes=base_avatar_bytes,
style=style,
aspect_ratio=aspect_ratio,
rendering_speed=rendering_speed,
timeout=None, # No timeout - poll until WaveSpeed says it's done or failed
)
# Create result object compatible with ImageGenerationResult
from services.llm_providers.image_generation.base import ImageGenerationResult
result = ImageGenerationResult(
image_bytes=image_bytes,
provider="wavespeed",
model="ideogram-ai/ideogram-character",
width=request.width,
height=request.height,
)
logger.info(f"[Podcast] ✅ Successfully generated character-consistent scene image")
except HTTPException as http_err:
# Re-raise HTTPExceptions from wavespeed client as-is
logger.error(f"[Podcast] ❌ Ideogram Character HTTPException: {http_err.status_code} - {http_err.detail}")
raise
except Exception as char_error:
error_msg = str(char_error)
error_type = type(char_error).__name__
logger.error(f"[Podcast] ❌ Ideogram Character failed: {error_type}: {error_msg}", exc_info=True)
# If Ideogram Character fails, we should NOT fall back to standard generation
# because that would lose character consistency. Instead, raise an error.
# However, if it's a timeout/connection issue, we can provide a helpful message.
error_msg_lower = error_msg.lower()
if "timeout" in error_msg_lower or "connection" in error_msg_lower or "504" in error_msg:
raise HTTPException(
status_code=504,
detail={
"error": "Image generation service unavailable",
"message": "The character-consistent image generation service is currently unavailable. Please try again in a few moments. If the problem persists, the service may be experiencing high load.",
"retry_recommended": True,
},
)
else:
raise HTTPException(
status_code=502,
detail={
"error": "Character-consistent image generation failed",
"message": f"Failed to generate image with character consistency: {error_msg}",
"retry_recommended": True,
},
)
# CRITICAL: If base_avatar_url was provided but we don't have base_avatar_bytes,
# this means either loading failed (already raised error) or Ideogram Character failed (already raised error)
# So this path should only be reached if NO base_avatar_url was provided in the first place
if not base_avatar_bytes:
logger.info(f"[Podcast] No base avatar provided - generating standard image from scratch")
# Standard generation from scratch (no base avatar provided)
prompt_parts = []
# Core podcast studio elements
prompt_parts.extend([
"Professional podcast recording studio",
"Modern podcast setup with high-quality microphone",
"Clean, minimalist background suitable for video",
"Professional studio lighting with soft, even illumination",
"Podcast host environment, professional and inviting"
])
# Scene-specific context
if request.scene_title:
prompt_parts.append(f"Scene theme: {request.scene_title}")
# Content context for visual relevance
if request.scene_content:
content_preview = request.scene_content[:150].replace("\n", " ").strip()
visual_keywords = []
content_lower = content_preview.lower()
if any(word in content_lower for word in ["data", "statistics", "numbers", "chart", "graph"]):
visual_keywords.append("data visualization elements")
if any(word in content_lower for word in ["technology", "tech", "digital", "ai", "software"]):
visual_keywords.append("modern technology aesthetic")
if any(word in content_lower for word in ["business", "growth", "strategy", "market"]):
visual_keywords.append("professional business environment")
if visual_keywords:
prompt_parts.append(", ".join(visual_keywords))
# Podcast theme context
if request.idea:
idea_preview = request.idea[:80].strip()
prompt_parts.append(f"Podcast topic context: {idea_preview}")
# Technical requirements for video generation
prompt_parts.extend([
"16:9 aspect ratio optimized for video",
"Center-focused composition for talking avatar overlay",
"Neutral color palette with professional tones",
"High resolution, sharp focus, professional photography quality",
"No text, no logos, no distracting elements",
"Suitable for InfiniteTalk video generation with animated avatar"
])
# Style constraints
prompt_parts.extend([
"Realistic photography style, not illustration or cartoon",
"Professional broadcast quality",
"Warm, inviting atmosphere",
"Clean composition with breathing room for avatar placement"
])
image_prompt = ", ".join(prompt_parts)
logger.info(f"[Podcast] Generating image for scene {request.scene_id}: {request.scene_title}")
# Generate image using main_image_generation service
image_options = {
"provider": None, # Auto-select provider
"width": request.width,
"height": request.height,
}
result = generate_image(
prompt=image_prompt,
options=image_options,
user_id=user_id
)
# Save image to podcast images directory
PODCAST_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
# Generate filename
clean_title = "".join(c if c.isalnum() or c in ('-', '_') else '_' for c in request.scene_title[:30])
unique_id = str(uuid.uuid4())[:8]
image_filename = f"scene_{request.scene_id}_{clean_title}_{unique_id}.png"
image_path = PODCAST_IMAGES_DIR / image_filename
# Save image
with open(image_path, "wb") as f:
f.write(result.image_bytes)
logger.info(f"[Podcast] Saved image to: {image_path}")
# Create image URL (served via API endpoint)
image_url = f"/api/podcast/images/{image_filename}"
# Estimate cost (rough estimate: ~$0.04 per image for most providers, ~$0.08 for Ideogram Character Quality)
cost = 0.08 if result.provider == "wavespeed" and result.model == "ideogram-ai/ideogram-character" else 0.04
# TRACK USAGE after successful image generation
try:
from models.subscription_models import UsageSummary, APIProvider
from sqlalchemy import text as sql_text
from datetime import datetime
current_period = pricing_service.get_current_billing_period(user_id) or datetime.now().strftime("%Y-%m")
# Update stability_calls and stability_cost (used for all image generation)
# Note: stability_calls is used for all image generation providers, not just Stability AI
update_query = sql_text("""
UPDATE usage_summaries
SET stability_calls = COALESCE(stability_calls, 0) + 1,
stability_cost = COALESCE(stability_cost, 0) + :cost,
total_calls = COALESCE(total_calls, 0) + 1,
total_cost = COALESCE(total_cost, 0) + :cost
WHERE user_id = :user_id AND billing_period = :period
""")
db.execute(update_query, {
'cost': cost,
'user_id': user_id,
'period': current_period
})
db.commit()
logger.info(f"[Podcast] ✅ Tracked image generation usage: user={user_id}, cost=${cost:.4f}, provider={result.provider}")
except Exception as usage_error:
logger.error(f"[Podcast] Failed to track image generation usage: {usage_error}")
db.rollback()
# Don't fail the request if usage tracking fails
# Save to asset library
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="image",
source_module="podcast_maker",
filename=image_filename,
file_url=image_url,
file_path=str(image_path),
file_size=len(result.image_bytes),
mime_type="image/png",
title=f"{request.scene_title} - Podcast Scene",
description=f"Podcast scene image: {request.scene_title}",
prompt=image_prompt,
tags=["podcast", "scene", request.scene_id],
provider=result.provider,
model=result.model,
asset_metadata={
"scene_id": request.scene_id,
"scene_title": request.scene_title,
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save image asset: {e}")
return PodcastImageResponse(
scene_id=request.scene_id,
scene_title=request.scene_title,
image_filename=image_filename,
image_url=image_url,
width=result.width,
height=result.height,
provider=result.provider,
model=result.model,
cost=cost,
)
except HTTPException:
# Re-raise HTTPExceptions as-is (they already have proper error details)
raise
except Exception as exc:
# Log the full exception for debugging
error_msg = str(exc)
error_type = type(exc).__name__
logger.error(f"[Podcast] Image generation failed: {error_type}: {error_msg}", exc_info=True)
# Create a safe error detail
raise HTTPException(
status_code=500,
detail={
"error": "Image generation failed",
"message": error_msg,
"type": error_type,
}
)
@router.get("/images/{path:path}")
async def serve_podcast_image(
path: str, # Changed from filename to path to support subdirectories
current_user: Dict[str, Any] = Depends(get_current_user_with_query_token),
):
"""Serve generated podcast scene images and avatars.
Supports authentication via Authorization header or token query parameter.
Query parameter is useful for HTML elements like <img> that cannot send custom headers.
Supports subdirectories like avatars/
"""
require_authenticated_user(current_user)
# Security check: ensure path doesn't contain path traversal or absolute paths
if ".." in path or path.startswith("/"):
raise HTTPException(status_code=400, detail="Invalid path")
image_path = (PODCAST_IMAGES_DIR / path).resolve()
# Security check: ensure resolved path is within PODCAST_IMAGES_DIR
if not str(image_path).startswith(str(PODCAST_IMAGES_DIR)):
raise HTTPException(status_code=403, detail="Access denied")
if not image_path.exists():
raise HTTPException(status_code=404, detail="Image not found")
return FileResponse(image_path, media_type="image/png")

View File

@@ -0,0 +1,203 @@
"""
Podcast Project Handlers
CRUD operations for podcast projects.
"""
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy.orm import Session
from typing import Optional, Dict, Any
from services.database import get_db
from middleware.auth_middleware import get_current_user
from services.podcast_service import PodcastService
from ..models import (
PodcastProjectResponse,
CreateProjectRequest,
UpdateProjectRequest,
PodcastProjectListResponse,
)
router = APIRouter()
@router.post("/projects", response_model=PodcastProjectResponse, status_code=201)
async def create_project(
request: CreateProjectRequest,
db: Session = Depends(get_db),
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""Create a new podcast project."""
try:
user_id = current_user.get("user_id") or current_user.get("id")
if not user_id:
raise HTTPException(status_code=401, detail="User ID not found")
service = PodcastService(db)
# Check if project_id already exists for this user
existing = service.get_project(user_id, request.project_id)
if existing:
raise HTTPException(status_code=400, detail="Project ID already exists")
project = service.create_project(
user_id=user_id,
project_id=request.project_id,
idea=request.idea,
duration=request.duration,
speakers=request.speakers,
budget_cap=request.budget_cap,
)
return PodcastProjectResponse.model_validate(project)
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error creating project: {str(e)}")
@router.get("/projects/{project_id}", response_model=PodcastProjectResponse)
async def get_project(
project_id: str,
db: Session = Depends(get_db),
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""Get a podcast project by ID."""
try:
user_id = current_user.get("user_id") or current_user.get("id")
if not user_id:
raise HTTPException(status_code=401, detail="User ID not found")
service = PodcastService(db)
project = service.get_project(user_id, project_id)
if not project:
raise HTTPException(status_code=404, detail="Project not found")
return PodcastProjectResponse.model_validate(project)
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error fetching project: {str(e)}")
@router.put("/projects/{project_id}", response_model=PodcastProjectResponse)
async def update_project(
project_id: str,
request: UpdateProjectRequest,
db: Session = Depends(get_db),
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""Update a podcast project state."""
try:
user_id = current_user.get("user_id") or current_user.get("id")
if not user_id:
raise HTTPException(status_code=401, detail="User ID not found")
service = PodcastService(db)
# Convert request to dict, excluding None values
updates = request.model_dump(exclude_unset=True)
project = service.update_project(user_id, project_id, **updates)
if not project:
raise HTTPException(status_code=404, detail="Project not found")
return PodcastProjectResponse.model_validate(project)
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error updating project: {str(e)}")
@router.get("/projects", response_model=PodcastProjectListResponse)
async def list_projects(
status: Optional[str] = Query(None, description="Filter by status"),
favorites_only: bool = Query(False, description="Only favorites"),
limit: int = Query(50, ge=1, le=200),
offset: int = Query(0, ge=0),
order_by: str = Query("updated_at", description="Order by: updated_at or created_at"),
db: Session = Depends(get_db),
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""List user's podcast projects."""
try:
user_id = current_user.get("user_id") or current_user.get("id")
if not user_id:
raise HTTPException(status_code=401, detail="User ID not found")
if order_by not in ["updated_at", "created_at"]:
raise HTTPException(status_code=400, detail="order_by must be 'updated_at' or 'created_at'")
service = PodcastService(db)
projects, total = service.list_projects(
user_id=user_id,
status=status,
favorites_only=favorites_only,
limit=limit,
offset=offset,
order_by=order_by,
)
return PodcastProjectListResponse(
projects=[PodcastProjectResponse.model_validate(p) for p in projects],
total=total,
limit=limit,
offset=offset,
)
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error listing projects: {str(e)}")
@router.delete("/projects/{project_id}", status_code=204)
async def delete_project(
project_id: str,
db: Session = Depends(get_db),
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""Delete a podcast project."""
try:
user_id = current_user.get("user_id") or current_user.get("id")
if not user_id:
raise HTTPException(status_code=401, detail="User ID not found")
service = PodcastService(db)
deleted = service.delete_project(user_id, project_id)
if not deleted:
raise HTTPException(status_code=404, detail="Project not found")
return None
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error deleting project: {str(e)}")
@router.post("/projects/{project_id}/favorite", response_model=PodcastProjectResponse)
async def toggle_favorite(
project_id: str,
db: Session = Depends(get_db),
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""Toggle favorite status of a project."""
try:
user_id = current_user.get("user_id") or current_user.get("id")
if not user_id:
raise HTTPException(status_code=401, detail="User ID not found")
service = PodcastService(db)
project = service.toggle_favorite(user_id, project_id)
if not project:
raise HTTPException(status_code=404, detail="Project not found")
return PodcastProjectResponse.model_validate(project)
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error toggling favorite: {str(e)}")

View File

@@ -0,0 +1,99 @@
"""
Podcast Research Handlers
Research endpoints using Exa provider.
"""
from fastapi import APIRouter, Depends, HTTPException
from typing import Dict, Any
from types import SimpleNamespace
from middleware.auth_middleware import get_current_user
from api.story_writer.utils.auth import require_authenticated_user
from services.blog_writer.research.exa_provider import ExaResearchProvider
from loguru import logger
from ..models import (
PodcastExaResearchRequest,
PodcastExaResearchResponse,
PodcastExaSource,
PodcastExaConfig,
)
router = APIRouter()
@router.post("/research/exa", response_model=PodcastExaResearchResponse)
async def podcast_research_exa(
request: PodcastExaResearchRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""
Run podcast research directly via Exa (no blog writer pipeline).
"""
user_id = require_authenticated_user(current_user)
queries = [q.strip() for q in request.queries if q and q.strip()]
if not queries:
raise HTTPException(status_code=400, detail="At least one query is required for research.")
exa_cfg = request.exa_config or PodcastExaConfig()
cfg = SimpleNamespace(
exa_search_type=exa_cfg.exa_search_type or "auto",
exa_category=exa_cfg.exa_category,
exa_include_domains=exa_cfg.exa_include_domains or [],
exa_exclude_domains=exa_cfg.exa_exclude_domains or [],
max_sources=exa_cfg.max_sources or 8,
source_types=[],
)
provider = ExaResearchProvider()
prompt = request.topic
try:
result = await provider.search(
prompt=prompt,
topic=request.topic,
industry="",
target_audience="",
config=cfg,
user_id=user_id,
)
except Exception as exc:
logger.error(f"[Podcast Exa Research] Failed for user {user_id}: {exc}")
raise HTTPException(status_code=500, detail=f"Exa research failed: {exc}")
# Track usage if available
try:
cost_total = 0.0
if isinstance(result, dict):
cost_total = result.get("cost", {}).get("total", 0.005) if result.get("cost") else 0.005
provider.track_exa_usage(user_id, cost_total)
except Exception as track_err:
logger.warning(f"[Podcast Exa Research] Failed to track usage: {track_err}")
sources_payload = []
if isinstance(result, dict):
for src in result.get("sources", []) or []:
try:
sources_payload.append(PodcastExaSource(**src))
except Exception:
sources_payload.append(PodcastExaSource(**{
"title": src.get("title", ""),
"url": src.get("url", ""),
"excerpt": src.get("excerpt", ""),
"published_at": src.get("published_at"),
"highlights": src.get("highlights"),
"summary": src.get("summary"),
"source_type": src.get("source_type"),
"index": src.get("index"),
}))
return PodcastExaResearchResponse(
sources=sources_payload,
search_queries=result.get("search_queries", queries) if isinstance(result, dict) else queries,
cost=result.get("cost") if isinstance(result, dict) else None,
search_type=result.get("search_type") if isinstance(result, dict) else None,
provider=result.get("provider", "exa") if isinstance(result, dict) else "exa",
content=result.get("content") if isinstance(result, dict) else None,
)

View File

@@ -0,0 +1,142 @@
"""
Podcast Script Handlers
Script generation endpoint.
"""
from fastapi import APIRouter, Depends, HTTPException
from typing import Dict, Any
import json
from middleware.auth_middleware import get_current_user
from api.story_writer.utils.auth import require_authenticated_user
from services.llm_providers.main_text_generation import llm_text_gen
from loguru import logger
from ..models import (
PodcastScriptRequest,
PodcastScriptResponse,
PodcastScene,
PodcastSceneLine,
)
router = APIRouter()
@router.post("/script", response_model=PodcastScriptResponse)
async def generate_podcast_script(
request: PodcastScriptRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""
Generate a podcast script outline (scenes + lines) using podcast-oriented prompting.
"""
user_id = require_authenticated_user(current_user)
# Build comprehensive research context for higher-quality scripts
research_context = ""
if request.research:
try:
key_insights = request.research.get("keyword_analysis", {}).get("key_insights") or []
fact_cards = request.research.get("factCards", []) or []
mapped_angles = request.research.get("mappedAngles", []) or []
sources = request.research.get("sources", []) or []
top_facts = [f.get("quote", "") for f in fact_cards[:5] if f.get("quote")]
angles_summary = [
f"{a.get('title', '')}: {a.get('why', '')}" for a in mapped_angles[:3] if a.get("title") or a.get("why")
]
top_sources = [s.get("url") for s in sources[:3] if s.get("url")]
research_parts = []
if key_insights:
research_parts.append(f"Key Insights: {', '.join(key_insights[:5])}")
if top_facts:
research_parts.append(f"Key Facts: {', '.join(top_facts)}")
if angles_summary:
research_parts.append(f"Research Angles: {' | '.join(angles_summary)}")
if top_sources:
research_parts.append(f"Top Sources: {', '.join(top_sources)}")
research_context = "\n".join(research_parts)
except Exception as exc:
logger.warning(f"Failed to parse research context: {exc}")
research_context = ""
prompt = f"""You are an expert podcast script planner. Create natural, conversational podcast scenes.
Podcast Idea: "{request.idea}"
Duration: ~{request.duration_minutes} minutes
Speakers: {request.speakers} (Host + optional Guest)
{f"RESEARCH CONTEXT:\n{research_context}\n" if research_context else ""}
Return JSON with:
- scenes: array of scenes. Each scene has:
- id: string
- title: short scene title (<= 60 chars)
- duration: duration in seconds (evenly split across total duration)
- emotion: string (one of: "neutral", "happy", "excited", "serious", "curious", "confident")
- lines: array of {{"speaker": "...", "text": "...", "emphasis": boolean}}
* Write natural, conversational dialogue
* Each line can be a sentence or a few sentences that flow together
* Use plain text only - no markdown formatting (no asterisks, underscores, etc.)
* Mark "emphasis": true for key statistics or important points
Guidelines:
- Write for spoken delivery: conversational, natural, with contractions
- Use research insights naturally - weave statistics into dialogue, don't just list them
- Vary emotion per scene based on content
- Ensure scenes match target duration: aim for ~2.5 words per second of audio
- Keep it engaging and informative, like a real podcast conversation
"""
try:
raw = llm_text_gen(prompt=prompt, user_id=user_id, json_struct=None)
except Exception as exc:
raise HTTPException(status_code=500, detail=f"Script generation failed: {exc}")
if isinstance(raw, str):
try:
data = json.loads(raw)
except json.JSONDecodeError:
raise HTTPException(status_code=500, detail="LLM returned non-JSON output")
elif isinstance(raw, dict):
data = raw
else:
raise HTTPException(status_code=500, detail="Unexpected LLM response format")
scenes_data = data.get("scenes") or []
if not isinstance(scenes_data, list):
raise HTTPException(status_code=500, detail="LLM response missing scenes array")
valid_emotions = {"neutral", "happy", "excited", "serious", "curious", "confident"}
# Normalize scenes
scenes: list[PodcastScene] = []
for idx, scene in enumerate(scenes_data):
title = scene.get("title") or f"Scene {idx + 1}"
duration = int(scene.get("duration") or max(30, (request.duration_minutes * 60) // max(1, len(scenes_data))))
emotion = scene.get("emotion") or "neutral"
if emotion not in valid_emotions:
emotion = "neutral"
lines_raw = scene.get("lines") or []
lines: list[PodcastSceneLine] = []
for line in lines_raw:
speaker = line.get("speaker") or ("Host" if len(lines) % request.speakers == 0 else "Guest")
text = line.get("text") or ""
emphasis = line.get("emphasis", False)
if text:
lines.append(PodcastSceneLine(speaker=speaker, text=text, emphasis=emphasis))
scenes.append(
PodcastScene(
id=scene.get("id") or f"scene-{idx + 1}",
title=title,
duration=duration,
lines=lines,
approved=False,
emotion=emotion,
)
)
return PodcastScriptResponse(scenes=scenes)

View File

@@ -0,0 +1,585 @@
"""
Podcast Video Handlers
Video generation and serving endpoints.
"""
from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks, Request
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session
from typing import Dict, Any, Optional
from pathlib import Path
from urllib.parse import quote
import re
import json
from concurrent.futures import ThreadPoolExecutor
from services.database import get_db
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
from api.story_writer.utils.auth import require_authenticated_user
from services.wavespeed.infinitetalk import animate_scene_with_voiceover
from services.podcast.video_combination_service import PodcastVideoCombinationService
from services.llm_providers.main_video_generation import track_video_usage
from services.subscription import PricingService
from services.subscription.preflight_validator import validate_scene_animation_operation
from api.story_writer.task_manager import task_manager
from loguru import logger
from ..constants import AI_VIDEO_SUBDIR, PODCAST_VIDEOS_DIR
from ..utils import load_podcast_audio_bytes, load_podcast_image_bytes
from services.podcast_service import PodcastService
from ..models import (
PodcastVideoGenerationRequest,
PodcastVideoGenerationResponse,
PodcastCombineVideosRequest,
PodcastCombineVideosResponse,
)
router = APIRouter()
# Thread pool executor for CPU-intensive video operations
# This prevents blocking the FastAPI event loop
_video_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="podcast_video")
def _extract_error_message(exc: Exception) -> str:
"""
Extract user-friendly error message from exception.
Handles HTTPException with nested error details from WaveSpeed API.
"""
if isinstance(exc, HTTPException):
detail = exc.detail
# If detail is a dict (from WaveSpeed client)
if isinstance(detail, dict):
# Try to extract message from nested response JSON
response_str = detail.get("response", "")
if response_str:
try:
response_json = json.loads(response_str)
if isinstance(response_json, dict) and "message" in response_json:
return response_json["message"]
except (json.JSONDecodeError, TypeError):
pass
# Fall back to error field
if "error" in detail:
return detail["error"]
# If detail is a string
elif isinstance(detail, str):
return detail
# For other exceptions, use string representation
error_str = str(exc)
# Try to extract meaningful message from HTTPException string format
# Format: "502: {'error': '...', 'response': '{"message":"..."}'}"
if "Insufficient credits" in error_str or "insufficient credits" in error_str.lower():
return "Insufficient WaveSpeed credits. Please top up your account."
# Try to extract JSON message from string
try:
# Look for JSON-like structures in the error string
json_match = re.search(r'"message"\s*:\s*"([^"]+)"', error_str)
if json_match:
return json_match.group(1)
except Exception:
pass
return error_str
def _execute_podcast_video_task(
task_id: str,
request: PodcastVideoGenerationRequest,
user_id: str,
image_bytes: bytes,
audio_bytes: bytes,
auth_token: Optional[str] = None,
mask_image_bytes: Optional[bytes] = None,
):
"""Background task to generate InfiniteTalk video for podcast scene."""
try:
task_manager.update_task_status(
task_id, "processing", progress=5.0, message="Submitting to WaveSpeed InfiniteTalk..."
)
# Extract scene number from scene_id
scene_number_match = re.search(r'\d+', request.scene_id)
scene_number = int(scene_number_match.group()) if scene_number_match else 0
# Prepare scene data for animation
scene_data = {
"scene_number": scene_number,
"title": request.scene_title,
"scene_id": request.scene_id,
}
story_context = {
"project_id": request.project_id,
"type": "podcast",
}
animation_result = animate_scene_with_voiceover(
image_bytes=image_bytes,
audio_bytes=audio_bytes,
scene_data=scene_data,
story_context=story_context,
user_id=user_id,
resolution=request.resolution or "720p",
prompt_override=request.prompt,
mask_image_bytes=mask_image_bytes,
seed=request.seed if request.seed is not None else -1,
image_mime="image/png",
audio_mime="audio/mpeg",
)
task_manager.update_task_status(
task_id, "processing", progress=80.0, message="Saving video file..."
)
# Use podcast-specific video directory
ai_video_dir = PODCAST_VIDEOS_DIR / AI_VIDEO_SUBDIR
ai_video_dir.mkdir(parents=True, exist_ok=True)
video_service = PodcastVideoCombinationService(output_dir=str(PODCAST_VIDEOS_DIR / "Final_Videos"))
save_result = video_service.save_scene_video(
video_bytes=animation_result["video_bytes"],
scene_number=scene_number,
user_id=user_id,
)
video_filename = save_result["video_filename"]
video_url = f"/api/podcast/videos/{video_filename}"
if auth_token:
video_url = f"{video_url}?token={quote(auth_token)}"
logger.info(
f"[Podcast] Video saved: filename={video_filename}, url={video_url}, scene={request.scene_id}"
)
usage_info = track_video_usage(
user_id=user_id,
provider=animation_result["provider"],
model_name=animation_result["model_name"],
prompt=animation_result["prompt"],
video_bytes=animation_result["video_bytes"],
cost_override=animation_result["cost"],
)
result_data = {
"video_url": video_url,
"video_filename": video_filename,
"cost": animation_result["cost"],
"duration": animation_result["duration"],
"provider": animation_result["provider"],
"model": animation_result["model_name"],
}
logger.info(
f"[Podcast] Updating task status to completed: task_id={task_id}, result={result_data}"
)
task_manager.update_task_status(
task_id,
"completed",
progress=100.0,
message="Video generation complete!",
result=result_data,
)
# Verify the task status was updated correctly
updated_status = task_manager.get_task_status(task_id)
logger.info(
f"[Podcast] Task status after update: task_id={task_id}, status={updated_status.get('status') if updated_status else 'None'}, has_result={bool(updated_status.get('result') if updated_status else False)}, video_url={updated_status.get('result', {}).get('video_url') if updated_status else 'N/A'}"
)
logger.info(
f"[Podcast] Video generation completed for project {request.project_id}, scene {request.scene_id}"
)
except Exception as exc:
# Use logger.exception to avoid KeyError when exception message contains curly braces
logger.exception(f"[Podcast] Video generation failed for project {request.project_id}, scene {request.scene_id}")
# Extract user-friendly error message from exception
error_msg = _extract_error_message(exc)
task_manager.update_task_status(
task_id, "failed", error=error_msg, message=f"Video generation failed: {error_msg}"
)
@router.post("/render/video", response_model=PodcastVideoGenerationResponse)
async def generate_podcast_video(
request_obj: Request,
request: PodcastVideoGenerationRequest,
background_tasks: BackgroundTasks,
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""
Generate video for a podcast scene using WaveSpeed InfiniteTalk (avatar image + audio).
Returns task_id for polling since InfiniteTalk can take up to 10 minutes.
"""
user_id = require_authenticated_user(current_user)
logger.info(
f"[Podcast] Starting video generation for project {request.project_id}, scene {request.scene_id}"
)
# Load audio bytes
audio_bytes = load_podcast_audio_bytes(request.audio_url)
# Validate resolution
if request.resolution not in {"480p", "720p"}:
raise HTTPException(status_code=400, detail="Resolution must be '480p' or '720p'.")
# Load image bytes (scene image is required for video generation)
if request.avatar_image_url:
image_bytes = load_podcast_image_bytes(request.avatar_image_url)
else:
# Scene-specific image should be generated before video generation
raise HTTPException(
status_code=400,
detail="Scene image is required for video generation. Please generate images for scenes first.",
)
mask_image_bytes = None
if request.mask_image_url:
try:
mask_image_bytes = load_podcast_image_bytes(request.mask_image_url)
except Exception as e:
logger.error(f"[Podcast] Failed to load mask image: {e}")
raise HTTPException(
status_code=400,
detail="Failed to load mask image for video generation.",
)
# Validate subscription limits
db = next(get_db())
try:
pricing_service = PricingService(db)
validate_scene_animation_operation(pricing_service=pricing_service, user_id=user_id)
finally:
db.close()
# Extract token for authenticated URL building
auth_token = None
auth_header = request_obj.headers.get("Authorization")
if auth_header and auth_header.startswith("Bearer "):
auth_token = auth_header.replace("Bearer ", "").strip()
# Create async task
task_id = task_manager.create_task("podcast_video_generation")
background_tasks.add_task(
_execute_podcast_video_task,
task_id=task_id,
request=request,
user_id=user_id,
image_bytes=image_bytes,
audio_bytes=audio_bytes,
auth_token=auth_token,
mask_image_bytes=mask_image_bytes,
)
return PodcastVideoGenerationResponse(
task_id=task_id,
status="pending",
message="Video generation started. This may take up to 10 minutes.",
)
@router.get("/videos/{filename}")
async def serve_podcast_video(
filename: str,
current_user: Dict[str, Any] = Depends(get_current_user_with_query_token),
):
"""Serve generated podcast scene video files.
Supports authentication via Authorization header or token query parameter.
Query parameter is useful for HTML elements like <video> that cannot send custom headers.
"""
require_authenticated_user(current_user)
# Security check: ensure filename doesn't contain path traversal
if ".." in filename or "/" in filename or "\\" in filename:
raise HTTPException(status_code=400, detail="Invalid filename")
# Look for video in podcast_videos directory (including AI_Videos subdirectory)
video_path = None
possible_paths = [
PODCAST_VIDEOS_DIR / filename,
PODCAST_VIDEOS_DIR / AI_VIDEO_SUBDIR / filename,
]
for path in possible_paths:
resolved_path = path.resolve()
# Security check: ensure path is within PODCAST_VIDEOS_DIR
if str(resolved_path).startswith(str(PODCAST_VIDEOS_DIR)) and resolved_path.exists():
video_path = resolved_path
break
if not video_path:
raise HTTPException(status_code=404, detail="Video file not found")
return FileResponse(video_path, media_type="video/mp4")
@router.get("/videos")
async def list_podcast_videos(
project_id: Optional[str] = None,
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""
List existing video files for the current user, optionally filtered by project.
Returns videos mapped to scene numbers for easy matching.
"""
try:
user_id = require_authenticated_user(current_user)
logger.info(f"[Podcast] Listing videos for user_id={user_id}, project_id={project_id}")
# Look in podcast_videos/AI_Videos directory
ai_video_dir = PODCAST_VIDEOS_DIR / AI_VIDEO_SUBDIR
ai_video_dir.mkdir(parents=True, exist_ok=True)
videos = []
if ai_video_dir.exists():
# Pattern: scene_{scene_number}_{user_id}_{timestamp}.mp4
# Extract user_id from current user (same logic as save_scene_video)
clean_user_id = "".join(c if c.isalnum() or c in ('-', '_') else '_' for c in user_id[:16])
logger.info(f"[Podcast] Looking for videos with clean_user_id={clean_user_id} in {ai_video_dir}")
# Map scene_number -> (most recent video info)
scene_video_map: Dict[int, Dict[str, Any]] = {}
all_files = list(ai_video_dir.glob("*.mp4"))
logger.info(f"[Podcast] Found {len(all_files)} MP4 files in directory")
for video_file in all_files:
filename = video_file.name
# Match pattern: scene_{number}_{user_id}_{hash}.mp4
# Use greedy match for user_id and match hash as "anything except underscore before .mp4"
match = re.match(r"scene_(\d+)_(.+)_([^_]+)\.mp4", filename)
if match:
scene_number = int(match.group(1))
file_user_id = match.group(2)
hash_part = match.group(3)
# Only include videos for this user
if file_user_id == clean_user_id:
video_url = f"/api/podcast/videos/{filename}"
file_mtime = video_file.stat().st_mtime
# Keep the most recent video for each scene
if scene_number not in scene_video_map or file_mtime > scene_video_map[scene_number]["mtime"]:
scene_video_map[scene_number] = {
"scene_number": scene_number,
"filename": filename,
"video_url": video_url,
"file_size": video_file.stat().st_size,
"mtime": file_mtime,
}
# Convert map to list and sort by scene number
videos = list(scene_video_map.values())
videos.sort(key=lambda v: v["scene_number"])
logger.info(f"[Podcast] Returning {len(videos)} videos for user: {[v['scene_number'] for v in videos]}")
else:
logger.warning(f"[Podcast] Video directory does not exist: {ai_video_dir}")
return {"videos": videos}
except Exception as e:
logger.exception(f"[Podcast] Error listing videos")
return {"videos": []}
@router.post("/render/combine-videos", response_model=PodcastCombineVideosResponse)
async def combine_podcast_videos(
request_obj: Request,
request: PodcastCombineVideosRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""
Combine all scene videos into a single final podcast video.
Returns task_id for polling.
"""
user_id = require_authenticated_user(current_user)
logger.info(f"[Podcast] Combining {len(request.scene_video_urls)} scene videos for project {request.project_id}")
if not request.scene_video_urls:
raise HTTPException(status_code=400, detail="No scene videos provided")
# Create async task
task_id = task_manager.create_task("podcast_combine_videos")
# Extract token for authenticated URL building
auth_token = None
auth_header = request_obj.headers.get("Authorization")
if auth_header and auth_header.startswith("Bearer "):
auth_token = auth_header.replace("Bearer ", "").strip()
# Run video combination in thread pool executor to prevent blocking event loop
# Submit directly to executor - this runs in a background thread and doesn't block
# The executor handles the thread pool management automatically
def handle_task_completion(future):
"""Callback to handle task completion and log errors."""
try:
future.result() # This will raise if there was an exception
except Exception as e:
logger.error(f"[Podcast] Error in video combination task: {e}", exc_info=True)
# Submit to executor - returns immediately, task runs in background thread
future = _video_executor.submit(
_execute_combine_videos_task,
task_id,
request.project_id,
request.scene_video_urls,
request.podcast_title,
user_id,
auth_token,
)
# Add callback to log errors without blocking
future.add_done_callback(handle_task_completion)
return PodcastCombineVideosResponse(
task_id=task_id,
status="pending",
message="Video combination started. This may take a few minutes.",
)
def _execute_combine_videos_task(
task_id: str,
project_id: str,
scene_video_urls: list[str],
podcast_title: str,
user_id: str,
auth_token: Optional[str] = None,
):
"""Background task to combine scene videos into final podcast."""
try:
task_manager.update_task_status(
task_id, "processing", progress=10.0, message="Preparing scene videos..."
)
# Convert scene video URLs to local file paths
scene_video_paths = []
for video_url in scene_video_urls:
# Extract filename from URL (e.g., /api/podcast/videos/scene_1_user_xxx.mp4)
filename = video_url.split("/")[-1].split("?")[0] # Remove query params
video_path = PODCAST_VIDEOS_DIR / AI_VIDEO_SUBDIR / filename
if not video_path.exists():
logger.warning(f"[Podcast] Scene video not found: {video_path}")
continue
scene_video_paths.append(str(video_path))
if not scene_video_paths:
raise ValueError("No valid scene videos found to combine")
logger.info(f"[Podcast] Found {len(scene_video_paths)} scene videos to combine")
task_manager.update_task_status(
task_id, "processing", progress=30.0, message="Combining videos..."
)
# Use dedicated PodcastVideoCombinationService
final_videos_dir = PODCAST_VIDEOS_DIR / "Final_Videos"
final_videos_dir.mkdir(parents=True, exist_ok=True)
video_service = PodcastVideoCombinationService(output_dir=str(final_videos_dir))
# Progress callback for task updates
def progress_callback(progress: float, message: str):
task_manager.update_task_status(
task_id, "processing", progress=progress, message=message
)
task_manager.update_task_status(
task_id, "processing", progress=50.0, message="Combining videos..."
)
# Combine videos using dedicated podcast service
result = video_service.combine_videos(
video_paths=scene_video_paths,
podcast_title=podcast_title,
fps=30,
progress_callback=progress_callback,
)
video_filename = Path(result["video_path"]).name
video_url = f"/api/podcast/final-videos/{video_filename}"
if auth_token:
video_url = f"{video_url}?token={quote(auth_token)}"
logger.info(f"[Podcast] Final video combined: {video_filename}")
result_data = {
"video_url": video_url,
"video_filename": video_filename,
"duration": result.get("duration", 0),
"file_size": result.get("file_size", 0),
}
task_manager.update_task_status(
task_id,
"completed",
progress=100.0,
message="Podcast video ready!",
result=result_data,
)
# Save final video URL to project for persistence across reloads
# Do this quickly and synchronously - database operations are fast
try:
from services.database import SessionLocal
db = SessionLocal()
try:
service = PodcastService(db)
service.update_project(user_id, project_id, final_video_url=video_url)
db.commit()
logger.info(f"[Podcast] Saved final video URL to project {project_id}: {video_url}")
finally:
db.close()
except Exception as e:
logger.warning(f"[Podcast] Failed to save final video URL to project: {e}")
# Don't fail the task if project update fails - video is still available via task result
logger.info(f"[Podcast] Task {task_id} marked as completed successfully")
except Exception as e:
logger.exception(f"[Podcast] Failed to combine videos: {e}")
error_msg = _extract_error_message(e)
task_manager.update_task_status(
task_id,
"failed",
progress=0.0,
message=f"Video combination failed: {error_msg}",
error=str(error_msg),
)
logger.error(f"[Podcast] Task {task_id} marked as failed: {error_msg}")
@router.get("/final-videos/{filename}")
async def serve_final_podcast_video(
filename: str,
current_user: Dict[str, Any] = Depends(get_current_user_with_query_token),
):
"""Serve the final combined podcast video with authentication."""
user_id = require_authenticated_user(current_user)
final_videos_dir = PODCAST_VIDEOS_DIR / "Final_Videos"
video_path = final_videos_dir / filename
if not video_path.exists():
raise HTTPException(status_code=404, detail="Video not found")
# Basic security: ensure filename doesn't contain path traversal
if ".." in filename or "/" in filename or "\\" in filename:
raise HTTPException(status_code=400, detail="Invalid filename")
return FileResponse(
path=str(video_path),
media_type="video/mp4",
filename=filename,
)

View File

@@ -0,0 +1,280 @@
"""
Podcast API Models
All Pydantic request/response models for podcast endpoints.
"""
from pydantic import BaseModel, Field, model_validator
from typing import List, Optional, Dict, Any
from datetime import datetime
class PodcastProjectResponse(BaseModel):
"""Response model for podcast project."""
id: int
project_id: str
user_id: str
idea: str
duration: int
speakers: int
budget_cap: float
analysis: Optional[Dict[str, Any]] = None
queries: Optional[List[Dict[str, Any]]] = None
selected_queries: Optional[List[str]] = None
research: Optional[Dict[str, Any]] = None
raw_research: Optional[Dict[str, Any]] = None
estimate: Optional[Dict[str, Any]] = None
script_data: Optional[Dict[str, Any]] = None
render_jobs: Optional[List[Dict[str, Any]]] = None
knobs: Optional[Dict[str, Any]] = None
research_provider: Optional[str] = None
show_script_editor: bool = False
show_render_queue: bool = False
current_step: Optional[str] = None
status: str = "draft"
is_favorite: bool = False
final_video_url: Optional[str] = None
created_at: datetime
updated_at: datetime
class Config:
from_attributes = True
class PodcastAnalyzeRequest(BaseModel):
"""Request model for podcast idea analysis."""
idea: str = Field(..., description="Podcast topic or idea")
duration: int = Field(default=10, description="Target duration in minutes")
speakers: int = Field(default=1, description="Number of speakers")
class PodcastAnalyzeResponse(BaseModel):
"""Response model for podcast idea analysis."""
audience: str
content_type: str
top_keywords: list[str]
suggested_outlines: list[Dict[str, Any]]
title_suggestions: list[str]
exa_suggested_config: Optional[Dict[str, Any]] = None
class PodcastScriptRequest(BaseModel):
"""Request model for podcast script generation."""
idea: str = Field(..., description="Podcast idea or topic")
duration_minutes: int = Field(default=10, description="Target duration in minutes")
speakers: int = Field(default=1, description="Number of speakers")
research: Optional[Dict[str, Any]] = Field(None, description="Optional research payload to ground the script")
class PodcastSceneLine(BaseModel):
speaker: str
text: str
emphasis: Optional[bool] = False
class PodcastScene(BaseModel):
id: str
title: str
duration: int
lines: list[PodcastSceneLine]
approved: bool = False
emotion: Optional[str] = None
imageUrl: Optional[str] = None # Generated image URL for video generation
class PodcastExaConfig(BaseModel):
"""Exa config for podcast research."""
exa_search_type: Optional[str] = Field(default="auto", description="auto | keyword | neural")
exa_category: Optional[str] = None
exa_include_domains: List[str] = []
exa_exclude_domains: List[str] = []
max_sources: int = 8
include_statistics: Optional[bool] = False
date_range: Optional[str] = Field(default=None, description="last_month | last_3_months | last_year | all_time")
@model_validator(mode="after")
def validate_domains(self):
if self.exa_include_domains and self.exa_exclude_domains:
# Exa API does not allow both include and exclude domains together with contents
# Prefer include_domains and drop exclude_domains
self.exa_exclude_domains = []
return self
class PodcastExaResearchRequest(BaseModel):
"""Request for podcast research using Exa directly (no blog writer)."""
topic: str
queries: List[str]
exa_config: Optional[PodcastExaConfig] = None
class PodcastExaSource(BaseModel):
title: str = ""
url: str = ""
excerpt: str = ""
published_at: Optional[str] = None
highlights: Optional[List[str]] = None
summary: Optional[str] = None
source_type: Optional[str] = None
index: Optional[int] = None
class PodcastExaResearchResponse(BaseModel):
sources: List[PodcastExaSource]
search_queries: List[str] = []
cost: Optional[Dict[str, Any]] = None
search_type: Optional[str] = None
provider: str = "exa"
content: Optional[str] = None
class PodcastScriptResponse(BaseModel):
scenes: list[PodcastScene]
class PodcastAudioRequest(BaseModel):
"""Generate TTS for a podcast scene."""
scene_id: str
scene_title: str
text: str
voice_id: Optional[str] = "Wise_Woman"
speed: Optional[float] = 1.0
volume: Optional[float] = 1.0
pitch: Optional[float] = 0.0
emotion: Optional[str] = "neutral"
english_normalization: Optional[bool] = False # Better number reading for statistics
sample_rate: Optional[int] = None
bitrate: Optional[int] = None
channel: Optional[str] = None
format: Optional[str] = None
language_boost: Optional[str] = None
enable_sync_mode: Optional[bool] = True
class PodcastAudioResponse(BaseModel):
scene_id: str
scene_title: str
audio_filename: str
audio_url: str
provider: str
model: str
voice_id: str
text_length: int
file_size: int
cost: float
class PodcastProjectListResponse(BaseModel):
"""Response model for project list."""
projects: List[PodcastProjectResponse]
total: int
limit: int
offset: int
class CreateProjectRequest(BaseModel):
"""Request model for creating a project."""
project_id: str = Field(..., description="Unique project ID")
idea: str = Field(..., description="Episode idea or URL")
duration: int = Field(..., description="Duration in minutes")
speakers: int = Field(default=1, description="Number of speakers")
budget_cap: float = Field(default=50.0, description="Budget cap in USD")
avatar_url: Optional[str] = Field(None, description="Optional presenter avatar URL")
class UpdateProjectRequest(BaseModel):
"""Request model for updating project state."""
analysis: Optional[Dict[str, Any]] = None
queries: Optional[List[Dict[str, Any]]] = None
selected_queries: Optional[List[str]] = None
research: Optional[Dict[str, Any]] = None
raw_research: Optional[Dict[str, Any]] = None
estimate: Optional[Dict[str, Any]] = None
script_data: Optional[Dict[str, Any]] = None
render_jobs: Optional[List[Dict[str, Any]]] = None
knobs: Optional[Dict[str, Any]] = None
research_provider: Optional[str] = None
show_script_editor: Optional[bool] = None
show_render_queue: Optional[bool] = None
current_step: Optional[str] = None
status: Optional[str] = None
final_video_url: Optional[str] = None
class PodcastCombineAudioRequest(BaseModel):
"""Request model for combining podcast audio files."""
project_id: str
scene_ids: List[str] = Field(..., description="List of scene IDs to combine")
scene_audio_urls: List[str] = Field(..., description="List of audio URLs for each scene")
class PodcastCombineAudioResponse(BaseModel):
"""Response model for combined podcast audio."""
combined_audio_url: str
combined_audio_filename: str
total_duration: float
file_size: int
scene_count: int
class PodcastImageRequest(BaseModel):
"""Request for generating an image for a podcast scene."""
scene_id: str
scene_title: str
scene_content: Optional[str] = None # Optional: scene lines text for context
idea: Optional[str] = None # Optional: podcast idea for context
base_avatar_url: Optional[str] = None # Base avatar image URL for scene variations
width: int = 1024
height: int = 1024
custom_prompt: Optional[str] = None # Custom prompt from user (overrides auto-generated prompt)
style: Optional[str] = None # "Auto", "Fiction", or "Realistic"
rendering_speed: Optional[str] = None # "Default", "Turbo", or "Quality"
aspect_ratio: Optional[str] = None # "1:1", "16:9", "9:16", "4:3", "3:4"
class PodcastImageResponse(BaseModel):
"""Response for podcast scene image generation."""
scene_id: str
scene_title: str
image_filename: str
image_url: str
width: int
height: int
provider: str
model: Optional[str] = None
cost: float
class PodcastVideoGenerationRequest(BaseModel):
"""Request model for podcast video generation."""
project_id: str = Field(..., description="Podcast project ID")
scene_id: str = Field(..., description="Scene ID")
scene_title: str = Field(..., description="Scene title")
audio_url: str = Field(..., description="URL to the generated audio file")
avatar_image_url: Optional[str] = Field(None, description="URL to scene image (required for video generation)")
resolution: str = Field("720p", description="Video resolution (480p or 720p)")
prompt: Optional[str] = Field(None, description="Optional animation prompt override")
seed: Optional[int] = Field(-1, description="Random seed; -1 for random")
mask_image_url: Optional[str] = Field(None, description="Optional mask image URL to specify animated region")
class PodcastVideoGenerationResponse(BaseModel):
"""Response model for podcast video generation."""
task_id: str
status: str
message: str
class PodcastCombineVideosRequest(BaseModel):
"""Request to combine scene videos into final podcast"""
project_id: str = Field(..., description="Project ID")
scene_video_urls: list[str] = Field(..., description="List of scene video URLs in order")
podcast_title: str = Field(default="Podcast", description="Title for the final podcast video")
class PodcastCombineVideosResponse(BaseModel):
"""Response from combine videos endpoint"""
task_id: str
status: str
message: str

View File

@@ -0,0 +1,143 @@
"""
Podcast Presenter Personas
Lightweight, podcast-specific presenter persona presets used to steer avatar generation.
Design goals:
- Market-fit + style consistency without asking end-users to choose sensitive traits.
- Deterministic persona selection using analysis hints (audience/content type/keywords).
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Dict, Optional, List
@dataclass(frozen=True)
class PresenterPersona:
id: str
label: str
target_market: str # e.g. "global", "us_canada", "uk_eu", "india", "latam"
style: str # e.g. "corporate", "tech_modern", "creator"
prompt: str # prompt fragment to inject
# NOTE: Avoid encoding/guessing ethnicity. Keep personas about market-fit + style.
PERSONAS: Dict[str, PresenterPersona] = {
"global_corporate": PresenterPersona(
id="global_corporate",
label="Global — Corporate Host",
target_market="global",
style="corporate",
prompt=(
"professional podcast presenter, business professional attire (white shirt and light gray blazer), "
"confident, friendly, camera-ready, neutral background, studio lighting"
),
),
"global_tech_modern": PresenterPersona(
id="global_tech_modern",
label="Global — Tech Modern Host",
target_market="global",
style="tech_modern",
prompt=(
"modern professional podcast presenter, contemporary tech-forward style, "
"clean minimal studio background, soft studio lighting, friendly and energetic expression"
),
),
"global_news_anchor": PresenterPersona(
id="global_news_anchor",
label="Global — News Anchor",
target_market="global",
style="news_anchor",
prompt=(
"professional news-style presenter, polished on-camera appearance, "
"formal attire, authoritative yet approachable expression, studio lighting, neutral background"
),
),
"india_corporate": PresenterPersona(
id="india_corporate",
label="India — Corporate Host",
target_market="india",
style="corporate",
prompt=(
"professional podcast presenter for the Indian market, business professional attire, "
"polished and confident on-camera presence, clean studio background, soft studio lighting"
),
),
"us_canada_creator": PresenterPersona(
id="us_canada_creator",
label="US/Canada — Creator Host",
target_market="us_canada",
style="creator",
prompt=(
"professional podcast creator host, business casual style, approachable and conversational expression, "
"clean studio background, soft studio lighting"
),
),
}
def get_persona(persona_id: Optional[str]) -> Optional[PresenterPersona]:
if not persona_id:
return None
return PERSONAS.get(persona_id)
def list_personas() -> List[PresenterPersona]:
return list(PERSONAS.values())
def choose_persona_id(
audience: Optional[str] = None,
content_type: Optional[str] = None,
top_keywords: Optional[List[str]] = None,
) -> str:
"""
Choose a persona id using non-sensitive heuristics from analysis.
- Uses explicit market hints if present (e.g. "India", "US", "UK", etc.)
- Uses content_type / keywords to pick a style
- Falls back to global corporate
"""
audience_l = (audience or "").lower()
content_l = (content_type or "").lower()
keywords_l = [k.lower() for k in (top_keywords or [])]
# Market hints (explicit only)
if any(x in audience_l for x in ["india", "indian"]):
market = "india"
elif any(x in audience_l for x in ["us", "usa", "united states", "canada", "north america"]):
market = "us_canada"
elif any(x in audience_l for x in ["uk", "united kingdom", "europe", "eu", "european"]):
market = "uk_eu"
elif any(x in audience_l for x in ["latam", "latin america", "south america"]):
market = "latam"
else:
market = "global"
# Style hints
style = "corporate"
if "news" in content_l or "analysis" in content_l:
style = "news_anchor"
if any(x in content_l for x in ["tech", "technology", "ai", "software"]) or any(
kw in ["ai", "technology", "tech", "software"] for kw in keywords_l
):
style = "tech_modern"
if any(x in content_l for x in ["casual", "creator", "conversational"]) or any(
kw in ["creator", "youtube", "tiktok", "instagram"] for kw in keywords_l
):
style = "creator"
# Map market+style to a concrete persona id
if market == "india" and style == "corporate":
return "india_corporate"
if market == "us_canada" and style == "creator":
return "us_canada_creator"
if style == "news_anchor":
return "global_news_anchor"
if style == "tech_modern":
return "global_tech_modern"
return "global_corporate"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,105 @@
"""
Podcast API Utility Functions
Helper functions for loading media files and other utilities.
"""
from pathlib import Path
from urllib.parse import urlparse
from fastapi import HTTPException
from loguru import logger
from .constants import PODCAST_AUDIO_DIR, PODCAST_IMAGES_DIR
def load_podcast_audio_bytes(audio_url: str) -> bytes:
"""Load podcast audio bytes from URL. Only handles /api/podcast/audio/ URLs."""
if not audio_url:
raise HTTPException(status_code=400, detail="Audio URL is required")
try:
parsed = urlparse(audio_url)
path = parsed.path if parsed.scheme else audio_url
# Only handle /api/podcast/audio/ URLs
prefix = "/api/podcast/audio/"
if prefix not in path:
raise HTTPException(
status_code=400,
detail=f"Unsupported audio URL format: {audio_url}. Only /api/podcast/audio/ URLs are supported."
)
filename = path.split(prefix, 1)[1].split("?", 1)[0].strip()
if not filename:
raise HTTPException(status_code=400, detail=f"Could not extract filename from URL: {audio_url}")
# Podcast audio files are stored in podcast_audio directory
audio_path = (PODCAST_AUDIO_DIR / filename).resolve()
# Security check: ensure path is within PODCAST_AUDIO_DIR
if not str(audio_path).startswith(str(PODCAST_AUDIO_DIR)):
logger.error(f"[Podcast] Attempted path traversal when resolving audio: {audio_url}")
raise HTTPException(status_code=403, detail="Invalid audio path")
if not audio_path.exists():
logger.warning(f"[Podcast] Audio file not found: {audio_path}")
raise HTTPException(status_code=404, detail=f"Audio file not found: {filename}")
return audio_path.read_bytes()
except HTTPException:
raise
except Exception as exc:
logger.error(f"[Podcast] Failed to load audio: {exc}")
raise HTTPException(status_code=500, detail=f"Failed to load audio: {str(exc)}")
def load_podcast_image_bytes(image_url: str) -> bytes:
"""Load podcast image bytes from URL. Only handles /api/podcast/images/ URLs."""
if not image_url:
raise HTTPException(status_code=400, detail="Image URL is required")
logger.info(f"[Podcast] Loading image from URL: {image_url}")
try:
parsed = urlparse(image_url)
path = parsed.path if parsed.scheme else image_url
# Only handle /api/podcast/images/ URLs
prefix = "/api/podcast/images/"
if prefix not in path:
logger.error(f"[Podcast] Unsupported image URL format: {image_url}")
raise HTTPException(
status_code=400,
detail=f"Unsupported image URL format: {image_url}. Only /api/podcast/images/ URLs are supported."
)
filename = path.split(prefix, 1)[1].split("?", 1)[0].strip()
if not filename:
logger.error(f"[Podcast] Could not extract filename from URL: {image_url}")
raise HTTPException(status_code=400, detail=f"Could not extract filename from URL: {image_url}")
logger.info(f"[Podcast] Extracted filename: {filename}")
logger.info(f"[Podcast] PODCAST_IMAGES_DIR: {PODCAST_IMAGES_DIR}")
# Podcast images are stored in podcast_images directory
image_path = (PODCAST_IMAGES_DIR / filename).resolve()
logger.info(f"[Podcast] Resolved image path: {image_path}")
# Security check: ensure path is within PODCAST_IMAGES_DIR
if not str(image_path).startswith(str(PODCAST_IMAGES_DIR)):
logger.error(f"[Podcast] Attempted path traversal when resolving image: {image_url} -> {image_path}")
raise HTTPException(status_code=403, detail="Invalid image path")
if not image_path.exists():
logger.error(f"[Podcast] Image file not found: {image_path}")
raise HTTPException(status_code=404, detail=f"Image file not found: {filename}")
image_bytes = image_path.read_bytes()
logger.info(f"[Podcast] ✅ Successfully loaded image: {len(image_bytes)} bytes from {image_path}")
return image_bytes
except HTTPException:
raise
except Exception as exc:
logger.error(f"[Podcast] Failed to load image: {exc}")
raise HTTPException(status_code=500, detail=f"Failed to load image: {str(exc)}")

View File

@@ -1,7 +1,8 @@
from pathlib import Path
from typing import Any, Dict, List, Optional
from concurrent.futures import ThreadPoolExecutor
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import FileResponse
from loguru import logger
from pydantic import BaseModel

View File

@@ -53,6 +53,9 @@ class PodcastProject(Base):
status = Column(String(50), default="draft", nullable=False, index=True) # draft, in_progress, completed, archived
is_favorite = Column(Boolean, default=False, index=True)
# Final combined video URL (persisted for reloads)
final_video_url = Column(String(1000), nullable=True) # URL to final combined podcast video
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow, nullable=False, index=True)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False, index=True)

View File

@@ -0,0 +1,91 @@
#!/usr/bin/env python3
"""
Migration script to add final_video_url column to podcast_projects table.
This script should be run once to add the column to existing databases.
"""
import os
import sys
import sqlite3
from pathlib import Path
from loguru import logger
# Add the backend directory to the Python path
backend_dir = Path(__file__).parent.parent
sys.path.insert(0, str(backend_dir))
def run_migration():
"""Run the final_video_url column migration."""
try:
# Get the database path
db_path = backend_dir / "alwrity.db"
logger.info(f"🔄 Starting final_video_url column migration...")
logger.info(f"📁 Database path: {db_path}")
# Check if database exists
if not db_path.exists():
logger.warning(f"⚠️ Database file not found at {db_path}")
logger.info(" New databases will have this column created automatically by SQLAlchemy")
return True
# Read the migration SQL
migration_file = backend_dir / "database" / "migrations" / "009_add_final_video_url_to_podcast_projects.sql"
if not migration_file.exists():
logger.error(f"❌ Migration file not found: {migration_file}")
return False
with open(migration_file, 'r') as f:
migration_sql = f.read()
logger.info("📋 Migration SQL loaded successfully")
# Connect to database and run migration
conn = sqlite3.connect(str(db_path))
cursor = conn.cursor()
# Check if column already exists
cursor.execute("PRAGMA table_info(podcast_projects)")
columns = [row[1] for row in cursor.fetchall()]
if 'final_video_url' in columns:
logger.info(" Column 'final_video_url' already exists, skipping migration")
conn.close()
return True
# Execute the migration
logger.info("🔧 Adding final_video_url column...")
cursor.execute("ALTER TABLE podcast_projects ADD COLUMN final_video_url VARCHAR(1000) NULL")
conn.commit()
# Verify the column was added
cursor.execute("PRAGMA table_info(podcast_projects)")
columns_after = [row[1] for row in cursor.fetchall()]
if 'final_video_url' in columns_after:
logger.info("✅ Migration completed successfully! Column 'final_video_url' added to podcast_projects table")
conn.close()
return True
else:
logger.error("❌ Migration failed: Column was not added")
conn.close()
return False
except sqlite3.OperationalError as e:
if "duplicate column name" in str(e).lower():
logger.info(" Column 'final_video_url' already exists, skipping migration")
return True
else:
logger.error(f"❌ Database error: {e}")
return False
except Exception as e:
logger.error(f"❌ Error running migration: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
success = run_migration()
sys.exit(0 if success else 1)

View File

@@ -137,6 +137,9 @@ def generate_audio(
# Generate audio using WaveSpeed
try:
# Avoid passing duplicate enable_sync_mode; allow override via kwargs
enable_sync_mode = kwargs.pop("enable_sync_mode", True)
client = WaveSpeedClient()
audio_bytes = client.generate_speech(
text=text,
@@ -145,7 +148,7 @@ def generate_audio(
volume=volume,
pitch=pitch,
emotion=emotion,
enable_sync_mode=True,
enable_sync_mode=enable_sync_mode,
**kwargs
)

View File

@@ -0,0 +1,11 @@
"""
Podcast Services Module
Dedicated services for podcast generation functionality.
Separate from story writer services to maintain clear separation of concerns.
"""
from .video_combination_service import PodcastVideoCombinationService
__all__ = ["PodcastVideoCombinationService"]

View File

@@ -0,0 +1,382 @@
"""
Podcast Video Combination Service
Dedicated service for combining podcast scene videos into final episodes.
Separate from StoryVideoGenerationService to avoid breaking story writer functionality.
"""
import uuid
import warnings
import time
import threading
from typing import List, Dict, Any, Optional
from pathlib import Path
from loguru import logger
class PodcastVideoCombinationService:
"""Service for combining podcast scene videos into final episodes."""
def __init__(self, output_dir: Optional[str] = None):
"""
Initialize the podcast video combination service.
Parameters:
output_dir (str, optional): Directory to save combined videos.
Defaults to 'backend/podcast_videos/Final_Videos' if not provided.
"""
if output_dir:
self.output_dir = Path(output_dir)
else:
# Default to podcast_videos/Final_Videos directory
base_dir = Path(__file__).parent.parent.parent
self.output_dir = base_dir / "podcast_videos" / "Final_Videos"
self.output_dir.mkdir(parents=True, exist_ok=True)
logger.info(f"[PodcastVideoCombination] Initialized with output directory: {self.output_dir}")
def combine_videos(
self,
video_paths: List[str],
podcast_title: str,
fps: int = 30,
progress_callback: Optional[callable] = None,
) -> Dict[str, Any]:
"""
Combine multiple video files into a single final podcast video.
This method is specifically designed for podcast videos that already have
embedded audio. It does not require separate audio files.
Parameters:
video_paths (List[str]): List of video file paths to combine.
podcast_title (str): Title of the podcast episode.
fps (int): Frames per second for output video (default: 30).
progress_callback (callable, optional): Callback function for progress updates.
Signature: callback(progress: float, message: str)
Returns:
Dict[str, Any]: Video metadata including file path, URL, duration, and file size.
Raises:
ValueError: If no valid video files are provided.
RuntimeError: If video combination fails.
"""
if not video_paths:
raise ValueError("No video paths provided")
# Validate all video files exist
valid_video_paths = []
for video_path in video_paths:
path = Path(video_path)
if path.exists() and path.is_file():
valid_video_paths.append(str(path))
else:
logger.warning(f"[PodcastVideoCombination] Video not found: {video_path}")
if not valid_video_paths:
raise ValueError("No valid video files found to combine")
logger.info(f"[PodcastVideoCombination] Combining {len(valid_video_paths)} videos")
try:
# Import MoviePy
try:
from moviepy import VideoFileClip, concatenate_videoclips
except Exception as e:
logger.error(f"[PodcastVideoCombination] MoviePy not installed: {e}")
raise RuntimeError("MoviePy is not installed. Please install it to combine videos.")
# Suppress MoviePy warnings about incomplete frames (common with some video encodings)
warnings.filterwarnings("ignore", category=UserWarning, module="moviepy")
if progress_callback:
progress_callback(10.0, "Loading video clips...")
# Load all video clips
video_clips = []
total_duration = 0.0
for idx, video_path in enumerate(valid_video_paths):
try:
logger.info(f"[PodcastVideoCombination] Loading video {idx + 1}/{len(valid_video_paths)}: {video_path}")
# Load video clip with error handling for incomplete files
# MoviePy will use the last valid frame if frames are missing at the end
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
video_clip = VideoFileClip(str(video_path))
# Validate clip was loaded successfully
if video_clip.duration <= 0:
logger.warning(f"[PodcastVideoCombination] Video {video_path} has invalid duration, skipping")
video_clip.close()
continue
# Videos already have embedded audio, no need to replace
video_clips.append(video_clip)
total_duration += video_clip.duration
if progress_callback:
progress = 10.0 + ((idx + 1) / len(valid_video_paths)) * 60.0
progress_callback(progress, f"Loaded video {idx + 1}/{len(valid_video_paths)}")
except Exception as e:
logger.error(f"[PodcastVideoCombination] Failed to load video {video_path}: {e}")
# Continue with other videos instead of failing completely
continue
if not video_clips:
raise RuntimeError("No valid video clips were loaded")
logger.info(f"[PodcastVideoCombination] Loaded {len(video_clips)} clips, total duration: {total_duration:.2f}s")
if progress_callback:
progress_callback(75.0, f"Concatenating {len(video_clips)} videos ({total_duration:.1f}s total)...")
# Concatenate all video clips
logger.info(f"[PodcastVideoCombination] Concatenating {len(video_clips)} video clips (total duration: {total_duration:.2f}s)")
final_video = concatenate_videoclips(video_clips, method="compose")
logger.info(f"[PodcastVideoCombination] Concatenation complete, final video duration: {final_video.duration:.2f}s")
# Generate output filename
video_filename = self._generate_video_filename(podcast_title)
video_path = self.output_dir / video_filename
if progress_callback:
progress_callback(85.0, f"Rendering final video ({total_duration:.1f}s total)...")
# Write final video file
logger.info(
f"[PodcastVideoCombination] Rendering final video to: {video_path} "
f"(duration: {total_duration:.2f}s, {len(video_clips)} clips)"
)
# Use faster preset for quicker encoding (still good quality)
# 'ultrafast' is fastest but lower quality, 'fast' is good balance
encoding_preset = 'fast' # Faster than 'medium' but still good quality
# Suppress warnings during video writing as well
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
# Write video with optimized settings
# Note: write_videofile is blocking and can take several minutes for longer videos
# Estimated time: ~1-2 minutes per minute of video content
estimated_time_minutes = max(1, int(total_duration / 60) * 2)
logger.info(
f"[PodcastVideoCombination] Starting video encoding "
f"(estimated time: ~{estimated_time_minutes} minutes for {total_duration:.1f}s video)..."
)
start_time = time.time()
# Start a thread to update progress periodically during encoding
# Since write_videofile is blocking, we'll simulate progress
progress_thread = None
encoding_done = threading.Event()
if progress_callback:
def update_progress_periodically():
"""Update progress every 5 seconds during encoding"""
base_progress = 87.0
max_progress = 98.0
progress_range = max_progress - base_progress
update_interval = 5.0 # Update every 5 seconds
elapsed = 0.0
try:
while not encoding_done.is_set():
elapsed += update_interval
# Simulate progress: start at 87%, gradually increase to 98%
# Use logarithmic curve to slow down as we approach completion
progress = base_progress + (progress_range * min(1.0, elapsed / (estimated_time_minutes * 60)))
progress = min(max_progress, progress)
remaining_minutes = max(0, estimated_time_minutes - int(elapsed / 60))
message = f"Encoding video... ({remaining_minutes} min remaining)"
if remaining_minutes == 0:
message = "Finalizing video..."
try:
progress_callback(progress, message)
except Exception as e:
logger.warning(f"[PodcastVideoCombination] Error in progress callback: {e}")
break
# Use wait with timeout instead of sleep to check event more frequently
if encoding_done.wait(timeout=update_interval):
break # Event was set, exit immediately
except Exception as e:
logger.warning(f"[PodcastVideoCombination] Error in progress thread: {e}")
progress_thread = threading.Thread(target=update_progress_periodically, daemon=True)
progress_thread.start()
# Write video file - this is the blocking operation
logger.info(f"[PodcastVideoCombination] Calling write_videofile...")
try:
final_video.write_videofile(
str(video_path),
fps=fps,
codec='libx264',
audio_codec='aac',
preset=encoding_preset, # Faster encoding
threads=4,
logger=None, # Disable MoviePy's default logger
bitrate=None, # Let encoder choose optimal bitrate
audio_bitrate='192k', # Good quality audio
temp_audiofile=str(video_path.with_suffix('.m4a')), # Temporary audio file
remove_temp=True, # Clean up temp files
write_logfile=False, # Don't write log file
)
logger.info(f"[PodcastVideoCombination] write_videofile completed successfully")
except Exception as write_error:
logger.error(f"[PodcastVideoCombination] Error in write_videofile: {write_error}")
# Check if file was created despite error
if video_path.exists() and video_path.stat().st_size > 0:
logger.warning(f"[PodcastVideoCombination] Video file exists despite error, continuing...")
else:
raise
finally:
# Always signal that encoding is done - don't wait for progress thread
if progress_thread:
encoding_done.set()
# Don't join - let it finish on its own (daemon thread)
elapsed_time = time.time() - start_time
logger.info(
f"[PodcastVideoCombination] Video encoding completed in {elapsed_time:.1f} seconds "
f"({elapsed_time/60:.1f} minutes)"
)
if progress_callback:
progress_callback(99.0, "Video encoding complete! Finalizing...")
# Verify file was created and get file size
# Use retry logic in case file is still being written
max_retries = 5
file_size = 0
for retry in range(max_retries):
if video_path.exists():
file_size = video_path.stat().st_size
if file_size > 0:
break
if retry < max_retries - 1:
logger.info(f"[PodcastVideoCombination] Waiting for video file to be written (retry {retry + 1}/{max_retries})...")
time.sleep(1)
if not video_path.exists():
raise RuntimeError(f"Video file was not created: {video_path}")
if file_size == 0:
raise RuntimeError(f"Video file is empty: {video_path}")
logger.info(f"[PodcastVideoCombination] Video file verified: {video_path} ({file_size} bytes)")
# Clean up clips immediately but quickly - don't block
# Close clips synchronously but with timeout protection
try:
final_video.close()
except Exception as e:
logger.warning(f"[PodcastVideoCombination] Error closing final video clip: {e}")
# Close individual clips quickly
for clip in video_clips:
try:
clip.close()
except Exception as e:
logger.warning(f"[PodcastVideoCombination] Error closing video clip: {e}")
if progress_callback:
progress_callback(100.0, "Video combination complete!")
logger.info(f"[PodcastVideoCombination] Saved combined video to: {video_path} ({file_size} bytes)")
# Return video metadata immediately - don't wait for cleanup
# This prevents blocking if cleanup hangs
return {
"video_path": str(video_path),
"video_filename": video_filename,
"video_url": f"/api/podcast/final-videos/{video_filename}",
"duration": total_duration,
"fps": fps,
"file_size": file_size,
"num_scenes": len(video_clips),
}
except Exception as e:
logger.exception(f"[PodcastVideoCombination] Error combining videos: {e}")
raise RuntimeError(f"Failed to combine videos: {str(e)}") from e
def save_scene_video(self, video_bytes: bytes, scene_number: int, user_id: str) -> Dict[str, str]:
"""
Save a single scene video to disk.
This is a utility method for saving individual scene videos before combination.
Separate from story writer to maintain clear separation of concerns.
Parameters:
video_bytes (bytes): Raw video file bytes.
scene_number (int): Scene number for filename.
user_id (str): User ID for filename.
Returns:
Dict[str, str]: Dictionary with 'video_filename', 'video_path', 'video_url', and 'file_size'.
"""
import uuid
try:
# Generate unique filename matching story writer format
clean_user_id = "".join(c if c.isalnum() or c in ('-', '_') else '_' for c in user_id[:16])
timestamp = str(uuid.uuid4())[:8]
video_filename = f"scene_{scene_number}_{clean_user_id}_{timestamp}.mp4"
# Save to AI_Videos subdirectory (scene videos before combination)
# output_dir is Final_Videos, so parent is podcast_videos, then AI_Videos
scene_videos_dir = self.output_dir.parent / "AI_Videos"
scene_videos_dir.mkdir(parents=True, exist_ok=True)
video_path = scene_videos_dir / video_filename
# Write video bytes to file
with open(video_path, "wb") as f:
f.write(video_bytes)
file_size = video_path.stat().st_size
logger.info(f"[PodcastVideoCombination] Saved scene {scene_number} video: {video_filename} ({file_size} bytes)")
# Generate URL path (relative to /api/podcast/videos/)
video_url = f"/api/podcast/videos/{video_filename}"
return {
"video_filename": video_filename,
"video_url": video_url,
"video_path": str(video_path),
"file_size": file_size,
}
except Exception as e:
logger.error(f"[PodcastVideoCombination] Error saving scene video: {e}", exc_info=True)
raise RuntimeError(f"Failed to save scene video: {str(e)}") from e
def _generate_video_filename(self, podcast_title: str) -> str:
"""
Generate a unique filename for the combined video.
Parameters:
podcast_title (str): Title of the podcast episode.
Returns:
str: Generated filename.
"""
# Sanitize title for filename
safe_title = "".join(c for c in podcast_title if c.isalnum() or c in (' ', '-', '_')).strip()
safe_title = safe_title.replace(' ', '_')[:50] # Limit length
# Add unique ID and timestamp
unique_id = str(uuid.uuid4())[:8]
timestamp = int(Path(__file__).stat().st_mtime) # Use file modification time as simple timestamp
return f"podcast_{safe_title}_{unique_id}_{timestamp}.mp4"

View File

@@ -301,6 +301,12 @@ class StoryAudioGenerationService:
pitch: float = 0.0,
emotion: str = "happy",
english_normalization: bool = False,
sample_rate: Optional[int] = None,
bitrate: Optional[int] = None,
channel: Optional[str] = None,
format: Optional[str] = None,
language_boost: Optional[str] = None,
enable_sync_mode: Optional[bool] = True,
) -> Dict[str, Any]:
"""
Generate AI audio for a single scene using main_audio_generation.
@@ -340,6 +346,12 @@ class StoryAudioGenerationService:
emotion=emotion,
user_id=user_id,
english_normalization=english_normalization,
sample_rate=sample_rate,
bitrate=bitrate,
channel=channel,
format=format,
language_boost=language_boost,
enable_sync_mode=enable_sync_mode,
)
# Save audio to file

View File

@@ -252,10 +252,14 @@ class StoryVideoGenerationService:
if len(scenes) != len(audio_paths):
raise ValueError("Number of scenes and audio paths must match")
video_paths = video_paths or [None] * len(scenes)
if len(video_paths) != len(scenes):
# Ensure video_paths is a list and matches scenes length
if video_paths is None:
video_paths = [None] * len(scenes)
elif len(video_paths) != len(scenes):
video_paths = video_paths + [None] * (len(scenes) - len(video_paths))
logger.debug(f"[StoryVideoGeneration] video_paths length: {len(video_paths)}, scenes length: {len(scenes)}")
try:
logger.info(f"[StoryVideoGeneration] Generating story video for {len(scenes)} scenes")
@@ -311,49 +315,64 @@ class StoryVideoGenerationService:
scene_title = scene.get("title", "Untitled")
logger.info(f"[StoryVideoGeneration] Processing scene {scene_number}/{len(scenes)}: {scene_title}")
audio_file = Path(audio_path)
if not audio_file.exists():
logger.warning(f"[StoryVideoGeneration] Audio not found: {audio_path}, skipping scene {scene_number}")
continue
# Load audio
audio_clip = AudioFileClip(str(audio_file))
audio_duration = audio_clip.duration
logger.debug(f"[StoryVideoGeneration] Scene {scene_number} paths - video: {video_path}, audio: {audio_path}, image: {image_path}")
# Prefer animated video if available
if video_path and Path(video_path).exists():
# Check video_path is not None and is a valid string before calling Path()
if video_path is not None and isinstance(video_path, (str, Path)) and video_path and Path(video_path).exists():
logger.info(f"[StoryVideoGeneration] Using animated video for scene {scene_number}: {video_path}")
# Load animated video
if VideoFileClip is None:
raise RuntimeError("VideoFileClip not available - MoviePy may not be fully installed")
video_clip = VideoFileClip(str(video_path))
# Replace audio with the preferred audio (AI or free)
video_clip = video_clip.with_audio(audio_clip)
# Match duration to audio if needed
if video_clip.duration > audio_duration:
video_clip = video_clip.subclip(0, audio_duration)
elif video_clip.duration < audio_duration:
# Loop the video if it's shorter than audio
loops_needed = int(audio_duration / video_clip.duration) + 1
video_clip = concatenate_videoclips([video_clip] * loops_needed).subclip(0, audio_duration)
# Handle audio: use embedded audio if no separate audio_path provided
if audio_path is not None and isinstance(audio_path, (str, Path)) and audio_path and Path(audio_path).exists():
# Load separate audio file and replace video's audio
logger.info(f"[StoryVideoGeneration] Replacing video audio with separate audio file: {audio_path}")
audio_clip = AudioFileClip(str(audio_path))
audio_duration = audio_clip.duration
video_clip = video_clip.with_audio(audio_clip)
elif image_path and Path(image_path).exists():
# Fall back to static image
logger.info(f"[StoryVideoGeneration] Using static image for scene {scene_number}: {image_path}")
image_file = Path(image_path)
# Create image clip (MoviePy v2: use with_* API)
image_clip = ImageClip(str(image_file)).with_duration(audio_duration)
image_clip = image_clip.with_fps(fps)
# Set audio to image clip
video_clip = image_clip.with_audio(audio_clip)
# Match duration to audio if needed
if video_clip.duration > audio_duration:
video_clip = video_clip.subclip(0, audio_duration)
elif video_clip.duration < audio_duration:
# Loop the video if it's shorter than audio
loops_needed = int(audio_duration / video_clip.duration) + 1
video_clip = concatenate_videoclips([video_clip] * loops_needed).subclip(0, audio_duration)
video_clip = video_clip.with_audio(audio_clip)
else:
# Use embedded audio from video
logger.info(f"[StoryVideoGeneration] Using embedded audio from video for scene {scene_number}")
audio_duration = video_clip.duration
# Video already has audio, no need to replace
scene_clips.append(video_clip)
total_duration += audio_duration
elif audio_path is not None and isinstance(audio_path, (str, Path)) and audio_path and Path(audio_path).exists():
# No video, but we have audio - use with image or create blank
audio_file = Path(audio_path)
audio_clip = AudioFileClip(str(audio_file))
audio_duration = audio_clip.duration
if image_path is not None and isinstance(image_path, (str, Path)) and image_path and Path(image_path).exists():
# Fall back to static image with audio
logger.info(f"[StoryVideoGeneration] Using static image for scene {scene_number}: {image_path}")
image_file = Path(image_path)
# Create image clip (MoviePy v2: use with_* API)
image_clip = ImageClip(str(image_file)).with_duration(audio_duration)
image_clip = image_clip.with_fps(fps)
# Set audio to image clip
video_clip = image_clip.with_audio(audio_clip)
scene_clips.append(video_clip)
total_duration += audio_duration
else:
logger.warning(f"[StoryVideoGeneration] Audio provided but no video or image for scene {scene_number}, skipping")
continue
else:
logger.warning(f"[StoryVideoGeneration] No video or image found for scene {scene_number}, skipping")
logger.warning(f"[StoryVideoGeneration] No video, audio, or image found for scene {scene_number}, skipping")
continue
scene_clips.append(video_clip)
total_duration += audio_duration
# Call progress callback if provided
if progress_callback:
progress = ((idx + 1) / len(scenes)) * 90 # Reserve 10% for final composition
@@ -362,7 +381,12 @@ class StoryVideoGenerationService:
logger.info(f"[StoryVideoGeneration] Processed scene {idx + 1}/{len(scenes)}")
except Exception as e:
logger.error(f"[StoryVideoGeneration] Failed to process scene {idx + 1}: {e}")
logger.error(
f"[StoryVideoGeneration] Failed to process scene {idx + 1} ({scene_number}): {e}\n"
f" video_path: {video_path} (type: {type(video_path)})\n"
f" audio_path: {audio_path} (type: {type(audio_path)})\n"
f" image_path: {image_path} (type: {type(image_path)})"
)
# Continue with next scene instead of failing completely
continue

View File

@@ -71,13 +71,16 @@ class WaveSpeedClient:
logger.info(f"[WaveSpeed] Submitted request: {prediction_id}")
return prediction_id
def get_prediction_result(self, prediction_id: str, timeout: int = 120) -> Dict[str, Any]:
def get_prediction_result(self, prediction_id: str, timeout: int = 30) -> Dict[str, Any]:
"""
Fetch the current status/result for a prediction.
Matches the example pattern: simple GET request, check status_code == 200, return data.
"""
url = f"{self.BASE_URL}/predictions/{prediction_id}/result"
headers = {"Authorization": f"Bearer {self.api_key}"}
try:
response = requests.get(url, headers={"Authorization": f"Bearer {self.api_key}"}, timeout=timeout)
response = requests.get(url, headers=headers, timeout=timeout)
except requests_exceptions.Timeout as exc:
raise HTTPException(
status_code=504,
@@ -98,7 +101,15 @@ class WaveSpeedClient:
"exception": str(exc),
},
) from exc
if response.status_code != 200:
# Match example pattern: check status_code == 200, then get data
if response.status_code == 200:
result = response.json().get("data")
if not result:
raise HTTPException(status_code=502, detail={"error": "WaveSpeed polling response missing data"})
return result
else:
# Non-200 status - log and raise error (matching example's break behavior)
logger.error(f"[WaveSpeed] Polling failed: {response.status_code} {response.text}")
raise HTTPException(
status_code=502,
@@ -109,59 +120,116 @@ class WaveSpeedClient:
},
)
result = response.json().get("data")
if not result:
raise HTTPException(status_code=502, detail={"error": "WaveSpeed polling response missing data"})
return result
def poll_until_complete(
self,
prediction_id: str,
timeout_seconds: int = 240,
timeout_seconds: Optional[int] = None,
interval_seconds: float = 1.0,
) -> Dict[str, Any]:
"""
Poll WaveSpeed until the job completes, fails, or times out.
Poll WaveSpeed until the job completes or fails.
Matches the example pattern: simple polling loop until status is "completed" or "failed".
Args:
prediction_id: The prediction ID to poll for
timeout_seconds: Optional timeout in seconds. If None, polls indefinitely until completion/failure.
interval_seconds: Seconds to wait between polling attempts (default: 1.0, faster than 2.0)
Returns:
Dict containing the completed result
Raises:
HTTPException: If the task fails, polling fails, or times out (if timeout_seconds is set)
"""
start_time = time.time()
consecutive_errors = 0
max_consecutive_errors = 6 # safety guard for non-transient errors
while True:
try:
result = self.get_prediction_result(prediction_id)
consecutive_errors = 0 # Reset error counter on success
except HTTPException as exc:
detail = exc.detail or {}
if isinstance(detail, dict):
detail.setdefault("prediction_id", prediction_id)
detail.setdefault("resume_available", True)
detail.setdefault("error", detail.get("error", "WaveSpeed polling failed"))
raise HTTPException(status_code=exc.status_code, detail=detail) from exc
# Determine underlying status code (WaveSpeed vs proxy)
status_code = detail.get("status_code", exc.status_code)
# Treat 5xx as transient: keep polling indefinitely with backoff
if 500 <= int(status_code) < 600:
consecutive_errors += 1
backoff = min(30.0, interval_seconds * (2 ** (consecutive_errors - 1)))
logger.warning(
f"[WaveSpeed] Transient polling error {consecutive_errors} for {prediction_id}: "
f"{status_code}. Backing off {backoff:.1f}s"
)
time.sleep(backoff)
continue
# For non-transient (typically 4xx) errors, apply safety cap
consecutive_errors += 1
if consecutive_errors >= max_consecutive_errors:
logger.error(
f"[WaveSpeed] Too many polling errors ({consecutive_errors}) for {prediction_id}, "
f"status_code={status_code}. Giving up."
)
raise HTTPException(status_code=exc.status_code, detail=detail) from exc
backoff = min(30.0, interval_seconds * (2 ** (consecutive_errors - 1)))
logger.warning(
f"[WaveSpeed] Polling error {consecutive_errors}/{max_consecutive_errors} for {prediction_id}: "
f"{status_code}. Backing off {backoff:.1f}s"
)
time.sleep(backoff)
continue
# Extract status from result (matching example pattern)
status = result.get("status")
if status == "completed":
logger.info(f"[WaveSpeed] Prediction {prediction_id} completed.")
elapsed = time.time() - start_time
logger.info(f"[WaveSpeed] Prediction {prediction_id} completed in {elapsed:.1f}s")
return result
if status == "failed":
logger.error(f"[WaveSpeed] Prediction {prediction_id} failed: {result.get('error')}")
error_msg = result.get("error", "Unknown error")
logger.error(f"[WaveSpeed] Prediction {prediction_id} failed: {error_msg}")
raise HTTPException(
status_code=502,
detail={
"error": "WaveSpeed animation failed",
"prediction_id": prediction_id,
"details": result.get("error"),
},
)
elapsed = time.time() - start_time
if elapsed > timeout_seconds:
logger.error(f"[WaveSpeed] Prediction {prediction_id} timed out after {timeout_seconds}s")
raise HTTPException(
status_code=504,
detail={
"error": "WaveSpeed animation timed out",
"error": "WaveSpeed task failed",
"prediction_id": prediction_id,
"message": error_msg,
"details": result,
},
)
logger.debug(f"[WaveSpeed] Prediction {prediction_id} status={status}. Waiting...")
# Check timeout only if specified
if timeout_seconds is not None:
elapsed = time.time() - start_time
if elapsed > timeout_seconds:
logger.error(f"[WaveSpeed] Prediction {prediction_id} timed out after {timeout_seconds}s")
raise HTTPException(
status_code=504,
detail={
"error": "WaveSpeed task timed out",
"prediction_id": prediction_id,
"timeout_seconds": timeout_seconds,
"current_status": status,
"message": f"Task did not complete within {timeout_seconds} seconds. Status: {status}",
},
)
# Log progress periodically (every 30 seconds)
elapsed = time.time() - start_time
if int(elapsed) % 30 == 0 and elapsed > 0:
logger.info(f"[WaveSpeed] Polling {prediction_id}: status={status}, elapsed={elapsed:.0f}s")
# Poll faster (1.0s instead of 2.0s) to match example's responsiveness
time.sleep(interval_seconds)
def optimize_prompt(
@@ -469,7 +537,9 @@ class WaveSpeedClient:
# Fetch image bytes
logger.info(f"[WaveSpeed] Fetching image from URL: {image_url}")
image_response = requests.get(image_url, timeout=timeout)
# Use reasonable timeout for downloading the final image (60s should be enough)
# The timeout parameter is for polling, not for downloading
image_response = requests.get(image_url, timeout=60)
if image_response.status_code == 200:
image_bytes = image_response.content
logger.info(f"[WaveSpeed] Image generated successfully (size: {len(image_bytes)} bytes)")
@@ -481,6 +551,208 @@ class WaveSpeedClient:
detail="Failed to fetch generated image from WaveSpeed URL",
)
def generate_character_image(
self,
prompt: str,
reference_image_bytes: bytes,
style: str = "Auto",
aspect_ratio: str = "16:9",
rendering_speed: str = "Default",
timeout: Optional[int] = None,
) -> bytes:
"""
Generate image using Ideogram Character API to maintain character consistency.
Creates variations of a reference character image while respecting the base appearance.
Note: This API is always async and requires polling for results.
Args:
prompt: Text prompt describing the scene/context for the character
reference_image_bytes: Reference image bytes (base avatar)
style: Character style type ("Auto", "Fiction", or "Realistic")
aspect_ratio: Aspect ratio ("1:1", "16:9", "9:16", "4:3", "3:4")
rendering_speed: Rendering speed ("Default", "Turbo", "Quality")
timeout: Total timeout in seconds for submission + polling (default: 180)
Returns:
bytes: Generated image bytes with consistent character
"""
import base64
# Encode reference image to base64
image_base64 = base64.b64encode(reference_image_bytes).decode('utf-8')
# Add data URI prefix
image_data_uri = f"data:image/png;base64,{image_base64}"
url = f"{self.BASE_URL}/ideogram-ai/ideogram-character"
# Note: enable_sync_mode is not a valid parameter for Ideogram Character API
# The API is always async and requires polling
payload = {
"prompt": prompt,
"image": image_data_uri,
"style": style,
"aspect_ratio": aspect_ratio,
"rendering_speed": rendering_speed,
}
logger.info(f"[WaveSpeed] Generating character image via Ideogram Character (prompt_length={len(prompt)})")
# POST request should return quickly with just the task ID
# Use reasonable timeouts for the initial submission
# Connection timeout: 30s (increased for reliability - network may be slow)
# Read timeout: 30s (should be enough to get task ID response)
# Retry logic for transient connection failures
max_retries = 2
retry_delay = 2.0 # seconds
for attempt in range(max_retries + 1):
try:
response = requests.post(
url,
headers=self._headers(),
json=payload,
timeout=(30, 30) # (connect_timeout, read_timeout) - increased for network reliability
)
break # Success, exit retry loop
except (requests_exceptions.ConnectTimeout, requests_exceptions.ConnectionError) as e:
if attempt < max_retries:
logger.warning(f"[WaveSpeed] Connection attempt {attempt + 1}/{max_retries + 1} failed, retrying in {retry_delay}s: {e}")
time.sleep(retry_delay)
retry_delay *= 2 # Exponential backoff
continue
else:
# Final attempt failed
error_type = "Connection timeout" if isinstance(e, requests_exceptions.ConnectTimeout) else "Connection error"
logger.error(f"[WaveSpeed] {error_type} to Ideogram Character API after {max_retries + 1} attempts: {e}")
raise HTTPException(
status_code=504 if isinstance(e, requests_exceptions.ConnectTimeout) else 502,
detail={
"error": f"{error_type} to WaveSpeed Ideogram Character API",
"message": "Unable to establish connection to the image generation service after multiple attempts. Please check your network connection and try again.",
"exception": str(e),
"retry_recommended": True,
},
)
except requests_exceptions.Timeout as e:
logger.error(f"[WaveSpeed] Request timeout to Ideogram Character API: {e}")
raise HTTPException(
status_code=504,
detail={
"error": "Request timeout to WaveSpeed Ideogram Character API",
"message": "The image generation request took too long. Please try again.",
"exception": str(e),
},
)
if response.status_code != 200:
logger.error(f"[WaveSpeed] Character image generation failed: {response.status_code} {response.text}")
raise HTTPException(
status_code=502,
detail={
"error": "WaveSpeed Ideogram Character generation failed",
"status_code": response.status_code,
"response": response.text,
},
)
response_json = response.json()
data = response_json.get("data") or response_json
# Extract prediction ID
prediction_id = data.get("id")
if not prediction_id:
logger.error(f"[WaveSpeed] No prediction ID in response: {response.text}")
raise HTTPException(
status_code=502,
detail="WaveSpeed Ideogram Character response missing prediction id",
)
# Ideogram Character API is always async - check status and poll if needed
outputs = data.get("outputs") or []
status = data.get("status", "unknown")
logger.info(f"[WaveSpeed] Ideogram Character task created: prediction_id={prediction_id}, status={status}")
# If status is already completed, use outputs directly (unlikely but possible)
if outputs and status == "completed":
logger.info(f"[WaveSpeed] Got immediate results from Ideogram Character")
else:
# Always need to poll for results (API is async)
logger.info(f"[WaveSpeed] Polling for Ideogram Character result (status: {status}, prediction_id: {prediction_id})")
# Poll until complete - use timeout if provided, otherwise poll indefinitely
# Match example pattern exactly: simple while True loop, check status, break on completed/failed
polling_timeout = timeout if timeout else None # None means poll indefinitely
result = self.poll_until_complete(
prediction_id,
timeout_seconds=polling_timeout,
interval_seconds=0.5, # Poll every 0.5s (closer to example's 0.1s)
)
# Safely extract outputs and status
if not isinstance(result, dict):
logger.error(f"[WaveSpeed] Unexpected result type: {type(result)}, value: {result}")
raise HTTPException(
status_code=502,
detail="WaveSpeed Ideogram Character returned unexpected response format",
)
outputs = result.get("outputs") or []
status = result.get("status", "unknown")
if status != "completed":
# Safely extract error message
error_msg = "Unknown error"
if isinstance(result, dict):
error_msg = result.get("error") or result.get("message") or str(result.get("details", "Unknown error"))
else:
error_msg = str(result)
logger.error(f"[WaveSpeed] Ideogram Character task did not complete: status={status}, error={error_msg}")
raise HTTPException(
status_code=502,
detail={
"error": "WaveSpeed Ideogram Character task failed",
"status": status,
"message": error_msg,
}
)
# Extract image URL from outputs
if not outputs:
logger.error(f"[WaveSpeed] No outputs after polling: status={status}")
raise HTTPException(
status_code=502,
detail="WaveSpeed Ideogram Character returned no outputs",
)
image_url = None
if isinstance(outputs, list) and len(outputs) > 0:
first_output = outputs[0]
if isinstance(first_output, str):
image_url = first_output
elif isinstance(first_output, dict):
image_url = first_output.get("url") or first_output.get("image_url")
if not image_url:
logger.error(f"[WaveSpeed] No image URL in outputs: {outputs}")
raise HTTPException(
status_code=502,
detail="WaveSpeed Ideogram Character response missing image URL",
)
# Download image
logger.info(f"[WaveSpeed] Downloading character image from: {image_url}")
image_response = requests.get(image_url, timeout=60)
if image_response.status_code != 200:
logger.error(f"[WaveSpeed] Failed to download image: {image_response.status_code}")
raise HTTPException(
status_code=502,
detail="Failed to download generated character image",
)
image_bytes = image_response.content
logger.info(f"[WaveSpeed] ✅ Successfully generated character image: {len(image_bytes)} bytes")
return image_bytes
def generate_speech(
self,
text: str,
@@ -490,7 +762,7 @@ class WaveSpeedClient:
pitch: float = 0.0,
emotion: str = "happy",
enable_sync_mode: bool = True,
timeout: int = 60,
timeout: int = 120,
**kwargs
) -> bytes:
"""
@@ -537,7 +809,51 @@ class WaveSpeedClient:
payload[param] = kwargs[param]
logger.info(f"[WaveSpeed] Generating speech via {url} (voice={voice_id}, text_length={len(text)})")
response = requests.post(url, headers=self._headers(), json=payload, timeout=timeout)
# Retry on transient connection issues
max_retries = 2
retry_delay = 2.0
last_error = None
for attempt in range(max_retries + 1):
try:
response = requests.post(
url,
headers=self._headers(),
json=payload,
timeout=(30, 60), # connect, read
)
break
except (requests_exceptions.ConnectTimeout, requests_exceptions.ConnectionError) as e:
last_error = e
if attempt < max_retries:
logger.warning(
f"[WaveSpeed] Speech connection attempt {attempt + 1}/{max_retries + 1} failed, "
f"retrying in {retry_delay}s: {e}"
)
time.sleep(retry_delay)
retry_delay *= 2
continue
logger.error(f"[WaveSpeed] Speech connection failed after {max_retries + 1} attempts: {e}")
raise HTTPException(
status_code=504,
detail={
"error": "Connection to WaveSpeed speech API timed out",
"message": "Unable to reach the speech service. Please try again.",
"exception": str(e),
"retry_recommended": True,
},
)
except requests_exceptions.Timeout as e:
last_error = e
logger.error(f"[WaveSpeed] Speech request timeout: {e}")
raise HTTPException(
status_code=504,
detail={
"error": "WaveSpeed speech request timed out",
"message": "The speech generation request took too long. Please try again.",
"exception": str(e),
},
)
if response.status_code != 200:
logger.error(f"[WaveSpeed] Speech generation failed: {response.status_code} {response.text}")

View File

@@ -8,7 +8,6 @@ from fastapi import HTTPException
from loguru import logger
from .client import WaveSpeedClient
from .kling_animation import generate_animation_prompt
INFINITALK_MODEL_PATH = "wavespeed-ai/infinitetalk"
INFINITALK_MODEL_NAME = "wavespeed-ai/infinitetalk"
@@ -22,6 +21,67 @@ def _as_data_uri(content_bytes: bytes, mime_type: str) -> str:
return f"data:{mime_type};base64,{encoded}"
def _generate_simple_infinitetalk_prompt(
scene_data: Dict[str, Any],
story_context: Dict[str, Any],
) -> Optional[str]:
"""
Generate a balanced, concise prompt for InfiniteTalk.
InfiniteTalk is audio-driven, so the prompt should describe the scene and suggest
subtle motion, but avoid overly elaborate cinematic descriptions.
Returns None if no meaningful prompt can be generated.
"""
title = (scene_data.get("title") or "").strip()
description = (scene_data.get("description") or "").strip()
image_prompt = (scene_data.get("image_prompt") or "").strip()
# Build a balanced prompt: scene description + simple motion hint
parts = []
# Start with the main subject/scene
if title and len(title) > 5 and title.lower() not in ("scene", "podcast", "episode"):
parts.append(title)
elif description:
# Take first sentence or first 60 chars
desc_part = description.split('.')[0][:60].strip()
if desc_part:
parts.append(desc_part)
elif image_prompt:
# Take first sentence or first 60 chars
img_part = image_prompt.split('.')[0][:60].strip()
if img_part:
parts.append(img_part)
if not parts:
return None
# Add a simple, subtle motion suggestion (not elaborate camera movements)
# Keep it natural and audio-driven
motion_hints = [
"with subtle movement",
"with gentle motion",
"with natural animation",
]
# Combine scene description with subtle motion hint
if len(parts[0]) < 80:
# Room for a motion hint
prompt = f"{parts[0]}, {motion_hints[0]}"
else:
# Just use the description if it's already long enough
prompt = parts[0]
# Keep it concise - max 120 characters (allows for scene + motion hint)
prompt = prompt[:120].strip()
# Clean up trailing commas or incomplete sentences
if prompt.endswith(','):
prompt = prompt[:-1].strip()
return prompt if len(prompt) >= 15 else None
def animate_scene_with_voiceover(
*,
image_bytes: bytes,
@@ -31,6 +91,8 @@ def animate_scene_with_voiceover(
user_id: str,
resolution: str = "720p",
prompt_override: Optional[str] = None,
mask_image_bytes: Optional[bytes] = None,
seed: Optional[int] = -1,
image_mime: str = "image/png",
audio_mime: str = "audio/mpeg",
client: Optional[WaveSpeedClient] = None,
@@ -59,21 +121,28 @@ def animate_scene_with_voiceover(
if resolution not in {"480p", "720p"}:
raise HTTPException(status_code=400, detail="Resolution must be '480p' or '720p'.")
animation_prompt = prompt_override or generate_animation_prompt(scene_data, story_context, user_id)
# Generate simple, concise prompt for InfiniteTalk (audio-driven, less need for elaborate descriptions)
animation_prompt = prompt_override or _generate_simple_infinitetalk_prompt(scene_data, story_context)
payload = {
payload: Dict[str, Any] = {
"image": _as_data_uri(image_bytes, image_mime),
"audio": _as_data_uri(audio_bytes, audio_mime),
"resolution": resolution,
}
# Only include prompt if we have a meaningful one (InfiniteTalk works fine without it)
if animation_prompt:
payload["prompt"] = animation_prompt
if mask_image_bytes:
payload["mask_image"] = _as_data_uri(mask_image_bytes, image_mime)
if seed is not None:
payload["seed"] = seed
client = client or WaveSpeedClient()
prediction_id = client.submit_image_to_video(INFINITALK_MODEL_PATH, payload, timeout=60)
try:
result = client.poll_until_complete(prediction_id, timeout_seconds=600, interval_seconds=1.0)
# Poll faster (0.5s) to mirror reference pattern; allow up to 10 minutes
result = client.poll_until_complete(prediction_id, timeout_seconds=600, interval_seconds=0.5)
except HTTPException as exc:
detail = exc.detail or {}
if isinstance(detail, dict):

View File

@@ -1,197 +0,0 @@
# Build Optimization Guide
This guide explains how to optimize the production build for better performance.
## Current Issues
1. **Minify JavaScript**: 504 KiB savings possible
2. **Reduce unused JavaScript**: 980 KiB savings possible
3. **Minify CSS**: 24 KiB savings possible
4. **Reduce unused CSS**: 25 KiB savings possible
5. **Cache Headers**: 1,702 KiB not cached (requires server configuration)
## React Scripts Build Configuration
React Scripts already minifies JavaScript and CSS in production builds. However, you can optimize further:
### 1. Environment Variables
Create `.env.production` (already created) with:
```env
GENERATE_SOURCEMAP=false
INLINE_RUNTIME_CHUNK=false
```
### 2. Build Command
Run production build:
```bash
npm run build
```
This will:
- Minify JavaScript (already enabled)
- Minify CSS (already enabled)
- Tree-shake unused code (already enabled)
- Generate source maps (disabled via env var)
## Reducing Unused JavaScript
### Analyze Bundle Size
Install webpack-bundle-analyzer:
```bash
npm install --save-dev webpack-bundle-analyzer
```
Add to `package.json` scripts:
```json
"analyze": "npm run build && npx webpack-bundle-analyzer build/static/js/*.js"
```
Run:
```bash
npm run analyze
```
### Common Issues and Solutions
1. **Large Dependencies**:
- `framer-motion`: 246 KiB - Consider lazy loading animations
- `@mui/material`: Multiple chunks - Already code-split
- `recharts`: Only load when needed
2. **Unused Imports**:
- Use ESLint rule: `"no-unused-vars": "error"`
- Run: `npx eslint --ext .ts,.tsx src/ --fix`
3. **Dynamic Imports**:
- Already implemented for routes
- Consider lazy loading heavy components like charts
## Server-Side Cache Headers
### For Express.js (if using)
```javascript
// Add to your Express server
app.use(express.static('build', {
maxAge: '1y',
immutable: true,
etag: true,
lastModified: true
}));
```
### For Nginx
```nginx
location /static {
alias /path/to/build/static;
expires 1y;
add_header Cache-Control "public, immutable";
access_log off;
}
```
### For Apache
```apache
<IfModule mod_expires.c>
ExpiresActive On
ExpiresByType application/javascript "access plus 1 year"
ExpiresByType text/css "access plus 1 year"
ExpiresByType image/png "access plus 1 year"
ExpiresByType image/jpeg "access plus 1 year"
ExpiresByType image/webp "access plus 1 year"
</IfModule>
```
## Image Optimization
### Convert AskAlwrity-min.ico to WebP
1. Install sharp or use online tool:
```bash
npm install --save-dev sharp
```
2. Create script `scripts/optimize-images.js`:
```javascript
const sharp = require('sharp');
const path = require('path');
sharp('public/AskAlwrity-min.ico')
.resize(60, 60)
.webp({ quality: 80 })
.toFile('public/AskAlwrity-min.webp')
.then(() => console.log('Image optimized!'));
```
3. Update `index.html`:
```html
<link rel="icon" href="%PUBLIC_URL%/AskAlwrity-min.webp" />
```
## Performance Budget
Set performance budgets in `package.json`:
```json
{
"performance": {
"budgets": [
{
"type": "initial",
"maximumWarning": "500kb",
"maximumError": "1mb"
},
{
"type": "anyComponentStyle",
"maximumWarning": "50kb",
"maximumError": "100kb"
}
]
}
}
```
## Monitoring
### Lighthouse CI
Add to CI/CD pipeline:
```bash
npm install -g @lhci/cli
lhci autorun
```
### Web Vitals
Monitor in production:
```javascript
import { getCLS, getFID, getFCP, getLCP, getTTFB } from 'web-vitals';
function sendToAnalytics(metric) {
// Send to your analytics service
console.log(metric);
}
getCLS(sendToAnalytics);
getFID(sendToAnalytics);
getFCP(sendToAnalytics);
getLCP(sendToAnalytics);
getTTFB(sendToAnalytics);
```
## Expected Improvements
After implementing all optimizations:
- **Performance Score**: 28 → 70-80+
- **Bundle Size**: Reduced by ~1.5MB (unused code + minification)
- **Cache Hit Rate**: 0% → 90%+ (with proper headers)
- **CLS**: 0.167 → <0.1 (with layout fixes)
- **LCP**: Improved by additional 200-300ms

View File

@@ -1,114 +0,0 @@
# Unused JavaScript Optimization - Progress Tracker
## ✅ Completed
1. **Bundle Analysis Setup**
- Added `source-map-explorer` to devDependencies
- Added `npm run analyze` script
- Created analysis guide
2. **Lazy Loading Infrastructure**
- ✅ Created `frontend/src/utils/lazyRecharts.tsx` - Lazy load recharts
- ✅ Created `frontend/src/utils/lazyWix.ts` - Lazy load Wix SDK
- ✅ Updated `frontend/src/components/billing/UsageTrends.tsx`:
- Replaced direct recharts imports with lazy versions
- Replaced lucide-react icons with MUI icons
- Added Suspense boundaries
## 📋 Remaining Tasks
### High Priority (Large Impact)
1. **Update Other Chart Components**
- [ ] `frontend/src/components/SchedulerDashboard/SchedulerCharts.tsx`
- [ ] `frontend/src/components/ContentPlanningDashboard/components/MonitoringCharts.tsx`
- [ ] `frontend/src/components/shared/charts/AdvancedChartComponents.tsx`
2. **Lazy Load Wix SDK**
- [ ] `frontend/src/components/WixTestPage/WixTestPage.tsx`
- [ ] `frontend/src/components/WixCallbackPage/WixCallbackPage.tsx`
- [ ] `frontend/src/components/OnboardingWizard/common/usePlatformConnections.ts`
### Medium Priority
3. **Replace Lucide Icons with MUI Icons**
- [ ] `frontend/src/components/billing/EnhancedBillingDashboard.tsx`
- [ ] `frontend/src/components/billing/CompactBillingDashboard.tsx`
- [ ] `frontend/src/components/billing/BillingOverview.tsx`
- [ ] Other billing components using lucide-react
4. **Optimize Framer Motion**
- Review usage and replace simple animations with CSS
- Lazy load for non-critical animations
### Low Priority
5. **Further Code Splitting**
- Lazy load heavy components within routes
- Split large components into smaller chunks
## 🎯 How to Continue
### Step 1: Run Bundle Analysis
```bash
cd frontend
npm install # Install source-map-explorer
npm run analyze
# Open bundle-report.html to see current state
```
### Step 2: Update Chart Components
Follow the pattern in `UsageTrends.tsx`:
```typescript
// Before
import { LineChart, Line } from 'recharts';
// After
import { LazyLineChart, Line, ChartLoadingFallback } from '../../utils/lazyRecharts';
import { Suspense } from 'react';
<Suspense fallback={<ChartLoadingFallback />}>
<LazyLineChart data={data}>
<Line />
</LazyLineChart>
</Suspense>
```
### Step 3: Replace Icons
```typescript
// Before
import { TrendingUp } from 'lucide-react';
<TrendingUp size={20} />
// After
import { TrendingUp as TrendingUpIcon } from '@mui/icons-material';
<TrendingUpIcon fontSize="small" />
```
### Step 4: Test
```bash
npm run build
npm run analyze # Check if bundle size decreased
```
## 📊 Expected Results
### Current
- Unused JavaScript: 980 KiB
- Recharts: ~200 KiB (loaded on every page)
- Wix SDK: ~100 KiB (loaded on every page)
### After All Optimizations
- Unused JavaScript: < 200 KiB (estimated)
- Recharts: Only loaded when charts are viewed
- Wix SDK: Only loaded on Wix-related pages
- Performance: 33 → 50-60+ (estimated)
## 📝 Notes
- Lazy loading adds a small delay when components first load
- Use Suspense boundaries with loading states
- Test all functionality after changes
- Monitor bundle size after each change

View File

@@ -1,162 +0,0 @@
# Performance Optimizations Applied
This document outlines all the performance optimizations implemented to improve Lighthouse scores and overall app performance.
## 1. Font Loading Optimization
### Changes Made:
- Added `preconnect` hints for Google Fonts in `index.html`
- Added `dns-prefetch` for faster DNS resolution
- Font loading already uses `font-display: swap` in `global.css`
### Impact:
- Reduces font loading time by ~330ms (LCP improvement)
- Prevents render-blocking font requests
## 2. Code Splitting
### Changes Made:
- Implemented `React.lazy()` for all route components in `App.tsx`
- Added `Suspense` boundaries with loading fallbacks
- Route-level code splitting reduces initial bundle size
### Impact:
- Reduces initial JavaScript bundle from ~3.4MB to smaller chunks
- Each route loads only when needed
- Estimated savings: ~2,474 KiB of unused JavaScript
## 3. Layout Shift (CLS) Fixes
### Changes Made:
- Changed `::after` and `::before` pseudo-elements from `absolute` to `fixed` positioning
- Added `will-change: transform` for animation optimization
- Added `overflow: hidden` to prevent layout shifts
- Added `minHeight` to WorkflowHeroSection and parent containers to reserve space
- Added `pointerEvents: 'none'` to pseudo-elements to prevent layout impact
- Fixed line-height and width constraints on typography elements
### Impact:
- Reduced CLS score from 0.634 to 0.167 (73% improvement)
- Further improvements expected with reserved space for hero section
- Prevents visual instability during page load
## 4. Component Memoization
### Changes Made:
- Added `useMemo` for expensive search computations in `MainDashboard`
- Added `useCallback` for event handlers to prevent unnecessary re-renders
- Optimized search debouncing logic
### Impact:
- Reduces unnecessary re-renders
- Improves main thread performance
- Reduces JavaScript execution time
## 5. Build Optimizations
### Changes Made:
- Created `.env.production` with optimization flags
- `GENERATE_SOURCEMAP=false` for smaller production builds
- `INLINE_RUNTIME_CHUNK=false` for better caching
### Impact:
- Smaller production bundle size
- Better browser caching
- Faster subsequent page loads
## 6. Resource Hints
### Changes Made:
- Added `preconnect` for Google Fonts
- Added `dns-prefetch` for external domains
- Added meta tags for better browser optimization
### Impact:
- Faster connection establishment
- Reduced latency for external resources
## Performance Progress
### Before Optimizations:
- **Performance Score**: 12
- **CLS**: 0.634
- **Bundle Size**: 3,435 KiB (single bundle)
- **Cache**: 0% (3,514 KiB not cached)
### After Initial Optimizations:
- **Performance Score**: 28 (133% improvement)
- **CLS**: 0.167 (73% improvement)
- **Bundle Size**: Code-split into multiple chunks
- **Cache**: Still needs server configuration
### Remaining Optimizations Needed
### 1. Image Optimization
- **Issue**: `AskAlwrity-min.ico` is 78.6 KiB but displayed at 60x60
- **Solution**:
- Convert to WebP format (saves ~68 KiB)
- Resize to actual display size (saves ~74 KiB)
- Use responsive images with `srcset`
### 2. Cache Headers
- **Issue**: No cache headers for static assets (3,514 KiB not cached)
- **Solution**: Configure server to add cache headers:
```
Cache-Control: public, max-age=31536000, immutable
```
For `bundle.js` and other static assets
### 3. Bundle Analysis
- **Issue**: Large bundle size (3,435 KiB for bundle.js)
- **Solution**:
- Analyze bundle with `webpack-bundle-analyzer`
- Remove unused dependencies
- Consider dynamic imports for heavy libraries
### 4. Third-Party Scripts
- **Issue**: Clerk and CopilotKit scripts add to main thread work
- **Solution**:
- Load third-party scripts asynchronously
- Defer non-critical scripts
- Consider loading Clerk after initial render
### 5. Long Tasks
- **Issue**: 20 long tasks found, longest 6,208ms
- **Solution**:
- Break up large computations
- Use `requestIdleCallback` for non-critical work
- Implement virtual scrolling for long lists
## Performance Monitoring
### Recommended Tools:
1. **Lighthouse CI**: Automate performance testing
2. **Web Vitals**: Monitor Core Web Vitals in production
3. **Bundle Analyzer**: Track bundle size over time
4. **React DevTools Profiler**: Identify slow components
### Target Metrics:
- **Performance Score**: 90+ (currently 12)
- **FCP**: < 1.8s
- **LCP**: < 2.5s
- **CLS**: < 0.1
- **TBT**: < 200ms
- **Bundle Size**: < 500 KiB initial load
## Next Steps
1. **Immediate**:
- Optimize images (WebP conversion)
- Configure server cache headers
- Run bundle analysis
2. **Short-term**:
- Implement virtual scrolling
- Optimize third-party script loading
- Add service worker for caching
3. **Long-term**:
- Consider migrating to Vite for faster builds
- Implement progressive web app features
- Add performance budgets to CI/CD

View File

@@ -1,231 +0,0 @@
# Unused JavaScript Optimization Guide
## Current Issue
Lighthouse reports **980 KiB of unused JavaScript**. This guide helps identify and fix it.
## Strategy
### 1. Bundle Analysis
First, analyze what's taking up space:
```bash
cd frontend
npm install # Install source-map-explorer if needed
npm run analyze
```
This creates `bundle-report.html` - open it in a browser to see:
- Which packages are largest
- Which files import them
- Unused code within packages
### 2. Lazy Load Heavy Dependencies
#### A. Recharts (Charts Library)
**Size**: ~200+ KiB
**Usage**: Only in billing, analytics, and scheduler dashboards
**Before**:
```typescript
import { LineChart, Line } from 'recharts';
```
**After**:
```typescript
import { LazyLineChart, Line } from '../../utils/lazyRecharts';
import { Suspense } from 'react';
<Suspense fallback={<ChartSkeleton />}>
<LazyLineChart>
<Line />
</LazyLineChart>
</Suspense>
```
**Files to update**:
- `frontend/src/components/billing/UsageTrends.tsx`
- `frontend/src/components/SchedulerDashboard/SchedulerCharts.tsx`
- `frontend/src/components/ContentPlanningDashboard/components/MonitoringCharts.tsx`
- `frontend/src/components/shared/charts/AdvancedChartComponents.tsx`
#### B. Wix SDK
**Size**: ~100+ KiB
**Usage**: Only in WixTestPage and WixCallbackPage
**Before**:
```typescript
import { createClient } from '@wix/sdk';
```
**After**:
```typescript
const { createClient } = await import('@wix/sdk');
// Or use lazy loading in component
```
**Files to update**:
- `frontend/src/components/WixTestPage/WixTestPage.tsx`
- `frontend/src/components/WixCallbackPage/WixCallbackPage.tsx`
- `frontend/src/components/OnboardingWizard/common/usePlatformConnections.ts`
#### C. Framer Motion (Animations)
**Size**: ~246 KiB
**Usage**: Used extensively but can be optimized
**Strategy**:
1. Use CSS animations for simple transitions
2. Lazy load framer-motion for non-critical animations
3. Use `will-change` CSS property instead of complex animations
**Example**:
```typescript
// Instead of complex framer-motion for simple fade
// Use CSS:
const fadeIn = {
animation: 'fadeIn 0.3s ease-in'
};
```
### 3. Tree Shaking Optimization
#### A. Material-UI Icons
**Issue**: Importing entire icon set
**Before**:
```typescript
import { TrendingUp, TrendingDown } from '@mui/icons-material';
```
**After** (already optimized, but verify):
```typescript
// React Scripts should tree-shake automatically
// But verify imports are specific
```
#### B. Lucide React Icons
**Issue**: Large icon library, some can be replaced with MUI icons
**Strategy**: Replace lucide-react icons with MUI icons where possible
**Before**:
```typescript
import { TrendingUp } from 'lucide-react';
```
**After**:
```typescript
import { TrendingUp } from '@mui/icons-material';
```
### 4. Remove Unused Dependencies
Check if these are actually used:
- `@wix/blog` - Only in WixTestPage
- `lucide-react` - Can be replaced with MUI icons in many places
- `zod` - Verify if all schemas are used
### 5. Code Splitting Improvements
#### A. Route-Level Splitting (Already Done ✅)
Routes are already lazy-loaded.
#### B. Component-Level Splitting
Lazy load heavy components within routes:
```typescript
// In MainDashboard.tsx
const EnhancedBillingDashboard = lazy(() =>
import('../billing/EnhancedBillingDashboard')
);
```
### 6. Dynamic Imports for Heavy Features
#### A. Charts
Only load charts when dashboard is viewed:
```typescript
const loadCharts = () => import('recharts');
```
#### B. Analytics
Only load analytics when analytics tab is opened:
```typescript
const loadAnalytics = () => import('./components/AnalyticsInsights');
```
## Implementation Steps
### Step 1: Analyze Bundle
```bash
npm run analyze
# Open bundle-report.html
```
### Step 2: Identify Large Dependencies
Look for:
- Packages > 50 KiB
- Packages used in < 3 places
- Packages that can be lazy-loaded
### Step 3: Lazy Load Heavy Dependencies
1. Create lazy wrappers (see `lazyRecharts.tsx`)
2. Update imports to use lazy versions
3. Add Suspense boundaries
### Step 4: Replace Icons
1. Find lucide-react imports
2. Replace with MUI icons where possible
3. Remove lucide-react if not needed
### Step 5: Test
```bash
npm run build
npm run analyze # Check if bundle size decreased
```
## Expected Results
### Before
- Unused JavaScript: 980 KiB
- Bundle size: Large initial load
### After
- Unused JavaScript: < 200 KiB (estimated)
- Bundle size: Reduced by ~500-700 KiB
- Performance: Improved initial load time
## Files to Update
### High Priority (Large Impact)
1.`frontend/src/utils/lazyRecharts.tsx` - Created
2.`frontend/src/utils/lazyWix.ts` - Created
3. `frontend/src/components/billing/UsageTrends.tsx` - Use lazy recharts
4. `frontend/src/components/SchedulerDashboard/SchedulerCharts.tsx` - Use lazy recharts
5. `frontend/src/components/WixTestPage/WixTestPage.tsx` - Use lazy Wix SDK
### Medium Priority
6. `frontend/src/components/ContentPlanningDashboard/components/MonitoringCharts.tsx`
7. `frontend/src/components/shared/charts/AdvancedChartComponents.tsx`
8. Replace lucide-react with MUI icons in billing components
### Low Priority (Optimization)
9. Optimize framer-motion usage
10. Further code splitting within components
## Monitoring
After changes, verify:
1. Bundle size decreased
2. Lighthouse "Reduce unused JavaScript" improved
3. No broken functionality
4. Charts still work (with loading states)
## Next Steps
1. Run `npm run analyze` to see current bundle
2. Update components to use lazy-loaded dependencies
3. Test functionality
4. Re-run Lighthouse audit

View File

@@ -1,16 +1,97 @@
import React from "react";
import { Stack, Box, Typography, Divider, Chip, Paper, alpha } from "@mui/material";
import { Psychology as PsychologyIcon, Insights as InsightsIcon, Search as SearchIcon } from "@mui/icons-material";
import React, { useState, useEffect } from "react";
import { Stack, Box, Typography, Divider, Chip, Paper, alpha, CircularProgress } from "@mui/material";
import { Psychology as PsychologyIcon, Insights as InsightsIcon, Search as SearchIcon, Person as PersonIcon, AutoAwesome as AutoAwesomeIcon } from "@mui/icons-material";
import { PodcastAnalysis } from "./types";
import { GlassyCard, glassyCardSx, SecondaryButton } from "./ui";
import { Refresh as RefreshIcon } from "@mui/icons-material";
import { aiApiClient } from "../../api/client";
interface AnalysisPanelProps {
analysis: PodcastAnalysis | null;
idea?: string;
duration?: number;
speakers?: number;
avatarUrl?: string | null;
avatarPrompt?: string | null;
onRegenerate?: () => void;
}
export const AnalysisPanel: React.FC<AnalysisPanelProps> = ({ analysis, onRegenerate }) => {
export const AnalysisPanel: React.FC<AnalysisPanelProps> = ({ analysis, idea, duration, speakers, avatarUrl, avatarPrompt, onRegenerate }) => {
const [avatarBlobUrl, setAvatarBlobUrl] = useState<string | null>(null);
const [avatarLoading, setAvatarLoading] = useState(false);
const [avatarError, setAvatarError] = useState(false);
// Load avatar image as blob for authenticated URLs
useEffect(() => {
if (!avatarUrl) {
setAvatarBlobUrl(null);
setAvatarError(false);
return;
}
// Check if it's already a blob URL
if (avatarUrl.startsWith('blob:')) {
setAvatarBlobUrl(avatarUrl);
return;
}
// Check if it's an authenticated endpoint
const isAuthenticatedEndpoint = avatarUrl.includes('/api/podcast/images/') || avatarUrl.includes('/api/podcast/avatar/');
let currentBlobUrl: string | null = null;
if (isAuthenticatedEndpoint) {
setAvatarLoading(true);
setAvatarError(false);
const loadAvatarBlob = async () => {
try {
const response = await aiApiClient.get(avatarUrl, { responseType: 'blob' });
const blobUrl = URL.createObjectURL(response.data);
currentBlobUrl = blobUrl;
setAvatarBlobUrl(blobUrl);
setAvatarError(false);
} catch (error) {
console.error('[AnalysisPanel] Failed to load avatar as blob:', error);
// Fallback: try with query token
try {
const token = localStorage.getItem('clerk_dashboard_token') || '';
if (token) {
const urlWithToken = `${avatarUrl}?token=${encodeURIComponent(token)}`;
setAvatarBlobUrl(urlWithToken);
} else {
setAvatarError(true);
}
} catch (fallbackError) {
console.error('[AnalysisPanel] Fallback avatar loading failed:', fallbackError);
setAvatarError(true);
}
} finally {
setAvatarLoading(false);
}
};
loadAvatarBlob();
// Cleanup blob URL on unmount or when avatarUrl changes
return () => {
if (currentBlobUrl && currentBlobUrl.startsWith('blob:')) {
URL.revokeObjectURL(currentBlobUrl);
}
// Also cleanup any previous blob URL from state
setAvatarBlobUrl((prev) => {
if (prev && prev.startsWith('blob:') && prev !== currentBlobUrl) {
URL.revokeObjectURL(prev);
}
return null;
});
};
} else {
// Direct URL, use as-is
setAvatarBlobUrl(avatarUrl);
}
}, [avatarUrl]);
if (!analysis) return null;
return (
<GlassyCard
@@ -54,6 +135,229 @@ export const AnalysisPanel: React.FC<AnalysisPanelProps> = ({ analysis, onRegene
<Divider sx={{ borderColor: "rgba(0,0,0,0.06)" }} />
{/* Inputs Section */}
{(idea || duration || speakers || avatarUrl || avatarPrompt) && (
<>
<Box>
<Typography
variant="subtitle1"
sx={{
color: "#0f172a",
fontWeight: 700,
mb: 1.5,
display: "flex",
alignItems: "center",
gap: 0.5,
}}
>
Your Inputs
</Typography>
<Box
sx={{
display: "grid",
gridTemplateColumns: { xs: "1fr", md: avatarUrl ? "1fr 1fr" : "1fr" },
gap: 3,
alignItems: "flex-start",
}}
>
{/* Left Column: Text Inputs */}
<Stack spacing={1.5}>
{idea && (
<Box>
<Typography variant="caption" sx={{ color: "#64748b", fontWeight: 600, display: "block", mb: 0.5 }}>
Podcast Idea
</Typography>
<Typography variant="body2" sx={{ color: "#0f172a", wordBreak: "break-word" }}>
{idea}
</Typography>
</Box>
)}
<Stack direction="row" spacing={2} flexWrap="wrap">
{duration !== undefined && (
<Box>
<Typography variant="caption" sx={{ color: "#64748b", fontWeight: 600, display: "block", mb: 0.5 }}>
Duration
</Typography>
<Chip
label={`${duration} minutes`}
size="small"
sx={{ background: "#f1f5f9", color: "#0f172a", border: "1px solid rgba(0,0,0,0.08)" }}
/>
</Box>
)}
{speakers !== undefined && (
<Box>
<Typography variant="caption" sx={{ color: "#64748b", fontWeight: 600, display: "block", mb: 0.5 }}>
Speakers
</Typography>
<Chip
label={`${speakers} ${speakers === 1 ? "speaker" : "speakers"}`}
size="small"
sx={{ background: "#f1f5f9", color: "#0f172a", border: "1px solid rgba(0,0,0,0.08)" }}
/>
</Box>
)}
</Stack>
{/* AI Prompt Used for Avatar Generation */}
{avatarUrl && (
<Box>
<Typography
variant="caption"
sx={{
color: "#64748b",
fontWeight: 600,
display: "flex",
alignItems: "center",
gap: 0.5,
mb: 0.75,
}}
>
<AutoAwesomeIcon sx={{ fontSize: 14 }} />
AI Generation Prompt
</Typography>
{avatarPrompt ? (
<Paper
sx={{
p: 1.5,
background: "#f8fafc",
border: "1px solid rgba(0,0,0,0.08)",
borderRadius: 1.5,
maxHeight: 200,
overflow: "auto",
}}
>
<Typography
variant="caption"
sx={{
color: "#475569",
fontFamily: "monospace",
fontSize: "0.75rem",
lineHeight: 1.6,
whiteSpace: "pre-wrap",
wordBreak: "break-word",
display: "block",
}}
>
{avatarPrompt}
</Typography>
</Paper>
) : (
<Paper
sx={{
p: 1.5,
background: "#f1f5f9",
border: "1px solid rgba(0,0,0,0.08)",
borderRadius: 1.5,
}}
>
<Typography
variant="caption"
sx={{
color: "#64748b",
fontStyle: "italic",
fontSize: "0.75rem",
}}
>
Prompt not available (avatar was uploaded or generated before this feature was added)
</Typography>
</Paper>
)}
</Box>
)}
</Stack>
{/* Right Column: Presenter Avatar */}
{avatarUrl && (
<Box>
<Typography
variant="caption"
sx={{
color: "#64748b",
fontWeight: 600,
display: "flex",
alignItems: "center",
gap: 0.5,
mb: 1,
}}
>
<PersonIcon sx={{ fontSize: 16 }} />
Presenter Avatar
</Typography>
<Box
sx={{
width: "100%",
maxWidth: { xs: "100%", md: 300 },
borderRadius: 2,
overflow: "hidden",
border: "1px solid rgba(102,126,234,0.2)",
background: alpha("#667eea", 0.05),
position: "relative",
aspectRatio: "1",
boxShadow: "0 4px 12px rgba(0,0,0,0.08)",
}}
>
{avatarLoading ? (
<Box
sx={{
display: "flex",
alignItems: "center",
justifyContent: "center",
height: "100%",
background: "#f8fafc",
}}
>
<CircularProgress size={40} />
</Box>
) : avatarError ? (
<Box
sx={{
display: "flex",
alignItems: "center",
justifyContent: "center",
height: "100%",
background: "#fef2f2",
color: "#dc2626",
p: 2,
}}
>
<Typography variant="caption" sx={{ textAlign: "center" }}>
Failed to load avatar
</Typography>
</Box>
) : avatarBlobUrl ? (
<Box
component="img"
src={avatarBlobUrl}
alt="Podcast Presenter"
sx={{
width: "100%",
height: "100%",
objectFit: "cover",
display: "block",
}}
onError={(e) => {
console.error('[AnalysisPanel] Avatar image failed to load:', {
src: e.currentTarget.src,
avatarUrl,
avatarBlobUrl,
});
setAvatarError(true);
}}
onLoad={() => {
console.log('[AnalysisPanel] Avatar image loaded successfully');
}}
/>
) : null}
</Box>
</Box>
)}
</Box>
</Box>
<Divider sx={{ borderColor: "rgba(0,0,0,0.06)" }} />
</>
)}
<Box sx={{ display: "grid", gridTemplateColumns: { xs: "1fr", md: "1fr 1fr" }, gap: 3 }}>
<Stack spacing={2}>
<Box>

View File

@@ -6,6 +6,9 @@ import {
Info as InfoIcon,
HelpOutline as HelpOutlineIcon,
AttachMoney as AttachMoneyIcon,
CloudUpload as CloudUploadIcon,
Person as PersonIcon,
Delete as DeleteIcon,
} from "@mui/icons-material";
import { CreateProjectPayload, Knobs } from "./types";
import { PrimaryButton, SecondaryButton } from "./ui";
@@ -35,6 +38,9 @@ export const CreateModal: React.FC<CreateModalProps> = ({ onCreate, open, defaul
const [budgetCap, setBudgetCap] = useState<number>(50);
const [voiceFile, setVoiceFile] = useState<File | null>(null);
const [avatarFile, setAvatarFile] = useState<File | null>(null);
const [avatarPreview, setAvatarPreview] = useState<string | null>(null);
const [avatarUrl, setAvatarUrl] = useState<string | null>(null); // Store uploaded avatar URL
const [makingPresentable, setMakingPresentable] = useState(false);
const [knobs, setKnobs] = useState<Knobs>({ ...defaultKnobs });
const [placeholderIndex, setPlaceholderIndex] = useState(0);
@@ -107,8 +113,22 @@ export const CreateModal: React.FC<CreateModalProps> = ({ onCreate, open, defaul
const canSubmit = Boolean(idea || url);
const submit = () => {
const submit = async () => {
if (!canSubmit || isSubmitting) return;
// If avatar was uploaded but not yet uploaded to server, upload it now
let finalAvatarUrl: string | null = avatarUrl;
if (avatarFile && !avatarUrl) {
try {
const { podcastApi } = await import("../../services/podcastApi");
const uploadResult = await podcastApi.uploadAvatar(avatarFile);
finalAvatarUrl = uploadResult.avatar_url;
} catch (error) {
console.error('Avatar upload failed:', error);
// Continue without avatar
}
}
onCreate({
ideaOrUrl: idea || url,
speakers,
@@ -116,6 +136,7 @@ export const CreateModal: React.FC<CreateModalProps> = ({ onCreate, open, defaul
knobs,
budgetCap,
files: { voiceFile, avatarFile },
avatarUrl: finalAvatarUrl,
});
};
@@ -127,6 +148,9 @@ export const CreateModal: React.FC<CreateModalProps> = ({ onCreate, open, defaul
setBudgetCap(50);
setVoiceFile(null);
setAvatarFile(null);
setAvatarPreview(null);
setAvatarUrl(null);
setMakingPresentable(false);
setKnobs({ ...defaultKnobs });
setPlaceholderIndex(0);
};
@@ -141,6 +165,68 @@ export const CreateModal: React.FC<CreateModalProps> = ({ onCreate, open, defaul
setSpeakers(clamped);
};
const handleAvatarChange = async (e: React.ChangeEvent<HTMLInputElement>) => {
const file = e.target.files?.[0];
if (file) {
// Validate file type
if (!file.type.startsWith('image/')) {
console.error('Please select an image file');
return;
}
// Validate file size (e.g., max 5MB)
if (file.size > 5 * 1024 * 1024) {
console.error('Image file size must be less than 5MB');
return;
}
setAvatarFile(file);
// Create preview
const reader = new FileReader();
reader.onloadend = () => {
setAvatarPreview(reader.result as string);
};
reader.readAsDataURL(file);
// Upload image immediately to get URL (for "Make Presentable" feature)
try {
const { podcastApi } = await import("../../services/podcastApi");
const uploadResult = await podcastApi.uploadAvatar(file);
setAvatarUrl(uploadResult.avatar_url);
} catch (error) {
console.error('Avatar upload failed:', error);
// Continue with local preview - upload will happen on submit
}
}
};
const handleRemoveAvatar = () => {
setAvatarFile(null);
setAvatarPreview(null);
setAvatarUrl(null);
setMakingPresentable(false);
};
const handleMakePresentable = async () => {
if (!avatarUrl || makingPresentable) return;
try {
setMakingPresentable(true);
const { podcastApi } = await import("../../services/podcastApi");
const result = await podcastApi.makeAvatarPresentable(avatarUrl);
// Fetch the transformed image as blob to display
const { aiApiClient } = await import("../../api/client");
const response = await aiApiClient.get(result.avatar_url, { responseType: 'blob' });
const blobUrl = URL.createObjectURL(response.data);
setAvatarPreview(blobUrl);
setAvatarUrl(result.avatar_url);
} catch (error) {
console.error('Failed to make avatar presentable:', error);
// Could show error message to user
} finally {
setMakingPresentable(false);
}
};
return (
<Paper
elevation={0}
@@ -601,181 +687,372 @@ export const CreateModal: React.FC<CreateModalProps> = ({ onCreate, open, defaul
{/* Settings Section */}
<Box
sx={{
p: 3,
borderRadius: 2,
background: alpha("#f8fafc", 0.5),
border: "1px solid rgba(15, 23, 42, 0.06)",
p: 3.5,
borderRadius: 2.5,
background: "linear-gradient(135deg, rgba(248, 250, 252, 0.8) 0%, rgba(241, 245, 249, 0.8) 100%)",
border: "1.5px solid rgba(15, 23, 42, 0.08)",
boxShadow: "0 1px 3px rgba(15, 23, 42, 0.04), 0 4px 12px rgba(15, 23, 42, 0.06)",
}}
>
<Typography variant="subtitle2" sx={{ mb: 2, color: "#0f172a", fontWeight: 600, fontSize: "0.9375rem" }}>
Podcast Settings
</Typography>
<Stack direction={{ xs: "column", sm: "row" }} spacing={2} alignItems="flex-start">
<Stack direction={{ xs: "column", sm: "row" }} spacing={2} sx={{ flex: 1 }}>
<TextField
label="Duration (minutes)"
type="number"
value={duration}
onChange={(e) => handleDurationChange(Number(e.target.value) || 1)}
InputProps={{ inputProps: { min: 1, max: 10 } }}
size="small"
helperText={duration > 10 ? "Maximum duration is 10 minutes" : `Recommended: 1-3 minutes for quick tests (currently: ${duration} min)`}
error={duration > 10}
<Stack direction="row" spacing={1.5} alignItems="center" sx={{ mb: 3 }}>
<Box
sx={{
maxWidth: 220,
"& .MuiOutlinedInput-root": {
backgroundColor: "#ffffff",
border: "1.5px solid rgba(15, 23, 42, 0.12)",
borderRadius: 2,
"&:hover": {
backgroundColor: "#ffffff",
borderColor: "rgba(102, 126, 234, 0.4)",
},
"&.Mui-focused": {
borderColor: "#667eea",
borderWidth: 2,
},
},
"& .MuiInputLabel-root": {
color: "#64748b",
"&.Mui-focused": {
color: "#667eea",
},
},
"& .MuiFormHelperText-root": {
color: "#64748b",
fontSize: "0.8125rem",
},
}}
/>
<TextField
label="Number of speakers"
type="number"
value={speakers}
onChange={(e) => handleSpeakersChange(Number(e.target.value) || 1)}
InputProps={{ inputProps: { min: 1, max: 2 } }}
size="small"
helperText={speakers > 2 ? "Maximum 2 speakers supported" : `Supports 1-2 speakers (currently: ${speakers})`}
error={speakers > 2}
sx={{
maxWidth: 220,
"& .MuiOutlinedInput-root": {
backgroundColor: "#ffffff",
border: "1.5px solid rgba(15, 23, 42, 0.12)",
borderRadius: 2,
"&:hover": {
backgroundColor: "#ffffff",
borderColor: "rgba(102, 126, 234, 0.4)",
},
"&.Mui-focused": {
borderColor: "#667eea",
borderWidth: 2,
},
},
"& .MuiInputLabel-root": {
color: "#64748b",
"&.Mui-focused": {
color: "#667eea",
},
},
"& .MuiFormHelperText-root": {
color: "#64748b",
fontSize: "0.8125rem",
},
}}
/>
</Stack>
{/* Cost Breakdown Panel - positioned in empty space */}
<Paper
elevation={0}
sx={{
p: 2.5,
background: "linear-gradient(135deg, rgba(16, 185, 129, 0.08) 0%, rgba(5, 150, 105, 0.08) 100%)",
border: "1.5px solid rgba(16, 185, 129, 0.2)",
width: 40,
height: 40,
borderRadius: 2,
minWidth: { xs: "100%", sm: 300 },
flex: { xs: "none", sm: "0 0 auto" },
boxShadow: "0 2px 8px rgba(16, 185, 129, 0.08)",
background: "linear-gradient(135deg, rgba(102, 126, 234, 0.12) 0%, rgba(118, 75, 162, 0.12) 100%)",
display: "flex",
alignItems: "center",
justifyContent: "center",
}}
>
<Stack spacing={1.5}>
<Stack direction="row" spacing={1} alignItems="center">
<Box
sx={{
width: 32,
height: 32,
borderRadius: 1.5,
background: "linear-gradient(135deg, rgba(16, 185, 129, 0.15) 0%, rgba(5, 150, 105, 0.15) 100%)",
display: "flex",
alignItems: "center",
justifyContent: "center",
}}
>
<AttachMoneyIcon sx={{ fontSize: "1.125rem", color: "#059669" }} />
</Box>
<Typography variant="subtitle2" sx={{ color: "#0f172a", fontWeight: 600, fontSize: "0.875rem" }}>
Estimated Cost
</Typography>
</Stack>
<Typography
variant="h5"
sx={{
color: "#059669",
fontWeight: 700,
fontSize: "1.75rem",
lineHeight: 1.2,
<AutoAwesomeIcon sx={{ color: "#667eea", fontSize: "1.25rem" }} />
</Box>
<Typography variant="h6" sx={{ color: "#0f172a", fontWeight: 700, fontSize: "1.125rem", letterSpacing: "-0.01em" }}>
Podcast Settings
</Typography>
</Stack>
<Stack direction={{ xs: "column", lg: "row" }} spacing={3} alignItems="flex-start">
{/* Duration and Speakers in vertical column */}
<Box
sx={{
flex: { xs: "1 1 auto", lg: "0 0 280px" },
width: { xs: "100%", lg: "280px" },
p: 2.5,
borderRadius: 2,
background: "#ffffff",
border: "1px solid rgba(15, 23, 42, 0.08)",
boxShadow: "0 1px 2px rgba(15, 23, 42, 0.04)",
}}
>
<Typography variant="subtitle2" sx={{ mb: 2, color: "#0f172a", fontWeight: 600, fontSize: "0.875rem" }}>
Basic Configuration
</Typography>
<Stack spacing={2.5}>
<TextField
label="Duration (minutes)"
type="number"
value={duration}
onChange={(e) => handleDurationChange(Number(e.target.value) || 1)}
InputProps={{ inputProps: { min: 1, max: 10 } }}
size="small"
helperText={duration > 10 ? "Maximum duration is 10 minutes" : `Recommended: 1-3 minutes for quick tests`}
error={duration > 10}
fullWidth
sx={{
"& .MuiOutlinedInput-root": {
backgroundColor: "#f8fafc",
border: "1.5px solid rgba(15, 23, 42, 0.12)",
borderRadius: 2,
transition: "all 0.2s",
"&:hover": {
backgroundColor: "#ffffff",
borderColor: "rgba(102, 126, 234, 0.4)",
boxShadow: "0 0 0 3px rgba(102, 126, 234, 0.08)",
},
"&.Mui-focused": {
backgroundColor: "#ffffff",
borderColor: "#667eea",
borderWidth: 2,
boxShadow: "0 0 0 3px rgba(102, 126, 234, 0.12)",
},
},
"& .MuiInputLabel-root": {
color: "#64748b",
fontWeight: 500,
"&.Mui-focused": {
color: "#667eea",
fontWeight: 600,
},
},
"& .MuiFormHelperText-root": {
color: duration > 10 ? "#dc2626" : "#64748b",
fontSize: "0.8125rem",
mt: 0.75,
},
}}
>
${estimatedCost.total}
</Typography>
<Stack spacing={0.75} sx={{ mt: 0.5 }}>
<Box sx={{ display: "flex", justifyContent: "space-between", alignItems: "center" }}>
<Typography variant="caption" sx={{ color: "#64748b", fontSize: "0.8125rem", fontWeight: 400 }}>
Audio Generation
</Typography>
<Typography variant="caption" sx={{ color: "#0f172a", fontSize: "0.8125rem", fontWeight: 600 }}>
${estimatedCost.ttsCost}
</Typography>
</Box>
<Box sx={{ display: "flex", justifyContent: "space-between", alignItems: "center" }}>
<Typography variant="caption" sx={{ color: "#64748b", fontSize: "0.8125rem", fontWeight: 400 }}>
Avatar Creation
</Typography>
<Typography variant="caption" sx={{ color: "#0f172a", fontSize: "0.8125rem", fontWeight: 600 }}>
${estimatedCost.avatarCost}
</Typography>
</Box>
<Box sx={{ display: "flex", justifyContent: "space-between", alignItems: "center" }}>
<Typography variant="caption" sx={{ color: "#64748b", fontSize: "0.8125rem", fontWeight: 400 }}>
Video Rendering
</Typography>
<Typography variant="caption" sx={{ color: "#0f172a", fontSize: "0.8125rem", fontWeight: 600 }}>
${estimatedCost.videoCost}
</Typography>
</Box>
<Box sx={{ display: "flex", justifyContent: "space-between", alignItems: "center" }}>
<Typography variant="caption" sx={{ color: "#64748b", fontSize: "0.8125rem", fontWeight: 400 }}>
Research
</Typography>
<Typography variant="caption" sx={{ color: "#0f172a", fontSize: "0.8125rem", fontWeight: 600 }}>
${estimatedCost.researchCost}
</Typography>
</Box>
</Stack>
/>
<TextField
label="Number of speakers"
type="number"
value={speakers}
onChange={(e) => handleSpeakersChange(Number(e.target.value) || 1)}
InputProps={{ inputProps: { min: 1, max: 2 } }}
size="small"
helperText={speakers > 2 ? "Maximum 2 speakers supported" : `Supports 1-2 speakers`}
error={speakers > 2}
fullWidth
sx={{
"& .MuiOutlinedInput-root": {
backgroundColor: "#f8fafc",
border: "1.5px solid rgba(15, 23, 42, 0.12)",
borderRadius: 2,
transition: "all 0.2s",
"&:hover": {
backgroundColor: "#ffffff",
borderColor: "rgba(102, 126, 234, 0.4)",
boxShadow: "0 0 0 3px rgba(102, 126, 234, 0.08)",
},
"&.Mui-focused": {
backgroundColor: "#ffffff",
borderColor: "#667eea",
borderWidth: 2,
boxShadow: "0 0 0 3px rgba(102, 126, 234, 0.12)",
},
},
"& .MuiInputLabel-root": {
color: "#64748b",
fontWeight: 500,
"&.Mui-focused": {
color: "#667eea",
fontWeight: 600,
},
},
"& .MuiFormHelperText-root": {
color: speakers > 2 ? "#dc2626" : "#64748b",
fontSize: "0.8125rem",
mt: 0.75,
},
}}
/>
</Stack>
</Box>
{/* Avatar Upload Section - replacing Estimated Cost */}
<Box
sx={{
flex: 1,
minWidth: 0,
p: 2.5,
borderRadius: 2,
background: "#ffffff",
border: "1px solid rgba(15, 23, 42, 0.08)",
boxShadow: "0 1px 2px rgba(15, 23, 42, 0.04)",
}}
>
<Stack direction="row" spacing={1.5} alignItems="center" sx={{ mb: 2.5 }}>
<Box
sx={{
mt: 1,
pt: 1.5,
borderTop: "1.5px solid rgba(16, 185, 129, 0.15)",
width: 36,
height: 36,
borderRadius: 1.5,
background: "linear-gradient(135deg, rgba(102, 126, 234, 0.12) 0%, rgba(118, 75, 162, 0.12) 100%)",
display: "flex",
alignItems: "center",
justifyContent: "center",
}}
>
<Typography variant="caption" sx={{ color: "#64748b", fontSize: "0.75rem", fontWeight: 500 }}>
{duration} min {speakers} speaker{speakers > 1 ? "s" : ""} {knobs.bitrate === "hd" ? "HD" : "Standard"} quality
</Typography>
<PersonIcon fontSize="small" sx={{ color: "#667eea" }} />
</Box>
<Typography variant="subtitle2" sx={{ color: "#0f172a", fontWeight: 600, fontSize: "0.9375rem" }}>
Podcast Presenter Avatar
</Typography>
<Tooltip
title={
<Box>
<Typography variant="body2" sx={{ fontWeight: 600, mb: 0.5 }}>
Avatar Options:
</Typography>
<Typography variant="body2" component="div" sx={{ fontSize: "0.875rem", lineHeight: 1.6 }}>
<strong>Upload your photo:</strong> We'll enhance it into a professional podcast presenter using AI. Click "Make Presentable" after upload.<br/><br/>
<strong>Skip upload:</strong> After analysis completes, we'll generate professional presenter images based on your podcast topic, audience, and speaker count.
</Typography>
</Box>
}
arrow
placement="top"
componentsProps={{
tooltip: {
sx: {
bgcolor: "#0f172a",
color: "#ffffff",
maxWidth: 320,
fontSize: "0.875rem",
p: 1.5,
boxShadow: "0 4px 12px rgba(0,0,0,0.15)",
},
},
arrow: {
sx: {
color: "#0f172a",
},
},
}}
>
<InfoIcon fontSize="small" sx={{ color: "#94a3b8", cursor: "help" }} />
</Tooltip>
</Stack>
<Stack direction={{ xs: "column", sm: "row" }} spacing={2.5} alignItems="flex-start">
{avatarPreview ? (
<Stack spacing={1.5} sx={{ flexShrink: 0 }}>
<Box sx={{ position: "relative", display: "inline-block" }}>
<Box
component="img"
src={avatarPreview}
alt="Avatar preview"
sx={{
width: 140,
height: 140,
objectFit: "cover",
borderRadius: 2.5,
border: "2px solid #e2e8f0",
boxShadow: "0 2px 8px rgba(15, 23, 42, 0.08)",
}}
/>
<IconButton
size="small"
onClick={handleRemoveAvatar}
sx={{
position: "absolute",
top: -8,
right: -8,
bgcolor: "white",
border: "1.5px solid #e2e8f0",
boxShadow: "0 2px 4px rgba(15, 23, 42, 0.1)",
"&:hover": {
bgcolor: "#f8fafc",
borderColor: "#dc2626",
color: "#dc2626",
},
}}
>
<DeleteIcon fontSize="small" />
</IconButton>
</Box>
{avatarUrl && (
<Tooltip
title="Transform your uploaded photo into a professional podcast presenter. This AI enhancement optimizes your photo for video generation while maintaining your appearance and identity."
arrow
placement="top"
>
<Box>
<SecondaryButton
onClick={handleMakePresentable}
disabled={makingPresentable}
loading={makingPresentable}
startIcon={!makingPresentable ? <AutoAwesomeIcon fontSize="small" /> : undefined}
sx={{
fontSize: "0.8125rem",
py: 0.75,
width: "100%",
background: makingPresentable ? undefined : "linear-gradient(135deg, rgba(102, 126, 234, 0.08) 0%, rgba(118, 75, 162, 0.08) 100%)",
border: makingPresentable ? undefined : "1px solid rgba(102, 126, 234, 0.2)",
color: makingPresentable ? undefined : "#667eea",
fontWeight: 600,
"&:hover": {
background: makingPresentable ? undefined : "linear-gradient(135deg, rgba(102, 126, 234, 0.12) 0%, rgba(118, 75, 162, 0.12) 100%)",
},
}}
>
{makingPresentable ? "Transforming..." : "Make Presentable"}
</SecondaryButton>
</Box>
</Tooltip>
)}
</Stack>
) : (
<Box
component="label"
sx={{
display: "flex",
flexDirection: "column",
alignItems: "center",
justifyContent: "center",
width: { xs: "100%", sm: 200 },
minHeight: 140,
border: "2px dashed #cbd5e1",
borderRadius: 2.5,
bgcolor: "#f8fafc",
cursor: "pointer",
transition: "all 0.2s",
flexShrink: 0,
"&:hover": {
borderColor: "#667eea",
bgcolor: "#f1f5f9",
borderWidth: "2.5px",
boxShadow: "0 0 0 3px rgba(102, 126, 234, 0.08)",
},
}}
>
<input
type="file"
accept="image/*"
onChange={handleAvatarChange}
style={{ display: "none" }}
/>
<CloudUploadIcon sx={{ color: "#94a3b8", fontSize: 36, mb: 1.5 }} />
<Typography variant="body2" sx={{ color: "#64748b", fontWeight: 600, mb: 0.5 }}>
Upload Your Photo
</Typography>
<Typography variant="caption" sx={{ color: "#94a3b8", textAlign: "center", px: 2, lineHeight: 1.5 }}>
Optional - We'll enhance it with AI or generate one after analysis
</Typography>
</Box>
)}
<Box sx={{ flex: 1, minWidth: 0 }}>
<Stack spacing={1.5}>
<Box>
<Typography variant="body2" sx={{ color: "#0f172a", fontSize: "0.9375rem", lineHeight: 1.7, fontWeight: 500, mb: 1 }}>
Choose Your Avatar Option:
</Typography>
<Stack spacing={1.5}>
<Box
sx={{
p: 1.5,
borderRadius: 1.5,
background: alpha("#f0f4ff", 0.6),
border: "1px solid rgba(99, 102, 241, 0.2)",
}}
>
<Typography variant="body2" sx={{ color: "#0f172a", fontSize: "0.875rem", fontWeight: 600, mb: 0.5, display: "flex", alignItems: "center", gap: 0.5 }}>
<AutoAwesomeIcon fontSize="small" sx={{ color: "#667eea" }} />
Upload Your Photo (Recommended)
</Typography>
<Typography variant="body2" sx={{ color: "#475569", fontSize: "0.8125rem", lineHeight: 1.6 }}>
Upload your photo and we'll enhance it into a professional podcast presenter using AI. After upload, click <strong>"Make Presentable"</strong> to transform your photo into a podcast-ready avatar that maintains your appearance while optimizing it for video generation.
</Typography>
</Box>
<Box
sx={{
p: 1.5,
borderRadius: 1.5,
background: alpha("#f8fafc", 0.8),
border: "1px solid rgba(15, 23, 42, 0.1)",
}}
>
<Typography variant="body2" sx={{ color: "#0f172a", fontSize: "0.875rem", fontWeight: 600, mb: 0.5, display: "flex", alignItems: "center", gap: 0.5 }}>
<PersonIcon fontSize="small" sx={{ color: "#64748b" }} />
Let ALwrity Generate (Alternative)
</Typography>
<Typography variant="body2" sx={{ color: "#475569", fontSize: "0.8125rem", lineHeight: 1.6 }}>
If you skip upload, we'll automatically generate professional presenter images <strong>after the AI analysis completes</strong>. The generated presenters will be tailored to your podcast topic, target audience, content type, and speaker count for the best fit.
</Typography>
</Box>
</Stack>
</Box>
<Box
sx={{
p: 1.5,
borderRadius: 1.5,
background: alpha("#f0f4ff", 0.5),
border: "1px solid rgba(99, 102, 241, 0.15)",
}}
>
<Typography variant="caption" sx={{ color: "#6366f1", fontSize: "0.8125rem", fontWeight: 500, display: "flex", alignItems: "center", gap: 0.5 }}>
<InfoIcon fontSize="inherit" />
Supported formats: JPG, PNG, WebP (max 5MB)
</Typography>
</Box>
</Stack>
</Box>
</Stack>
</Paper>
</Box>
</Stack>
</Box>

View File

@@ -1,6 +1,6 @@
import React, { useMemo } from "react";
import { Stack, Typography, Divider, Chip, Tooltip, IconButton, alpha } from "@mui/material";
import { OpenInNew as OpenInNewIcon, ContentCopy as ContentCopyIcon } from "@mui/icons-material";
import React, { useMemo, useState } from "react";
import { Stack, Typography, Divider, Chip, Tooltip, IconButton, alpha, Box } from "@mui/material";
import { OpenInNew as OpenInNewIcon, ContentCopy as ContentCopyIcon, ExpandMore as ExpandMoreIcon, ExpandLess as ExpandLessIcon } from "@mui/icons-material";
import { Fact } from "./types";
import { GlassyCard, glassyCardSx } from "./ui";
@@ -8,7 +8,10 @@ interface FactCardProps {
fact: Fact;
}
const MAX_PREVIEW_LENGTH = 200; // Characters to show before truncation
export const FactCard: React.FC<FactCardProps> = ({ fact }) => {
const [expanded, setExpanded] = useState(false);
const hostname = useMemo(() => {
try {
return new URL(fact.url).hostname;
@@ -21,30 +24,77 @@ export const FactCard: React.FC<FactCardProps> = ({ fact }) => {
navigator.clipboard.writeText(fact.quote);
};
const shouldTruncate = fact.quote.length > MAX_PREVIEW_LENGTH;
const previewText = shouldTruncate ? fact.quote.slice(0, MAX_PREVIEW_LENGTH).trim() + "..." : fact.quote;
const fullText = fact.quote;
return (
<GlassyCard
whileHover={{ y: -4 }}
whileHover={{ y: -2 }}
sx={{
...glassyCardSx,
p: 2,
p: 1.5,
cursor: "pointer",
transition: "all 0.2s",
height: "100%",
display: "flex",
flexDirection: "column",
"&:hover": {
borderColor: "rgba(102,126,234,0.25)",
boxShadow: "0 12px 28px rgba(15,23,42,0.08)",
boxShadow: "0 8px 20px rgba(15,23,42,0.08)",
},
background: "#ffffff",
border: "1px solid rgba(0,0,0,0.06)",
}}
>
<Stack spacing={1.5}>
<Typography variant="body2" sx={{ lineHeight: 1.6, color: "#0f172a" }}>
{fact.quote}
</Typography>
<Divider sx={{ borderColor: "rgba(0,0,0,0.06)" }} />
<Stack direction="row" spacing={1} alignItems="center" justifyContent="space-between">
<Stack direction="row" spacing={1} alignItems="center" flex={1}>
<OpenInNewIcon fontSize="small" sx={{ color: "rgba(15,23,42,0.6)" }} />
<Stack spacing={1} sx={{ flex: 1, minHeight: 0 }}>
{/* Quote Text - Truncated with expand option */}
<Box sx={{ flex: 1, minHeight: 0 }}>
<Typography
variant="body2"
sx={{
lineHeight: 1.5,
color: "#0f172a",
fontSize: "0.8125rem",
display: "-webkit-box",
WebkitLineClamp: expanded ? "none" : 4,
WebkitBoxOrient: "vertical",
overflow: "hidden",
textOverflow: "ellipsis",
mb: shouldTruncate ? 0.5 : 0,
}}
>
{expanded ? fullText : previewText}
</Typography>
{shouldTruncate && (
<IconButton
size="small"
onClick={(e) => {
e.stopPropagation();
setExpanded(!expanded);
}}
sx={{
p: 0.25,
mt: 0.25,
color: "#4f46e5",
"&:hover": { background: alpha("#4f46e5", 0.1) },
}}
>
{expanded ? (
<ExpandLessIcon fontSize="small" />
) : (
<ExpandMoreIcon fontSize="small" />
)}
</IconButton>
)}
</Box>
<Divider sx={{ borderColor: "rgba(0,0,0,0.06)", my: 0.5 }} />
{/* Source and Actions */}
<Stack direction="row" spacing={0.75} alignItems="center" justifyContent="space-between">
<Stack direction="row" spacing={0.5} alignItems="center" flex={1} minWidth={0}>
<OpenInNewIcon fontSize="small" sx={{ color: "rgba(15,23,42,0.5)", flexShrink: 0 }} />
<Typography
variant="caption"
component="a"
@@ -55,34 +105,49 @@ export const FactCard: React.FC<FactCardProps> = ({ fact }) => {
color: "#4f46e5",
textDecoration: "none",
"&:hover": { textDecoration: "underline" },
flex: 1,
overflow: "hidden",
textOverflow: "ellipsis",
whiteSpace: "nowrap",
fontSize: "0.7rem",
}}
>
{hostname || "source"}
</Typography>
</Stack>
<Tooltip title="Copy citation">
<IconButton size="small" onClick={handleCopy} sx={{ color: "rgba(15,23,42,0.65)" }}>
<IconButton
size="small"
onClick={(e) => {
e.stopPropagation();
handleCopy();
}}
sx={{
color: "rgba(15,23,42,0.6)",
p: 0.5,
"&:hover": { background: alpha("#4f46e5", 0.1) },
}}
>
<ContentCopyIcon fontSize="small" />
</IconButton>
</Tooltip>
</Stack>
<Stack direction="row" spacing={2}>
{/* Confidence and Date */}
<Stack direction="row" spacing={1} alignItems="center" justifyContent="space-between">
<Chip
label={`${(fact.confidence * 100).toFixed(0)}% confidence`}
label={`${(fact.confidence * 100).toFixed(0)}%`}
size="small"
sx={{
height: 20,
height: 18,
fontSize: "0.65rem",
background: alpha("#22c55e", 0.15),
color: "#15803d",
border: "1px solid rgba(34,197,94,0.35)",
fontWeight: 600,
}}
/>
<Typography variant="caption" sx={{ color: "#475569" }}>
{fact.date}
<Typography variant="caption" sx={{ color: "#64748b", fontSize: "0.7rem" }}>
{fact.date !== "Unknown" ? new Date(fact.date).toLocaleDateString("en-US", { month: "short", year: "numeric" }) : fact.date}
</Typography>
</Stack>
</Stack>

View File

@@ -217,6 +217,11 @@ const PodcastDashboard: React.FC = () => {
{analysis && !showScriptEditor && !showRenderQueue && (
<AnalysisPanel
analysis={analysis}
idea={project?.idea}
duration={project?.duration}
speakers={project?.speakers}
avatarUrl={project?.avatarUrl}
avatarPrompt={project?.avatarPrompt}
onRegenerate={() => {}}
/>
)}
@@ -259,6 +264,7 @@ const PodcastDashboard: React.FC = () => {
onBackToResearch={() => setShowScriptEditor(false)}
onProceedToRendering={(s) => workflow.handleProceedToRendering(s)}
onError={(msg) => workflow.setAnnouncement(msg)}
avatarUrl={project?.avatarUrl}
/>
)}

View File

@@ -19,8 +19,15 @@ export const Header: React.FC<HeaderProps> = ({ onShowProjects, onNewEpisode })
const navigate = useNavigate();
return (
<Stack direction="row" justifyContent="space-between" alignItems="flex-start" flexWrap="wrap" gap={2}>
<Box>
<Stack
direction="row"
justifyContent="space-between"
alignItems="flex-start"
flexWrap="wrap"
gap={2}
sx={{ width: "100%", minWidth: 0 }} // Ensure full width and allow wrapping
>
<Box sx={{ minWidth: 0, flex: { xs: "1 1 100%", md: "0 1 auto" } }}>
<Typography
variant="h3"
sx={{
@@ -30,24 +37,61 @@ export const Header: React.FC<HeaderProps> = ({ onShowProjects, onNewEpisode })
display: "flex",
alignItems: "center",
gap: 1.5,
fontSize: { xs: "1.5rem", md: "2rem" },
}}
>
<MicIcon fontSize="large" sx={{ color: "#667eea" }} />
AI Podcast Maker
</Typography>
<Typography variant="body2" color="text.secondary">
<Typography variant="body2" color="text.secondary" sx={{ display: { xs: "none", sm: "block" } }}>
Create professional podcast episodes with AI-powered research, smart scriptwriting, and natural voice narration
</Typography>
</Box>
<Stack direction="row" spacing={1} alignItems="center">
<Stack
direction="row"
spacing={1}
alignItems="center"
flexWrap="wrap"
useFlexGap
sx={{
justifyContent: { xs: "flex-start", md: "flex-end" },
gap: { xs: 0.5, md: 1 },
minWidth: 0,
width: { xs: "100%", md: "auto" }, // Full width on mobile to allow wrapping
flex: { xs: "1 1 100%", md: "0 1 auto" }, // Take full width on mobile
}}
>
<HeaderControls colorMode="light" showAlerts={true} showUser={true} />
<SecondaryButton onClick={() => window.open("/docs", "_blank")} startIcon={<InfoIcon />}>
<SecondaryButton
onClick={() => window.open("/docs", "_blank")}
startIcon={<InfoIcon />}
sx={{
display: { xs: "none", lg: "flex" },
// Override for light theme
borderColor: "rgba(102, 126, 234, 0.3) !important",
color: "#667eea !important",
"&:hover": {
borderColor: "rgba(102, 126, 234, 0.5) !important",
background: "rgba(102, 126, 234, 0.1) !important",
},
}}
>
Help
</SecondaryButton>
<SecondaryButton
onClick={() => navigate("/asset-library?source_module=podcast_maker&asset_type=audio")}
startIcon={<LibraryMusicIcon />}
tooltip="View all podcast episodes in Asset Library"
sx={{
display: { xs: "none", xl: "flex" },
// Override for light theme
borderColor: "rgba(102, 126, 234, 0.3) !important",
color: "#667eea !important",
"&:hover": {
borderColor: "rgba(102, 126, 234, 0.5) !important",
background: "rgba(102, 126, 234, 0.1) !important",
},
}}
>
My Episodes
</SecondaryButton>
@@ -55,10 +99,30 @@ export const Header: React.FC<HeaderProps> = ({ onShowProjects, onNewEpisode })
onClick={onShowProjects}
startIcon={<MicIcon />}
tooltip="View and resume saved projects"
sx={{
flexShrink: 0,
display: "flex !important", // Always show "My Projects" - force display
order: { xs: 1, md: 0 }, // Show first on mobile
// Override button colors for light theme
borderColor: "rgba(102, 126, 234, 0.3) !important",
color: "#667eea !important",
"&:hover": {
borderColor: "rgba(102, 126, 234, 0.5) !important",
background: "rgba(102, 126, 234, 0.1) !important",
},
}}
>
My Projects
</SecondaryButton>
<PrimaryButton onClick={onNewEpisode} startIcon={<AutoAwesomeIcon />}>
<PrimaryButton
onClick={onNewEpisode}
startIcon={<AutoAwesomeIcon />}
sx={{
flexShrink: 0,
display: "flex", // Always show "New Episode"
order: { xs: 0, md: 1 }, // Show first on mobile
}}
>
New Episode
</PrimaryButton>
</Stack>

View File

@@ -1,10 +1,11 @@
import React from "react";
import { Stack, Typography, Chip, Divider, Box, alpha } from "@mui/material";
import React, { useMemo } from "react";
import { Stack, Typography, Chip, Divider, Box, alpha, Paper } from "@mui/material";
import {
Insights as InsightsIcon,
Search as SearchIcon,
AttachMoney as AttachMoneyIcon,
EditNote as EditNoteIcon,
Article as ArticleIcon,
} from "@mui/icons-material";
import { Research } from "../types";
import { GlassyCard, glassyCardSx, PrimaryButton } from "../ui";
@@ -21,17 +22,68 @@ export const ResearchSummary: React.FC<ResearchSummaryProps> = ({
canGenerateScript,
onGenerateScript,
}) => {
// Extract key insights from summary if it's long
const summaryParts = useMemo(() => {
const fullSummary = research.summary || "";
if (fullSummary.length > 500) {
// Try to split into paragraphs or sentences
const sentences = fullSummary.split(/[.!?]\s+/).filter(s => s.trim().length > 20);
const keyPoints = sentences.slice(0, 3);
const remainingText = sentences.slice(3).join(". ") + (sentences.length > 3 ? "." : "");
return { keyPoints, remainingText };
}
return { keyPoints: [], remainingText: fullSummary };
}, [research.summary]);
return (
<GlassyCard sx={glassyCardSx}>
<Stack spacing={3}>
<Stack direction="row" justifyContent="space-between" alignItems="flex-start" flexWrap="wrap" gap={2}>
<Box sx={{ flex: 1 }}>
<Typography variant="h6" sx={{ display: "flex", alignItems: "center", gap: 1, mb: 1 }}>
<Box sx={{ flex: 1, minWidth: { xs: "100%", md: "60%" } }}>
<Typography variant="h6" sx={{ display: "flex", alignItems: "center", gap: 1, mb: 1.5 }}>
<InsightsIcon />
Research Summary
</Typography>
<Typography variant="body2" color="text.secondary" sx={{ mb: 2, lineHeight: 1.7 }}>
{research.summary}
{/* Key Insights */}
{summaryParts.keyPoints.length > 0 && (
<Box sx={{ mb: 2 }}>
<Typography variant="subtitle2" sx={{ mb: 1, color: "#0f172a", fontWeight: 600, display: "flex", alignItems: "center", gap: 0.5 }}>
<ArticleIcon fontSize="small" />
Key Insights
</Typography>
<Stack spacing={1}>
{summaryParts.keyPoints.map((point, idx) => (
<Paper
key={idx}
sx={{
p: 1.25,
background: alpha("#667eea", 0.05),
border: "1px solid rgba(102, 126, 234, 0.15)",
borderRadius: 1.5,
}}
>
<Typography variant="body2" sx={{ color: "#0f172a", lineHeight: 1.6, fontSize: "0.875rem" }}>
{point}
</Typography>
</Paper>
))}
</Stack>
</Box>
)}
{/* Full Summary Text */}
<Typography
variant="body2"
color="text.secondary"
sx={{
mb: 2,
lineHeight: 1.7,
fontSize: "0.875rem",
color: "#475569",
}}
>
{summaryParts.remainingText || research.summary}
</Typography>
{/* Research Metadata */}
@@ -126,15 +178,23 @@ export const ResearchSummary: React.FC<ResearchSummaryProps> = ({
{research.factCards.length > 0 && (
<>
<Divider sx={{ borderColor: "rgba(0,0,0,0.08)" }} />
<Stack direction="row" justifyContent="space-between" alignItems="center" sx={{ mb: 1.5 }}>
<Stack direction="row" justifyContent="space-between" alignItems="center" sx={{ mb: 1.5, flexWrap: "wrap", gap: 1 }}>
<Typography variant="subtitle2" sx={{ color: "#0f172a", fontWeight: 600 }}>
Research Sources & Facts ({research.factCards.length})
</Typography>
<Typography variant="caption" sx={{ color: "#64748b" }}>
Click any card to view source details
<Typography variant="caption" sx={{ color: "#64748b", fontSize: "0.75rem" }}>
Click to expand Hover to see source
</Typography>
</Stack>
<Box sx={{ display: "grid", gridTemplateColumns: { xs: "1fr", sm: "1fr 1fr", lg: "1fr 1fr 1fr" }, gap: 2 }}>
<Box
sx={{
display: "grid",
gridTemplateColumns: { xs: "1fr", sm: "repeat(2, 1fr)", md: "repeat(3, 1fr)", lg: "repeat(4, 1fr)" },
gap: 1.5,
width: "100%",
overflow: "hidden",
}}
>
{research.factCards.map((fact) => (
<FactCard key={fact.id} fact={fact} />
))}

View File

@@ -94,17 +94,66 @@ export const usePodcastWorkflow = ({ projectState, onError }: UsePodcastWorkflow
setShowRenderQueue(false);
try {
setIsAnalyzing(true);
// Upload avatar if provided, or generate presenters
let avatarUrl: string | null = null;
if (payload.files.avatarFile) {
try {
setAnnouncement("Uploading presenter avatar...");
const uploadResponse = await podcastApi.uploadAvatar(payload.files.avatarFile);
avatarUrl = uploadResponse.avatar_url;
} catch (error) {
console.error('Avatar upload failed:', error);
// Continue without avatar - will generate one later
}
}
setAnnouncement("Analyzing your idea — AI suggestions incoming");
const result = await podcastApi.createProject(payload);
const result = await podcastApi.createProject({ ...payload, avatarUrl });
await initializeProject(payload, result.projectId);
setProject({ id: result.projectId, idea: payload.ideaOrUrl, duration: payload.duration, speakers: payload.speakers });
setProject({ id: result.projectId, idea: payload.ideaOrUrl, duration: payload.duration, speakers: payload.speakers, avatarUrl });
setAnalysis(result.analysis);
setEstimate(result.estimate);
setQueries(result.queries);
setSelectedQueries(new Set(result.queries.map((q) => q.id)));
setKnobs(payload.knobs);
setBudgetCap(payload.budgetCap);
setAnnouncement("Analysis complete");
// Generate presenters AFTER analysis completes (to use analysis insights)
// This happens only if no avatar was uploaded
if (!avatarUrl && payload.speakers > 0 && result.analysis) {
try {
setAnnouncement("Generating presenter avatars using AI insights...");
const presentersResponse = await podcastApi.generatePresenters(
payload.speakers,
result.projectId,
result.analysis.audience,
result.analysis.contentType,
result.analysis.topKeywords
);
if (presentersResponse.avatars && presentersResponse.avatars.length > 0) {
// Store the first presenter avatar URL and prompt
const firstAvatar = presentersResponse.avatars[0];
const prompt = firstAvatar.prompt || null;
setProject({
id: result.projectId,
idea: payload.ideaOrUrl,
duration: payload.duration,
speakers: payload.speakers,
avatarUrl: firstAvatar.avatar_url,
avatarPrompt: prompt,
avatarPersonaId: firstAvatar.persona_id || presentersResponse.persona_id || null,
});
setAnnouncement("Analysis complete - Presenter avatars generated");
}
} catch (error) {
console.error('Presenter generation failed:', error);
setAnnouncement("Analysis complete - Avatar generation will happen later");
// Continue without presenters - can generate later
}
} else {
setAnnouncement("Analysis complete");
}
} catch (error: any) {
if (error?.response?.status === 429 || error?.response?.data?.detail) {
const errorDetail = error.response.data.detail;

View File

@@ -1,8 +1,11 @@
import React, { useCallback } from "react";
import { Box, Stack, Typography, Alert, Paper, alpha } from "@mui/material";
import React, { useCallback, useState, useEffect } from "react";
import { Box, Stack, Typography, Alert, Paper, alpha, Button, CircularProgress, LinearProgress } from "@mui/material";
import {
PlayArrow as PlayArrowIcon,
ArrowBack as ArrowBackIcon,
VideoLibrary as VideoLibraryIcon,
Download as DownloadIcon,
CheckCircle as CheckCircleIcon,
} from "@mui/icons-material";
import { Script, Knobs, Job } from "./types";
import { SecondaryButton } from "./ui";
@@ -10,6 +13,7 @@ import { SceneCard } from "./RenderQueue/SceneCard";
import { SummaryStats } from "./RenderQueue/SummaryStats";
import { GuidancePanel } from "./RenderQueue/GuidancePanel";
import { useRenderQueue } from "./RenderQueue/useRenderQueue";
import { fetchMediaBlobUrl } from "../../utils/fetchMediaBlobUrl";
interface RenderQueueProps {
projectId: string;
@@ -36,6 +40,7 @@ export const RenderQueue: React.FC<RenderQueueProps> = ({
onBack,
onError,
}) => {
const [localError, setLocalError] = useState<string>("");
const {
rendering,
generatingImage,
@@ -43,6 +48,10 @@ export const RenderQueue: React.FC<RenderQueueProps> = ({
runRender,
runImageGeneration,
runVideoRender,
combiningVideos,
combiningProgress,
finalVideoUrl,
combineFinalVideo,
} = useRenderQueue({
script,
jobs,
@@ -52,7 +61,10 @@ export const RenderQueue: React.FC<RenderQueueProps> = ({
avatarImageUrl,
onUpdateJob,
onUpdateScript,
onError,
onError: (msg) => {
setLocalError(msg);
onError(msg);
},
});
const handleDownloadAudio = useCallback((audioUrl: string, title: string) => {
@@ -76,11 +88,11 @@ export const RenderQueue: React.FC<RenderQueueProps> = ({
title,
text: `Check out this podcast episode: ${title}`,
url: audioUrl,
});
});
} catch (err) {
// User cancelled or error
}
} else {
} else {
// Fallback: copy to clipboard
await navigator.clipboard.writeText(audioUrl);
alert("Audio URL copied to clipboard!");
@@ -91,6 +103,28 @@ export const RenderQueue: React.FC<RenderQueueProps> = ({
(jobs.length > 0 && jobs.every((j) => j.status === "completed" && j.imageUrl)) ||
(script.scenes.length > 0 && script.scenes.every((s) => s.audioUrl && s.imageUrl));
const allVideosReady = jobs.length > 0 && jobs.every((j) => j.videoUrl);
// State for final video blob URL
const [finalVideoBlobUrl, setFinalVideoBlobUrl] = useState<string | null>(null);
// Load final video as blob when URL changes
useEffect(() => {
if (finalVideoUrl) {
fetchMediaBlobUrl(finalVideoUrl)
.then((blobUrl) => {
if (blobUrl) {
setFinalVideoBlobUrl(blobUrl);
}
})
.catch((err) => {
console.error("Failed to load final video blob:", err);
});
} else {
setFinalVideoBlobUrl(null);
}
}, [finalVideoUrl]);
return (
<Box sx={{ mt: 3 }}>
{/* Header */}
@@ -115,6 +149,24 @@ export const RenderQueue: React.FC<RenderQueueProps> = ({
</Typography>
</Stack>
{/* Error Display */}
{localError && (
<Alert
severity="error"
onClose={() => setLocalError("")}
sx={{
mb: 3,
background: alpha("#ef4444", 0.1),
border: "1px solid",
borderColor: alpha("#ef4444", 0.3),
}}
>
<Typography variant="body2" sx={{ fontWeight: 600 }}>
{localError}
</Typography>
</Alert>
)}
{/* Info Alert */}
<Alert severity="info" sx={{ mb: 3, background: alpha("#3b82f6", 0.1), border: "1px solid rgba(59,130,246,0.3)" }}>
<Typography variant="body2">
@@ -127,21 +179,21 @@ export const RenderQueue: React.FC<RenderQueueProps> = ({
{/* Empty State */}
{jobs.length === 0 && script.scenes.length === 0 && (
<Paper
sx={{
<Paper
sx={{
p: 4,
textAlign: "center",
background: "linear-gradient(135deg, rgba(102, 126, 234, 0.05) 0%, rgba(118, 75, 162, 0.05) 100%)",
border: "2px dashed rgba(102, 126, 234, 0.3)",
borderRadius: 2,
}}
>
}}
>
<Typography variant="h6" sx={{ color: "#0f172a", fontWeight: 600, mb: 1 }}>
No scenes to render
</Typography>
</Typography>
<Typography variant="body2" sx={{ color: "#64748b", mb: 3 }}>
Go back to the script editor to generate and approve scenes first.
</Typography>
</Typography>
<SecondaryButton onClick={onBack} startIcon={<ArrowBackIcon />}>
Back to Script Editor
</SecondaryButton>
@@ -166,7 +218,7 @@ export const RenderQueue: React.FC<RenderQueueProps> = ({
avatarImageUrl={avatarImageUrl}
onRender={runRender}
onImageGenerate={runImageGeneration}
onVideoRender={runVideoRender}
onVideoGenerate={(sceneId, settings) => runVideoRender(sceneId, settings)}
onDownloadAudio={handleDownloadAudio}
onDownloadVideo={handleDownloadVideo}
onShare={handleShare}
@@ -176,6 +228,224 @@ export const RenderQueue: React.FC<RenderQueueProps> = ({
})}
</Stack>
{/* Final Export Section - Show when all scene videos are ready */}
{allVideosReady && (
<Paper
elevation={3}
sx={{
mt: 4,
p: 4,
background: "linear-gradient(135deg, rgba(16, 185, 129, 0.05) 0%, rgba(6, 182, 212, 0.05) 100%)",
border: "2px solid",
borderColor: finalVideoUrl ? "success.main" : "info.light",
borderRadius: 3,
position: "relative",
overflow: "hidden",
"&::before": {
content: '""',
position: "absolute",
top: 0,
left: 0,
right: 0,
height: "4px",
background: finalVideoUrl
? "linear-gradient(90deg, #10b981 0%, #06b6d4 100%)"
: "linear-gradient(90deg, #667eea 0%, #764ba2 100%)",
},
}}
>
<Stack spacing={3}>
{/* Header */}
<Stack direction="row" alignItems="center" spacing={2}>
<Box
sx={{
p: 1.5,
borderRadius: 2,
background: finalVideoUrl
? "linear-gradient(135deg, #10b981 0%, #059669 100%)"
: "linear-gradient(135deg, #667eea 0%, #764ba2 100%)",
display: "flex",
alignItems: "center",
justifyContent: "center",
boxShadow: "0 4px 12px rgba(102, 126, 234, 0.3)",
}}
>
{finalVideoUrl ? (
<CheckCircleIcon sx={{ color: "white", fontSize: 32 }} />
) : (
<VideoLibraryIcon sx={{ color: "white", fontSize: 32 }} />
)}
</Box>
<Box>
<Typography
variant="h5"
sx={{
fontWeight: 700,
color: "#0f172a",
mb: 0.5,
}}
>
{finalVideoUrl ? "🎉 Final Podcast Ready!" : "🎬 Final Podcast Export"}
</Typography>
<Typography variant="body2" sx={{ color: "#64748b" }}>
{finalVideoUrl
? "Your complete podcast video is ready to download"
: `Combine ${script.scenes.length} scene videos into one final podcast`}
</Typography>
</Box>
</Stack>
{finalVideoUrl ? (
<Stack spacing={3}>
<Alert
severity="success"
icon={<CheckCircleIcon />}
sx={{
background: alpha("#10b981", 0.1),
border: "1px solid",
borderColor: alpha("#10b981", 0.3),
}}
>
<Typography variant="body2" sx={{ fontWeight: 600 }}>
Your final podcast video has been created successfully!
</Typography>
</Alert>
{/* Video Preview */}
<Box
sx={{
width: "100%",
maxWidth: 900,
mx: "auto",
borderRadius: 2,
overflow: "hidden",
boxShadow: "0 8px 24px rgba(0, 0, 0, 0.12)",
border: "1px solid",
borderColor: alpha("#10b981", 0.2),
}}
>
<video
controls
src={finalVideoBlobUrl || finalVideoUrl}
style={{
width: "100%",
display: "block",
backgroundColor: "#000",
}}
>
Your browser does not support video playback.
</video>
</Box>
{/* Download Button */}
<Stack direction="row" spacing={2} justifyContent="center" sx={{ pt: 2 }}>
<Button
variant="contained"
size="large"
startIcon={<DownloadIcon />}
onClick={async () => {
if (finalVideoBlobUrl) {
const link = document.createElement("a");
link.href = finalVideoBlobUrl;
link.download = `podcast-final-${Date.now()}.mp4`;
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
}
}}
sx={{
px: 4,
py: 1.5,
background: "linear-gradient(135deg, #10b981 0%, #059669 100%)",
boxShadow: "0 4px 12px rgba(16, 185, 129, 0.4)",
"&:hover": {
background: "linear-gradient(135deg, #059669 0%, #047857 100%)",
boxShadow: "0 6px 16px rgba(16, 185, 129, 0.5)",
},
}}
>
Download Final Podcast
</Button>
</Stack>
</Stack>
) : (
<Stack spacing={3}>
<Alert
severity="info"
sx={{
background: alpha("#3b82f6", 0.08),
border: "1px solid",
borderColor: alpha("#3b82f6", 0.2),
}}
>
<Typography variant="body2">
<strong>Ready to export!</strong> Click below to combine all {script.scenes.length} scene videos into your final podcast video.
</Typography>
</Alert>
{combiningVideos && (
<Box sx={{ width: "100%" }}>
<Stack direction="row" justifyContent="space-between" sx={{ mb: 1 }}>
<Typography variant="body2" sx={{ fontWeight: 600, color: "#0f172a" }}>
{combiningProgress?.message || "Combining videos..."}
</Typography>
{combiningProgress && (
<Typography variant="body2" sx={{ color: "#64748b", fontWeight: 600 }}>
{combiningProgress.progress.toFixed(0)}%
</Typography>
)}
</Stack>
<LinearProgress
variant={combiningProgress ? "determinate" : "indeterminate"}
value={combiningProgress?.progress || 0}
sx={{
height: 8,
borderRadius: 4,
background: alpha("#667eea", 0.1),
"& .MuiLinearProgress-bar": {
background: "linear-gradient(90deg, #667eea 0%, #764ba2 100%)",
borderRadius: 4,
},
}}
/>
{combiningProgress && combiningProgress.progress < 100 && (
<Typography variant="caption" sx={{ color: "#64748b", mt: 0.5, display: "block" }}>
Video encoding in progress. This may take a few minutes...
</Typography>
)}
</Box>
)}
<Button
variant="contained"
size="large"
fullWidth
startIcon={combiningVideos ? <CircularProgress size={20} sx={{ color: "white" }} /> : <VideoLibraryIcon />}
onClick={combineFinalVideo}
disabled={combiningVideos}
sx={{
py: 2,
fontSize: "1.1rem",
fontWeight: 700,
background: "linear-gradient(135deg, #667eea 0%, #764ba2 100%)",
boxShadow: "0 4px 12px rgba(102, 126, 234, 0.4)",
"&:hover": {
background: "linear-gradient(135deg, #5568d3 0%, #6a3f8f 100%)",
boxShadow: "0 6px 16px rgba(102, 126, 234, 0.5)",
},
"&:disabled": {
background: alpha("#667eea", 0.5),
},
}}
>
{combiningVideos ? "Combining Videos..." : "Combine Scenes into Final Video"}
</Button>
</Stack>
)}
</Stack>
</Paper>
)}
{/* Footer - Video Generation Focus */}
<Paper
sx={{
@@ -191,13 +461,22 @@ export const RenderQueue: React.FC<RenderQueueProps> = ({
<SecondaryButton onClick={onBack} startIcon={<ArrowBackIcon />}>
Back to Script
</SecondaryButton>
{allScenesCompleted ? (
{allVideosReady ? (
<Stack spacing={1} alignItems="flex-end">
<Typography variant="body1" sx={{ color: "#10b981", fontWeight: 700, fontSize: "1rem" }}>
🎉 All scene videos ready!
</Typography>
<Typography variant="body2" sx={{ color: "#64748b" }}>
Scroll up to combine them into your final podcast video.
</Typography>
</Stack>
) : allScenesCompleted ? (
<Stack spacing={1} alignItems="flex-end">
<Typography variant="body1" sx={{ color: "#10b981", fontWeight: 700, fontSize: "1rem" }}>
🎉 All scenes ready for video generation!
</Typography>
<Typography variant="body2" sx={{ color: "#64748b" }}>
Generate videos for individual scenes or download them.
Generate videos for individual scenes above.
</Typography>
</Stack>
) : (

View File

@@ -92,6 +92,9 @@ export const SceneActionButtons: React.FC<SceneActionButtonsProps> = ({
}
// Has audio - show all action buttons
const videoInProgress = rendering !== null;
const isCurrentVideo = rendering === scene.id;
return (
<Stack direction="row" spacing={1.5} justifyContent="flex-end" flexWrap="wrap" useFlexGap>
{/* Generate Image */}
@@ -114,21 +117,29 @@ export const SceneActionButtons: React.FC<SceneActionButtonsProps> = ({
{/* Generate Video */}
<PrimaryButton
onClick={() => onVideoRender(scene.id)}
disabled={isBusy || !hasImage || hasVideo}
onClick={() => {
onVideoRender(scene.id);
}}
disabled={isBusy || videoInProgress || !hasImage || hasVideo}
startIcon={<VideocamIcon />}
tooltip={
hasVideo
? "Video already generated"
: !hasImage
? "Generate an image first to create video"
: videoInProgress
? "A video generation is already running. Please wait..."
: isBusy
? "Another operation in progress"
: "Generate video for this scene"
}
sx={{ minWidth: 160 }}
sx={{ minWidth: 180 }}
>
{hasVideo ? "Video Ready" : "Generate Video"}
{videoInProgress && isCurrentVideo
? "Generating Video..."
: hasVideo
? "Video Ready"
: "Generate Video"}
</PrimaryButton>
{/* Download Video */}

View File

@@ -7,11 +7,13 @@ import {
OpenInNew as OpenInNewIcon,
Videocam as VideocamIcon,
} from "@mui/icons-material";
import { Scene, Job } from "../types";
import { Scene, Job, VideoGenerationSettings } from "../types";
import { GlassyCard, glassyCardSx } from "../ui";
import { InlineAudioPlayer } from "../InlineAudioPlayer";
import { SceneActionButtons } from "./SceneActionButtons";
import { aiApiClient } from "../../../api/client";
import { fetchMediaBlobUrl } from "../../../utils/fetchMediaBlobUrl";
import { VideoRegenerateModal } from "./VideoRegenerateModal";
interface SceneCardProps {
scene: Scene;
@@ -22,7 +24,7 @@ interface SceneCardProps {
avatarImageUrl?: string | null;
onRender: (sceneId: string, mode: "preview" | "full") => void;
onImageGenerate: (sceneId: string) => void;
onVideoRender: (sceneId: string) => void;
onVideoGenerate: (sceneId: string, settings: VideoGenerationSettings) => void;
onDownloadAudio: (audioUrl: string, title: string) => void;
onDownloadVideo: (videoUrl: string, title: string) => void;
onShare: (audioUrl: string, title: string) => void;
@@ -75,7 +77,7 @@ export const SceneCard: React.FC<SceneCardProps> = ({
avatarImageUrl,
onRender,
onImageGenerate,
onVideoRender,
onVideoGenerate,
onDownloadAudio,
onDownloadVideo,
onShare,
@@ -89,8 +91,27 @@ export const SceneCard: React.FC<SceneCardProps> = ({
const status = job?.status || (hasAudio ? "completed" : "idle");
const initials = getInitials(scene.title);
// Load image as blob if it's an authenticated endpoint
const [imageBlobUrl, setImageBlobUrl] = useState<string | null>(null);
const [videoBlobUrl, setVideoBlobUrl] = useState<string | null>(null);
const [showVideoModal, setShowVideoModal] = useState(false);
const [initialVideoPrompt, setInitialVideoPrompt] = useState<string>("");
// Prepare a simple default prompt based on the scene title/description
useEffect(() => {
const baseTitle = (scene.title || "").trim();
const description = (scene as any).description as string | undefined;
const descSnippet = (description || "").split(".")[0]?.trim();
let prompt = baseTitle;
if (!prompt && descSnippet) {
prompt = descSnippet;
}
if (!prompt) {
prompt = "Professional podcast scene with subtle movement";
}
setInitialVideoPrompt(prompt);
}, [scene]);
useEffect(() => {
if (!imageUrl) {
@@ -98,14 +119,11 @@ export const SceneCard: React.FC<SceneCardProps> = ({
return;
}
console.log('[SceneCard] Loading image:', { imageUrl, hasImage, sceneId: scene.id });
// Check if this is a podcast image endpoint that requires authentication
const isPodcastImage = imageUrl.includes('/api/podcast/images/') || imageUrl.includes('/api/story/images/');
if (!isPodcastImage) {
// Regular URL (external), use directly
console.log('[SceneCard] Using external image URL directly');
setImageBlobUrl(imageUrl);
return;
}
@@ -134,22 +152,17 @@ export const SceneCard: React.FC<SceneCardProps> = ({
// Remove query parameters if present
imagePath = imagePath.split('?')[0];
console.log('[SceneCard] Fetching image blob from:', imagePath);
const response = await aiApiClient.get(imagePath, {
responseType: 'blob',
});
if (!isMounted || imageUrl !== currentImageUrl) {
console.log('[SceneCard] Component unmounted or URL changed, skipping blob URL set');
return;
}
const blob = response.data;
const newBlobUrl = URL.createObjectURL(blob);
console.log('[SceneCard] Image blob loaded successfully, created blob URL');
setImageBlobUrl((prevBlobUrl) => {
// Clean up previous blob URL if exists
if (prevBlobUrl && prevBlobUrl !== newBlobUrl && prevBlobUrl.startsWith('blob:')) {
@@ -184,11 +197,9 @@ export const SceneCard: React.FC<SceneCardProps> = ({
const token = localStorage.getItem('clerk_dashboard_token') || '';
if (token) {
const urlWithToken = `${fallbackPath}?token=${encodeURIComponent(token)}`;
console.log('[SceneCard] Trying URL with query token');
setImageBlobUrl(urlWithToken);
} else {
// Fallback to original URL
console.log('[SceneCard] No token available, using original URL');
setImageBlobUrl(imageUrl);
}
} catch (fallbackErr) {
@@ -213,6 +224,39 @@ export const SceneCard: React.FC<SceneCardProps> = ({
};
}, [imageUrl, hasImage, scene.id]);
// Load video as blob when videoUrl changes (using Story Writer's utility)
useEffect(() => {
if (!job?.videoUrl) {
setVideoBlobUrl(null);
return;
}
let currentBlobUrl: string | null = null;
fetchMediaBlobUrl(job.videoUrl)
.then((blobUrl) => {
if (blobUrl) {
currentBlobUrl = blobUrl;
setVideoBlobUrl(blobUrl);
} else {
// File not found (404) - clear the blob URL
console.warn('[SceneCard] Video file not found (404):', job.videoUrl);
setVideoBlobUrl(null);
}
})
.catch((err) => {
console.error('[SceneCard] Failed to load video blob:', err);
setVideoBlobUrl(null);
});
return () => {
// Cleanup blob URL when component unmounts or URL changes
if (currentBlobUrl) {
URL.revokeObjectURL(currentBlobUrl);
}
};
}, [job?.videoUrl]);
return (
<GlassyCard sx={glassyCardSx}>
<Stack spacing={2}>
@@ -279,13 +323,12 @@ export const SceneCard: React.FC<SceneCardProps> = ({
</Box>
</Box>
)}
{hasVideo && job?.videoUrl && (
{hasVideo && videoBlobUrl && (
<Box sx={{ mt: 1 }}>
<Box
component="a"
href={job.videoUrl}
target="_blank"
rel="noopener noreferrer"
href={videoBlobUrl}
download={`${scene.title.replace(/[^a-z0-9]/gi, '_')}_video.mp4`}
sx={{ color: "#a78bfa", textDecoration: "none", display: "inline-flex", alignItems: "center", gap: 0.5 }}
>
<VideocamIcon sx={{ fontSize: 16 }} />
@@ -350,8 +393,57 @@ export const SceneCard: React.FC<SceneCardProps> = ({
<InlineAudioPlayer audioUrl={audioUrl} title={scene.title} />
)}
{/* Image Preview */}
{hasImage && (imageBlobUrl || imageUrl) && (
{/* Video Preview - Show video if available, otherwise show image */}
{hasVideo && videoBlobUrl ? (
<Box
sx={{
width: "100%",
borderRadius: 2,
overflow: "hidden",
border: "2px solid rgba(56,189,248,0.5)",
background: alpha("#0f172a", 0.85),
position: "relative",
}}
>
<Box
component="video"
src={videoBlobUrl}
controls
preload="metadata"
sx={{
width: "100%",
height: "auto",
display: "block",
maxHeight: 420,
objectFit: "cover",
backgroundColor: "black",
}}
onError={(e) => {
const videoElement = e.currentTarget as HTMLVideoElement;
console.error("[SceneCard] Video failed to load:", {
originalUrl: job?.videoUrl,
networkState: videoElement.networkState,
});
}}
/>
<Box
sx={{
position: "absolute",
top: 8,
right: 8,
bgcolor: "rgba(56,189,248,0.9)",
color: "white",
px: 1,
py: 0.5,
borderRadius: 1,
fontSize: "0.75rem",
fontWeight: 600,
}}
>
VIDEO
</Box>
</Box>
) : hasImage && (imageBlobUrl || imageUrl) ? (
<Box
sx={{
width: "100%",
@@ -373,21 +465,14 @@ export const SceneCard: React.FC<SceneCardProps> = ({
objectFit: "cover",
}}
onError={(e) => {
console.error('[SceneCard] Image failed to load:', {
console.error("[SceneCard] Image failed to load:", {
src: e.currentTarget.src,
imageUrl,
imageBlobUrl,
hasImage,
});
}}
onLoad={() => {
console.log('[SceneCard] Image loaded successfully:', {
src: imageBlobUrl || imageUrl,
});
}}
/>
</Box>
)}
) : null}
{/* Action Buttons */}
<SceneActionButtons
@@ -402,12 +487,25 @@ export const SceneCard: React.FC<SceneCardProps> = ({
isBusy={isBusy}
onRender={onRender}
onImageGenerate={onImageGenerate}
onVideoRender={onVideoRender}
onVideoRender={() => setShowVideoModal(true)}
onDownloadAudio={onDownloadAudio}
onDownloadVideo={onDownloadVideo}
onShare={onShare}
onError={onError}
/>
{/* Video Generation Settings Modal */}
<VideoRegenerateModal
open={showVideoModal}
onClose={() => setShowVideoModal(false)}
onGenerate={(settings: VideoGenerationSettings) => {
setShowVideoModal(false);
onVideoGenerate(scene.id, settings);
}}
initialPrompt={initialVideoPrompt}
initialResolution="480p"
initialSeed={-1}
/>
</Stack>
</GlassyCard>
);

View File

@@ -0,0 +1,228 @@
import React, { useEffect, useState } from "react";
import {
Dialog,
DialogTitle,
DialogContent,
DialogActions,
Stack,
Box,
Typography,
TextField,
FormControl,
FormLabel,
RadioGroup,
FormControlLabel,
Radio,
Tooltip,
} from "@mui/material";
import { Info as InfoIcon } from "@mui/icons-material";
import { PrimaryButton, SecondaryButton } from "../ui";
import type { VideoGenerationSettings } from "../types";
interface VideoRegenerateModalProps {
open: boolean;
onClose: () => void;
onGenerate: (settings: VideoGenerationSettings) => void;
initialPrompt: string;
initialResolution?: "480p" | "720p";
initialSeed?: number | null;
}
export const VideoRegenerateModal: React.FC<VideoRegenerateModalProps> = ({
open,
onClose,
onGenerate,
initialPrompt,
initialResolution = "480p",
initialSeed = -1,
}) => {
const [prompt, setPrompt] = useState(initialPrompt);
const [resolution, setResolution] = useState<"480p" | "720p">(initialResolution);
const [seed, setSeed] = useState<string>(initialSeed != null && initialSeed !== -1 ? String(initialSeed) : "");
const [maskImageUrl, setMaskImageUrl] = useState<string>("");
useEffect(() => {
setPrompt(initialPrompt);
setResolution(initialResolution);
}, [initialResolution, initialPrompt]);
const handleGenerate = () => {
const parsedSeed = seed.trim() === "" ? undefined : Number.isNaN(Number(seed)) ? undefined : Number(seed);
const settings: VideoGenerationSettings = {
prompt: prompt.trim(),
resolution,
seed: parsedSeed,
maskImageUrl: maskImageUrl.trim() || undefined,
};
onGenerate(settings);
};
return (
<Dialog
open={open}
onClose={onClose}
maxWidth="md"
fullWidth
PaperProps={{
sx: {
background: "rgba(15, 23, 42, 0.96)",
backdropFilter: "blur(18px)",
borderRadius: 4,
border: "1px solid rgba(148, 163, 184, 0.4)",
},
}}
>
<DialogTitle>
<Stack direction="row" justifyContent="space-between" alignItems="center">
<Typography variant="h6" sx={{ color: "white", fontWeight: 600 }}>
Configure Video Generation
</Typography>
<Tooltip title="Adjust how your talking-head video is rendered. These settings control resolution, prompt, and animation seed.">
<InfoIcon sx={{ color: "rgba(148,163,184,0.9)" }} />
</Tooltip>
</Stack>
<Typography variant="body2" sx={{ color: "rgba(148,163,184,0.9)", mt: 1 }}>
Fine-tune how this scene is animated. InfiniteTalk is audio-driven, so use the prompt to describe the visual
look and feel you want while keeping it concise.
</Typography>
</DialogTitle>
<DialogContent>
<Stack spacing={3} sx={{ mt: 1 }}>
{/* Prompt */}
<Box>
<FormLabel sx={{ color: "rgba(248,250,252,0.9)", mb: 0.5 }}>Visual prompt</FormLabel>
<TextField
multiline
minRows={3}
maxRows={6}
fullWidth
value={prompt}
onChange={(e) => setPrompt(e.target.value)}
placeholder="Short description of how the scene should look (lighting, mood, camera feel, etc.)"
variant="outlined"
InputProps={{
sx: {
bgcolor: "rgba(15,23,42,0.9)",
color: "white",
"& .MuiOutlinedInput-notchedOutline": {
borderColor: "rgba(148,163,184,0.4)",
},
"&:hover .MuiOutlinedInput-notchedOutline": {
borderColor: "rgba(125,211,252,0.8)",
},
},
}}
InputLabelProps={{
sx: { color: "rgba(148,163,184,0.9)" },
}}
/>
<Typography variant="caption" sx={{ color: "rgba(148,163,184,0.9)", mt: 0.5, display: "block" }}>
Example: &quot;Modern podcast studio with soft lighting, the host framed center, gentle camera movement.&quot;
</Typography>
</Box>
{/* Resolution */}
<Box>
<FormLabel sx={{ color: "rgba(248,250,252,0.9)", mb: 1 }}>Resolution &amp; quality</FormLabel>
<RadioGroup
row
value={resolution}
onChange={(e) => setResolution(e.target.value as "480p" | "720p")}
>
<FormControlLabel
value="480p"
control={<Radio color="primary" />}
label={
<Box>
<Typography variant="body2">480p (Recommended)</Typography>
<Typography variant="caption" color="text.secondary">
Faster render, lower cost, great for previews &amp; social
</Typography>
</Box>
}
/>
<FormControlLabel
value="720p"
control={<Radio color="primary" />}
label={
<Box>
<Typography variant="body2">720p (Higher quality)</Typography>
<Typography variant="caption" color="text.secondary">
Sharper video, slightly higher cost and render time
</Typography>
</Box>
}
/>
</RadioGroup>
</Box>
{/* Seed & advanced options */}
<Stack direction={{ xs: "column", sm: "row" }} spacing={2}>
<FormControl fullWidth>
<FormLabel sx={{ color: "rgba(248,250,252,0.9)", mb: 0.5 }}>Seed (optional)</FormLabel>
<TextField
type="number"
value={seed}
onChange={(e) => setSeed(e.target.value)}
placeholder="Random each time if left empty"
InputProps={{
sx: {
bgcolor: "rgba(15,23,42,0.9)",
color: "white",
"& .MuiOutlinedInput-notchedOutline": {
borderColor: "rgba(148,163,184,0.4)",
},
"&:hover .MuiOutlinedInput-notchedOutline": {
borderColor: "rgba(125,211,252,0.8)",
},
},
}}
/>
<Typography variant="caption" sx={{ color: "rgba(148,163,184,0.9)", mt: 0.5 }}>
Use the same seed to get a similar animation style across multiple scenes.
</Typography>
</FormControl>
<FormControl full-width="true">
<FormLabel sx={{ color: "rgba(248,250,252,0.9)", mb: 0.5 }}>Mask image URL (optional)</FormLabel>
<TextField
value={maskImageUrl}
onChange={(e) => setMaskImageUrl(e.target.value)}
placeholder="e.g. /api/podcast/images/your_avatar_mask.png"
InputProps={{
sx: {
bgcolor: "rgba(15,23,42,0.9)",
color: "white",
"& .MuiOutlinedInput-notchedOutline": {
borderColor: "rgba(148,163,184,0.4)",
},
"&:hover .MuiOutlinedInput-notchedOutline": {
borderColor: "rgba(125,211,252,0.8)",
},
},
}}
/>
<Typography variant="caption" sx={{ color: "rgba(148,163,184,0.9)", mt: 0.5 }}>
Optional: limit animation to a specific region (e.g. face) by providing a mask image URL. Leave empty to
animate the whole frame.
</Typography>
</FormControl>
</Stack>
</Stack>
</DialogContent>
<DialogActions sx={{ p: 2.5, pt: 0, justifyContent: "space-between" }}>
<Typography variant="caption" sx={{ color: "rgba(148,163,184,0.9)" }}>
Estimated cost at 480p is lower than 720p. You&apos;ll only be billed for successful renders.
</Typography>
<Stack direction="row" spacing={1}>
<SecondaryButton onClick={onClose}>Cancel</SecondaryButton>
<PrimaryButton onClick={handleGenerate}>Generate Video</PrimaryButton>
</Stack>
</DialogActions>
</Dialog>
);
}

View File

@@ -1,5 +1,5 @@
import { useState, useEffect, useRef, useCallback } from "react";
import { Script, Knobs, Job, RenderJobResult, TaskStatus } from "../types";
import { Script, Knobs, Job, RenderJobResult, TaskStatus, VideoGenerationSettings } from "../types";
import { podcastApi } from "../../../services/podcastApi";
interface UseRenderQueueProps {
@@ -36,7 +36,11 @@ export const useRenderQueue = ({
duration: number;
sceneCount: number;
} | null>(null);
const [combiningVideos, setCombiningVideos] = useState(false);
const [finalVideoUrl, setFinalVideoUrl] = useState<string | null>(null);
const [combiningProgress, setCombiningProgress] = useState<{ progress: number; message: string } | null>(null);
const pollingIntervals = useRef<Map<string, NodeJS.Timeout>>(new Map());
const pollingErrorCounts = useRef<Map<string, number>>(new Map());
// Cleanup polling intervals on unmount
useEffect(() => {
@@ -44,10 +48,11 @@ export const useRenderQueue = ({
return () => {
intervals.forEach((interval) => clearInterval(interval));
intervals.clear();
pollingErrorCounts.current.clear();
};
}, []);
// Initialize jobs if empty
// Initialize jobs if empty (audio/image only)
useEffect(() => {
if (jobs.length === 0 && script.scenes.length > 0) {
const initialJobs: Job[] = script.scenes.map((s) => {
@@ -59,7 +64,7 @@ export const useRenderQueue = ({
progress: hasExistingAudio ? 100 : 0,
previewUrl: null,
finalUrl: hasExistingAudio ? s.audioUrl || null : null,
imageUrl: s.imageUrl || null, // Include existing imageUrl from scene
imageUrl: s.imageUrl || null,
jobId: null,
};
});
@@ -67,25 +72,201 @@ export const useRenderQueue = ({
onUpdateJob(job.sceneId, job);
});
}
}, [script.scenes.length, jobs.length, onUpdateJob, script.scenes]);
}, [jobs.length, script.scenes.length, onUpdateJob, script.scenes]);
// Load final video URL from project on mount (for persistence across reloads)
useEffect(() => {
if (!projectId) return;
podcastApi
.loadProject(projectId)
.then((project) => {
if (project.final_video_url) {
console.log("[useRenderQueue] Loaded final video URL from project:", project.final_video_url);
setFinalVideoUrl(project.final_video_url);
}
})
.catch((error) => {
console.error("[useRenderQueue] Failed to load project for final video URL:", error);
// Don't show error to user - this is just for restoring state
});
}, [projectId]);
// Always try to attach existing videos to scenes (even after reloads)
useEffect(() => {
if (script.scenes.length === 0) return;
podcastApi
.listVideos(projectId)
.then((result) => {
const videoMap = new Map<number, string>();
result.videos.forEach((video) => {
// Use the most recent video for each scene number
if (!videoMap.has(video.scene_number)) {
// Store the raw video URL - SceneCard will handle authentication via blob loading
videoMap.set(video.scene_number, video.video_url);
}
});
script.scenes.forEach((scene) => {
const sceneNumberMatch = scene.id.match(/\d+/);
const sceneNumber = sceneNumberMatch ? parseInt(sceneNumberMatch[0], 10) : null;
if (sceneNumber === null) return;
const videoUrl = videoMap.get(sceneNumber);
if (!videoUrl) return;
const job = jobs.find((j) => j.sceneId === scene.id);
// Avoid redundant updates
if (job?.videoUrl === videoUrl) return;
onUpdateJob(scene.id, {
sceneId: scene.id,
title: scene.title,
videoUrl,
status: "completed" as const,
progress: 100,
});
});
})
.catch((error) => {
console.error("[useRenderQueue] Failed to list existing videos:", error);
});
}, [projectId, script.scenes, jobs, onUpdateJob]);
// Periodic check to rescue videos that were generated but not detected by polling
useEffect(() => {
if (rendering && script.scenes.length > 0) {
const rescueInterval = setInterval(async () => {
// Check for videos every 2 minutes while rendering is active
try {
const videoList = await podcastApi.listVideos(projectId);
const videoMap = new Map<number, string>();
videoList.videos.forEach((video) => {
if (!videoMap.has(video.scene_number)) {
// Store the raw video URL - SceneCard will handle authentication via blob loading
videoMap.set(video.scene_number, video.video_url);
}
});
// Update jobs for scenes that have videos but no videoUrl set
script.scenes.forEach((scene) => {
const sceneNumberMatch = scene.id.match(/\d+/);
const sceneNumber = sceneNumberMatch ? parseInt(sceneNumberMatch[0], 10) : null;
if (sceneNumber !== null) {
const videoUrl = videoMap.get(sceneNumber);
const job = jobs.find((j) => j.sceneId === scene.id);
if (videoUrl) {
if (!job) {
onUpdateJob(scene.id, {
sceneId: scene.id,
title: scene.title,
status: "completed" as const,
progress: 100,
videoUrl,
});
} else if (!job.videoUrl) {
onUpdateJob(scene.id, { videoUrl, status: "completed" as const, progress: 100 });
// If this was the rendering scene, stop rendering
if (rendering === scene.id) {
setRendering(null);
}
}
}
}
});
} catch (error) {
console.error("[useRenderQueue] Failed to rescue videos:", error);
}
}, 120000); // Check every 2 minutes
return () => clearInterval(rescueInterval);
}
}, [rendering, script.scenes, jobs, projectId, onUpdateJob]);
const getScene = useCallback((sceneId: string) => script.scenes.find((s) => s.id === sceneId), [script.scenes]);
const pollTaskStatus = useCallback(async (taskId: string, sceneId: string) => {
try {
const status: TaskStatus = await podcastApi.pollTaskStatus(taskId);
const status: TaskStatus | null = await podcastApi.pollTaskStatus(taskId);
// Handle null response (task not found)
if (!status) {
const errorCount = (pollingErrorCounts.current.get(sceneId) || 0) + 1;
pollingErrorCounts.current.set(sceneId, errorCount);
// Stop polling after 3 consecutive "task not found" errors
if (errorCount >= 3) {
onUpdateJob(sceneId, { status: "failed", progress: 0 });
const interval = pollingIntervals.current.get(sceneId);
if (interval) {
clearInterval(interval);
pollingIntervals.current.delete(sceneId);
}
pollingErrorCounts.current.delete(sceneId);
setRendering(null);
onError("Video generation task not found. The task may have expired or been cancelled.");
return true; // Stop polling
}
return false; // Continue polling (might be transient)
}
// Reset error count on successful poll
pollingErrorCounts.current.delete(sceneId);
onUpdateJob(sceneId, {
progress: status.progress ?? 0,
status: status.status === "completed" ? "completed" : status.status === "failed" ? "failed" : "running",
});
if (status.status === "completed" && status.result) {
// Check for completion - handle both "completed" and "processing" with 100% progress
const isCompleted = status.status === "completed" || (status.status === "processing" && status.progress === 100);
if (isCompleted && status.result) {
const result = status.result;
console.log("[useRenderQueue] Task completed, extracting video URL", {
result,
video_url: result.video_url,
status: status.status,
progress: status.progress,
});
let videoUrl = result.video_url;
if (!videoUrl) {
console.error("[useRenderQueue] No video_url in result! Attempting to rescue from file system...", { result });
// Try to rescue: check if video exists for this scene
const sceneNumberMatch = getScene(sceneId)?.id.match(/\d+/);
const sceneNumber = sceneNumberMatch ? parseInt(sceneNumberMatch[0], 10) : null;
if (sceneNumber !== null) {
podcastApi
.listVideos(projectId)
.then((videoList) => {
const sceneVideo = videoList.videos.find((v) => v.scene_number === sceneNumber);
if (sceneVideo) {
// Store the raw video URL - SceneCard will handle authentication via blob loading
onUpdateJob(sceneId, {
status: "completed",
progress: 100,
videoUrl: sceneVideo.video_url,
cost: result.cost || 0,
});
}
})
.catch((err) => console.error("[useRenderQueue] Failed to rescue video:", err));
}
return true; // Stop polling
}
// Store the raw video URL - SceneCard will handle authentication via blob loading
onUpdateJob(sceneId, {
status: "completed",
progress: 100,
videoUrl: result.video_url,
videoUrl,
cost: result.cost,
});
@@ -94,20 +275,62 @@ export const useRenderQueue = ({
clearInterval(interval);
pollingIntervals.current.delete(sceneId);
}
setRendering(null);
return true; // Stop polling
} else if (status.status === "failed") {
// Extract user-friendly error message
let errorMessage = "Video generation failed";
if (status.error) {
// Try to extract meaningful error from various formats
const errorStr = status.error;
if (errorStr.includes("Insufficient credits")) {
errorMessage = "Video generation failed: Insufficient WaveSpeed credits. Please top up your account.";
} else if (errorStr.includes("HTTPException") || errorStr.includes("502")) {
// Extract the actual error message from HTTPException details
const match = errorStr.match(/message[":\s]+"([^"]+)"/i) || errorStr.match(/detail[":\s]+"([^"]+)"/i);
if (match && match[1]) {
errorMessage = `Video generation failed: ${match[1]}`;
} else {
errorMessage = `Video generation failed: ${errorStr}`;
}
} else {
errorMessage = `Video generation failed: ${errorStr}`;
}
}
onUpdateJob(sceneId, { status: "failed", progress: 0 });
const interval = pollingIntervals.current.get(sceneId);
if (interval) {
clearInterval(interval);
pollingIntervals.current.delete(sceneId);
}
onError(status.error || "Video generation failed");
pollingErrorCounts.current.delete(sceneId);
setRendering(null);
onError(errorMessage);
return true; // Stop polling
}
return status.status === "completed" || status.status === "failed";
return false; // Continue polling
} catch (error) {
console.error("Error polling task status:", error);
return false;
const errorCount = (pollingErrorCounts.current.get(sceneId) || 0) + 1;
pollingErrorCounts.current.set(sceneId, errorCount);
// Stop polling after 5 consecutive network errors
if (errorCount >= 5) {
onUpdateJob(sceneId, { status: "failed", progress: 0 });
const interval = pollingIntervals.current.get(sceneId);
if (interval) {
clearInterval(interval);
pollingIntervals.current.delete(sceneId);
}
pollingErrorCounts.current.delete(sceneId);
setRendering(null);
const errorMsg = error instanceof Error ? error.message : String(error);
onError(`Video generation failed: Unable to check status. ${errorMsg}`);
return true; // Stop polling
}
return false; // Continue polling (might be transient network error)
}
}, [onUpdateJob, onError]);
@@ -217,6 +440,7 @@ export const useRenderQueue = ({
sceneId: scene.id,
sceneTitle: scene.title,
sceneContent: sceneContent,
baseAvatarUrl: avatarImageUrl || undefined, // Use base avatar if available
width: 1024,
height: 1024,
});
@@ -239,68 +463,112 @@ export const useRenderQueue = ({
} finally {
setGeneratingImage(null);
}
}, [generatingImage, getScene, onUpdateJob, onError]);
}, [generatingImage, getScene, avatarImageUrl, onUpdateJob, onError, script]);
const runVideoRender = useCallback(async (sceneId: string) => {
if (rendering && rendering !== sceneId) return;
const scene = getScene(sceneId);
if (!scene) return;
const sceneImageUrl = scene.imageUrl || avatarImageUrl;
if (!sceneImageUrl) {
onError("Scene image is required for video generation. Please generate images for scenes first.");
return;
}
const job = jobs.find((j) => j.sceneId === sceneId);
if (!job?.finalUrl) {
onError("Please generate audio first before creating video.");
return;
}
const estimatedCost = 0.30;
if (budgetCap && budgetCap > 0) {
const currentSpent = jobs
.filter((j) => j.status === "completed" && j.cost)
.reduce((sum, j) => sum + (j.cost || 0), 0);
if (currentSpent + estimatedCost > budgetCap) {
onError(`Budget cap exceeded. Estimated cost: $${estimatedCost.toFixed(2)}, Budget remaining: $${(budgetCap - currentSpent).toFixed(2)}`);
const runVideoRender = useCallback(
async (sceneId: string, settings?: VideoGenerationSettings) => {
if (rendering && rendering !== sceneId) {
return;
}
}
setRendering(sceneId);
onUpdateJob(sceneId, {
status: "running",
progress: 5,
});
const scene = getScene(sceneId);
if (!scene) {
return;
}
try {
const result = await podcastApi.generateVideo({
projectId,
sceneId,
sceneTitle: scene.title,
audioUrl: job.finalUrl,
avatarImageUrl: sceneImageUrl,
resolution: knobs.resolution || "720p",
});
// Guard: require image and audio before calling expensive video gen
const sceneImageUrl = scene.imageUrl || avatarImageUrl;
if (!sceneImageUrl) {
onError("Scene image is required for video generation. Please generate images for scenes first.");
return;
}
const job = jobs.find((j) => j.sceneId === sceneId);
// Use job.finalUrl if available, otherwise fall back to scene.audioUrl (from Script Editor)
const audioUrl = job?.finalUrl || scene.audioUrl;
if (!audioUrl || audioUrl.startsWith("blob:")) {
onError("Please generate audio first before creating video.");
return;
}
// Guard: ensure every scene has audio and image before enabling render queue video
const allScenesHaveAudio = script.scenes.every((s) => s.audioUrl && !s.audioUrl.startsWith("blob:"));
const allScenesHaveImage = script.scenes.every((s) => s.imageUrl);
if (!allScenesHaveAudio || !allScenesHaveImage) {
onError("Please ensure all scenes have both audio and image before generating video.");
return;
}
// Resolution & simple cost heuristic (default 480p for lower cost)
const targetResolution: "480p" | "720p" =
settings?.resolution || (knobs.resolution as "480p" | "720p") || "480p";
const baseCost = 0.3; // 5s at 720p
const estimatedCost = targetResolution === "480p" ? baseCost / 2 : baseCost;
if (budgetCap && budgetCap > 0) {
const currentSpent = jobs
.filter((j) => j.status === "completed" && j.cost)
.reduce((sum, j) => sum + (j.cost || 0), 0);
if (currentSpent + estimatedCost > budgetCap) {
onError(
`Budget cap exceeded. Estimated cost: $${estimatedCost.toFixed(
2
)}, Budget remaining: $${(budgetCap - currentSpent).toFixed(2)}`
);
return;
}
}
setRendering(sceneId);
onUpdateJob(sceneId, {
taskId: result.taskId,
status: "running",
progress: 5,
});
startPolling(result.taskId, sceneId);
} catch (error) {
onUpdateJob(sceneId, { status: "failed", progress: 0 });
const message = error instanceof Error ? error.message : "Video generation failed";
onError(message);
} finally {
setRendering(null);
}
}, [rendering, getScene, avatarImageUrl, jobs, budgetCap, projectId, knobs, onUpdateJob, onError, startPolling]);
try {
console.log("[useRenderQueue] Starting video generation", {
sceneId,
sceneTitle: scene.title,
audioUrl,
avatarImageUrl: sceneImageUrl,
resolution: targetResolution,
prompt: settings?.prompt,
seed: settings?.seed,
maskImageUrl: settings?.maskImageUrl,
});
const result = await podcastApi.generateVideo({
projectId,
sceneId,
sceneTitle: scene.title,
audioUrl,
avatarImageUrl: sceneImageUrl,
resolution: targetResolution,
prompt: settings?.prompt || undefined,
seed: settings?.seed ?? -1,
maskImageUrl: settings?.maskImageUrl || undefined,
});
if (!result.taskId) {
throw new Error("Backend did not return a task ID. Response: " + JSON.stringify(result));
}
onUpdateJob(sceneId, {
taskId: result.taskId,
status: "running",
progress: 5,
});
startPolling(result.taskId, sceneId);
} catch (error) {
onUpdateJob(sceneId, { status: "failed", progress: 0 });
const message = error instanceof Error ? error.message : "Video generation failed";
onError(message);
}
},
[rendering, getScene, avatarImageUrl, jobs, budgetCap, projectId, knobs, onUpdateJob, onError, script.scenes, startPolling]
);
const combineAudio = useCallback(async () => {
try {
@@ -361,16 +629,151 @@ export const useRenderQueue = ({
}
}, [script.scenes, jobs, projectId, onError]);
const combineFinalVideo = useCallback(async () => {
try {
setCombiningVideos(true);
onError("");
// Collect all scene video URLs
const sceneVideoUrls: string[] = [];
for (const scene of script.scenes) {
const job = jobs.find((j) => j.sceneId === scene.id);
if (!job?.videoUrl) {
throw new Error(`Scene "${scene.title}" is missing a video. Please generate videos for all scenes first.`);
}
// Remove blob URLs and query params - use the API path only
let videoUrl = job.videoUrl;
if (videoUrl.startsWith("blob:")) {
throw new Error(`Scene "${scene.title}" has a blob URL. Cannot combine blob URLs. Please use API URLs.`);
}
videoUrl = videoUrl.split("?")[0]; // Remove query params
sceneVideoUrls.push(videoUrl);
}
console.log("[combineFinalVideo] Starting combination with", sceneVideoUrls.length, "videos");
// Start combination task
const result = await podcastApi.combineVideos({
projectId,
sceneVideoUrls,
podcastTitle: script.scenes[0]?.title || "Podcast",
});
console.log("[combineFinalVideo] Task created:", result.taskId);
// Poll for completion
const taskId = result.taskId;
let done = false;
let pollCount = 0;
const maxPolls = 300; // 10 minutes max (300 * 2 seconds) - encoding can take time
let lastProgress = 0;
let lastMessage = "Starting video combination...";
while (!done && pollCount < maxPolls) {
await new Promise((r) => setTimeout(r, 2000)); // Poll every 2 seconds
pollCount++;
const status = await podcastApi.pollTaskStatus(taskId);
// Update progress and message for user feedback
if (status) {
const currentProgress = status.progress ?? 0;
const currentMessage = status.message || "Processing...";
// Update UI with progress
setCombiningProgress({
progress: currentProgress,
message: currentMessage,
});
// Only log if progress or message changed to reduce noise
if (currentProgress !== lastProgress || currentMessage !== lastMessage) {
console.log(
`[combineFinalVideo] Poll ${pollCount}: ${status.status} | ` +
`Progress: ${currentProgress.toFixed(1)}% | Message: ${currentMessage}`
);
lastProgress = currentProgress;
lastMessage = currentMessage;
}
} else {
console.log(`[combineFinalVideo] Poll ${pollCount}: No status yet...`);
}
if (!status) {
// Don't fail immediately - task might still be initializing
if (pollCount < 10) {
continue; // Wait up to 20 seconds for task to appear
}
console.error("[combineFinalVideo] Task not found after 10 polls");
throw new Error("Task not found. Video combination may have failed on the server. Please try again.");
}
if (status.status === "completed") {
done = true;
const videoUrl = status.result?.video_url;
if (!videoUrl) {
console.error("[combineFinalVideo] No video URL in result:", status.result);
throw new Error("Final video URL not found in result. Please contact support.");
}
console.log("[combineFinalVideo] Success! Video URL:", videoUrl);
setFinalVideoUrl(videoUrl);
// Save final video URL to project for persistence across reloads
try {
await podcastApi.saveProject(projectId, { final_video_url: videoUrl });
console.log("[combineFinalVideo] Saved final video URL to project");
} catch (error) {
console.warn("[combineFinalVideo] Failed to save final video URL to project:", error);
// Don't fail the operation if project save fails - video is still available
}
} else if (status.status === "failed") {
const errorMsg = status.error || status.message || "Video combination failed";
console.error("[combineFinalVideo] Task failed:", errorMsg);
throw new Error(`Video combination failed: ${errorMsg}`);
}
}
if (pollCount >= maxPolls) {
throw new Error("Video combination timed out after 10 minutes. The video may still be processing. Please check back in a few minutes or try again.");
}
} catch (error: any) {
console.error("[combineFinalVideo] Error:", error);
// Extract detailed error message
let message = "Failed to combine videos";
if (error?.response?.data?.detail) {
// Backend error with detail
message = error.response.data.detail;
} else if (error?.message) {
// Standard error message
message = error.message;
} else if (typeof error === "string") {
message = error;
}
console.error("[combineFinalVideo] Displaying error to user:", message);
onError(message);
} finally {
setCombiningVideos(false);
setCombiningProgress(null);
}
}, [script.scenes, jobs, projectId, onError]);
return {
rendering,
generatingImage,
combiningAudio,
combinedAudioResult,
combiningVideos,
combiningProgress,
finalVideoUrl,
isBusy: Boolean(rendering),
runRender,
runImageGeneration,
runVideoRender,
combineAudio,
combineFinalVideo,
};
};

View File

@@ -0,0 +1,464 @@
import React, { useEffect, useState } from "react";
import {
Dialog,
DialogTitle,
DialogContent,
DialogActions,
Stack,
Box,
Typography,
Slider,
Select,
MenuItem,
FormControl,
InputLabel,
FormControlLabel,
Checkbox,
Tooltip,
IconButton,
alpha,
TextField,
} from "@mui/material";
import { HelpOutline as HelpOutlineIcon, Close as CloseIcon } from "@mui/icons-material";
import { PrimaryButton, SecondaryButton } from "../ui";
export type AudioGenerationSettings = {
voiceId: string;
speed: number;
volume: number;
pitch: number;
emotion: string;
englishNormalization: boolean;
sampleRate?: number;
bitrate?: number;
channel?: "1" | "2";
format?: "mp3" | "wav" | "pcm" | "flac";
languageBoost?: string;
};
interface AudioRegenerateModalProps {
open: boolean;
onClose: () => void;
onRegenerate: (settings: AudioGenerationSettings) => void;
initialSettings: AudioGenerationSettings;
isGenerating?: boolean;
}
const VOICE_OPTIONS = [
"Wise_Woman",
"Friendly_Person",
"Inspirational_girl",
"Deep_Voice_Man",
"Calm_Woman",
"Casual_Guy",
"Lively_Girl",
"Patient_Man",
"Young_Knight",
"Determined_Man",
"Lovely_Girl",
"Decent_Boy",
"Imposing_Manner",
"Elegant_Man",
"Abbess",
"Sweet_Girl_2",
"Exuberant_Girl",
];
const EMOTION_OPTIONS = ["happy", "sad", "angry", "fearful", "disgusted", "surprised", "neutral"];
const SAMPLE_RATE_OPTIONS = [8000, 16000, 22050, 24000, 32000, 44100];
const BITRATE_OPTIONS = [32000, 64000, 128000, 256000];
const LANGUAGE_BOOST_OPTIONS = [
"auto",
"English",
"Chinese",
"Chinese,Yue",
"Arabic",
"Russian",
"Spanish",
"French",
"Portuguese",
"German",
"Turkish",
"Dutch",
"Ukrainian",
"Vietnamese",
"Indonesian",
"Japanese",
"Italian",
"Korean",
"Thai",
"Polish",
"Romanian",
"Greek",
"Czech",
"Finnish",
"Hindi",
];
export const AudioRegenerateModal: React.FC<AudioRegenerateModalProps> = ({
open,
onClose,
onRegenerate,
initialSettings,
isGenerating = false,
}) => {
const [settings, setSettings] = useState<AudioGenerationSettings>(initialSettings);
useEffect(() => {
setSettings(initialSettings);
}, [initialSettings]);
const handleRegenerate = () => {
onRegenerate(settings);
};
return (
<Dialog
open={open}
onClose={onClose}
maxWidth="md"
fullWidth
PaperProps={{
sx: {
background: alpha("#0f172a", 0.95),
backdropFilter: "blur(20px)",
border: "1px solid rgba(255,255,255,0.1)",
borderRadius: 4,
},
}}
>
<DialogTitle>
<Stack direction="row" justifyContent="space-between" alignItems="center">
<Typography variant="h6" sx={{ color: "white", fontWeight: 600 }}>
Regenerate Audio with Custom Settings
</Typography>
<IconButton onClick={onClose} size="small" sx={{ color: "rgba(255,255,255,0.7)" }}>
<CloseIcon />
</IconButton>
</Stack>
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.6)", mt: 1 }}>
Adjust voice, speed, tone, and quality. Changes apply only to this scene.
</Typography>
</DialogTitle>
<DialogContent>
<Stack spacing={3} sx={{ mt: 1 }}>
{/* Voice */}
<Box>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1 }}>
<Typography variant="subtitle1" sx={{ color: "white", fontWeight: 600 }}>
Voice
</Typography>
<Tooltip title="Choose a system voice or your custom trained voice ID." arrow>
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
<HelpOutlineIcon fontSize="small" />
</IconButton>
</Tooltip>
</Stack>
<FormControl fullWidth>
<Select
value={settings.voiceId}
onChange={(e) => setSettings({ ...settings, voiceId: e.target.value })}
sx={{
backgroundColor: alpha("#ffffff", 0.05),
color: "white",
"& .MuiOutlinedInput-notchedOutline": { borderColor: "rgba(255,255,255,0.2)" },
"&:hover .MuiOutlinedInput-notchedOutline": { borderColor: "rgba(255,255,255,0.3)" },
"&.Mui-focused .MuiOutlinedInput-notchedOutline": { borderColor: "#667eea" },
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
}}
>
{VOICE_OPTIONS.map((v) => (
<MenuItem key={v} value={v}>
{v}
</MenuItem>
))}
</Select>
</FormControl>
</Box>
{/* Speed / Volume / Pitch */}
<Stack direction={{ xs: "column", sm: "row" }} spacing={2}>
<Box sx={{ flex: 1 }}>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 0.5 }}>
<Typography variant="subtitle2" sx={{ color: "white", fontWeight: 600 }}>
Speed (0.5-2.0)
</Typography>
<Tooltip title="Control how fast the voice speaks. 1.0 is normal." arrow>
<HelpOutlineIcon fontSize="small" sx={{ color: "rgba(255,255,255,0.5)" }} />
</Tooltip>
</Stack>
<Slider
value={settings.speed}
min={0.5}
max={2.0}
step={0.05}
onChange={(_, v) => setSettings({ ...settings, speed: v as number })}
sx={{ color: "#6366f1" }}
/>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Slower (narrative) Faster (conversational). Impacts duration.
</Typography>
</Box>
<Box sx={{ flex: 1 }}>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 0.5 }}>
<Typography variant="subtitle2" sx={{ color: "white", fontWeight: 600 }}>
Volume (0.1-10)
</Typography>
<Tooltip title="Loudness of the voice. 1.0 is normal loudness." arrow>
<HelpOutlineIcon fontSize="small" sx={{ color: "rgba(255,255,255,0.5)" }} />
</Tooltip>
</Stack>
<Slider
value={settings.volume}
min={0.1}
max={10}
step={0.1}
onChange={(_, v) => setSettings({ ...settings, volume: v as number })}
sx={{ color: "#10b981" }}
/>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Lower for soft tone; higher for punchier delivery.
</Typography>
</Box>
<Box sx={{ flex: 1 }}>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 0.5 }}>
<Typography variant="subtitle2" sx={{ color: "white", fontWeight: 600 }}>
Pitch (-12 to 12)
</Typography>
<Tooltip title="Tone of the voice. 0 is neutral. Negative is deeper; positive is brighter." arrow>
<HelpOutlineIcon fontSize="small" sx={{ color: "rgba(255,255,255,0.5)" }} />
</Tooltip>
</Stack>
<Slider
value={settings.pitch}
min={-12}
max={12}
step={0.5}
onChange={(_, v) => setSettings({ ...settings, pitch: v as number })}
sx={{ color: "#f97316" }}
/>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Use small adjustments (±2) for natural results.
</Typography>
</Box>
</Stack>
{/* Emotion */}
<Box>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1 }}>
<Typography variant="subtitle1" sx={{ color: "white", fontWeight: 600 }}>
Emotion
</Typography>
<Tooltip title="Sets the vocal mood: happy, neutral, sad, etc." arrow>
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
<HelpOutlineIcon fontSize="small" />
</IconButton>
</Tooltip>
</Stack>
<FormControl fullWidth>
<Select
value={settings.emotion}
onChange={(e) => setSettings({ ...settings, emotion: e.target.value })}
sx={{
backgroundColor: alpha("#ffffff", 0.05),
color: "white",
"& .MuiOutlinedInput-notchedOutline": { borderColor: "rgba(255,255,255,0.2)" },
"&:hover .MuiOutlinedInput-notchedOutline": { borderColor: "rgba(255,255,255,0.3)" },
"&.Mui-focused .MuiOutlinedInput-notchedOutline": { borderColor: "#667eea" },
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
}}
>
{EMOTION_OPTIONS.map((e) => (
<MenuItem key={e} value={e}>
{e}
</MenuItem>
))}
</Select>
</FormControl>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)", mt: 0.5, display: "block" }}>
Tip: happy/neutral for most podcasts; sad/angry for dramatic or critical segments.
</Typography>
</Box>
{/* Normalization & Language */}
<Stack direction={{ xs: "column", sm: "row" }} spacing={2}>
<Box sx={{ flex: 1 }}>
<FormControlLabel
control={
<Checkbox
checked={settings.englishNormalization}
onChange={(e) => setSettings({ ...settings, englishNormalization: e.target.checked })}
sx={{ color: "rgba(255,255,255,0.7)" }}
/>
}
label={
<Typography variant="body2" sx={{ color: "white" }}>
English normalization (better numbers/dates)
</Typography>
}
/>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Improves pronunciation of numbers/dates (recommended for stats-heavy scenes).
</Typography>
</Box>
<Box sx={{ flex: 1 }}>
<TextField
select
fullWidth
label="Language boost"
value={settings.languageBoost || "auto"}
onChange={(e) => setSettings({ ...settings, languageBoost: e.target.value })}
SelectProps={{ native: false }}
InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }}
sx={{
"& .MuiOutlinedInput-root": {
backgroundColor: alpha("#ffffff", 0.05),
color: "white",
"& fieldset": { borderColor: "rgba(255,255,255,0.2)" },
"&:hover fieldset": { borderColor: "rgba(255,255,255,0.3)" },
"&.Mui-focused fieldset": { borderColor: "#667eea" },
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
},
}}
>
{LANGUAGE_BOOST_OPTIONS.map((opt) => (
<MenuItem key={opt} value={opt}>
{opt}
</MenuItem>
))}
</TextField>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)", mt: 0.5, display: "block" }}>
Helps with language-specific pronunciation and accent.
</Typography>
</Box>
</Stack>
{/* Quality & Format */}
<Stack direction={{ xs: "column", sm: "row" }} spacing={2}>
<Box sx={{ flex: 1 }}>
<TextField
select
fullWidth
label="Sample rate"
value={settings.sampleRate || 24000}
onChange={(e) => setSettings({ ...settings, sampleRate: Number(e.target.value) })}
InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }}
sx={{
"& .MuiOutlinedInput-root": {
backgroundColor: alpha("#ffffff", 0.05),
color: "white",
"& fieldset": { borderColor: "rgba(255,255,255,0.2)" },
"&:hover fieldset": { borderColor: "rgba(255,255,255,0.3)" },
"&.Mui-focused fieldset": { borderColor: "#667eea" },
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
},
}}
>
{SAMPLE_RATE_OPTIONS.map((opt) => (
<MenuItem key={opt} value={opt}>
{opt} Hz
</MenuItem>
))}
</TextField>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)", mt: 0.5, display: "block" }}>
Higher sample rate = higher fidelity (24k+ recommended for podcast voice).
</Typography>
</Box>
<Box sx={{ flex: 1 }}>
<TextField
select
fullWidth
label="Bitrate"
value={settings.bitrate || 64000}
onChange={(e) => setSettings({ ...settings, bitrate: Number(e.target.value) })}
InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }}
sx={{
"& .MuiOutlinedInput-root": {
backgroundColor: alpha("#ffffff", 0.05),
color: "white",
"& fieldset": { borderColor: "rgba(255,255,255,0.2)" },
"&:hover fieldset": { borderColor: "rgba(255,255,255,0.3)" },
"&.Mui-focused fieldset": { borderColor: "#667eea" },
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
},
}}
>
{BITRATE_OPTIONS.map((opt) => (
<MenuItem key={opt} value={opt}>
{opt / 1000} kbps
</MenuItem>
))}
</TextField>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)", mt: 0.5, display: "block" }}>
Higher bitrate = larger file but clearer audio. 64128 kbps is great for voice.
</Typography>
</Box>
</Stack>
<Stack direction={{ xs: "column", sm: "row" }} spacing={2}>
<Box sx={{ flex: 1 }}>
<TextField
select
fullWidth
label="Channel"
value={settings.channel || "1"}
onChange={(e) => setSettings({ ...settings, channel: e.target.value as "1" | "2" })}
InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }}
sx={{
"& .MuiOutlinedInput-root": {
backgroundColor: alpha("#ffffff", 0.05),
color: "white",
"& fieldset": { borderColor: "rgba(255,255,255,0.2)" },
"&:hover fieldset": { borderColor: "rgba(255,255,255,0.3)" },
"&.Mui-focused fieldset": { borderColor: "#667eea" },
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
},
}}
>
<MenuItem value="1">Mono (smaller, voice-focused)</MenuItem>
<MenuItem value="2">Stereo (wider, more presence)</MenuItem>
</TextField>
</Box>
<Box sx={{ flex: 1 }}>
<TextField
select
fullWidth
label="Format"
value={settings.format || "mp3"}
onChange={(e) => setSettings({ ...settings, format: e.target.value as "mp3" | "wav" | "pcm" | "flac" })}
InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }}
sx={{
"& .MuiOutlinedInput-root": {
backgroundColor: alpha("#ffffff", 0.05),
color: "white",
"& fieldset": { borderColor: "rgba(255,255,255,0.2)" },
"&:hover fieldset": { borderColor: "rgba(255,255,255,0.3)" },
"&.Mui-focused fieldset": { borderColor: "#667eea" },
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
},
}}
>
<MenuItem value="mp3">mp3 (small, universal)</MenuItem>
<MenuItem value="wav">wav (uncompressed)</MenuItem>
<MenuItem value="pcm">pcm (raw)</MenuItem>
<MenuItem value="flac">flac (lossless)</MenuItem>
</TextField>
</Box>
</Stack>
</Stack>
</DialogContent>
<DialogActions sx={{ p: 3, pt: 2 }}>
<SecondaryButton onClick={onClose} disabled={isGenerating}>
Cancel
</SecondaryButton>
<PrimaryButton onClick={handleRegenerate} loading={isGenerating} disabled={isGenerating}>
{isGenerating ? "Generating..." : "Apply & Regenerate"}
</PrimaryButton>
</DialogActions>
</Dialog>
);
};

View File

@@ -0,0 +1,563 @@
import React, { useState, useEffect } from "react";
import {
Dialog,
DialogTitle,
DialogContent,
DialogActions,
Stack,
Box,
Typography,
TextField,
Select,
MenuItem,
FormControl,
InputLabel,
Divider,
alpha,
Tooltip,
IconButton,
Paper,
} from "@mui/material";
import {
Info as InfoIcon,
HelpOutline as HelpOutlineIcon,
Close as CloseIcon,
} from "@mui/icons-material";
import { PrimaryButton, SecondaryButton } from "../ui";
type PresetKey = "studioNeutral" | "warmBroadcast" | "techModern";
const PRESETS: Record<
PresetKey,
{
title: string;
subtitle: string;
prompt: string;
style: "Auto" | "Fiction" | "Realistic";
renderingSpeed: "Default" | "Turbo" | "Quality";
aspectRatio: "1:1" | "16:9" | "9:16" | "4:3" | "3:4";
}
> = {
studioNeutral: {
title: "Studio Neutral",
subtitle: "Clean, well-lit studio, neutral background",
prompt:
"Professional podcast studio, neutral light grey backdrop, soft key + fill lighting, subtle depth of field, clear microphone framing",
style: "Realistic",
renderingSpeed: "Quality",
aspectRatio: "16:9",
},
warmBroadcast: {
title: "Warm Broadcast",
subtitle: "Warm tones, friendly and inviting broadcast desk",
prompt:
"Warm broadcast desk, soft amber lighting, cozy ambience, gentle vignette, inviting expression, polished but approachable look",
style: "Realistic",
renderingSpeed: "Quality",
aspectRatio: "16:9",
},
techModern: {
title: "Tech Modern",
subtitle: "Crisp, modern look with cool accent lighting",
prompt:
"Modern tech podcast set, cool accent lights (teal/purple), minimal backdrop, crisp highlights, premium camera look, subtle bokeh",
style: "Auto",
renderingSpeed: "Quality",
aspectRatio: "16:9",
},
};
export interface ImageGenerationSettings {
prompt: string;
style: "Auto" | "Fiction" | "Realistic";
renderingSpeed: "Default" | "Turbo" | "Quality";
aspectRatio: "1:1" | "16:9" | "9:16" | "4:3" | "3:4";
}
interface ImageRegenerateModalProps {
open: boolean;
onClose: () => void;
onRegenerate: (settings: ImageGenerationSettings) => void;
initialPrompt: string;
initialStyle?: "Auto" | "Fiction" | "Realistic";
initialRenderingSpeed?: "Default" | "Turbo" | "Quality";
initialAspectRatio?: "1:1" | "16:9" | "9:16" | "4:3" | "3:4";
isGenerating?: boolean;
}
export const ImageRegenerateModal: React.FC<ImageRegenerateModalProps> = ({
open,
onClose,
onRegenerate,
initialPrompt,
initialStyle = "Realistic",
initialRenderingSpeed = "Quality",
initialAspectRatio = "16:9",
isGenerating = false,
}) => {
const [prompt, setPrompt] = useState(initialPrompt);
const [style, setStyle] = useState<"Auto" | "Fiction" | "Realistic">(initialStyle);
const [renderingSpeed, setRenderingSpeed] = useState<"Default" | "Turbo" | "Quality">(initialRenderingSpeed);
const [aspectRatio, setAspectRatio] = useState<"1:1" | "16:9" | "9:16" | "4:3" | "3:4">(initialAspectRatio);
// Update state when initial values change
useEffect(() => {
setPrompt(initialPrompt);
setStyle(initialStyle);
setRenderingSpeed(initialRenderingSpeed);
setAspectRatio(initialAspectRatio);
}, [initialPrompt, initialStyle, initialRenderingSpeed, initialAspectRatio]);
const handleRegenerate = () => {
onRegenerate({
prompt,
style,
renderingSpeed,
aspectRatio,
});
};
const applyPreset = (presetKey: PresetKey) => {
const p = PRESETS[presetKey];
// Combine the preset prompt with current scene prompt context
setPrompt((current) => {
// If user already customized, append; otherwise replace with preset
if (!current || current.trim() === "" || current.trim() === initialPrompt.trim()) {
return `${initialPrompt}\n${p.prompt}`.trim();
}
return `${current}\n${p.prompt}`.trim();
});
setStyle(p.style);
setRenderingSpeed(p.renderingSpeed);
setAspectRatio(p.aspectRatio);
};
return (
<Dialog
open={open}
onClose={onClose}
maxWidth="md"
fullWidth
PaperProps={{
sx: {
background: alpha("#0f172a", 0.95),
backdropFilter: "blur(20px)",
border: "1px solid rgba(255,255,255,0.1)",
borderRadius: 4,
},
}}
>
<DialogTitle>
<Stack direction="row" justifyContent="space-between" alignItems="center">
<Typography variant="h6" sx={{ color: "white", fontWeight: 600 }}>
Regenerate Image with Custom Settings
</Typography>
<IconButton
onClick={onClose}
size="small"
sx={{ color: "rgba(255,255,255,0.7)" }}
>
<CloseIcon />
</IconButton>
</Stack>
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.6)", mt: 1 }}>
Customize the image generation parameters to get the perfect result for your scene
</Typography>
</DialogTitle>
<DialogContent>
<Stack spacing={3} sx={{ mt: 1 }}>
{/* Presets */}
<Box>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1 }}>
<Typography variant="subtitle1" sx={{ color: "white", fontWeight: 600 }}>
Podcast-ready presets
</Typography>
<Tooltip
title="Quickly apply a podcast-friendly look. Each preset adjusts lighting, background, and ratio while keeping your base avatar consistent."
arrow
>
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
<HelpOutlineIcon fontSize="small" />
</IconButton>
</Tooltip>
</Stack>
<Stack direction={{ xs: "column", sm: "row" }} spacing={1.5}>
{(
Object.entries(PRESETS) as Array<[PresetKey, (typeof PRESETS)[PresetKey]]>
).map(([key, p]) => (
<Paper
key={key}
onClick={() => applyPreset(key)}
sx={{
p: 1.5,
flex: 1,
cursor: "pointer",
backgroundColor: alpha("#ffffff", 0.04),
border: "1px solid rgba(255,255,255,0.1)",
borderRadius: 2,
transition: "all 0.2s ease",
"&:hover": {
borderColor: "rgba(102,126,234,0.7)",
boxShadow: "0 8px 24px rgba(0,0,0,0.25)",
backgroundColor: alpha("#667eea", 0.08),
},
}}
>
<Typography variant="subtitle2" sx={{ color: "white", fontWeight: 700 }}>
{p.title}
</Typography>
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.7)", lineHeight: 1.5, mb: 0.75 }}>
{p.subtitle}
</Typography>
<Stack direction="row" spacing={1} sx={{ color: "rgba(255,255,255,0.6)", fontSize: "0.8rem" }}>
<Typography variant="caption">Style: {p.style}</Typography>
<Typography variant="caption">Speed: {p.renderingSpeed}</Typography>
<Typography variant="caption">AR: {p.aspectRatio}</Typography>
</Stack>
</Paper>
))}
</Stack>
</Box>
{/* Prompt Section */}
<Box>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1 }}>
<Typography variant="subtitle1" sx={{ color: "white", fontWeight: 600 }}>
Generation Prompt
</Typography>
<Tooltip
title="The prompt describes what you want to see in the generated image. It should include scene context, visual elements, and style preferences. The AI will use this along with your base avatar to create a consistent character in the scene."
arrow
>
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
<HelpOutlineIcon fontSize="small" />
</IconButton>
</Tooltip>
</Stack>
<TextField
fullWidth
multiline
rows={4}
value={prompt}
onChange={(e) => setPrompt(e.target.value)}
placeholder="Describe the scene, visual elements, and style..."
sx={{
"& .MuiOutlinedInput-root": {
backgroundColor: alpha("#ffffff", 0.05),
color: "white",
"& fieldset": {
borderColor: "rgba(255,255,255,0.2)",
},
"&:hover fieldset": {
borderColor: "rgba(255,255,255,0.3)",
},
"&.Mui-focused fieldset": {
borderColor: "#667eea",
},
},
"& .MuiInputBase-input": {
color: "white",
},
}}
/>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.5)", mt: 0.5, display: "block" }}>
This prompt will be combined with scene context to generate your image. Be specific about visual elements, mood, and composition.
</Typography>
</Box>
<Divider sx={{ borderColor: "rgba(255,255,255,0.1)" }} />
{/* Style Selection */}
<Box>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1.5 }}>
<Typography variant="subtitle1" sx={{ color: "white", fontWeight: 600 }}>
Character Style
</Typography>
<Tooltip
title="Determines the artistic style of the character generation. Auto lets the AI choose, Fiction creates more stylized/artistic characters, and Realistic produces photorealistic results."
arrow
>
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
<HelpOutlineIcon fontSize="small" />
</IconButton>
</Tooltip>
</Stack>
<FormControl fullWidth>
<Select
value={style}
onChange={(e) => setStyle(e.target.value as "Auto" | "Fiction" | "Realistic")}
sx={{
backgroundColor: alpha("#ffffff", 0.05),
color: "white",
"& .MuiOutlinedInput-notchedOutline": {
borderColor: "rgba(255,255,255,0.2)",
},
"&:hover .MuiOutlinedInput-notchedOutline": {
borderColor: "rgba(255,255,255,0.3)",
},
"&.Mui-focused .MuiOutlinedInput-notchedOutline": {
borderColor: "#667eea",
},
"& .MuiSvgIcon-root": {
color: "rgba(255,255,255,0.7)",
},
}}
>
<MenuItem value="Auto">
<Stack>
<Typography sx={{ color: "white" }}>Auto</Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
AI automatically selects the best style
</Typography>
</Stack>
</MenuItem>
<MenuItem value="Fiction">
<Stack>
<Typography sx={{ color: "white" }}>Fiction</Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Stylized, artistic character appearance
</Typography>
</Stack>
</MenuItem>
<MenuItem value="Realistic">
<Stack>
<Typography sx={{ color: "white" }}>Realistic</Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Photorealistic, professional appearance
</Typography>
</Stack>
</MenuItem>
</Select>
</FormControl>
<Paper
sx={{
mt: 1.5,
p: 1.5,
backgroundColor: alpha("#667eea", 0.1),
border: "1px solid rgba(102,126,234,0.3)",
borderRadius: 2,
}}
>
<Stack direction="row" spacing={1}>
<InfoIcon sx={{ color: "#667eea", fontSize: "1.2rem", mt: 0.1 }} />
<Box>
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.9)", fontWeight: 500, mb: 0.5 }}>
Style Impact:
</Typography>
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.7)", lineHeight: 1.6 }}>
<strong>Auto:</strong> Best for most cases, balances realism and style<br />
<strong>Fiction:</strong> Great for creative, artistic podcasts with stylized visuals<br />
<strong>Realistic:</strong> Ideal for professional, corporate, or news-style podcasts
</Typography>
</Box>
</Stack>
</Paper>
</Box>
{/* Rendering Speed */}
<Box>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1.5 }}>
<Typography variant="subtitle1" sx={{ color: "white", fontWeight: 600 }}>
Rendering Speed
</Typography>
<Tooltip
title="Controls the balance between generation speed, cost, and quality. Turbo is fastest and cheapest but lower quality. Quality is slowest and most expensive but produces the best results. Default provides a balanced approach."
arrow
>
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
<HelpOutlineIcon fontSize="small" />
</IconButton>
</Tooltip>
</Stack>
<FormControl fullWidth>
<Select
value={renderingSpeed}
onChange={(e) => setRenderingSpeed(e.target.value as "Default" | "Turbo" | "Quality")}
sx={{
backgroundColor: alpha("#ffffff", 0.05),
color: "white",
"& .MuiOutlinedInput-notchedOutline": {
borderColor: "rgba(255,255,255,0.2)",
},
"&:hover .MuiOutlinedInput-notchedOutline": {
borderColor: "rgba(255,255,255,0.3)",
},
"&.Mui-focused .MuiOutlinedInput-notchedOutline": {
borderColor: "#667eea",
},
"& .MuiSvgIcon-root": {
color: "rgba(255,255,255,0.7)",
},
}}
>
<MenuItem value="Turbo">
<Stack>
<Typography sx={{ color: "white" }}>Turbo </Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Fastest (~10-20s) Cheapest Lower quality
</Typography>
</Stack>
</MenuItem>
<MenuItem value="Default">
<Stack>
<Typography sx={{ color: "white" }}>Default </Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Balanced (~30-60s) Moderate cost Good quality
</Typography>
</Stack>
</MenuItem>
<MenuItem value="Quality">
<Stack>
<Typography sx={{ color: "white" }}>Quality </Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Slowest (~60-120s) Most expensive Highest quality
</Typography>
</Stack>
</MenuItem>
</Select>
</FormControl>
<Paper
sx={{
mt: 1.5,
p: 1.5,
backgroundColor: alpha("#10b981", 0.1),
border: "1px solid rgba(16,185,129,0.3)",
borderRadius: 2,
}}
>
<Stack direction="row" spacing={1}>
<InfoIcon sx={{ color: "#10b981", fontSize: "1.2rem", mt: 0.1 }} />
<Box>
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.9)", fontWeight: 500, mb: 0.5 }}>
Speed vs Quality Trade-off:
</Typography>
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.7)", lineHeight: 1.6 }}>
<strong>Turbo:</strong> Use for quick iterations and testing (~$0.02/image)<br />
<strong>Default:</strong> Best balance for most production use (~$0.04/image)<br />
<strong>Quality:</strong> Use for final, high-quality outputs (~$0.08/image)
</Typography>
</Box>
</Stack>
</Paper>
</Box>
{/* Aspect Ratio */}
<Box>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1.5 }}>
<Typography variant="subtitle1" sx={{ color: "white", fontWeight: 600 }}>
Aspect Ratio
</Typography>
<Tooltip
title="The width-to-height ratio of the generated image. Choose based on your video format: 16:9 for standard widescreen, 9:16 for vertical/social media, 1:1 for square formats, or 4:3 for traditional formats."
arrow
>
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
<HelpOutlineIcon fontSize="small" />
</IconButton>
</Tooltip>
</Stack>
<FormControl fullWidth>
<Select
value={aspectRatio}
onChange={(e) => setAspectRatio(e.target.value as "1:1" | "16:9" | "9:16" | "4:3" | "3:4")}
sx={{
backgroundColor: alpha("#ffffff", 0.05),
color: "white",
"& .MuiOutlinedInput-notchedOutline": {
borderColor: "rgba(255,255,255,0.2)",
},
"&:hover .MuiOutlinedInput-notchedOutline": {
borderColor: "rgba(255,255,255,0.3)",
},
"&.Mui-focused .MuiOutlinedInput-notchedOutline": {
borderColor: "#667eea",
},
"& .MuiSvgIcon-root": {
color: "rgba(255,255,255,0.7)",
},
}}
>
<MenuItem value="16:9">
<Stack>
<Typography sx={{ color: "white" }}>16:9 (Widescreen)</Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Standard video format, best for YouTube, web
</Typography>
</Stack>
</MenuItem>
<MenuItem value="9:16">
<Stack>
<Typography sx={{ color: "white" }}>9:16 (Vertical)</Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Mobile/social media format (TikTok, Instagram Stories)
</Typography>
</Stack>
</MenuItem>
<MenuItem value="1:1">
<Stack>
<Typography sx={{ color: "white" }}>1:1 (Square)</Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Instagram posts, profile images
</Typography>
</Stack>
</MenuItem>
<MenuItem value="4:3">
<Stack>
<Typography sx={{ color: "white" }}>4:3 (Traditional)</Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Classic TV format, presentations
</Typography>
</Stack>
</MenuItem>
<MenuItem value="3:4">
<Stack>
<Typography sx={{ color: "white" }}>3:4 (Portrait)</Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Portrait orientation, mobile apps
</Typography>
</Stack>
</MenuItem>
</Select>
</FormControl>
<Paper
sx={{
mt: 1.5,
p: 1.5,
backgroundColor: alpha("#f59e0b", 0.1),
border: "1px solid rgba(245,158,11,0.3)",
borderRadius: 2,
}}
>
<Stack direction="row" spacing={1}>
<InfoIcon sx={{ color: "#f59e0b", fontSize: "1.2rem", mt: 0.1 }} />
<Box>
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.9)", fontWeight: 500, mb: 0.5 }}>
Format Recommendation:
</Typography>
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.7)", lineHeight: 1.6 }}>
<strong>16:9</strong> is recommended for most podcast videos as it matches standard video player dimensions and provides optimal viewing experience.
</Typography>
</Box>
</Stack>
</Paper>
</Box>
</Stack>
</DialogContent>
<DialogActions sx={{ p: 3, pt: 2 }}>
<SecondaryButton onClick={onClose} disabled={isGenerating}>
Cancel
</SecondaryButton>
<PrimaryButton
onClick={handleRegenerate}
loading={isGenerating}
disabled={!prompt.trim() || isGenerating}
>
{isGenerating ? "Generating..." : "Regenerate Image"}
</PrimaryButton>
</DialogActions>
</Dialog>
);
};

View File

@@ -1,5 +1,5 @@
import React, { useState, useEffect } from "react";
import { Stack, Box, Typography, Divider, Chip, alpha, CircularProgress } from "@mui/material";
import { Stack, Box, Typography, Divider, Chip, alpha, CircularProgress, LinearProgress } from "@mui/material";
import {
EditNote as EditNoteIcon,
CheckCircle as CheckCircleIcon,
@@ -11,6 +11,8 @@ import {
import { Scene, Line, Knobs } from "../types";
import { GlassyCard, glassyCardSx, PrimaryButton } from "../ui";
import { LineEditor } from "./LineEditor";
import { ImageRegenerateModal, ImageGenerationSettings } from "./ImageRegenerateModal";
import { AudioRegenerateModal, AudioGenerationSettings } from "./AudioRegenerateModal";
import { podcastApi } from "../../../services/podcastApi";
import { aiApiClient } from "../../../api/client";
@@ -24,6 +26,7 @@ interface SceneEditorProps {
onAudioGenerationStart?: (sceneId: string) => void;
onAudioGenerated?: (sceneId: string, audioUrl: string) => void;
idea?: string; // Podcast idea for image generation context
avatarUrl?: string | null; // Base avatar URL for consistent scene image generation
}
export const SceneEditor: React.FC<SceneEditorProps> = ({
@@ -36,10 +39,30 @@ export const SceneEditor: React.FC<SceneEditorProps> = ({
onAudioGenerationStart,
onAudioGenerated,
idea,
avatarUrl,
}) => {
const [localGenerating, setLocalGenerating] = useState(false);
const [generatingImage, setGeneratingImage] = useState(false);
const [imageGenerationStatus, setImageGenerationStatus] = useState<string>("");
const [imageGenerationProgress, setImageGenerationProgress] = useState<number>(0);
const [audioBlobUrl, setAudioBlobUrl] = useState<string | null>(null);
const [imageBlobUrl, setImageBlobUrl] = useState<string | null>(null);
const [imageLoading, setImageLoading] = useState(false);
const [showRegenerateModal, setShowRegenerateModal] = useState(false);
const [showAudioModal, setShowAudioModal] = useState(false);
const [audioSettings, setAudioSettings] = useState<AudioGenerationSettings>({
voiceId: "Wise_Woman",
speed: 1.0,
volume: 1.0,
pitch: 0.0,
emotion: scene.emotion || "neutral",
englishNormalization: true,
sampleRate: 24000,
bitrate: 64000,
channel: "1",
format: "mp3",
languageBoost: "auto",
});
// Load audio as blob when audioUrl is available
useEffect(() => {
@@ -116,6 +139,99 @@ export const SceneEditor: React.FC<SceneEditorProps> = ({
};
}, [scene.audioUrl, scene.id]);
// Load image as blob when imageUrl is available
useEffect(() => {
if (!scene.imageUrl) {
// Clean up blob URL if imageUrl is removed
setImageBlobUrl((currentBlobUrl) => {
if (currentBlobUrl && currentBlobUrl.startsWith('blob:')) {
URL.revokeObjectURL(currentBlobUrl);
}
return null;
});
return;
}
let isMounted = true;
const currentImageUrl = scene.imageUrl; // Capture current value
const loadImageBlob = async () => {
try {
setImageLoading(true);
// Normalize path
let imagePath = currentImageUrl.startsWith('/') ? currentImageUrl : `/${currentImageUrl}`;
// Convert /api/story/images/ to /api/podcast/images/ if needed
if (imagePath.includes('/api/story/images/')) {
const filename = imagePath.split('/api/story/images/').pop() || '';
imagePath = `/api/podcast/images/${filename}`;
}
// Ensure it's a podcast image endpoint
if (!imagePath.includes('/api/podcast/images/')) {
const filename = imagePath.split('/').pop() || currentImageUrl;
imagePath = `/api/podcast/images/${filename}`;
}
// Remove query parameters if present
imagePath = imagePath.split('?')[0];
const response = await aiApiClient.get(imagePath, {
responseType: 'blob',
});
if (!isMounted) {
return;
}
// Double-check that imageUrl hasn't changed
if (scene.imageUrl !== currentImageUrl) {
return;
}
const blob = response.data;
const blobUrl = URL.createObjectURL(blob);
setImageBlobUrl((prevBlobUrl) => {
// Clean up previous blob URL if exists
if (prevBlobUrl && prevBlobUrl !== blobUrl && prevBlobUrl.startsWith('blob:')) {
URL.revokeObjectURL(prevBlobUrl);
}
return blobUrl;
});
} catch (error) {
console.error('[SceneEditor] Failed to load image blob:', error);
// Fallback: try with query token
try {
const token = localStorage.getItem('clerk_dashboard_token') || '';
if (token) {
const urlWithToken = `${currentImageUrl}?token=${encodeURIComponent(token)}`;
setImageBlobUrl(urlWithToken);
}
} catch (fallbackError) {
console.error('[SceneEditor] Fallback image loading failed:', fallbackError);
}
} finally {
if (isMounted) {
setImageLoading(false);
}
}
};
loadImageBlob();
return () => {
isMounted = false;
// Cleanup blob URL on unmount or when imageUrl changes
setImageBlobUrl((prevBlobUrl) => {
if (prevBlobUrl && prevBlobUrl.startsWith('blob:')) {
URL.revokeObjectURL(prevBlobUrl);
}
return null;
});
};
}, [scene.imageUrl]);
const updateLine = (updatedLine: Line) => {
const updated = { ...scene, lines: scene.lines.map((l) => (l.id === updatedLine.id ? updatedLine : l)) };
onUpdateScene(updated);
@@ -126,7 +242,7 @@ export const SceneEditor: React.FC<SceneEditorProps> = ({
const hasAudio = Boolean(scene.audioUrl && audioBlobUrl);
const hasImage = Boolean(scene.imageUrl);
const handleApproveAndGenerate = async () => {
const handleApproveAndGenerate = async (settings?: AudioGenerationSettings) => {
const wasAlreadyApproved = scene.approved;
const sceneId = scene.id;
@@ -152,11 +268,20 @@ export const SceneEditor: React.FC<SceneEditorProps> = ({
const currentScene = { ...scene, approved: true };
// Generate audio
const effectiveSettings = settings || audioSettings;
const result = await podcastApi.renderSceneAudio({
scene: currentScene,
voiceId: "Wise_Woman",
emotion: scene.emotion || knobs.voice_emotion || "neutral",
speed: knobs.voice_speed || 1.0,
voiceId: effectiveSettings.voiceId || "Wise_Woman",
emotion: effectiveSettings.emotion || scene.emotion || knobs.voice_emotion || "neutral",
speed: effectiveSettings.speed ?? knobs.voice_speed ?? 1.0,
volume: effectiveSettings.volume ?? 1.0,
pitch: effectiveSettings.pitch ?? 0.0,
englishNormalization: effectiveSettings.englishNormalization ?? true,
sampleRate: effectiveSettings.sampleRate,
bitrate: effectiveSettings.bitrate,
channel: effectiveSettings.channel,
format: effectiveSettings.format,
languageBoost: effectiveSettings.languageBoost,
});
// Update scene with audio URL and ensure approved state
@@ -179,35 +304,138 @@ export const SceneEditor: React.FC<SceneEditorProps> = ({
}
};
const handleGenerateImage = async () => {
const handleGenerateImage = async (settings?: ImageGenerationSettings) => {
const sceneId = scene.id;
const startTime = Date.now();
let progressInterval: NodeJS.Timeout | null = null;
try {
setGeneratingImage(true);
setShowRegenerateModal(false);
setImageGenerationStatus("Submitting image generation request...");
setImageGenerationProgress(10);
// Build scene content from lines for context
const sceneContent = scene.lines.map((line) => line.text).join(" ");
// Log avatar URL for debugging
console.log("[SceneEditor] Generating image with avatarUrl:", avatarUrl);
console.log("[SceneEditor] Custom settings:", settings);
// Simulate progress updates during API call
progressInterval = setInterval(() => {
const elapsed = Date.now() - startTime;
const seconds = Math.floor(elapsed / 1000);
// Update status based on elapsed time
if (seconds < 5) {
setImageGenerationStatus("Submitting request to AI service...");
setImageGenerationProgress(15);
} else if (seconds < 15) {
setImageGenerationStatus("AI is generating your image...");
setImageGenerationProgress(30);
} else if (seconds < 30) {
setImageGenerationStatus("Creating character-consistent scene image...");
setImageGenerationProgress(50);
} else if (seconds < 60) {
setImageGenerationStatus("Rendering image details...");
setImageGenerationProgress(70);
} else {
setImageGenerationStatus(`Processing... (${seconds}s elapsed)`);
setImageGenerationProgress(Math.min(90, 50 + (seconds - 30) / 2));
}
}, 1000);
const result = await podcastApi.generateSceneImage({
sceneId: scene.id,
sceneTitle: scene.title,
sceneContent: sceneContent,
baseAvatarUrl: avatarUrl || undefined, // Pass base avatar URL for character consistency
idea: idea,
width: 1024,
height: 1024,
// Pass custom settings if provided
customPrompt: settings?.prompt,
style: settings?.style,
renderingSpeed: settings?.renderingSpeed,
aspectRatio: settings?.aspectRatio,
});
if (progressInterval) {
clearInterval(progressInterval);
progressInterval = null;
}
setImageGenerationStatus("Finalizing image...");
setImageGenerationProgress(95);
// Update scene with image URL
const updatedScene = { ...scene, imageUrl: result.image_url };
onUpdateScene(updatedScene);
} catch (error) {
const elapsed = Math.floor((Date.now() - startTime) / 1000);
setImageGenerationStatus(`Image generated successfully in ${elapsed}s`);
setImageGenerationProgress(100);
// Clear status after a moment
setTimeout(() => {
setImageGenerationStatus("");
setImageGenerationProgress(0);
}, 2000);
} catch (error: any) {
// Clear interval on error
if (progressInterval) {
clearInterval(progressInterval);
progressInterval = null;
}
console.error("Failed to generate image:", error);
// Extract error message from response if available
const errorMessage = error?.response?.data?.detail?.message
|| error?.response?.data?.detail?.error
|| error?.response?.data?.detail
|| error?.message
|| "Failed to generate image. Please try again.";
console.error("Error details:", {
status: error?.response?.status,
statusText: error?.response?.statusText,
data: error?.response?.data,
message: errorMessage,
});
setImageGenerationStatus(`Error: ${errorMessage}`);
setImageGenerationProgress(0);
// Show user-friendly error message
alert(`Image generation failed: ${errorMessage}`);
throw error;
} finally {
// Ensure interval is cleared
if (progressInterval) {
clearInterval(progressInterval);
}
setGeneratingImage(false);
}
};
const handleRegenerateClick = () => {
setShowRegenerateModal(true);
};
const handleAudioRegenerateClick = () => {
if (hasAudio) {
setShowAudioModal(true);
} else {
handleApproveAndGenerate(audioSettings);
}
};
const handleAudioRegenerate = (settings: AudioGenerationSettings) => {
setAudioSettings(settings);
setShowAudioModal(false);
handleApproveAndGenerate(settings);
};
return (
<GlassyCard sx={glassyCardSx}>
<Stack spacing={2.5}>
@@ -256,7 +484,7 @@ export const SceneEditor: React.FC<SceneEditorProps> = ({
</Box>
<Stack direction="row" spacing={1.5} flexWrap="wrap" useFlexGap>
<PrimaryButton
onClick={handleApproveAndGenerate}
onClick={handleAudioRegenerateClick}
disabled={approving || generating}
loading={approving || generating}
startIcon={
@@ -270,7 +498,7 @@ export const SceneEditor: React.FC<SceneEditorProps> = ({
}
tooltip={
hasAudio && !generating
? "Regenerate audio for this scene"
? "Regenerate audio for this scene with custom settings"
: generating
? "Generating audio..."
: scene.approved
@@ -290,7 +518,7 @@ export const SceneEditor: React.FC<SceneEditorProps> = ({
: "Approve & Generate Audio"}
</PrimaryButton>
<PrimaryButton
onClick={handleGenerateImage}
onClick={hasImage ? handleRegenerateClick : () => handleGenerateImage()}
disabled={generatingImage}
loading={generatingImage}
startIcon={
@@ -372,7 +600,157 @@ export const SceneEditor: React.FC<SceneEditorProps> = ({
</Box>
</>
)}
{/* Image Generation Progress - Show when generating */}
{generatingImage && (
<>
<Divider sx={{ borderColor: "rgba(15, 23, 42, 0.08)", borderWidth: 1, mt: 1 }} />
<Box
sx={{
p: 2,
background: "linear-gradient(135deg, rgba(102, 126, 234, 0.08) 0%, rgba(118, 75, 162, 0.08) 100%)",
borderRadius: 2,
border: "1px solid rgba(102, 126, 234, 0.2)",
}}
>
<Stack direction="row" alignItems="center" spacing={1.5} sx={{ mb: 1.5 }}>
<ImageIcon sx={{ color: "#667eea", fontSize: "1.25rem" }} />
<Typography variant="subtitle2" sx={{ color: "#667eea", fontWeight: 600 }}>
Generating Image...
</Typography>
</Stack>
{/* Progress Bar */}
<Box sx={{ mb: 1.5 }}>
<LinearProgress
variant="determinate"
value={imageGenerationProgress}
sx={{
height: 8,
borderRadius: 4,
backgroundColor: alpha("#667eea", 0.1),
"& .MuiLinearProgress-bar": {
backgroundColor: "#667eea",
borderRadius: 4,
}
}}
/>
<Typography variant="caption" sx={{ color: "#667eea", mt: 0.5, display: "block", textAlign: "right" }}>
{imageGenerationProgress}%
</Typography>
</Box>
{/* Status Message */}
{imageGenerationStatus && (
<Typography variant="body2" sx={{ color: "#667eea", fontSize: "0.875rem", lineHeight: 1.6, mb: 1 }}>
{imageGenerationStatus}
</Typography>
)}
{/* Spinner */}
<Box sx={{ display: "flex", alignItems: "center", justifyContent: "center", mt: 1 }}>
<CircularProgress size={32} sx={{ color: "#667eea" }} />
</Box>
</Box>
</>
)}
{/* Generated Image Display - Show when image exists and not generating */}
{scene.imageUrl && !generatingImage && (
<>
<Divider sx={{ borderColor: "rgba(15, 23, 42, 0.08)", borderWidth: 1, mt: 1 }} />
<Box
sx={{
p: 2,
background: imageBlobUrl && !imageLoading
? "linear-gradient(135deg, rgba(102, 126, 234, 0.08) 0%, rgba(118, 75, 162, 0.08) 100%)"
: "linear-gradient(135deg, rgba(245, 158, 11, 0.08) 0%, rgba(217, 119, 6, 0.08) 100%)",
borderRadius: 2,
border: imageBlobUrl && !imageLoading
? "1px solid rgba(102, 126, 234, 0.2)"
: "1px solid rgba(245, 158, 11, 0.2)",
}}
>
<Stack direction="row" alignItems="center" spacing={1.5} sx={{ mb: 1.5 }}>
<ImageIcon sx={{ color: imageBlobUrl && !imageLoading ? "#667eea" : "#d97706", fontSize: "1.25rem" }} />
<Typography variant="subtitle2" sx={{ color: imageBlobUrl && !imageLoading ? "#667eea" : "#d97706", fontWeight: 600 }}>
{imageBlobUrl && !imageLoading ? "Image Generated" : "Loading Image..."}
</Typography>
</Stack>
{imageBlobUrl && !imageLoading ? (
<Box
sx={{
width: "100%",
borderRadius: 2,
overflow: "hidden",
border: "1px solid rgba(102,126,234,0.2)",
background: alpha("#667eea", 0.05),
}}
>
<Box
component="img"
src={imageBlobUrl}
alt={scene.title}
sx={{
width: "100%",
height: "auto",
display: "block",
maxHeight: 400,
objectFit: "cover",
}}
onError={(e) => {
console.error('[SceneEditor] Image failed to load:', {
src: e.currentTarget.src,
imageUrl: scene.imageUrl,
imageBlobUrl,
});
}}
onLoad={() => {
console.log('[SceneEditor] Image loaded successfully');
}}
/>
</Box>
) : (
<Box sx={{ display: "flex", alignItems: "center", justifyContent: "center", py: 2 }}>
<CircularProgress size={24} sx={{ color: "#d97706" }} />
</Box>
)}
</Box>
</>
)}
</Stack>
{/* Image Regeneration Modal */}
<ImageRegenerateModal
open={showRegenerateModal}
onClose={() => setShowRegenerateModal(false)}
onRegenerate={handleGenerateImage}
initialPrompt={(() => {
const promptParts = [
`Scene: ${scene.title}`,
"Professional podcast recording studio",
"Modern microphone setup",
"Clean background, professional lighting",
"16:9 aspect ratio, video-optimized composition"
];
if (idea) {
promptParts.push(`Topic: ${idea.substring(0, 60)}`);
}
return promptParts.join(", ");
})()}
initialStyle="Realistic"
initialRenderingSpeed="Quality"
initialAspectRatio="16:9"
isGenerating={generatingImage}
/>
<AudioRegenerateModal
open={showAudioModal}
onClose={() => setShowAudioModal(false)}
onRegenerate={handleAudioRegenerate}
initialSettings={audioSettings}
isGenerating={generating}
/>
</GlassyCard>
);
};

View File

@@ -22,6 +22,7 @@ interface ScriptEditorProps {
onBackToResearch: () => void;
onProceedToRendering: (script: Script) => void;
onError: (message: string) => void;
avatarUrl?: string | null; // Base avatar URL for consistent scene image generation
}
export const ScriptEditor: React.FC<ScriptEditorProps> = ({
@@ -37,6 +38,7 @@ export const ScriptEditor: React.FC<ScriptEditorProps> = ({
onBackToResearch,
onProceedToRendering,
onError,
avatarUrl,
}) => {
const [script, setScript] = useState<Script | null>(initialScript);
const [loading, setLoading] = useState(false);
@@ -52,6 +54,12 @@ export const ScriptEditor: React.FC<ScriptEditorProps> = ({
sceneCount: number;
} | null>(null);
// Defer upward script updates to avoid setState during render warnings
const emitScriptChange = useCallback(
(next: Script) => Promise.resolve().then(() => onScriptChange(next)),
[onScriptChange]
);
// Sync with parent state
useEffect(() => {
if (initialScript) {
@@ -85,7 +93,7 @@ export const ScriptEditor: React.FC<ScriptEditorProps> = ({
.then((res) => {
if (mounted) {
setScript(res);
onScriptChange(res);
emitScriptChange(res);
setError(null);
}
})
@@ -108,7 +116,7 @@ export const ScriptEditor: React.FC<ScriptEditorProps> = ({
...currentScript,
scenes: currentScript.scenes.map((s) => (s.id === updated.id ? { ...s, ...updated } : s))
};
onScriptChange(updatedScript);
emitScriptChange(updatedScript);
return updatedScript;
});
};
@@ -124,7 +132,7 @@ export const ScriptEditor: React.FC<ScriptEditorProps> = ({
...currentScript,
scenes: currentScript.scenes.map((s) => (s.id === sceneId ? { ...s, approved: true } : s)),
};
onScriptChange(updatedScript);
emitScriptChange(updatedScript);
return updatedScript;
});
} catch (err) {
@@ -570,11 +578,12 @@ export const ScriptEditor: React.FC<ScriptEditorProps> = ({
s.id === sceneId ? { ...s, audioUrl, approved: true } : s
);
const updatedScript = { ...currentScript, scenes: updatedScenes };
onScriptChange(updatedScript);
emitScriptChange(updatedScript);
return updatedScript;
});
}}
idea={idea}
avatarUrl={avatarUrl}
/>
</GlassyCard>
))}

View File

@@ -120,6 +120,7 @@ export type CreateProjectPayload = {
knobs: Knobs;
budgetCap: number;
files: { voiceFile?: File | null; avatarFile?: File | null };
avatarUrl?: string | null;
};
export type CreateProjectResult = {
@@ -141,6 +142,13 @@ export type RenderJobResult = {
videoFilename?: string;
};
export interface VideoGenerationSettings {
prompt: string;
resolution: "480p" | "720p";
seed?: number | null;
maskImageUrl?: string | null;
}
export type TaskStatus = {
task_id: string;
status: "pending" | "processing" | "completed" | "failed";

View File

@@ -1,5 +1,5 @@
import React from "react";
import { Button, Tooltip, CircularProgress, alpha } from "@mui/material";
import { Button, Tooltip, CircularProgress, alpha, SxProps, Theme } from "@mui/material";
interface SecondaryButtonProps {
children: React.ReactNode;
@@ -9,6 +9,7 @@ interface SecondaryButtonProps {
startIcon?: React.ReactNode;
tooltip?: string;
ariaLabel?: string;
sx?: SxProps<Theme>;
}
export const SecondaryButton: React.FC<SecondaryButtonProps> = ({
@@ -19,6 +20,7 @@ export const SecondaryButton: React.FC<SecondaryButtonProps> = ({
startIcon,
tooltip,
ariaLabel,
sx,
}) => {
const button = (
<Button
@@ -27,17 +29,20 @@ export const SecondaryButton: React.FC<SecondaryButtonProps> = ({
disabled={disabled || loading}
startIcon={loading ? <CircularProgress size={16} /> : startIcon}
aria-label={ariaLabel}
sx={{
borderColor: "rgba(255,255,255,0.2)",
color: "rgba(255,255,255,0.9)",
textTransform: "none",
px: 2.5,
py: 0.75,
"&:hover": {
borderColor: "rgba(255,255,255,0.4)",
background: alpha("#fff", 0.05),
sx={[
{
borderColor: "rgba(255,255,255,0.2)",
color: "rgba(255,255,255,0.9)",
textTransform: "none",
px: 2.5,
py: 0.75,
"&:hover": {
borderColor: "rgba(255,255,255,0.4)",
background: alpha("#fff", 0.05),
},
},
}}
...(Array.isArray(sx) ? sx : sx ? [sx] : []),
]}
>
{children}
</Button>

View File

@@ -14,7 +14,15 @@ import { podcastApi } from '../services/podcastApi';
export interface PodcastProjectState {
// Project metadata
project: { id: string; idea: string; duration: number; speakers: number } | null;
project: {
id: string;
idea: string;
duration: number;
speakers: number;
avatarUrl?: string | null;
avatarPrompt?: string | null;
avatarPersonaId?: string | null;
} | null;
// Step results
analysis: PodcastAnalysis | null;
@@ -40,6 +48,9 @@ export interface PodcastProjectState {
// Current step tracking
currentStep: 'create' | 'analysis' | 'research' | 'script' | 'render' | null;
// Final combined video
finalVideoUrl?: string | null;
// Timestamps
createdAt?: string;
updatedAt?: string;
@@ -233,13 +244,44 @@ export const usePodcastProjectState = () => {
}, []);
const updateRenderJob = useCallback((sceneId: string, updates: Partial<Job>) => {
setState((prev) => ({
...prev,
renderJobs: prev.renderJobs.map((job) =>
job.sceneId === sceneId ? { ...job, ...updates } : job
),
updatedAt: new Date().toISOString(),
}));
setState((prev) => {
const existingJob = prev.renderJobs.find((job) => job.sceneId === sceneId);
if (existingJob) {
// Update existing job
return {
...prev,
renderJobs: prev.renderJobs.map((job) =>
job.sceneId === sceneId ? { ...job, ...updates } : job
),
updatedAt: new Date().toISOString(),
};
} else {
// Create new job if it doesn't exist
const newJob: Job = {
sceneId,
title: updates.title || sceneId,
status: updates.status || "idle",
progress: updates.progress || 0,
previewUrl: updates.previewUrl || null,
finalUrl: updates.finalUrl || null,
videoUrl: updates.videoUrl || null,
imageUrl: updates.imageUrl || null,
jobId: updates.jobId || null,
taskId: updates.taskId || null,
cost: updates.cost || null,
provider: updates.provider || null,
voiceId: updates.voiceId || null,
fileSize: updates.fileSize || null,
avatarImageUrl: updates.avatarImageUrl || null,
};
return {
...prev,
renderJobs: [...prev.renderJobs, newJob],
updatedAt: new Date().toISOString(),
};
}
});
}, []);
const setKnobs = useCallback((knobs: Knobs) => {
@@ -295,6 +337,9 @@ export const usePodcastProjectState = () => {
idea: payload.ideaOrUrl,
duration: payload.duration,
speakers: payload.speakers,
avatarUrl: payload.avatarUrl || null,
avatarPrompt: null, // Will be set when avatar is generated
avatarPersonaId: null,
},
knobs: payload.knobs,
budgetCap: payload.budgetCap,
@@ -317,6 +362,9 @@ export const usePodcastProjectState = () => {
idea: dbProject.idea,
duration: dbProject.duration,
speakers: dbProject.speakers,
avatarUrl: dbProject.avatar_url || null,
avatarPrompt: dbProject.avatar_prompt || null,
avatarPersonaId: dbProject.avatar_persona_id || null,
},
analysis: dbProject.analysis,
queries: dbProject.queries || [],
@@ -332,6 +380,7 @@ export const usePodcastProjectState = () => {
showScriptEditor: dbProject.show_script_editor || false,
showRenderQueue: dbProject.show_render_queue || false,
currentStep: dbProject.current_step || null,
finalVideoUrl: dbProject.final_video_url || null,
createdAt: dbProject.created_at,
updatedAt: dbProject.updated_at,
}));

View File

@@ -321,9 +321,9 @@ export const podcastApi = {
});
const exaResult = response.data as ExaResearchResult;
if (params.onProgress) {
if (params.onProgress) {
params.onProgress("Deep research completed with Exa.");
}
}
const mapped = mapExaResearchResponse(exaResult);
return { research: mapped, raw: exaResult };
},
@@ -411,6 +411,14 @@ export const podcastApi = {
voiceId?: string;
emotion?: string; // Fallback if scene doesn't have emotion
speed?: number;
volume?: number;
pitch?: number;
englishNormalization?: boolean;
sampleRate?: number;
bitrate?: number;
channel?: "1" | "2";
format?: "mp3" | "wav" | "pcm" | "flac";
languageBoost?: string;
}): Promise<RenderJobResult> {
// Use scene-specific emotion if available, otherwise fallback to provided/default
const sceneEmotion = params.scene.emotion || params.emotion || "neutral";
@@ -493,9 +501,16 @@ export const podcastApi = {
scene_title: params.scene.title,
text: textToUse,
voice_id: params.voiceId || "Wise_Woman",
speed: params.speed || 1.0, // Normal speed (was 0.9, but too slow - causing duration issues)
speed: params.speed ?? 1.0, // Normal speed (was 0.9, but too slow - causing duration issues)
volume: params.volume ?? 1.0,
pitch: params.pitch ?? 0.0,
emotion: sceneEmotion,
english_normalization: true, // Better number reading for statistics
english_normalization: params.englishNormalization ?? true, // Better number reading for statistics
sample_rate: params.sampleRate || null,
bitrate: params.bitrate || null,
channel: params.channel || null,
format: params.format || null,
language_boost: params.languageBoost || null,
});
return {
@@ -607,6 +622,8 @@ export const podcastApi = {
avatarImageUrl?: string;
resolution?: string;
prompt?: string;
seed?: number;
maskImageUrl?: string;
}): Promise<{ taskId: string; status: string; message: string }> {
const response = await aiApiClient.post("/api/podcast/render/video", {
project_id: params.projectId,
@@ -616,22 +633,73 @@ export const podcastApi = {
avatar_image_url: params.avatarImageUrl,
resolution: params.resolution || "720p",
prompt: params.prompt,
seed: params.seed ?? -1,
mask_image_url: params.maskImageUrl,
});
// Backend returns snake_case (task_id); normalize to camelCase for callers
const { task_id, status, message } = response.data || {};
return {
taskId: task_id,
status,
message,
};
},
async pollTaskStatus(taskId: string): Promise<TaskStatus | null> {
const response = await aiApiClient.get(`/api/podcast/task/${taskId}/status`);
// Backend returns null if task not found
return response.data || null;
},
async listVideos(projectId?: string): Promise<{
videos: Array<{
scene_number: number;
filename: string;
video_url: string;
file_size: number;
}>;
}> {
const params = projectId ? { project_id: projectId } : {};
const response = await aiApiClient.get("/api/podcast/videos", { params });
return response.data;
},
async pollTaskStatus(taskId: string): Promise<TaskStatus> {
const response = await aiApiClient.get(`/api/podcast/task/${taskId}/status`);
return response.data;
async combineVideos(params: {
projectId: string;
sceneVideoUrls: string[];
podcastTitle?: string;
}): Promise<{
taskId: string;
status: string;
message: string;
}> {
const response = await aiApiClient.post("/api/podcast/render/combine-videos", {
project_id: params.projectId,
scene_video_urls: params.sceneVideoUrls,
podcast_title: params.podcastTitle || "Podcast",
});
const { task_id, status, message } = response.data || {};
return {
taskId: task_id,
status,
message,
};
},
async generateSceneImage(params: {
sceneId: string;
sceneTitle: string;
sceneContent?: string;
baseAvatarUrl?: string;
idea?: string;
width?: number;
height?: number;
customPrompt?: string;
style?: "Auto" | "Fiction" | "Realistic";
renderingSpeed?: "Default" | "Turbo" | "Quality";
aspectRatio?: "1:1" | "16:9" | "9:16" | "4:3" | "3:4";
}): Promise<{
scene_id: string;
scene_title: string;
@@ -647,9 +715,14 @@ export const podcastApi = {
scene_id: params.sceneId,
scene_title: params.sceneTitle,
scene_content: params.sceneContent,
idea: params.idea,
base_avatar_url: params.baseAvatarUrl || null,
idea: params.idea || null,
width: params.width || 1024,
height: params.height || 1024,
custom_prompt: params.customPrompt || null,
style: params.style || null,
rendering_speed: params.renderingSpeed || null,
aspect_ratio: params.aspectRatio || null,
});
return response.data;
},
@@ -679,6 +752,60 @@ export const podcastApi = {
project_id: params.projectId,
scene_ids: params.sceneIds,
scene_audio_urls: params.sceneAudioUrls,
});
return response.data;
},
async uploadAvatar(file: File, projectId?: string): Promise<{ avatar_url: string; avatar_filename: string }> {
const formData = new FormData();
formData.append('file', file);
if (projectId) {
formData.append('project_id', projectId);
}
const response = await aiApiClient.post('/api/podcast/avatar/upload', formData, {
headers: { 'Content-Type': 'multipart/form-data' },
});
return response.data;
},
async generatePresenters(
speakers: number,
projectId?: string,
audience?: string,
contentType?: string,
topKeywords?: string[]
): Promise<{
avatars: Array<{ avatar_url: string; speaker_number: number; prompt?: string; persona_id?: string; seed?: number }>;
persona_id?: string;
}> {
const formData = new FormData();
formData.append('speakers', speakers.toString());
if (projectId) {
formData.append('project_id', projectId);
}
if (audience) {
formData.append('audience', audience);
}
if (contentType) {
formData.append('content_type', contentType);
}
if (topKeywords && Array.isArray(topKeywords) && topKeywords.length > 0) {
formData.append('top_keywords', JSON.stringify(topKeywords));
}
const response = await aiApiClient.post('/api/podcast/avatar/generate', formData, {
headers: { 'Content-Type': 'multipart/form-data' },
});
return response.data;
},
async makeAvatarPresentable(avatarUrl: string, projectId?: string): Promise<{ avatar_url: string; avatar_filename: string }> {
const formData = new FormData();
formData.append('avatar_url', avatarUrl);
if (projectId) {
formData.append('project_id', projectId);
}
const response = await aiApiClient.post('/api/podcast/avatar/make-presentable', formData, {
headers: { 'Content-Type': 'multipart/form-data' },
});
return response.data;
},