AI podcast project
This commit is contained in:
5
.gitignore
vendored
5
.gitignore
vendored
@@ -14,6 +14,11 @@ podcast_audio/*
|
||||
backend/podcast_audio/
|
||||
|
||||
|
||||
podcast_audio/
|
||||
podcast_images/
|
||||
youtube_videos/
|
||||
backend/podcast_images/
|
||||
backend/podcast_videos/
|
||||
|
||||
.cursorignore
|
||||
story_videos
|
||||
|
||||
@@ -45,7 +45,11 @@ class CacheHeadersMiddleware(BaseHTTPMiddleware):
|
||||
# Immutable files (with hash) - cache for 1 year
|
||||
# These files never change (new hash = new file)
|
||||
response.headers["Cache-Control"] = "public, max-age=31536000, immutable"
|
||||
response.headers["Expires"] = "Thu, 31 Dec 2025 23:59:59 GMT"
|
||||
# Expires header calculated dynamically to match max-age
|
||||
# Modern browsers prefer Cache-Control, but Expires provides compatibility
|
||||
from datetime import datetime, timedelta
|
||||
expires_date = datetime.utcnow() + timedelta(seconds=31536000)
|
||||
response.headers["Expires"] = expires_date.strftime("%a, %d %b %Y %H:%M:%S GMT")
|
||||
else:
|
||||
# Non-hashed files - shorter cache (1 hour)
|
||||
# These might be updated, so cache for shorter time
|
||||
|
||||
28
backend/api/podcast/constants.py
Normal file
28
backend/api/podcast/constants.py
Normal file
@@ -0,0 +1,28 @@
|
||||
"""
|
||||
Podcast API Constants
|
||||
|
||||
Centralized constants and directory configuration for podcast module.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from services.story_writer.audio_generation_service import StoryAudioGenerationService
|
||||
|
||||
# Directory paths
|
||||
# router.py is at: backend/api/podcast/router.py
|
||||
# parents[0] = backend/api/podcast/
|
||||
# parents[1] = backend/api/
|
||||
# parents[2] = backend/
|
||||
BASE_DIR = Path(__file__).resolve().parents[2] # backend/
|
||||
PODCAST_AUDIO_DIR = (BASE_DIR / "podcast_audio").resolve()
|
||||
PODCAST_AUDIO_DIR.mkdir(parents=True, exist_ok=True)
|
||||
PODCAST_IMAGES_DIR = (BASE_DIR / "podcast_images").resolve()
|
||||
PODCAST_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
|
||||
PODCAST_VIDEOS_DIR = (BASE_DIR / "podcast_videos").resolve()
|
||||
PODCAST_VIDEOS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Video subdirectory
|
||||
AI_VIDEO_SUBDIR = Path("AI_Videos")
|
||||
|
||||
# Initialize audio service
|
||||
audio_service = StoryAudioGenerationService(output_dir=str(PODCAST_AUDIO_DIR))
|
||||
|
||||
6
backend/api/podcast/handlers/__init__.py
Normal file
6
backend/api/podcast/handlers/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
"""
|
||||
Podcast API Handlers
|
||||
|
||||
Handler modules for different podcast operations.
|
||||
"""
|
||||
|
||||
96
backend/api/podcast/handlers/analysis.py
Normal file
96
backend/api/podcast/handlers/analysis.py
Normal file
@@ -0,0 +1,96 @@
|
||||
"""
|
||||
Podcast Analysis Handlers
|
||||
|
||||
Analysis endpoint for podcast ideas.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from typing import Dict, Any
|
||||
import json
|
||||
|
||||
from middleware.auth_middleware import get_current_user
|
||||
from api.story_writer.utils.auth import require_authenticated_user
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from loguru import logger
|
||||
from ..models import PodcastAnalyzeRequest, PodcastAnalyzeResponse
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/analyze", response_model=PodcastAnalyzeResponse)
|
||||
async def analyze_podcast_idea(
|
||||
request: PodcastAnalyzeRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""
|
||||
Analyze a podcast idea and return podcast-oriented outlines, keywords, and titles.
|
||||
This uses the shared LLM provider but with a podcast-specific prompt (not story format).
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
prompt = f"""
|
||||
You are an expert podcast producer. Given a podcast idea, craft concise podcast-ready assets
|
||||
that sound like episode plans (not fiction stories).
|
||||
|
||||
Podcast Idea: "{request.idea}"
|
||||
Duration: ~{request.duration} minutes
|
||||
Speakers: {request.speakers} (host + optional guest)
|
||||
|
||||
Return JSON with:
|
||||
- audience: short target audience description
|
||||
- content_type: podcast style/format
|
||||
- top_keywords: 5 podcast-relevant keywords/phrases
|
||||
- suggested_outlines: 2 items, each with title (<=60 chars) and 4-6 short segments (bullet-friendly, factual)
|
||||
- title_suggestions: 3 concise episode titles (no cliffhanger storytelling)
|
||||
- exa_suggested_config: suggested Exa search options to power research (keep conservative defaults to control cost), with:
|
||||
- exa_search_type: "auto" | "neural" | "keyword" (prefer "auto" unless clearly news-heavy)
|
||||
- exa_category: one of ["research paper","news","company","github","tweet","personal site","pdf","financial report","linkedin profile"]
|
||||
- exa_include_domains: up to 3 reputable domains to prioritize (optional)
|
||||
- exa_exclude_domains: up to 3 domains to avoid (optional)
|
||||
- max_sources: 6-10
|
||||
- include_statistics: boolean (true if topic needs fresh stats)
|
||||
- date_range: one of ["last_month","last_3_months","last_year","all_time"] (pick recent if time-sensitive)
|
||||
|
||||
Requirements:
|
||||
- Keep language factual, actionable, and suited for spoken audio.
|
||||
- Avoid narrative fiction tone; focus on insights, hooks, objections, and takeaways.
|
||||
- Prefer 2024-2025 context when relevant.
|
||||
"""
|
||||
|
||||
try:
|
||||
raw = llm_text_gen(prompt=prompt, user_id=user_id, json_struct=None)
|
||||
except HTTPException:
|
||||
# Re-raise HTTPExceptions (e.g., 429 subscription limit) - preserve error details
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.error(f"[Podcast Analyze] Analysis failed for user {user_id}: {exc}")
|
||||
raise HTTPException(status_code=500, detail=f"Analysis failed: {exc}")
|
||||
|
||||
# Normalize response (accept dict or JSON string)
|
||||
if isinstance(raw, str):
|
||||
try:
|
||||
data = json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
raise HTTPException(status_code=500, detail="LLM returned non-JSON output")
|
||||
elif isinstance(raw, dict):
|
||||
data = raw
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail="Unexpected LLM response format")
|
||||
|
||||
audience = data.get("audience") or "Growth-focused professionals"
|
||||
content_type = data.get("content_type") or "Interview + insights"
|
||||
top_keywords = data.get("top_keywords") or []
|
||||
suggested_outlines = data.get("suggested_outlines") or []
|
||||
title_suggestions = data.get("title_suggestions") or []
|
||||
|
||||
exa_suggested_config = data.get("exa_suggested_config") or None
|
||||
|
||||
return PodcastAnalyzeResponse(
|
||||
audience=audience,
|
||||
content_type=content_type,
|
||||
top_keywords=top_keywords,
|
||||
suggested_outlines=suggested_outlines,
|
||||
title_suggestions=title_suggestions,
|
||||
exa_suggested_config=exa_suggested_config,
|
||||
)
|
||||
|
||||
324
backend/api/podcast/handlers/audio.py
Normal file
324
backend/api/podcast/handlers/audio.py
Normal file
@@ -0,0 +1,324 @@
|
||||
"""
|
||||
Podcast Audio Handlers
|
||||
|
||||
Audio generation, combining, and serving endpoints.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.orm import Session
|
||||
from typing import Dict, Any
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
import tempfile
|
||||
import uuid
|
||||
import shutil
|
||||
|
||||
from services.database import get_db
|
||||
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
|
||||
from api.story_writer.utils.auth import require_authenticated_user
|
||||
from utils.asset_tracker import save_asset_to_library
|
||||
from models.story_models import StoryAudioResult
|
||||
from loguru import logger
|
||||
from ..constants import PODCAST_AUDIO_DIR, audio_service
|
||||
from ..models import (
|
||||
PodcastAudioRequest,
|
||||
PodcastAudioResponse,
|
||||
PodcastCombineAudioRequest,
|
||||
PodcastCombineAudioResponse,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/audio", response_model=PodcastAudioResponse)
|
||||
async def generate_podcast_audio(
|
||||
request: PodcastAudioRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Generate AI audio for a podcast scene using shared audio service.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
if not request.text or not request.text.strip():
|
||||
raise HTTPException(status_code=400, detail="Text is required")
|
||||
|
||||
try:
|
||||
result: StoryAudioResult = audio_service.generate_ai_audio(
|
||||
scene_number=0,
|
||||
scene_title=request.scene_title,
|
||||
text=request.text.strip(),
|
||||
user_id=user_id,
|
||||
voice_id=request.voice_id or "Wise_Woman",
|
||||
speed=request.speed or 1.0, # Normal speed (was 0.9, but too slow - causing duration issues)
|
||||
volume=request.volume or 1.0,
|
||||
pitch=request.pitch or 0.0, # Normal pitch (0.0 = neutral)
|
||||
emotion=request.emotion or "neutral",
|
||||
english_normalization=request.english_normalization or False,
|
||||
sample_rate=request.sample_rate,
|
||||
bitrate=request.bitrate,
|
||||
channel=request.channel,
|
||||
format=request.format,
|
||||
language_boost=request.language_boost,
|
||||
enable_sync_mode=request.enable_sync_mode,
|
||||
)
|
||||
|
||||
# Override URL to use podcast endpoint instead of story endpoint
|
||||
if result.get("audio_url") and "/api/story/audio/" in result.get("audio_url", ""):
|
||||
audio_filename = result.get("audio_filename", "")
|
||||
result["audio_url"] = f"/api/podcast/audio/{audio_filename}"
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=500, detail=f"Audio generation failed: {exc}")
|
||||
|
||||
# Save to asset library (podcast module)
|
||||
try:
|
||||
if result.get("audio_url"):
|
||||
save_asset_to_library(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
asset_type="audio",
|
||||
source_module="podcast_maker",
|
||||
filename=result.get("audio_filename", ""),
|
||||
file_url=result.get("audio_url", ""),
|
||||
file_path=result.get("audio_path"),
|
||||
file_size=result.get("file_size"),
|
||||
mime_type="audio/mpeg",
|
||||
title=f"{request.scene_title} - Podcast",
|
||||
description="Podcast scene narration",
|
||||
tags=["podcast", "audio", request.scene_id],
|
||||
provider=result.get("provider"),
|
||||
model=result.get("model"),
|
||||
cost=result.get("cost"),
|
||||
asset_metadata={
|
||||
"scene_id": request.scene_id,
|
||||
"scene_title": request.scene_title,
|
||||
"status": "completed",
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Podcast] Failed to save audio asset: {e}")
|
||||
|
||||
return PodcastAudioResponse(
|
||||
scene_id=request.scene_id,
|
||||
scene_title=request.scene_title,
|
||||
audio_filename=result.get("audio_filename", ""),
|
||||
audio_url=result.get("audio_url", ""),
|
||||
provider=result.get("provider", "wavespeed"),
|
||||
model=result.get("model", "minimax/speech-02-hd"),
|
||||
voice_id=result.get("voice_id", request.voice_id or "Wise_Woman"),
|
||||
text_length=result.get("text_length", len(request.text)),
|
||||
file_size=result.get("file_size", 0),
|
||||
cost=result.get("cost", 0.0),
|
||||
)
|
||||
|
||||
|
||||
@router.post("/combine-audio", response_model=PodcastCombineAudioResponse)
|
||||
async def combine_podcast_audio(
|
||||
request: PodcastCombineAudioRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Combine multiple scene audio files into a single podcast audio file.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
if not request.scene_ids or not request.scene_audio_urls:
|
||||
raise HTTPException(status_code=400, detail="Scene IDs and audio URLs are required")
|
||||
|
||||
if len(request.scene_ids) != len(request.scene_audio_urls):
|
||||
raise HTTPException(status_code=400, detail="Scene IDs and audio URLs count must match")
|
||||
|
||||
try:
|
||||
# Import moviepy for audio concatenation
|
||||
try:
|
||||
from moviepy import AudioFileClip, concatenate_audioclips
|
||||
except ImportError:
|
||||
logger.error("[Podcast] MoviePy not available for audio combination")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Audio combination requires MoviePy. Please install: pip install moviepy"
|
||||
)
|
||||
|
||||
# Create temporary directory for audio processing
|
||||
temp_dir = Path(tempfile.gettempdir()) / f"podcast_combine_{uuid.uuid4().hex[:8]}"
|
||||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
audio_clips = []
|
||||
total_duration = 0.0
|
||||
|
||||
try:
|
||||
# Log incoming request for debugging
|
||||
logger.info(f"[Podcast] Combining audio: {len(request.scene_audio_urls)} URLs received")
|
||||
for idx, url in enumerate(request.scene_audio_urls):
|
||||
logger.info(f"[Podcast] URL {idx+1}: {url}")
|
||||
|
||||
# Download and load each audio file from podcast_audio directory
|
||||
for idx, audio_url in enumerate(request.scene_audio_urls):
|
||||
try:
|
||||
# Normalize audio URL - handle both absolute and relative paths
|
||||
if audio_url.startswith("http"):
|
||||
# External URL - would need to download
|
||||
logger.error(f"[Podcast] External URLs not supported: {audio_url}")
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"External URLs not supported. Please use local file paths."
|
||||
)
|
||||
|
||||
# Handle relative paths - only /api/podcast/audio/... URLs are supported
|
||||
audio_path = None
|
||||
if audio_url.startswith("/api/"):
|
||||
# Extract filename from URL
|
||||
parsed = urlparse(audio_url)
|
||||
path = parsed.path if parsed.scheme else audio_url
|
||||
|
||||
# Handle both /api/podcast/audio/ and /api/story/audio/ URLs (for backward compatibility)
|
||||
if "/api/podcast/audio/" in path:
|
||||
filename = path.split("/api/podcast/audio/", 1)[1].split("?", 1)[0].strip()
|
||||
elif "/api/story/audio/" in path:
|
||||
# Convert story audio URLs to podcast audio (they're in the same directory now)
|
||||
filename = path.split("/api/story/audio/", 1)[1].split("?", 1)[0].strip()
|
||||
logger.info(f"[Podcast] Converting story audio URL to podcast: {audio_url} -> {filename}")
|
||||
else:
|
||||
logger.error(f"[Podcast] Unsupported audio URL format: {audio_url}. Expected /api/podcast/audio/ or /api/story/audio/ URLs.")
|
||||
continue
|
||||
|
||||
if not filename:
|
||||
logger.error(f"[Podcast] Could not extract filename from URL: {audio_url}")
|
||||
continue
|
||||
|
||||
# Podcast audio files are stored in podcast_audio directory
|
||||
audio_path = (PODCAST_AUDIO_DIR / filename).resolve()
|
||||
|
||||
# Security check: ensure path is within PODCAST_AUDIO_DIR
|
||||
if not str(audio_path).startswith(str(PODCAST_AUDIO_DIR)):
|
||||
logger.error(f"[Podcast] Attempted path traversal when resolving audio: {audio_url}")
|
||||
continue
|
||||
else:
|
||||
logger.warning(f"[Podcast] Non-API URL format, treating as direct path: {audio_url}")
|
||||
audio_path = Path(audio_url)
|
||||
|
||||
if not audio_path or not audio_path.exists():
|
||||
logger.error(f"[Podcast] Audio file not found: {audio_path} (from URL: {audio_url})")
|
||||
continue
|
||||
|
||||
# Load audio clip
|
||||
audio_clip = AudioFileClip(str(audio_path))
|
||||
audio_clips.append(audio_clip)
|
||||
total_duration += audio_clip.duration
|
||||
logger.info(f"[Podcast] Loaded audio {idx+1}/{len(request.scene_audio_urls)}: {audio_path.name} ({audio_clip.duration:.2f}s)")
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[Podcast] Failed to load audio {idx+1}: {e}", exc_info=True)
|
||||
# Continue with other audio files
|
||||
continue
|
||||
|
||||
if not audio_clips:
|
||||
raise HTTPException(status_code=400, detail="No valid audio files found to combine")
|
||||
|
||||
# Concatenate all audio clips
|
||||
logger.info(f"[Podcast] Combining {len(audio_clips)} audio clips (total duration: {total_duration:.2f}s)")
|
||||
combined_audio = concatenate_audioclips(audio_clips)
|
||||
|
||||
# Generate output filename
|
||||
output_filename = f"podcast_combined_{request.project_id}_{uuid.uuid4().hex[:8]}.mp3"
|
||||
output_path = PODCAST_AUDIO_DIR / output_filename
|
||||
|
||||
# Write combined audio file
|
||||
combined_audio.write_audiofile(
|
||||
str(output_path),
|
||||
codec="mp3",
|
||||
bitrate="192k",
|
||||
logger=None, # Suppress moviepy logging
|
||||
)
|
||||
|
||||
# Close audio clips to free resources
|
||||
for clip in audio_clips:
|
||||
clip.close()
|
||||
combined_audio.close()
|
||||
|
||||
file_size = output_path.stat().st_size
|
||||
audio_url = f"/api/podcast/audio/{output_filename}"
|
||||
|
||||
logger.info(f"[Podcast] Combined audio saved: {output_path} ({file_size} bytes)")
|
||||
|
||||
# Save to asset library
|
||||
try:
|
||||
save_asset_to_library(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
asset_type="audio",
|
||||
source_module="podcast_maker",
|
||||
filename=output_filename,
|
||||
file_url=audio_url,
|
||||
file_path=str(output_path),
|
||||
file_size=file_size,
|
||||
mime_type="audio/mpeg",
|
||||
title=f"Combined Podcast - {request.project_id}",
|
||||
description=f"Combined podcast audio from {len(request.scene_ids)} scenes",
|
||||
tags=["podcast", "audio", "combined", request.project_id],
|
||||
asset_metadata={
|
||||
"project_id": request.project_id,
|
||||
"scene_ids": request.scene_ids,
|
||||
"scene_count": len(request.scene_ids),
|
||||
"total_duration": total_duration,
|
||||
"status": "completed",
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Podcast] Failed to save combined audio asset: {e}")
|
||||
|
||||
return PodcastCombineAudioResponse(
|
||||
combined_audio_url=audio_url,
|
||||
combined_audio_filename=output_filename,
|
||||
total_duration=total_duration,
|
||||
file_size=file_size,
|
||||
scene_count=len(request.scene_ids),
|
||||
)
|
||||
|
||||
finally:
|
||||
# Cleanup temporary directory
|
||||
try:
|
||||
if temp_dir.exists():
|
||||
shutil.rmtree(temp_dir)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Podcast] Failed to cleanup temp directory: {e}")
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.error(f"[Podcast] Audio combination failed: {exc}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Audio combination failed: {exc}")
|
||||
|
||||
|
||||
@router.get("/audio/{filename}")
|
||||
async def serve_podcast_audio(
|
||||
filename: str,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user_with_query_token),
|
||||
):
|
||||
"""Serve generated podcast scene audio files.
|
||||
|
||||
Supports authentication via Authorization header or token query parameter.
|
||||
Query parameter is useful for HTML elements like <audio> that cannot send custom headers.
|
||||
"""
|
||||
require_authenticated_user(current_user)
|
||||
|
||||
# Security check: ensure filename doesn't contain path traversal
|
||||
if ".." in filename or "/" in filename or "\\" in filename:
|
||||
raise HTTPException(status_code=400, detail="Invalid filename")
|
||||
|
||||
audio_path = (PODCAST_AUDIO_DIR / filename).resolve()
|
||||
|
||||
# Security check: ensure path is within PODCAST_AUDIO_DIR
|
||||
if not str(audio_path).startswith(str(PODCAST_AUDIO_DIR)):
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
if not audio_path.exists():
|
||||
raise HTTPException(status_code=404, detail="Audio file not found")
|
||||
|
||||
return FileResponse(audio_path, media_type="audio/mpeg")
|
||||
|
||||
381
backend/api/podcast/handlers/avatar.py
Normal file
381
backend/api/podcast/handlers/avatar.py
Normal file
@@ -0,0 +1,381 @@
|
||||
"""
|
||||
Podcast Avatar Handlers
|
||||
|
||||
Avatar upload and presenter generation endpoints.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.orm import Session
|
||||
from typing import Dict, Any, List, Optional
|
||||
from pathlib import Path
|
||||
import uuid
|
||||
import hashlib
|
||||
|
||||
from services.database import get_db
|
||||
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
|
||||
from api.story_writer.utils.auth import require_authenticated_user
|
||||
from services.llm_providers.main_image_generation import generate_image
|
||||
from services.llm_providers.main_image_editing import edit_image
|
||||
from utils.asset_tracker import save_asset_to_library
|
||||
from loguru import logger
|
||||
from ..constants import PODCAST_IMAGES_DIR
|
||||
from ..presenter_personas import choose_persona_id, get_persona
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# Avatar subdirectory
|
||||
AVATAR_SUBDIR = "avatars"
|
||||
PODCAST_AVATARS_DIR = PODCAST_IMAGES_DIR / AVATAR_SUBDIR
|
||||
PODCAST_AVATARS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
@router.post("/avatar/upload")
|
||||
async def upload_podcast_avatar(
|
||||
file: UploadFile = File(...),
|
||||
project_id: Optional[str] = Form(None),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Upload a presenter avatar image for a podcast project.
|
||||
Returns the avatar URL for use in scene image generation.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
# Validate file type
|
||||
if not file.content_type or not file.content_type.startswith('image/'):
|
||||
raise HTTPException(status_code=400, detail="File must be an image")
|
||||
|
||||
# Validate file size (max 5MB)
|
||||
file_content = await file.read()
|
||||
if len(file_content) > 5 * 1024 * 1024:
|
||||
raise HTTPException(status_code=400, detail="Image file size must be less than 5MB")
|
||||
|
||||
try:
|
||||
# Generate filename
|
||||
file_ext = Path(file.filename).suffix or '.png'
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
avatar_filename = f"avatar_{project_id or 'temp'}_{unique_id}{file_ext}"
|
||||
avatar_path = PODCAST_AVATARS_DIR / avatar_filename
|
||||
|
||||
# Save file
|
||||
with open(avatar_path, "wb") as f:
|
||||
f.write(file_content)
|
||||
|
||||
logger.info(f"[Podcast] Avatar uploaded: {avatar_path}")
|
||||
|
||||
# Create avatar URL
|
||||
avatar_url = f"/api/podcast/images/{AVATAR_SUBDIR}/{avatar_filename}"
|
||||
|
||||
# Save to asset library if project_id provided
|
||||
if project_id:
|
||||
try:
|
||||
save_asset_to_library(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
asset_type="image",
|
||||
source_module="podcast_maker",
|
||||
filename=avatar_filename,
|
||||
file_url=avatar_url,
|
||||
file_path=str(avatar_path),
|
||||
file_size=len(file_content),
|
||||
mime_type=file.content_type,
|
||||
title=f"Podcast Presenter Avatar - {project_id}",
|
||||
description="Podcast presenter avatar image",
|
||||
tags=["podcast", "avatar", project_id],
|
||||
asset_metadata={
|
||||
"project_id": project_id,
|
||||
"type": "presenter_avatar",
|
||||
"status": "completed",
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Podcast] Failed to save avatar asset: {e}")
|
||||
|
||||
return {
|
||||
"avatar_url": avatar_url,
|
||||
"avatar_filename": avatar_filename,
|
||||
"message": "Avatar uploaded successfully"
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.error(f"[Podcast] Avatar upload failed: {exc}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Avatar upload failed: {str(exc)}")
|
||||
|
||||
|
||||
@router.post("/avatar/make-presentable")
|
||||
async def make_avatar_presentable(
|
||||
avatar_url: str = Form(...),
|
||||
project_id: Optional[str] = Form(None),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Transform an uploaded avatar image into a podcast-appropriate presenter.
|
||||
Uses AI image editing to convert the uploaded photo into a professional podcast presenter.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
try:
|
||||
# Load the uploaded avatar image
|
||||
from ..utils import load_podcast_image_bytes
|
||||
avatar_bytes = load_podcast_image_bytes(avatar_url)
|
||||
|
||||
logger.info(f"[Podcast] Transforming avatar to podcast presenter for project {project_id}")
|
||||
|
||||
# Create transformation prompt based on WaveSpeed AI recommendations
|
||||
# Transform the uploaded image into a professional podcast presenter
|
||||
transformation_prompt = """Transform this image into a professional podcast presenter:
|
||||
- Half-length portrait format, looking at camera
|
||||
- Professional attire (white shirt and light gray blazer or business casual)
|
||||
- Confident, friendly, engaging expression
|
||||
- Soft studio lighting, plain light-gray or neutral background
|
||||
- Professional podcast host appearance, suitable for video generation
|
||||
- Clean composition, center-focused for avatar overlay
|
||||
- Maintain the person's appearance and identity while making it podcast-appropriate
|
||||
- Ultra realistic, 4k quality, professional photography style"""
|
||||
|
||||
# Transform the image using image editing
|
||||
image_options = {
|
||||
"provider": None, # Auto-select provider
|
||||
"model": None, # Use default model
|
||||
}
|
||||
|
||||
result = edit_image(
|
||||
input_image_bytes=avatar_bytes,
|
||||
prompt=transformation_prompt,
|
||||
options=image_options,
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
# Save transformed avatar
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
transformed_filename = f"presenter_transformed_{project_id or 'temp'}_{unique_id}.png"
|
||||
transformed_path = PODCAST_AVATARS_DIR / transformed_filename
|
||||
|
||||
with open(transformed_path, "wb") as f:
|
||||
f.write(result.image_bytes)
|
||||
|
||||
transformed_url = f"/api/podcast/images/{AVATAR_SUBDIR}/{transformed_filename}"
|
||||
|
||||
logger.info(f"[Podcast] Transformed avatar saved to: {transformed_path}")
|
||||
|
||||
# Save to asset library
|
||||
if project_id:
|
||||
try:
|
||||
save_asset_to_library(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
asset_type="image",
|
||||
source_module="podcast_maker",
|
||||
filename=transformed_filename,
|
||||
file_url=transformed_url,
|
||||
file_path=str(transformed_path),
|
||||
file_size=len(result.image_bytes),
|
||||
mime_type="image/png",
|
||||
title=f"Podcast Presenter (Transformed) - {project_id}",
|
||||
description="AI-transformed podcast presenter avatar from uploaded photo",
|
||||
prompt=transformation_prompt,
|
||||
tags=["podcast", "avatar", "presenter", "transformed", project_id],
|
||||
provider=result.provider,
|
||||
model=result.model,
|
||||
asset_metadata={
|
||||
"project_id": project_id,
|
||||
"type": "transformed_presenter",
|
||||
"original_avatar_url": avatar_url,
|
||||
"status": "completed",
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Podcast] Failed to save transformed avatar asset: {e}")
|
||||
|
||||
return {
|
||||
"avatar_url": transformed_url,
|
||||
"avatar_filename": transformed_filename,
|
||||
"message": "Avatar transformed into podcast presenter successfully"
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.error(f"[Podcast] Avatar transformation failed: {exc}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Avatar transformation failed: {str(exc)}")
|
||||
|
||||
|
||||
@router.post("/avatar/generate")
|
||||
async def generate_podcast_presenters(
|
||||
speakers: int = Form(...),
|
||||
project_id: Optional[str] = Form(None),
|
||||
audience: Optional[str] = Form(None),
|
||||
content_type: Optional[str] = Form(None),
|
||||
top_keywords: Optional[str] = Form(None), # JSON string array
|
||||
persona_id: Optional[str] = Form(None),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Generate presenter avatar images based on number of speakers and AI analysis insights.
|
||||
Uses analysis data (audience, content_type, keywords) to create more relevant presenters.
|
||||
Returns list of avatar URLs.
|
||||
Based on WaveSpeed AI recommendations for professional podcast presenters.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
if speakers < 1 or speakers > 2:
|
||||
raise HTTPException(status_code=400, detail="Speakers must be between 1 and 2")
|
||||
|
||||
try:
|
||||
# Parse keywords if provided
|
||||
keywords_list = []
|
||||
if top_keywords:
|
||||
try:
|
||||
import json
|
||||
keywords_list = json.loads(top_keywords) if isinstance(top_keywords, str) else top_keywords
|
||||
except:
|
||||
keywords_list = []
|
||||
|
||||
# Choose persona (market-fit + style) using analysis if not explicitly provided.
|
||||
# Do not infer sensitive traits (like ethnicity); personas represent market + style only.
|
||||
selected_persona_id = persona_id or choose_persona_id(
|
||||
audience=audience,
|
||||
content_type=content_type,
|
||||
top_keywords=keywords_list,
|
||||
)
|
||||
persona = get_persona(selected_persona_id)
|
||||
|
||||
generated_avatars = []
|
||||
|
||||
for i in range(speakers):
|
||||
# Generate presenter-specific prompt based on WaveSpeed AI recommendations
|
||||
# Enhanced with analysis insights for more relevant presenter appearance
|
||||
gender = "female" if i == 0 else "male" # First speaker female, second male
|
||||
|
||||
# Build context-aware prompt using analysis insights + persona preset
|
||||
prompt_parts = [
|
||||
f"Half-length portrait of a professional podcast presenter ({gender}, 25-35 years old)",
|
||||
"photo-realistic, professional photography",
|
||||
]
|
||||
|
||||
if persona:
|
||||
prompt_parts.append(persona.prompt)
|
||||
|
||||
# Use content_type to influence attire/style
|
||||
if content_type:
|
||||
content_lower = content_type.lower()
|
||||
if "business" in content_lower or "corporate" in content_lower:
|
||||
prompt_parts.append("business professional attire (white shirt and light gray blazer)")
|
||||
elif "casual" in content_lower or "conversational" in content_lower:
|
||||
prompt_parts.append("business casual attire (smart casual, approachable)")
|
||||
elif "tech" in content_lower or "technology" in content_lower:
|
||||
prompt_parts.append("modern professional attire (tech-forward, contemporary style)")
|
||||
else:
|
||||
prompt_parts.append("professional attire (white shirt and light gray blazer or business casual)")
|
||||
else:
|
||||
prompt_parts.append("professional attire (white shirt and light gray blazer or business casual)")
|
||||
|
||||
# Use audience to influence expression and style
|
||||
if audience:
|
||||
audience_lower = audience.lower()
|
||||
if "young" in audience_lower or "millennial" in audience_lower or "gen z" in audience_lower:
|
||||
prompt_parts.append("modern, energetic, approachable expression")
|
||||
elif "executive" in audience_lower or "professional" in audience_lower or "business" in audience_lower:
|
||||
prompt_parts.append("confident, authoritative, professional expression")
|
||||
else:
|
||||
prompt_parts.append("confident, friendly, engaging expression")
|
||||
else:
|
||||
prompt_parts.append("confident, friendly expression")
|
||||
|
||||
# Add keywords context if available (for visual style hints)
|
||||
if keywords_list and len(keywords_list) > 0:
|
||||
# Extract visual-relevant keywords
|
||||
visual_keywords = [k for k in keywords_list[:3] if any(word in k.lower() for word in ["tech", "business", "creative", "modern", "professional"])]
|
||||
if visual_keywords:
|
||||
prompt_parts.append(f"context: {', '.join(visual_keywords[:2])}")
|
||||
|
||||
# Technical requirements
|
||||
prompt_parts.extend([
|
||||
"looking at camera",
|
||||
"soft studio lighting, plain light-gray or neutral background",
|
||||
"ultra realistic, 4k quality, 85mm lens, f/2.8",
|
||||
"professional podcast host appearance, suitable for video generation",
|
||||
"clean composition, center-focused for avatar overlay"
|
||||
])
|
||||
|
||||
prompt = ", ".join(prompt_parts)
|
||||
|
||||
logger.info(f"[Podcast] Generating presenter {i+1}/{speakers} for project {project_id}")
|
||||
|
||||
# Generate image
|
||||
# Use a deterministic seed per (project_id, speaker_number, persona_id) to keep presenter identity stable.
|
||||
# Note: determinism may vary by provider/model, but seed improves consistency substantially.
|
||||
seed_source = f"{project_id or 'temp'}|speaker={i+1}|persona={selected_persona_id}"
|
||||
seed = int(hashlib.sha256(seed_source.encode("utf-8")).hexdigest()[:8], 16)
|
||||
image_options = {
|
||||
"provider": None, # Auto-select provider
|
||||
"width": 1024,
|
||||
"height": 1024,
|
||||
"seed": seed,
|
||||
}
|
||||
|
||||
result = generate_image(
|
||||
prompt=prompt,
|
||||
options=image_options,
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
# Save avatar
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
avatar_filename = f"presenter_{project_id or 'temp'}_{i+1}_{unique_id}.png"
|
||||
avatar_path = PODCAST_AVATARS_DIR / avatar_filename
|
||||
|
||||
with open(avatar_path, "wb") as f:
|
||||
f.write(result.image_bytes)
|
||||
|
||||
avatar_url = f"/api/podcast/images/{AVATAR_SUBDIR}/{avatar_filename}"
|
||||
|
||||
# Save to asset library
|
||||
if project_id:
|
||||
try:
|
||||
save_asset_to_library(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
asset_type="image",
|
||||
source_module="podcast_maker",
|
||||
filename=avatar_filename,
|
||||
file_url=avatar_url,
|
||||
file_path=str(avatar_path),
|
||||
file_size=len(result.image_bytes),
|
||||
mime_type="image/png",
|
||||
title=f"Podcast Presenter {i+1} - {project_id}",
|
||||
description=f"Generated podcast presenter avatar for speaker {i+1}",
|
||||
prompt=prompt,
|
||||
tags=["podcast", "avatar", "presenter", project_id],
|
||||
provider=result.provider,
|
||||
model=result.model,
|
||||
asset_metadata={
|
||||
"project_id": project_id,
|
||||
"speaker_number": i + 1,
|
||||
"type": "generated_presenter",
|
||||
"status": "completed",
|
||||
"persona_id": selected_persona_id,
|
||||
"seed": seed,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Podcast] Failed to save presenter asset: {e}")
|
||||
|
||||
generated_avatars.append({
|
||||
"avatar_url": avatar_url,
|
||||
"avatar_filename": avatar_filename,
|
||||
"speaker_number": i + 1,
|
||||
"prompt": prompt, # Include the prompt used for generation
|
||||
"persona_id": selected_persona_id,
|
||||
"seed": seed,
|
||||
})
|
||||
|
||||
return {
|
||||
"avatars": generated_avatars,
|
||||
"message": f"Generated {speakers} presenter avatar(s)",
|
||||
"persona_id": selected_persona_id,
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.error(f"[Podcast] Presenter generation failed: {exc}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Presenter generation failed: {str(exc)}")
|
||||
|
||||
431
backend/api/podcast/handlers/images.py
Normal file
431
backend/api/podcast/handlers/images.py
Normal file
@@ -0,0 +1,431 @@
|
||||
"""
|
||||
Podcast Image Handlers
|
||||
|
||||
Image generation and serving endpoints.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.orm import Session
|
||||
from typing import Dict, Any
|
||||
from pathlib import Path
|
||||
import uuid
|
||||
|
||||
from services.database import get_db
|
||||
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
|
||||
from api.story_writer.utils.auth import require_authenticated_user
|
||||
from services.llm_providers.main_image_generation import generate_image
|
||||
from utils.asset_tracker import save_asset_to_library
|
||||
from loguru import logger
|
||||
from ..constants import PODCAST_IMAGES_DIR
|
||||
from ..models import PodcastImageRequest, PodcastImageResponse
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/image", response_model=PodcastImageResponse)
|
||||
async def generate_podcast_scene_image(
|
||||
request: PodcastImageRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Generate an AI image for a podcast scene.
|
||||
Creates a professional, podcast-appropriate image based on scene title and content.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
if not request.scene_title:
|
||||
raise HTTPException(status_code=400, detail="Scene title is required")
|
||||
|
||||
try:
|
||||
# PRE-FLIGHT VALIDATION: Check subscription limits before any API calls
|
||||
from services.subscription import PricingService
|
||||
from services.subscription.preflight_validator import validate_image_generation_operations
|
||||
from fastapi import HTTPException as FastAPIHTTPException
|
||||
|
||||
pricing_service = PricingService(db)
|
||||
try:
|
||||
# Raises HTTPException immediately if validation fails
|
||||
validate_image_generation_operations(
|
||||
pricing_service=pricing_service,
|
||||
user_id=user_id,
|
||||
num_images=1
|
||||
)
|
||||
logger.info(f"[Podcast] ✅ Pre-flight validation passed for user {user_id}")
|
||||
except FastAPIHTTPException as http_ex:
|
||||
logger.error(f"[Podcast] ❌ Pre-flight validation failed for user {user_id}: {http_ex.detail}")
|
||||
raise
|
||||
|
||||
# If base avatar is provided, create scene-specific variation
|
||||
# Otherwise, generate from scratch
|
||||
logger.info(f"[Podcast] Image generation request for scene {request.scene_id}")
|
||||
logger.info(f"[Podcast] base_avatar_url={request.base_avatar_url}")
|
||||
logger.info(f"[Podcast] custom_prompt={request.custom_prompt}")
|
||||
logger.info(f"[Podcast] style={request.style}, rendering_speed={request.rendering_speed}, aspect_ratio={request.aspect_ratio}")
|
||||
|
||||
if request.base_avatar_url:
|
||||
# Load base avatar image for reference
|
||||
from ..utils import load_podcast_image_bytes
|
||||
try:
|
||||
logger.info(f"[Podcast] Attempting to load base avatar from: {request.base_avatar_url}")
|
||||
base_avatar_bytes = load_podcast_image_bytes(request.base_avatar_url)
|
||||
logger.info(f"[Podcast] ✅ Successfully loaded base avatar ({len(base_avatar_bytes)} bytes) for scene {request.scene_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"[Podcast] ❌ Failed to load base avatar from {request.base_avatar_url}: {e}", exc_info=True)
|
||||
# If base avatar fails to load, we cannot maintain character consistency
|
||||
# Raise an error instead of falling back to standard generation
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail={
|
||||
"error": "Failed to load base avatar",
|
||||
"message": f"Could not load the base avatar image for character consistency: {str(e)}. Please ensure the avatar image is accessible.",
|
||||
},
|
||||
)
|
||||
else:
|
||||
logger.info(f"[Podcast] No base avatar URL provided, will generate from scratch")
|
||||
base_avatar_bytes = None
|
||||
|
||||
# Build optimized prompt for scene image generation
|
||||
# When base avatar is provided, use Ideogram Character to maintain consistency
|
||||
# Otherwise, generate from scratch with podcast-optimized prompt
|
||||
image_prompt = "" # Initialize prompt variable
|
||||
|
||||
if base_avatar_bytes:
|
||||
# Use Ideogram Character API for consistent character generation
|
||||
# Use custom prompt if provided, otherwise build scene-specific prompt
|
||||
if request.custom_prompt:
|
||||
# User provided custom prompt - use it directly
|
||||
image_prompt = request.custom_prompt
|
||||
logger.info(f"[Podcast] Using custom prompt from user for scene {request.scene_id}")
|
||||
else:
|
||||
# Build scene-specific prompt that respects the base avatar
|
||||
prompt_parts = []
|
||||
|
||||
# Scene context (primary focus)
|
||||
if request.scene_title:
|
||||
prompt_parts.append(f"Scene: {request.scene_title}")
|
||||
|
||||
# Scene content insights for visual context
|
||||
if request.scene_content:
|
||||
content_preview = request.scene_content[:200].replace("\n", " ").strip()
|
||||
# Extract visualizable themes
|
||||
visual_keywords = []
|
||||
content_lower = content_preview.lower()
|
||||
if any(word in content_lower for word in ["data", "statistics", "numbers", "chart", "graph"]):
|
||||
visual_keywords.append("data visualization background")
|
||||
if any(word in content_lower for word in ["technology", "tech", "digital", "ai", "software"]):
|
||||
visual_keywords.append("modern tech studio setting")
|
||||
if any(word in content_lower for word in ["business", "growth", "strategy", "market"]):
|
||||
visual_keywords.append("professional business studio")
|
||||
if visual_keywords:
|
||||
prompt_parts.append(", ".join(visual_keywords))
|
||||
|
||||
# Podcast theme context
|
||||
if request.idea:
|
||||
idea_preview = request.idea[:60].strip()
|
||||
prompt_parts.append(f"Topic: {idea_preview}")
|
||||
|
||||
# Studio setting (maintains podcast aesthetic)
|
||||
prompt_parts.extend([
|
||||
"Professional podcast recording studio",
|
||||
"Modern microphone setup",
|
||||
"Clean background, professional lighting",
|
||||
"16:9 aspect ratio, video-optimized composition"
|
||||
])
|
||||
|
||||
image_prompt = ", ".join(prompt_parts)
|
||||
|
||||
logger.info(f"[Podcast] Using Ideogram Character for scene {request.scene_id} with base avatar")
|
||||
logger.info(f"[Podcast] Scene prompt: {image_prompt[:150]}...")
|
||||
|
||||
# Use Ideogram Character API via WaveSpeed client
|
||||
from services.wavespeed.client import WaveSpeedClient
|
||||
wavespeed_client = WaveSpeedClient()
|
||||
|
||||
# Use custom settings if provided, otherwise use defaults
|
||||
style = request.style or "Realistic" # Default to Realistic for professional podcast presenters
|
||||
rendering_speed = request.rendering_speed or "Quality" # Default to Quality for podcast videos
|
||||
|
||||
# Calculate aspect ratio from custom setting or dimensions
|
||||
if request.aspect_ratio:
|
||||
aspect_ratio = request.aspect_ratio
|
||||
else:
|
||||
aspect_ratio_map = {
|
||||
(1024, 1024): "1:1",
|
||||
(1920, 1080): "16:9",
|
||||
(1080, 1920): "9:16",
|
||||
(1280, 960): "4:3",
|
||||
(960, 1280): "3:4",
|
||||
}
|
||||
aspect_ratio = aspect_ratio_map.get((request.width, request.height), "16:9")
|
||||
|
||||
logger.info(f"[Podcast] Ideogram Character settings: style={style}, rendering_speed={rendering_speed}, aspect_ratio={aspect_ratio}")
|
||||
|
||||
try:
|
||||
image_bytes = wavespeed_client.generate_character_image(
|
||||
prompt=image_prompt,
|
||||
reference_image_bytes=base_avatar_bytes,
|
||||
style=style,
|
||||
aspect_ratio=aspect_ratio,
|
||||
rendering_speed=rendering_speed,
|
||||
timeout=None, # No timeout - poll until WaveSpeed says it's done or failed
|
||||
)
|
||||
|
||||
# Create result object compatible with ImageGenerationResult
|
||||
from services.llm_providers.image_generation.base import ImageGenerationResult
|
||||
result = ImageGenerationResult(
|
||||
image_bytes=image_bytes,
|
||||
provider="wavespeed",
|
||||
model="ideogram-ai/ideogram-character",
|
||||
width=request.width,
|
||||
height=request.height,
|
||||
)
|
||||
|
||||
logger.info(f"[Podcast] ✅ Successfully generated character-consistent scene image")
|
||||
except HTTPException as http_err:
|
||||
# Re-raise HTTPExceptions from wavespeed client as-is
|
||||
logger.error(f"[Podcast] ❌ Ideogram Character HTTPException: {http_err.status_code} - {http_err.detail}")
|
||||
raise
|
||||
except Exception as char_error:
|
||||
error_msg = str(char_error)
|
||||
error_type = type(char_error).__name__
|
||||
logger.error(f"[Podcast] ❌ Ideogram Character failed: {error_type}: {error_msg}", exc_info=True)
|
||||
|
||||
# If Ideogram Character fails, we should NOT fall back to standard generation
|
||||
# because that would lose character consistency. Instead, raise an error.
|
||||
# However, if it's a timeout/connection issue, we can provide a helpful message.
|
||||
error_msg_lower = error_msg.lower()
|
||||
if "timeout" in error_msg_lower or "connection" in error_msg_lower or "504" in error_msg:
|
||||
raise HTTPException(
|
||||
status_code=504,
|
||||
detail={
|
||||
"error": "Image generation service unavailable",
|
||||
"message": "The character-consistent image generation service is currently unavailable. Please try again in a few moments. If the problem persists, the service may be experiencing high load.",
|
||||
"retry_recommended": True,
|
||||
},
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail={
|
||||
"error": "Character-consistent image generation failed",
|
||||
"message": f"Failed to generate image with character consistency: {error_msg}",
|
||||
"retry_recommended": True,
|
||||
},
|
||||
)
|
||||
|
||||
# CRITICAL: If base_avatar_url was provided but we don't have base_avatar_bytes,
|
||||
# this means either loading failed (already raised error) or Ideogram Character failed (already raised error)
|
||||
# So this path should only be reached if NO base_avatar_url was provided in the first place
|
||||
if not base_avatar_bytes:
|
||||
logger.info(f"[Podcast] No base avatar provided - generating standard image from scratch")
|
||||
# Standard generation from scratch (no base avatar provided)
|
||||
prompt_parts = []
|
||||
|
||||
# Core podcast studio elements
|
||||
prompt_parts.extend([
|
||||
"Professional podcast recording studio",
|
||||
"Modern podcast setup with high-quality microphone",
|
||||
"Clean, minimalist background suitable for video",
|
||||
"Professional studio lighting with soft, even illumination",
|
||||
"Podcast host environment, professional and inviting"
|
||||
])
|
||||
|
||||
# Scene-specific context
|
||||
if request.scene_title:
|
||||
prompt_parts.append(f"Scene theme: {request.scene_title}")
|
||||
|
||||
# Content context for visual relevance
|
||||
if request.scene_content:
|
||||
content_preview = request.scene_content[:150].replace("\n", " ").strip()
|
||||
visual_keywords = []
|
||||
content_lower = content_preview.lower()
|
||||
if any(word in content_lower for word in ["data", "statistics", "numbers", "chart", "graph"]):
|
||||
visual_keywords.append("data visualization elements")
|
||||
if any(word in content_lower for word in ["technology", "tech", "digital", "ai", "software"]):
|
||||
visual_keywords.append("modern technology aesthetic")
|
||||
if any(word in content_lower for word in ["business", "growth", "strategy", "market"]):
|
||||
visual_keywords.append("professional business environment")
|
||||
if visual_keywords:
|
||||
prompt_parts.append(", ".join(visual_keywords))
|
||||
|
||||
# Podcast theme context
|
||||
if request.idea:
|
||||
idea_preview = request.idea[:80].strip()
|
||||
prompt_parts.append(f"Podcast topic context: {idea_preview}")
|
||||
|
||||
# Technical requirements for video generation
|
||||
prompt_parts.extend([
|
||||
"16:9 aspect ratio optimized for video",
|
||||
"Center-focused composition for talking avatar overlay",
|
||||
"Neutral color palette with professional tones",
|
||||
"High resolution, sharp focus, professional photography quality",
|
||||
"No text, no logos, no distracting elements",
|
||||
"Suitable for InfiniteTalk video generation with animated avatar"
|
||||
])
|
||||
|
||||
# Style constraints
|
||||
prompt_parts.extend([
|
||||
"Realistic photography style, not illustration or cartoon",
|
||||
"Professional broadcast quality",
|
||||
"Warm, inviting atmosphere",
|
||||
"Clean composition with breathing room for avatar placement"
|
||||
])
|
||||
|
||||
image_prompt = ", ".join(prompt_parts)
|
||||
|
||||
logger.info(f"[Podcast] Generating image for scene {request.scene_id}: {request.scene_title}")
|
||||
|
||||
# Generate image using main_image_generation service
|
||||
image_options = {
|
||||
"provider": None, # Auto-select provider
|
||||
"width": request.width,
|
||||
"height": request.height,
|
||||
}
|
||||
|
||||
result = generate_image(
|
||||
prompt=image_prompt,
|
||||
options=image_options,
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
# Save image to podcast images directory
|
||||
PODCAST_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Generate filename
|
||||
clean_title = "".join(c if c.isalnum() or c in ('-', '_') else '_' for c in request.scene_title[:30])
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
image_filename = f"scene_{request.scene_id}_{clean_title}_{unique_id}.png"
|
||||
image_path = PODCAST_IMAGES_DIR / image_filename
|
||||
|
||||
# Save image
|
||||
with open(image_path, "wb") as f:
|
||||
f.write(result.image_bytes)
|
||||
|
||||
logger.info(f"[Podcast] Saved image to: {image_path}")
|
||||
|
||||
# Create image URL (served via API endpoint)
|
||||
image_url = f"/api/podcast/images/{image_filename}"
|
||||
|
||||
# Estimate cost (rough estimate: ~$0.04 per image for most providers, ~$0.08 for Ideogram Character Quality)
|
||||
cost = 0.08 if result.provider == "wavespeed" and result.model == "ideogram-ai/ideogram-character" else 0.04
|
||||
|
||||
# TRACK USAGE after successful image generation
|
||||
try:
|
||||
from models.subscription_models import UsageSummary, APIProvider
|
||||
from sqlalchemy import text as sql_text
|
||||
from datetime import datetime
|
||||
|
||||
current_period = pricing_service.get_current_billing_period(user_id) or datetime.now().strftime("%Y-%m")
|
||||
|
||||
# Update stability_calls and stability_cost (used for all image generation)
|
||||
# Note: stability_calls is used for all image generation providers, not just Stability AI
|
||||
update_query = sql_text("""
|
||||
UPDATE usage_summaries
|
||||
SET stability_calls = COALESCE(stability_calls, 0) + 1,
|
||||
stability_cost = COALESCE(stability_cost, 0) + :cost,
|
||||
total_calls = COALESCE(total_calls, 0) + 1,
|
||||
total_cost = COALESCE(total_cost, 0) + :cost
|
||||
WHERE user_id = :user_id AND billing_period = :period
|
||||
""")
|
||||
db.execute(update_query, {
|
||||
'cost': cost,
|
||||
'user_id': user_id,
|
||||
'period': current_period
|
||||
})
|
||||
db.commit()
|
||||
|
||||
logger.info(f"[Podcast] ✅ Tracked image generation usage: user={user_id}, cost=${cost:.4f}, provider={result.provider}")
|
||||
except Exception as usage_error:
|
||||
logger.error(f"[Podcast] Failed to track image generation usage: {usage_error}")
|
||||
db.rollback()
|
||||
# Don't fail the request if usage tracking fails
|
||||
|
||||
# Save to asset library
|
||||
try:
|
||||
save_asset_to_library(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
asset_type="image",
|
||||
source_module="podcast_maker",
|
||||
filename=image_filename,
|
||||
file_url=image_url,
|
||||
file_path=str(image_path),
|
||||
file_size=len(result.image_bytes),
|
||||
mime_type="image/png",
|
||||
title=f"{request.scene_title} - Podcast Scene",
|
||||
description=f"Podcast scene image: {request.scene_title}",
|
||||
prompt=image_prompt,
|
||||
tags=["podcast", "scene", request.scene_id],
|
||||
provider=result.provider,
|
||||
model=result.model,
|
||||
asset_metadata={
|
||||
"scene_id": request.scene_id,
|
||||
"scene_title": request.scene_title,
|
||||
"status": "completed",
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Podcast] Failed to save image asset: {e}")
|
||||
|
||||
return PodcastImageResponse(
|
||||
scene_id=request.scene_id,
|
||||
scene_title=request.scene_title,
|
||||
image_filename=image_filename,
|
||||
image_url=image_url,
|
||||
width=result.width,
|
||||
height=result.height,
|
||||
provider=result.provider,
|
||||
model=result.model,
|
||||
cost=cost,
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
# Re-raise HTTPExceptions as-is (they already have proper error details)
|
||||
raise
|
||||
except Exception as exc:
|
||||
# Log the full exception for debugging
|
||||
error_msg = str(exc)
|
||||
error_type = type(exc).__name__
|
||||
logger.error(f"[Podcast] Image generation failed: {error_type}: {error_msg}", exc_info=True)
|
||||
|
||||
# Create a safe error detail
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail={
|
||||
"error": "Image generation failed",
|
||||
"message": error_msg,
|
||||
"type": error_type,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@router.get("/images/{path:path}")
|
||||
async def serve_podcast_image(
|
||||
path: str, # Changed from filename to path to support subdirectories
|
||||
current_user: Dict[str, Any] = Depends(get_current_user_with_query_token),
|
||||
):
|
||||
"""Serve generated podcast scene images and avatars.
|
||||
|
||||
Supports authentication via Authorization header or token query parameter.
|
||||
Query parameter is useful for HTML elements like <img> that cannot send custom headers.
|
||||
Supports subdirectories like avatars/
|
||||
"""
|
||||
require_authenticated_user(current_user)
|
||||
|
||||
# Security check: ensure path doesn't contain path traversal or absolute paths
|
||||
if ".." in path or path.startswith("/"):
|
||||
raise HTTPException(status_code=400, detail="Invalid path")
|
||||
|
||||
image_path = (PODCAST_IMAGES_DIR / path).resolve()
|
||||
|
||||
# Security check: ensure resolved path is within PODCAST_IMAGES_DIR
|
||||
if not str(image_path).startswith(str(PODCAST_IMAGES_DIR)):
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
if not image_path.exists():
|
||||
raise HTTPException(status_code=404, detail="Image not found")
|
||||
|
||||
return FileResponse(image_path, media_type="image/png")
|
||||
|
||||
203
backend/api/podcast/handlers/projects.py
Normal file
203
backend/api/podcast/handlers/projects.py
Normal file
@@ -0,0 +1,203 @@
|
||||
"""
|
||||
Podcast Project Handlers
|
||||
|
||||
CRUD operations for podcast projects.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy.orm import Session
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
from services.database import get_db
|
||||
from middleware.auth_middleware import get_current_user
|
||||
from services.podcast_service import PodcastService
|
||||
from ..models import (
|
||||
PodcastProjectResponse,
|
||||
CreateProjectRequest,
|
||||
UpdateProjectRequest,
|
||||
PodcastProjectListResponse,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/projects", response_model=PodcastProjectResponse, status_code=201)
|
||||
async def create_project(
|
||||
request: CreateProjectRequest,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""Create a new podcast project."""
|
||||
try:
|
||||
user_id = current_user.get("user_id") or current_user.get("id")
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=401, detail="User ID not found")
|
||||
|
||||
service = PodcastService(db)
|
||||
|
||||
# Check if project_id already exists for this user
|
||||
existing = service.get_project(user_id, request.project_id)
|
||||
if existing:
|
||||
raise HTTPException(status_code=400, detail="Project ID already exists")
|
||||
|
||||
project = service.create_project(
|
||||
user_id=user_id,
|
||||
project_id=request.project_id,
|
||||
idea=request.idea,
|
||||
duration=request.duration,
|
||||
speakers=request.speakers,
|
||||
budget_cap=request.budget_cap,
|
||||
)
|
||||
|
||||
return PodcastProjectResponse.model_validate(project)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Error creating project: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/projects/{project_id}", response_model=PodcastProjectResponse)
|
||||
async def get_project(
|
||||
project_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""Get a podcast project by ID."""
|
||||
try:
|
||||
user_id = current_user.get("user_id") or current_user.get("id")
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=401, detail="User ID not found")
|
||||
|
||||
service = PodcastService(db)
|
||||
project = service.get_project(user_id, project_id)
|
||||
|
||||
if not project:
|
||||
raise HTTPException(status_code=404, detail="Project not found")
|
||||
|
||||
return PodcastProjectResponse.model_validate(project)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Error fetching project: {str(e)}")
|
||||
|
||||
|
||||
@router.put("/projects/{project_id}", response_model=PodcastProjectResponse)
|
||||
async def update_project(
|
||||
project_id: str,
|
||||
request: UpdateProjectRequest,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""Update a podcast project state."""
|
||||
try:
|
||||
user_id = current_user.get("user_id") or current_user.get("id")
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=401, detail="User ID not found")
|
||||
|
||||
service = PodcastService(db)
|
||||
|
||||
# Convert request to dict, excluding None values
|
||||
updates = request.model_dump(exclude_unset=True)
|
||||
|
||||
project = service.update_project(user_id, project_id, **updates)
|
||||
|
||||
if not project:
|
||||
raise HTTPException(status_code=404, detail="Project not found")
|
||||
|
||||
return PodcastProjectResponse.model_validate(project)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Error updating project: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/projects", response_model=PodcastProjectListResponse)
|
||||
async def list_projects(
|
||||
status: Optional[str] = Query(None, description="Filter by status"),
|
||||
favorites_only: bool = Query(False, description="Only favorites"),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
offset: int = Query(0, ge=0),
|
||||
order_by: str = Query("updated_at", description="Order by: updated_at or created_at"),
|
||||
db: Session = Depends(get_db),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""List user's podcast projects."""
|
||||
try:
|
||||
user_id = current_user.get("user_id") or current_user.get("id")
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=401, detail="User ID not found")
|
||||
|
||||
if order_by not in ["updated_at", "created_at"]:
|
||||
raise HTTPException(status_code=400, detail="order_by must be 'updated_at' or 'created_at'")
|
||||
|
||||
service = PodcastService(db)
|
||||
projects, total = service.list_projects(
|
||||
user_id=user_id,
|
||||
status=status,
|
||||
favorites_only=favorites_only,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
order_by=order_by,
|
||||
)
|
||||
|
||||
return PodcastProjectListResponse(
|
||||
projects=[PodcastProjectResponse.model_validate(p) for p in projects],
|
||||
total=total,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Error listing projects: {str(e)}")
|
||||
|
||||
|
||||
@router.delete("/projects/{project_id}", status_code=204)
|
||||
async def delete_project(
|
||||
project_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""Delete a podcast project."""
|
||||
try:
|
||||
user_id = current_user.get("user_id") or current_user.get("id")
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=401, detail="User ID not found")
|
||||
|
||||
service = PodcastService(db)
|
||||
deleted = service.delete_project(user_id, project_id)
|
||||
|
||||
if not deleted:
|
||||
raise HTTPException(status_code=404, detail="Project not found")
|
||||
|
||||
return None
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Error deleting project: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/projects/{project_id}/favorite", response_model=PodcastProjectResponse)
|
||||
async def toggle_favorite(
|
||||
project_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""Toggle favorite status of a project."""
|
||||
try:
|
||||
user_id = current_user.get("user_id") or current_user.get("id")
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=401, detail="User ID not found")
|
||||
|
||||
service = PodcastService(db)
|
||||
project = service.toggle_favorite(user_id, project_id)
|
||||
|
||||
if not project:
|
||||
raise HTTPException(status_code=404, detail="Project not found")
|
||||
|
||||
return PodcastProjectResponse.model_validate(project)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Error toggling favorite: {str(e)}")
|
||||
|
||||
99
backend/api/podcast/handlers/research.py
Normal file
99
backend/api/podcast/handlers/research.py
Normal file
@@ -0,0 +1,99 @@
|
||||
"""
|
||||
Podcast Research Handlers
|
||||
|
||||
Research endpoints using Exa provider.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from typing import Dict, Any
|
||||
from types import SimpleNamespace
|
||||
|
||||
from middleware.auth_middleware import get_current_user
|
||||
from api.story_writer.utils.auth import require_authenticated_user
|
||||
from services.blog_writer.research.exa_provider import ExaResearchProvider
|
||||
from loguru import logger
|
||||
from ..models import (
|
||||
PodcastExaResearchRequest,
|
||||
PodcastExaResearchResponse,
|
||||
PodcastExaSource,
|
||||
PodcastExaConfig,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/research/exa", response_model=PodcastExaResearchResponse)
|
||||
async def podcast_research_exa(
|
||||
request: PodcastExaResearchRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""
|
||||
Run podcast research directly via Exa (no blog writer pipeline).
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
queries = [q.strip() for q in request.queries if q and q.strip()]
|
||||
if not queries:
|
||||
raise HTTPException(status_code=400, detail="At least one query is required for research.")
|
||||
|
||||
exa_cfg = request.exa_config or PodcastExaConfig()
|
||||
cfg = SimpleNamespace(
|
||||
exa_search_type=exa_cfg.exa_search_type or "auto",
|
||||
exa_category=exa_cfg.exa_category,
|
||||
exa_include_domains=exa_cfg.exa_include_domains or [],
|
||||
exa_exclude_domains=exa_cfg.exa_exclude_domains or [],
|
||||
max_sources=exa_cfg.max_sources or 8,
|
||||
source_types=[],
|
||||
)
|
||||
|
||||
provider = ExaResearchProvider()
|
||||
prompt = request.topic
|
||||
|
||||
try:
|
||||
result = await provider.search(
|
||||
prompt=prompt,
|
||||
topic=request.topic,
|
||||
industry="",
|
||||
target_audience="",
|
||||
config=cfg,
|
||||
user_id=user_id,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error(f"[Podcast Exa Research] Failed for user {user_id}: {exc}")
|
||||
raise HTTPException(status_code=500, detail=f"Exa research failed: {exc}")
|
||||
|
||||
# Track usage if available
|
||||
try:
|
||||
cost_total = 0.0
|
||||
if isinstance(result, dict):
|
||||
cost_total = result.get("cost", {}).get("total", 0.005) if result.get("cost") else 0.005
|
||||
provider.track_exa_usage(user_id, cost_total)
|
||||
except Exception as track_err:
|
||||
logger.warning(f"[Podcast Exa Research] Failed to track usage: {track_err}")
|
||||
|
||||
sources_payload = []
|
||||
if isinstance(result, dict):
|
||||
for src in result.get("sources", []) or []:
|
||||
try:
|
||||
sources_payload.append(PodcastExaSource(**src))
|
||||
except Exception:
|
||||
sources_payload.append(PodcastExaSource(**{
|
||||
"title": src.get("title", ""),
|
||||
"url": src.get("url", ""),
|
||||
"excerpt": src.get("excerpt", ""),
|
||||
"published_at": src.get("published_at"),
|
||||
"highlights": src.get("highlights"),
|
||||
"summary": src.get("summary"),
|
||||
"source_type": src.get("source_type"),
|
||||
"index": src.get("index"),
|
||||
}))
|
||||
|
||||
return PodcastExaResearchResponse(
|
||||
sources=sources_payload,
|
||||
search_queries=result.get("search_queries", queries) if isinstance(result, dict) else queries,
|
||||
cost=result.get("cost") if isinstance(result, dict) else None,
|
||||
search_type=result.get("search_type") if isinstance(result, dict) else None,
|
||||
provider=result.get("provider", "exa") if isinstance(result, dict) else "exa",
|
||||
content=result.get("content") if isinstance(result, dict) else None,
|
||||
)
|
||||
|
||||
142
backend/api/podcast/handlers/script.py
Normal file
142
backend/api/podcast/handlers/script.py
Normal file
@@ -0,0 +1,142 @@
|
||||
"""
|
||||
Podcast Script Handlers
|
||||
|
||||
Script generation endpoint.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from typing import Dict, Any
|
||||
import json
|
||||
|
||||
from middleware.auth_middleware import get_current_user
|
||||
from api.story_writer.utils.auth import require_authenticated_user
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from loguru import logger
|
||||
from ..models import (
|
||||
PodcastScriptRequest,
|
||||
PodcastScriptResponse,
|
||||
PodcastScene,
|
||||
PodcastSceneLine,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/script", response_model=PodcastScriptResponse)
|
||||
async def generate_podcast_script(
|
||||
request: PodcastScriptRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""
|
||||
Generate a podcast script outline (scenes + lines) using podcast-oriented prompting.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
# Build comprehensive research context for higher-quality scripts
|
||||
research_context = ""
|
||||
if request.research:
|
||||
try:
|
||||
key_insights = request.research.get("keyword_analysis", {}).get("key_insights") or []
|
||||
fact_cards = request.research.get("factCards", []) or []
|
||||
mapped_angles = request.research.get("mappedAngles", []) or []
|
||||
sources = request.research.get("sources", []) or []
|
||||
|
||||
top_facts = [f.get("quote", "") for f in fact_cards[:5] if f.get("quote")]
|
||||
angles_summary = [
|
||||
f"{a.get('title', '')}: {a.get('why', '')}" for a in mapped_angles[:3] if a.get("title") or a.get("why")
|
||||
]
|
||||
top_sources = [s.get("url") for s in sources[:3] if s.get("url")]
|
||||
|
||||
research_parts = []
|
||||
if key_insights:
|
||||
research_parts.append(f"Key Insights: {', '.join(key_insights[:5])}")
|
||||
if top_facts:
|
||||
research_parts.append(f"Key Facts: {', '.join(top_facts)}")
|
||||
if angles_summary:
|
||||
research_parts.append(f"Research Angles: {' | '.join(angles_summary)}")
|
||||
if top_sources:
|
||||
research_parts.append(f"Top Sources: {', '.join(top_sources)}")
|
||||
|
||||
research_context = "\n".join(research_parts)
|
||||
except Exception as exc:
|
||||
logger.warning(f"Failed to parse research context: {exc}")
|
||||
research_context = ""
|
||||
|
||||
prompt = f"""You are an expert podcast script planner. Create natural, conversational podcast scenes.
|
||||
|
||||
Podcast Idea: "{request.idea}"
|
||||
Duration: ~{request.duration_minutes} minutes
|
||||
Speakers: {request.speakers} (Host + optional Guest)
|
||||
|
||||
{f"RESEARCH CONTEXT:\n{research_context}\n" if research_context else ""}
|
||||
|
||||
Return JSON with:
|
||||
- scenes: array of scenes. Each scene has:
|
||||
- id: string
|
||||
- title: short scene title (<= 60 chars)
|
||||
- duration: duration in seconds (evenly split across total duration)
|
||||
- emotion: string (one of: "neutral", "happy", "excited", "serious", "curious", "confident")
|
||||
- lines: array of {{"speaker": "...", "text": "...", "emphasis": boolean}}
|
||||
* Write natural, conversational dialogue
|
||||
* Each line can be a sentence or a few sentences that flow together
|
||||
* Use plain text only - no markdown formatting (no asterisks, underscores, etc.)
|
||||
* Mark "emphasis": true for key statistics or important points
|
||||
|
||||
Guidelines:
|
||||
- Write for spoken delivery: conversational, natural, with contractions
|
||||
- Use research insights naturally - weave statistics into dialogue, don't just list them
|
||||
- Vary emotion per scene based on content
|
||||
- Ensure scenes match target duration: aim for ~2.5 words per second of audio
|
||||
- Keep it engaging and informative, like a real podcast conversation
|
||||
"""
|
||||
|
||||
try:
|
||||
raw = llm_text_gen(prompt=prompt, user_id=user_id, json_struct=None)
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=500, detail=f"Script generation failed: {exc}")
|
||||
|
||||
if isinstance(raw, str):
|
||||
try:
|
||||
data = json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
raise HTTPException(status_code=500, detail="LLM returned non-JSON output")
|
||||
elif isinstance(raw, dict):
|
||||
data = raw
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail="Unexpected LLM response format")
|
||||
|
||||
scenes_data = data.get("scenes") or []
|
||||
if not isinstance(scenes_data, list):
|
||||
raise HTTPException(status_code=500, detail="LLM response missing scenes array")
|
||||
|
||||
valid_emotions = {"neutral", "happy", "excited", "serious", "curious", "confident"}
|
||||
|
||||
# Normalize scenes
|
||||
scenes: list[PodcastScene] = []
|
||||
for idx, scene in enumerate(scenes_data):
|
||||
title = scene.get("title") or f"Scene {idx + 1}"
|
||||
duration = int(scene.get("duration") or max(30, (request.duration_minutes * 60) // max(1, len(scenes_data))))
|
||||
emotion = scene.get("emotion") or "neutral"
|
||||
if emotion not in valid_emotions:
|
||||
emotion = "neutral"
|
||||
lines_raw = scene.get("lines") or []
|
||||
lines: list[PodcastSceneLine] = []
|
||||
for line in lines_raw:
|
||||
speaker = line.get("speaker") or ("Host" if len(lines) % request.speakers == 0 else "Guest")
|
||||
text = line.get("text") or ""
|
||||
emphasis = line.get("emphasis", False)
|
||||
if text:
|
||||
lines.append(PodcastSceneLine(speaker=speaker, text=text, emphasis=emphasis))
|
||||
scenes.append(
|
||||
PodcastScene(
|
||||
id=scene.get("id") or f"scene-{idx + 1}",
|
||||
title=title,
|
||||
duration=duration,
|
||||
lines=lines,
|
||||
approved=False,
|
||||
emotion=emotion,
|
||||
)
|
||||
)
|
||||
|
||||
return PodcastScriptResponse(scenes=scenes)
|
||||
|
||||
585
backend/api/podcast/handlers/video.py
Normal file
585
backend/api/podcast/handlers/video.py
Normal file
@@ -0,0 +1,585 @@
|
||||
"""
|
||||
Podcast Video Handlers
|
||||
|
||||
Video generation and serving endpoints.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks, Request
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.orm import Session
|
||||
from typing import Dict, Any, Optional
|
||||
from pathlib import Path
|
||||
from urllib.parse import quote
|
||||
import re
|
||||
import json
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from services.database import get_db
|
||||
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
|
||||
from api.story_writer.utils.auth import require_authenticated_user
|
||||
from services.wavespeed.infinitetalk import animate_scene_with_voiceover
|
||||
from services.podcast.video_combination_service import PodcastVideoCombinationService
|
||||
from services.llm_providers.main_video_generation import track_video_usage
|
||||
from services.subscription import PricingService
|
||||
from services.subscription.preflight_validator import validate_scene_animation_operation
|
||||
from api.story_writer.task_manager import task_manager
|
||||
from loguru import logger
|
||||
from ..constants import AI_VIDEO_SUBDIR, PODCAST_VIDEOS_DIR
|
||||
from ..utils import load_podcast_audio_bytes, load_podcast_image_bytes
|
||||
from services.podcast_service import PodcastService
|
||||
from ..models import (
|
||||
PodcastVideoGenerationRequest,
|
||||
PodcastVideoGenerationResponse,
|
||||
PodcastCombineVideosRequest,
|
||||
PodcastCombineVideosResponse,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# Thread pool executor for CPU-intensive video operations
|
||||
# This prevents blocking the FastAPI event loop
|
||||
_video_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="podcast_video")
|
||||
|
||||
|
||||
def _extract_error_message(exc: Exception) -> str:
|
||||
"""
|
||||
Extract user-friendly error message from exception.
|
||||
Handles HTTPException with nested error details from WaveSpeed API.
|
||||
"""
|
||||
if isinstance(exc, HTTPException):
|
||||
detail = exc.detail
|
||||
# If detail is a dict (from WaveSpeed client)
|
||||
if isinstance(detail, dict):
|
||||
# Try to extract message from nested response JSON
|
||||
response_str = detail.get("response", "")
|
||||
if response_str:
|
||||
try:
|
||||
response_json = json.loads(response_str)
|
||||
if isinstance(response_json, dict) and "message" in response_json:
|
||||
return response_json["message"]
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
# Fall back to error field
|
||||
if "error" in detail:
|
||||
return detail["error"]
|
||||
# If detail is a string
|
||||
elif isinstance(detail, str):
|
||||
return detail
|
||||
|
||||
# For other exceptions, use string representation
|
||||
error_str = str(exc)
|
||||
|
||||
# Try to extract meaningful message from HTTPException string format
|
||||
# Format: "502: {'error': '...', 'response': '{"message":"..."}'}"
|
||||
if "Insufficient credits" in error_str or "insufficient credits" in error_str.lower():
|
||||
return "Insufficient WaveSpeed credits. Please top up your account."
|
||||
|
||||
# Try to extract JSON message from string
|
||||
try:
|
||||
# Look for JSON-like structures in the error string
|
||||
json_match = re.search(r'"message"\s*:\s*"([^"]+)"', error_str)
|
||||
if json_match:
|
||||
return json_match.group(1)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return error_str
|
||||
|
||||
|
||||
def _execute_podcast_video_task(
|
||||
task_id: str,
|
||||
request: PodcastVideoGenerationRequest,
|
||||
user_id: str,
|
||||
image_bytes: bytes,
|
||||
audio_bytes: bytes,
|
||||
auth_token: Optional[str] = None,
|
||||
mask_image_bytes: Optional[bytes] = None,
|
||||
):
|
||||
"""Background task to generate InfiniteTalk video for podcast scene."""
|
||||
try:
|
||||
task_manager.update_task_status(
|
||||
task_id, "processing", progress=5.0, message="Submitting to WaveSpeed InfiniteTalk..."
|
||||
)
|
||||
|
||||
# Extract scene number from scene_id
|
||||
scene_number_match = re.search(r'\d+', request.scene_id)
|
||||
scene_number = int(scene_number_match.group()) if scene_number_match else 0
|
||||
|
||||
# Prepare scene data for animation
|
||||
scene_data = {
|
||||
"scene_number": scene_number,
|
||||
"title": request.scene_title,
|
||||
"scene_id": request.scene_id,
|
||||
}
|
||||
story_context = {
|
||||
"project_id": request.project_id,
|
||||
"type": "podcast",
|
||||
}
|
||||
|
||||
animation_result = animate_scene_with_voiceover(
|
||||
image_bytes=image_bytes,
|
||||
audio_bytes=audio_bytes,
|
||||
scene_data=scene_data,
|
||||
story_context=story_context,
|
||||
user_id=user_id,
|
||||
resolution=request.resolution or "720p",
|
||||
prompt_override=request.prompt,
|
||||
mask_image_bytes=mask_image_bytes,
|
||||
seed=request.seed if request.seed is not None else -1,
|
||||
image_mime="image/png",
|
||||
audio_mime="audio/mpeg",
|
||||
)
|
||||
|
||||
task_manager.update_task_status(
|
||||
task_id, "processing", progress=80.0, message="Saving video file..."
|
||||
)
|
||||
|
||||
# Use podcast-specific video directory
|
||||
ai_video_dir = PODCAST_VIDEOS_DIR / AI_VIDEO_SUBDIR
|
||||
ai_video_dir.mkdir(parents=True, exist_ok=True)
|
||||
video_service = PodcastVideoCombinationService(output_dir=str(PODCAST_VIDEOS_DIR / "Final_Videos"))
|
||||
|
||||
save_result = video_service.save_scene_video(
|
||||
video_bytes=animation_result["video_bytes"],
|
||||
scene_number=scene_number,
|
||||
user_id=user_id,
|
||||
)
|
||||
video_filename = save_result["video_filename"]
|
||||
video_url = f"/api/podcast/videos/{video_filename}"
|
||||
if auth_token:
|
||||
video_url = f"{video_url}?token={quote(auth_token)}"
|
||||
|
||||
logger.info(
|
||||
f"[Podcast] Video saved: filename={video_filename}, url={video_url}, scene={request.scene_id}"
|
||||
)
|
||||
|
||||
usage_info = track_video_usage(
|
||||
user_id=user_id,
|
||||
provider=animation_result["provider"],
|
||||
model_name=animation_result["model_name"],
|
||||
prompt=animation_result["prompt"],
|
||||
video_bytes=animation_result["video_bytes"],
|
||||
cost_override=animation_result["cost"],
|
||||
)
|
||||
|
||||
result_data = {
|
||||
"video_url": video_url,
|
||||
"video_filename": video_filename,
|
||||
"cost": animation_result["cost"],
|
||||
"duration": animation_result["duration"],
|
||||
"provider": animation_result["provider"],
|
||||
"model": animation_result["model_name"],
|
||||
}
|
||||
|
||||
logger.info(
|
||||
f"[Podcast] Updating task status to completed: task_id={task_id}, result={result_data}"
|
||||
)
|
||||
|
||||
task_manager.update_task_status(
|
||||
task_id,
|
||||
"completed",
|
||||
progress=100.0,
|
||||
message="Video generation complete!",
|
||||
result=result_data,
|
||||
)
|
||||
|
||||
# Verify the task status was updated correctly
|
||||
updated_status = task_manager.get_task_status(task_id)
|
||||
logger.info(
|
||||
f"[Podcast] Task status after update: task_id={task_id}, status={updated_status.get('status') if updated_status else 'None'}, has_result={bool(updated_status.get('result') if updated_status else False)}, video_url={updated_status.get('result', {}).get('video_url') if updated_status else 'N/A'}"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"[Podcast] Video generation completed for project {request.project_id}, scene {request.scene_id}"
|
||||
)
|
||||
|
||||
except Exception as exc:
|
||||
# Use logger.exception to avoid KeyError when exception message contains curly braces
|
||||
logger.exception(f"[Podcast] Video generation failed for project {request.project_id}, scene {request.scene_id}")
|
||||
|
||||
# Extract user-friendly error message from exception
|
||||
error_msg = _extract_error_message(exc)
|
||||
|
||||
task_manager.update_task_status(
|
||||
task_id, "failed", error=error_msg, message=f"Video generation failed: {error_msg}"
|
||||
)
|
||||
|
||||
|
||||
@router.post("/render/video", response_model=PodcastVideoGenerationResponse)
|
||||
async def generate_podcast_video(
|
||||
request_obj: Request,
|
||||
request: PodcastVideoGenerationRequest,
|
||||
background_tasks: BackgroundTasks,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""
|
||||
Generate video for a podcast scene using WaveSpeed InfiniteTalk (avatar image + audio).
|
||||
Returns task_id for polling since InfiniteTalk can take up to 10 minutes.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
logger.info(
|
||||
f"[Podcast] Starting video generation for project {request.project_id}, scene {request.scene_id}"
|
||||
)
|
||||
|
||||
# Load audio bytes
|
||||
audio_bytes = load_podcast_audio_bytes(request.audio_url)
|
||||
|
||||
# Validate resolution
|
||||
if request.resolution not in {"480p", "720p"}:
|
||||
raise HTTPException(status_code=400, detail="Resolution must be '480p' or '720p'.")
|
||||
|
||||
# Load image bytes (scene image is required for video generation)
|
||||
if request.avatar_image_url:
|
||||
image_bytes = load_podcast_image_bytes(request.avatar_image_url)
|
||||
else:
|
||||
# Scene-specific image should be generated before video generation
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Scene image is required for video generation. Please generate images for scenes first.",
|
||||
)
|
||||
|
||||
mask_image_bytes = None
|
||||
if request.mask_image_url:
|
||||
try:
|
||||
mask_image_bytes = load_podcast_image_bytes(request.mask_image_url)
|
||||
except Exception as e:
|
||||
logger.error(f"[Podcast] Failed to load mask image: {e}")
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Failed to load mask image for video generation.",
|
||||
)
|
||||
|
||||
# Validate subscription limits
|
||||
db = next(get_db())
|
||||
try:
|
||||
pricing_service = PricingService(db)
|
||||
validate_scene_animation_operation(pricing_service=pricing_service, user_id=user_id)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
# Extract token for authenticated URL building
|
||||
auth_token = None
|
||||
auth_header = request_obj.headers.get("Authorization")
|
||||
if auth_header and auth_header.startswith("Bearer "):
|
||||
auth_token = auth_header.replace("Bearer ", "").strip()
|
||||
|
||||
# Create async task
|
||||
task_id = task_manager.create_task("podcast_video_generation")
|
||||
background_tasks.add_task(
|
||||
_execute_podcast_video_task,
|
||||
task_id=task_id,
|
||||
request=request,
|
||||
user_id=user_id,
|
||||
image_bytes=image_bytes,
|
||||
audio_bytes=audio_bytes,
|
||||
auth_token=auth_token,
|
||||
mask_image_bytes=mask_image_bytes,
|
||||
)
|
||||
|
||||
return PodcastVideoGenerationResponse(
|
||||
task_id=task_id,
|
||||
status="pending",
|
||||
message="Video generation started. This may take up to 10 minutes.",
|
||||
)
|
||||
|
||||
|
||||
@router.get("/videos/{filename}")
|
||||
async def serve_podcast_video(
|
||||
filename: str,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user_with_query_token),
|
||||
):
|
||||
"""Serve generated podcast scene video files.
|
||||
|
||||
Supports authentication via Authorization header or token query parameter.
|
||||
Query parameter is useful for HTML elements like <video> that cannot send custom headers.
|
||||
"""
|
||||
require_authenticated_user(current_user)
|
||||
|
||||
# Security check: ensure filename doesn't contain path traversal
|
||||
if ".." in filename or "/" in filename or "\\" in filename:
|
||||
raise HTTPException(status_code=400, detail="Invalid filename")
|
||||
|
||||
# Look for video in podcast_videos directory (including AI_Videos subdirectory)
|
||||
video_path = None
|
||||
possible_paths = [
|
||||
PODCAST_VIDEOS_DIR / filename,
|
||||
PODCAST_VIDEOS_DIR / AI_VIDEO_SUBDIR / filename,
|
||||
]
|
||||
|
||||
for path in possible_paths:
|
||||
resolved_path = path.resolve()
|
||||
# Security check: ensure path is within PODCAST_VIDEOS_DIR
|
||||
if str(resolved_path).startswith(str(PODCAST_VIDEOS_DIR)) and resolved_path.exists():
|
||||
video_path = resolved_path
|
||||
break
|
||||
|
||||
if not video_path:
|
||||
raise HTTPException(status_code=404, detail="Video file not found")
|
||||
|
||||
return FileResponse(video_path, media_type="video/mp4")
|
||||
|
||||
|
||||
@router.get("/videos")
|
||||
async def list_podcast_videos(
|
||||
project_id: Optional[str] = None,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""
|
||||
List existing video files for the current user, optionally filtered by project.
|
||||
Returns videos mapped to scene numbers for easy matching.
|
||||
"""
|
||||
try:
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
logger.info(f"[Podcast] Listing videos for user_id={user_id}, project_id={project_id}")
|
||||
|
||||
# Look in podcast_videos/AI_Videos directory
|
||||
ai_video_dir = PODCAST_VIDEOS_DIR / AI_VIDEO_SUBDIR
|
||||
ai_video_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
videos = []
|
||||
if ai_video_dir.exists():
|
||||
# Pattern: scene_{scene_number}_{user_id}_{timestamp}.mp4
|
||||
# Extract user_id from current user (same logic as save_scene_video)
|
||||
clean_user_id = "".join(c if c.isalnum() or c in ('-', '_') else '_' for c in user_id[:16])
|
||||
|
||||
logger.info(f"[Podcast] Looking for videos with clean_user_id={clean_user_id} in {ai_video_dir}")
|
||||
|
||||
# Map scene_number -> (most recent video info)
|
||||
scene_video_map: Dict[int, Dict[str, Any]] = {}
|
||||
|
||||
all_files = list(ai_video_dir.glob("*.mp4"))
|
||||
logger.info(f"[Podcast] Found {len(all_files)} MP4 files in directory")
|
||||
|
||||
for video_file in all_files:
|
||||
filename = video_file.name
|
||||
# Match pattern: scene_{number}_{user_id}_{hash}.mp4
|
||||
# Use greedy match for user_id and match hash as "anything except underscore before .mp4"
|
||||
match = re.match(r"scene_(\d+)_(.+)_([^_]+)\.mp4", filename)
|
||||
if match:
|
||||
scene_number = int(match.group(1))
|
||||
file_user_id = match.group(2)
|
||||
hash_part = match.group(3)
|
||||
# Only include videos for this user
|
||||
if file_user_id == clean_user_id:
|
||||
video_url = f"/api/podcast/videos/{filename}"
|
||||
file_mtime = video_file.stat().st_mtime
|
||||
|
||||
# Keep the most recent video for each scene
|
||||
if scene_number not in scene_video_map or file_mtime > scene_video_map[scene_number]["mtime"]:
|
||||
scene_video_map[scene_number] = {
|
||||
"scene_number": scene_number,
|
||||
"filename": filename,
|
||||
"video_url": video_url,
|
||||
"file_size": video_file.stat().st_size,
|
||||
"mtime": file_mtime,
|
||||
}
|
||||
|
||||
# Convert map to list and sort by scene number
|
||||
videos = list(scene_video_map.values())
|
||||
videos.sort(key=lambda v: v["scene_number"])
|
||||
|
||||
logger.info(f"[Podcast] Returning {len(videos)} videos for user: {[v['scene_number'] for v in videos]}")
|
||||
else:
|
||||
logger.warning(f"[Podcast] Video directory does not exist: {ai_video_dir}")
|
||||
|
||||
return {"videos": videos}
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"[Podcast] Error listing videos")
|
||||
return {"videos": []}
|
||||
|
||||
|
||||
@router.post("/render/combine-videos", response_model=PodcastCombineVideosResponse)
|
||||
async def combine_podcast_videos(
|
||||
request_obj: Request,
|
||||
request: PodcastCombineVideosRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""
|
||||
Combine all scene videos into a single final podcast video.
|
||||
Returns task_id for polling.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
logger.info(f"[Podcast] Combining {len(request.scene_video_urls)} scene videos for project {request.project_id}")
|
||||
|
||||
if not request.scene_video_urls:
|
||||
raise HTTPException(status_code=400, detail="No scene videos provided")
|
||||
|
||||
# Create async task
|
||||
task_id = task_manager.create_task("podcast_combine_videos")
|
||||
|
||||
# Extract token for authenticated URL building
|
||||
auth_token = None
|
||||
auth_header = request_obj.headers.get("Authorization")
|
||||
if auth_header and auth_header.startswith("Bearer "):
|
||||
auth_token = auth_header.replace("Bearer ", "").strip()
|
||||
|
||||
# Run video combination in thread pool executor to prevent blocking event loop
|
||||
# Submit directly to executor - this runs in a background thread and doesn't block
|
||||
# The executor handles the thread pool management automatically
|
||||
def handle_task_completion(future):
|
||||
"""Callback to handle task completion and log errors."""
|
||||
try:
|
||||
future.result() # This will raise if there was an exception
|
||||
except Exception as e:
|
||||
logger.error(f"[Podcast] Error in video combination task: {e}", exc_info=True)
|
||||
|
||||
# Submit to executor - returns immediately, task runs in background thread
|
||||
future = _video_executor.submit(
|
||||
_execute_combine_videos_task,
|
||||
task_id,
|
||||
request.project_id,
|
||||
request.scene_video_urls,
|
||||
request.podcast_title,
|
||||
user_id,
|
||||
auth_token,
|
||||
)
|
||||
# Add callback to log errors without blocking
|
||||
future.add_done_callback(handle_task_completion)
|
||||
|
||||
return PodcastCombineVideosResponse(
|
||||
task_id=task_id,
|
||||
status="pending",
|
||||
message="Video combination started. This may take a few minutes.",
|
||||
)
|
||||
|
||||
|
||||
def _execute_combine_videos_task(
|
||||
task_id: str,
|
||||
project_id: str,
|
||||
scene_video_urls: list[str],
|
||||
podcast_title: str,
|
||||
user_id: str,
|
||||
auth_token: Optional[str] = None,
|
||||
):
|
||||
"""Background task to combine scene videos into final podcast."""
|
||||
try:
|
||||
task_manager.update_task_status(
|
||||
task_id, "processing", progress=10.0, message="Preparing scene videos..."
|
||||
)
|
||||
|
||||
# Convert scene video URLs to local file paths
|
||||
scene_video_paths = []
|
||||
for video_url in scene_video_urls:
|
||||
# Extract filename from URL (e.g., /api/podcast/videos/scene_1_user_xxx.mp4)
|
||||
filename = video_url.split("/")[-1].split("?")[0] # Remove query params
|
||||
video_path = PODCAST_VIDEOS_DIR / AI_VIDEO_SUBDIR / filename
|
||||
|
||||
if not video_path.exists():
|
||||
logger.warning(f"[Podcast] Scene video not found: {video_path}")
|
||||
continue
|
||||
|
||||
scene_video_paths.append(str(video_path))
|
||||
|
||||
if not scene_video_paths:
|
||||
raise ValueError("No valid scene videos found to combine")
|
||||
|
||||
logger.info(f"[Podcast] Found {len(scene_video_paths)} scene videos to combine")
|
||||
|
||||
task_manager.update_task_status(
|
||||
task_id, "processing", progress=30.0, message="Combining videos..."
|
||||
)
|
||||
|
||||
# Use dedicated PodcastVideoCombinationService
|
||||
final_videos_dir = PODCAST_VIDEOS_DIR / "Final_Videos"
|
||||
final_videos_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
video_service = PodcastVideoCombinationService(output_dir=str(final_videos_dir))
|
||||
|
||||
# Progress callback for task updates
|
||||
def progress_callback(progress: float, message: str):
|
||||
task_manager.update_task_status(
|
||||
task_id, "processing", progress=progress, message=message
|
||||
)
|
||||
|
||||
task_manager.update_task_status(
|
||||
task_id, "processing", progress=50.0, message="Combining videos..."
|
||||
)
|
||||
|
||||
# Combine videos using dedicated podcast service
|
||||
result = video_service.combine_videos(
|
||||
video_paths=scene_video_paths,
|
||||
podcast_title=podcast_title,
|
||||
fps=30,
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
|
||||
video_filename = Path(result["video_path"]).name
|
||||
video_url = f"/api/podcast/final-videos/{video_filename}"
|
||||
if auth_token:
|
||||
video_url = f"{video_url}?token={quote(auth_token)}"
|
||||
|
||||
logger.info(f"[Podcast] Final video combined: {video_filename}")
|
||||
|
||||
result_data = {
|
||||
"video_url": video_url,
|
||||
"video_filename": video_filename,
|
||||
"duration": result.get("duration", 0),
|
||||
"file_size": result.get("file_size", 0),
|
||||
}
|
||||
|
||||
task_manager.update_task_status(
|
||||
task_id,
|
||||
"completed",
|
||||
progress=100.0,
|
||||
message="Podcast video ready!",
|
||||
result=result_data,
|
||||
)
|
||||
|
||||
# Save final video URL to project for persistence across reloads
|
||||
# Do this quickly and synchronously - database operations are fast
|
||||
try:
|
||||
from services.database import SessionLocal
|
||||
db = SessionLocal()
|
||||
try:
|
||||
service = PodcastService(db)
|
||||
service.update_project(user_id, project_id, final_video_url=video_url)
|
||||
db.commit()
|
||||
logger.info(f"[Podcast] Saved final video URL to project {project_id}: {video_url}")
|
||||
finally:
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"[Podcast] Failed to save final video URL to project: {e}")
|
||||
# Don't fail the task if project update fails - video is still available via task result
|
||||
|
||||
logger.info(f"[Podcast] Task {task_id} marked as completed successfully")
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"[Podcast] Failed to combine videos: {e}")
|
||||
error_msg = _extract_error_message(e)
|
||||
task_manager.update_task_status(
|
||||
task_id,
|
||||
"failed",
|
||||
progress=0.0,
|
||||
message=f"Video combination failed: {error_msg}",
|
||||
error=str(error_msg),
|
||||
)
|
||||
logger.error(f"[Podcast] Task {task_id} marked as failed: {error_msg}")
|
||||
|
||||
|
||||
@router.get("/final-videos/{filename}")
|
||||
async def serve_final_podcast_video(
|
||||
filename: str,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user_with_query_token),
|
||||
):
|
||||
"""Serve the final combined podcast video with authentication."""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
final_videos_dir = PODCAST_VIDEOS_DIR / "Final_Videos"
|
||||
video_path = final_videos_dir / filename
|
||||
|
||||
if not video_path.exists():
|
||||
raise HTTPException(status_code=404, detail="Video not found")
|
||||
|
||||
# Basic security: ensure filename doesn't contain path traversal
|
||||
if ".." in filename or "/" in filename or "\\" in filename:
|
||||
raise HTTPException(status_code=400, detail="Invalid filename")
|
||||
|
||||
return FileResponse(
|
||||
path=str(video_path),
|
||||
media_type="video/mp4",
|
||||
filename=filename,
|
||||
)
|
||||
280
backend/api/podcast/models.py
Normal file
280
backend/api/podcast/models.py
Normal file
@@ -0,0 +1,280 @@
|
||||
"""
|
||||
Podcast API Models
|
||||
|
||||
All Pydantic request/response models for podcast endpoints.
|
||||
"""
|
||||
|
||||
from pydantic import BaseModel, Field, model_validator
|
||||
from typing import List, Optional, Dict, Any
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class PodcastProjectResponse(BaseModel):
|
||||
"""Response model for podcast project."""
|
||||
id: int
|
||||
project_id: str
|
||||
user_id: str
|
||||
idea: str
|
||||
duration: int
|
||||
speakers: int
|
||||
budget_cap: float
|
||||
analysis: Optional[Dict[str, Any]] = None
|
||||
queries: Optional[List[Dict[str, Any]]] = None
|
||||
selected_queries: Optional[List[str]] = None
|
||||
research: Optional[Dict[str, Any]] = None
|
||||
raw_research: Optional[Dict[str, Any]] = None
|
||||
estimate: Optional[Dict[str, Any]] = None
|
||||
script_data: Optional[Dict[str, Any]] = None
|
||||
render_jobs: Optional[List[Dict[str, Any]]] = None
|
||||
knobs: Optional[Dict[str, Any]] = None
|
||||
research_provider: Optional[str] = None
|
||||
show_script_editor: bool = False
|
||||
show_render_queue: bool = False
|
||||
current_step: Optional[str] = None
|
||||
status: str = "draft"
|
||||
is_favorite: bool = False
|
||||
final_video_url: Optional[str] = None
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class PodcastAnalyzeRequest(BaseModel):
|
||||
"""Request model for podcast idea analysis."""
|
||||
idea: str = Field(..., description="Podcast topic or idea")
|
||||
duration: int = Field(default=10, description="Target duration in minutes")
|
||||
speakers: int = Field(default=1, description="Number of speakers")
|
||||
|
||||
|
||||
class PodcastAnalyzeResponse(BaseModel):
|
||||
"""Response model for podcast idea analysis."""
|
||||
audience: str
|
||||
content_type: str
|
||||
top_keywords: list[str]
|
||||
suggested_outlines: list[Dict[str, Any]]
|
||||
title_suggestions: list[str]
|
||||
exa_suggested_config: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class PodcastScriptRequest(BaseModel):
|
||||
"""Request model for podcast script generation."""
|
||||
idea: str = Field(..., description="Podcast idea or topic")
|
||||
duration_minutes: int = Field(default=10, description="Target duration in minutes")
|
||||
speakers: int = Field(default=1, description="Number of speakers")
|
||||
research: Optional[Dict[str, Any]] = Field(None, description="Optional research payload to ground the script")
|
||||
|
||||
|
||||
class PodcastSceneLine(BaseModel):
|
||||
speaker: str
|
||||
text: str
|
||||
emphasis: Optional[bool] = False
|
||||
|
||||
|
||||
class PodcastScene(BaseModel):
|
||||
id: str
|
||||
title: str
|
||||
duration: int
|
||||
lines: list[PodcastSceneLine]
|
||||
approved: bool = False
|
||||
emotion: Optional[str] = None
|
||||
imageUrl: Optional[str] = None # Generated image URL for video generation
|
||||
|
||||
|
||||
class PodcastExaConfig(BaseModel):
|
||||
"""Exa config for podcast research."""
|
||||
exa_search_type: Optional[str] = Field(default="auto", description="auto | keyword | neural")
|
||||
exa_category: Optional[str] = None
|
||||
exa_include_domains: List[str] = []
|
||||
exa_exclude_domains: List[str] = []
|
||||
max_sources: int = 8
|
||||
include_statistics: Optional[bool] = False
|
||||
date_range: Optional[str] = Field(default=None, description="last_month | last_3_months | last_year | all_time")
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_domains(self):
|
||||
if self.exa_include_domains and self.exa_exclude_domains:
|
||||
# Exa API does not allow both include and exclude domains together with contents
|
||||
# Prefer include_domains and drop exclude_domains
|
||||
self.exa_exclude_domains = []
|
||||
return self
|
||||
|
||||
|
||||
class PodcastExaResearchRequest(BaseModel):
|
||||
"""Request for podcast research using Exa directly (no blog writer)."""
|
||||
topic: str
|
||||
queries: List[str]
|
||||
exa_config: Optional[PodcastExaConfig] = None
|
||||
|
||||
|
||||
class PodcastExaSource(BaseModel):
|
||||
title: str = ""
|
||||
url: str = ""
|
||||
excerpt: str = ""
|
||||
published_at: Optional[str] = None
|
||||
highlights: Optional[List[str]] = None
|
||||
summary: Optional[str] = None
|
||||
source_type: Optional[str] = None
|
||||
index: Optional[int] = None
|
||||
|
||||
|
||||
class PodcastExaResearchResponse(BaseModel):
|
||||
sources: List[PodcastExaSource]
|
||||
search_queries: List[str] = []
|
||||
cost: Optional[Dict[str, Any]] = None
|
||||
search_type: Optional[str] = None
|
||||
provider: str = "exa"
|
||||
content: Optional[str] = None
|
||||
|
||||
|
||||
class PodcastScriptResponse(BaseModel):
|
||||
scenes: list[PodcastScene]
|
||||
|
||||
|
||||
class PodcastAudioRequest(BaseModel):
|
||||
"""Generate TTS for a podcast scene."""
|
||||
scene_id: str
|
||||
scene_title: str
|
||||
text: str
|
||||
voice_id: Optional[str] = "Wise_Woman"
|
||||
speed: Optional[float] = 1.0
|
||||
volume: Optional[float] = 1.0
|
||||
pitch: Optional[float] = 0.0
|
||||
emotion: Optional[str] = "neutral"
|
||||
english_normalization: Optional[bool] = False # Better number reading for statistics
|
||||
sample_rate: Optional[int] = None
|
||||
bitrate: Optional[int] = None
|
||||
channel: Optional[str] = None
|
||||
format: Optional[str] = None
|
||||
language_boost: Optional[str] = None
|
||||
enable_sync_mode: Optional[bool] = True
|
||||
|
||||
|
||||
class PodcastAudioResponse(BaseModel):
|
||||
scene_id: str
|
||||
scene_title: str
|
||||
audio_filename: str
|
||||
audio_url: str
|
||||
provider: str
|
||||
model: str
|
||||
voice_id: str
|
||||
text_length: int
|
||||
file_size: int
|
||||
cost: float
|
||||
|
||||
|
||||
class PodcastProjectListResponse(BaseModel):
|
||||
"""Response model for project list."""
|
||||
projects: List[PodcastProjectResponse]
|
||||
total: int
|
||||
limit: int
|
||||
offset: int
|
||||
|
||||
|
||||
class CreateProjectRequest(BaseModel):
|
||||
"""Request model for creating a project."""
|
||||
project_id: str = Field(..., description="Unique project ID")
|
||||
idea: str = Field(..., description="Episode idea or URL")
|
||||
duration: int = Field(..., description="Duration in minutes")
|
||||
speakers: int = Field(default=1, description="Number of speakers")
|
||||
budget_cap: float = Field(default=50.0, description="Budget cap in USD")
|
||||
avatar_url: Optional[str] = Field(None, description="Optional presenter avatar URL")
|
||||
|
||||
|
||||
class UpdateProjectRequest(BaseModel):
|
||||
"""Request model for updating project state."""
|
||||
analysis: Optional[Dict[str, Any]] = None
|
||||
queries: Optional[List[Dict[str, Any]]] = None
|
||||
selected_queries: Optional[List[str]] = None
|
||||
research: Optional[Dict[str, Any]] = None
|
||||
raw_research: Optional[Dict[str, Any]] = None
|
||||
estimate: Optional[Dict[str, Any]] = None
|
||||
script_data: Optional[Dict[str, Any]] = None
|
||||
render_jobs: Optional[List[Dict[str, Any]]] = None
|
||||
knobs: Optional[Dict[str, Any]] = None
|
||||
research_provider: Optional[str] = None
|
||||
show_script_editor: Optional[bool] = None
|
||||
show_render_queue: Optional[bool] = None
|
||||
current_step: Optional[str] = None
|
||||
status: Optional[str] = None
|
||||
final_video_url: Optional[str] = None
|
||||
|
||||
|
||||
class PodcastCombineAudioRequest(BaseModel):
|
||||
"""Request model for combining podcast audio files."""
|
||||
project_id: str
|
||||
scene_ids: List[str] = Field(..., description="List of scene IDs to combine")
|
||||
scene_audio_urls: List[str] = Field(..., description="List of audio URLs for each scene")
|
||||
|
||||
|
||||
class PodcastCombineAudioResponse(BaseModel):
|
||||
"""Response model for combined podcast audio."""
|
||||
combined_audio_url: str
|
||||
combined_audio_filename: str
|
||||
total_duration: float
|
||||
file_size: int
|
||||
scene_count: int
|
||||
|
||||
|
||||
class PodcastImageRequest(BaseModel):
|
||||
"""Request for generating an image for a podcast scene."""
|
||||
scene_id: str
|
||||
scene_title: str
|
||||
scene_content: Optional[str] = None # Optional: scene lines text for context
|
||||
idea: Optional[str] = None # Optional: podcast idea for context
|
||||
base_avatar_url: Optional[str] = None # Base avatar image URL for scene variations
|
||||
width: int = 1024
|
||||
height: int = 1024
|
||||
custom_prompt: Optional[str] = None # Custom prompt from user (overrides auto-generated prompt)
|
||||
style: Optional[str] = None # "Auto", "Fiction", or "Realistic"
|
||||
rendering_speed: Optional[str] = None # "Default", "Turbo", or "Quality"
|
||||
aspect_ratio: Optional[str] = None # "1:1", "16:9", "9:16", "4:3", "3:4"
|
||||
|
||||
|
||||
class PodcastImageResponse(BaseModel):
|
||||
"""Response for podcast scene image generation."""
|
||||
scene_id: str
|
||||
scene_title: str
|
||||
image_filename: str
|
||||
image_url: str
|
||||
width: int
|
||||
height: int
|
||||
provider: str
|
||||
model: Optional[str] = None
|
||||
cost: float
|
||||
|
||||
|
||||
class PodcastVideoGenerationRequest(BaseModel):
|
||||
"""Request model for podcast video generation."""
|
||||
project_id: str = Field(..., description="Podcast project ID")
|
||||
scene_id: str = Field(..., description="Scene ID")
|
||||
scene_title: str = Field(..., description="Scene title")
|
||||
audio_url: str = Field(..., description="URL to the generated audio file")
|
||||
avatar_image_url: Optional[str] = Field(None, description="URL to scene image (required for video generation)")
|
||||
resolution: str = Field("720p", description="Video resolution (480p or 720p)")
|
||||
prompt: Optional[str] = Field(None, description="Optional animation prompt override")
|
||||
seed: Optional[int] = Field(-1, description="Random seed; -1 for random")
|
||||
mask_image_url: Optional[str] = Field(None, description="Optional mask image URL to specify animated region")
|
||||
|
||||
|
||||
class PodcastVideoGenerationResponse(BaseModel):
|
||||
"""Response model for podcast video generation."""
|
||||
task_id: str
|
||||
status: str
|
||||
message: str
|
||||
|
||||
|
||||
class PodcastCombineVideosRequest(BaseModel):
|
||||
"""Request to combine scene videos into final podcast"""
|
||||
project_id: str = Field(..., description="Project ID")
|
||||
scene_video_urls: list[str] = Field(..., description="List of scene video URLs in order")
|
||||
podcast_title: str = Field(default="Podcast", description="Title for the final podcast video")
|
||||
|
||||
|
||||
class PodcastCombineVideosResponse(BaseModel):
|
||||
"""Response from combine videos endpoint"""
|
||||
task_id: str
|
||||
status: str
|
||||
message: str
|
||||
|
||||
143
backend/api/podcast/presenter_personas.py
Normal file
143
backend/api/podcast/presenter_personas.py
Normal file
@@ -0,0 +1,143 @@
|
||||
"""
|
||||
Podcast Presenter Personas
|
||||
|
||||
Lightweight, podcast-specific presenter persona presets used to steer avatar generation.
|
||||
|
||||
Design goals:
|
||||
- Market-fit + style consistency without asking end-users to choose sensitive traits.
|
||||
- Deterministic persona selection using analysis hints (audience/content type/keywords).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, Optional, List
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PresenterPersona:
|
||||
id: str
|
||||
label: str
|
||||
target_market: str # e.g. "global", "us_canada", "uk_eu", "india", "latam"
|
||||
style: str # e.g. "corporate", "tech_modern", "creator"
|
||||
prompt: str # prompt fragment to inject
|
||||
|
||||
|
||||
# NOTE: Avoid encoding/guessing ethnicity. Keep personas about market-fit + style.
|
||||
PERSONAS: Dict[str, PresenterPersona] = {
|
||||
"global_corporate": PresenterPersona(
|
||||
id="global_corporate",
|
||||
label="Global — Corporate Host",
|
||||
target_market="global",
|
||||
style="corporate",
|
||||
prompt=(
|
||||
"professional podcast presenter, business professional attire (white shirt and light gray blazer), "
|
||||
"confident, friendly, camera-ready, neutral background, studio lighting"
|
||||
),
|
||||
),
|
||||
"global_tech_modern": PresenterPersona(
|
||||
id="global_tech_modern",
|
||||
label="Global — Tech Modern Host",
|
||||
target_market="global",
|
||||
style="tech_modern",
|
||||
prompt=(
|
||||
"modern professional podcast presenter, contemporary tech-forward style, "
|
||||
"clean minimal studio background, soft studio lighting, friendly and energetic expression"
|
||||
),
|
||||
),
|
||||
"global_news_anchor": PresenterPersona(
|
||||
id="global_news_anchor",
|
||||
label="Global — News Anchor",
|
||||
target_market="global",
|
||||
style="news_anchor",
|
||||
prompt=(
|
||||
"professional news-style presenter, polished on-camera appearance, "
|
||||
"formal attire, authoritative yet approachable expression, studio lighting, neutral background"
|
||||
),
|
||||
),
|
||||
"india_corporate": PresenterPersona(
|
||||
id="india_corporate",
|
||||
label="India — Corporate Host",
|
||||
target_market="india",
|
||||
style="corporate",
|
||||
prompt=(
|
||||
"professional podcast presenter for the Indian market, business professional attire, "
|
||||
"polished and confident on-camera presence, clean studio background, soft studio lighting"
|
||||
),
|
||||
),
|
||||
"us_canada_creator": PresenterPersona(
|
||||
id="us_canada_creator",
|
||||
label="US/Canada — Creator Host",
|
||||
target_market="us_canada",
|
||||
style="creator",
|
||||
prompt=(
|
||||
"professional podcast creator host, business casual style, approachable and conversational expression, "
|
||||
"clean studio background, soft studio lighting"
|
||||
),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def get_persona(persona_id: Optional[str]) -> Optional[PresenterPersona]:
|
||||
if not persona_id:
|
||||
return None
|
||||
return PERSONAS.get(persona_id)
|
||||
|
||||
|
||||
def list_personas() -> List[PresenterPersona]:
|
||||
return list(PERSONAS.values())
|
||||
|
||||
|
||||
def choose_persona_id(
|
||||
audience: Optional[str] = None,
|
||||
content_type: Optional[str] = None,
|
||||
top_keywords: Optional[List[str]] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Choose a persona id using non-sensitive heuristics from analysis.
|
||||
|
||||
- Uses explicit market hints if present (e.g. "India", "US", "UK", etc.)
|
||||
- Uses content_type / keywords to pick a style
|
||||
- Falls back to global corporate
|
||||
"""
|
||||
audience_l = (audience or "").lower()
|
||||
content_l = (content_type or "").lower()
|
||||
keywords_l = [k.lower() for k in (top_keywords or [])]
|
||||
|
||||
# Market hints (explicit only)
|
||||
if any(x in audience_l for x in ["india", "indian"]):
|
||||
market = "india"
|
||||
elif any(x in audience_l for x in ["us", "usa", "united states", "canada", "north america"]):
|
||||
market = "us_canada"
|
||||
elif any(x in audience_l for x in ["uk", "united kingdom", "europe", "eu", "european"]):
|
||||
market = "uk_eu"
|
||||
elif any(x in audience_l for x in ["latam", "latin america", "south america"]):
|
||||
market = "latam"
|
||||
else:
|
||||
market = "global"
|
||||
|
||||
# Style hints
|
||||
style = "corporate"
|
||||
if "news" in content_l or "analysis" in content_l:
|
||||
style = "news_anchor"
|
||||
if any(x in content_l for x in ["tech", "technology", "ai", "software"]) or any(
|
||||
kw in ["ai", "technology", "tech", "software"] for kw in keywords_l
|
||||
):
|
||||
style = "tech_modern"
|
||||
if any(x in content_l for x in ["casual", "creator", "conversational"]) or any(
|
||||
kw in ["creator", "youtube", "tiktok", "instagram"] for kw in keywords_l
|
||||
):
|
||||
style = "creator"
|
||||
|
||||
# Map market+style to a concrete persona id
|
||||
if market == "india" and style == "corporate":
|
||||
return "india_corporate"
|
||||
if market == "us_canada" and style == "creator":
|
||||
return "us_canada_creator"
|
||||
if style == "news_anchor":
|
||||
return "global_news_anchor"
|
||||
if style == "tech_modern":
|
||||
return "global_tech_modern"
|
||||
return "global_corporate"
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
105
backend/api/podcast/utils.py
Normal file
105
backend/api/podcast/utils.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""
|
||||
Podcast API Utility Functions
|
||||
|
||||
Helper functions for loading media files and other utilities.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
from fastapi import HTTPException
|
||||
from loguru import logger
|
||||
|
||||
from .constants import PODCAST_AUDIO_DIR, PODCAST_IMAGES_DIR
|
||||
|
||||
|
||||
def load_podcast_audio_bytes(audio_url: str) -> bytes:
|
||||
"""Load podcast audio bytes from URL. Only handles /api/podcast/audio/ URLs."""
|
||||
if not audio_url:
|
||||
raise HTTPException(status_code=400, detail="Audio URL is required")
|
||||
|
||||
try:
|
||||
parsed = urlparse(audio_url)
|
||||
path = parsed.path if parsed.scheme else audio_url
|
||||
|
||||
# Only handle /api/podcast/audio/ URLs
|
||||
prefix = "/api/podcast/audio/"
|
||||
if prefix not in path:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Unsupported audio URL format: {audio_url}. Only /api/podcast/audio/ URLs are supported."
|
||||
)
|
||||
|
||||
filename = path.split(prefix, 1)[1].split("?", 1)[0].strip()
|
||||
if not filename:
|
||||
raise HTTPException(status_code=400, detail=f"Could not extract filename from URL: {audio_url}")
|
||||
|
||||
# Podcast audio files are stored in podcast_audio directory
|
||||
audio_path = (PODCAST_AUDIO_DIR / filename).resolve()
|
||||
|
||||
# Security check: ensure path is within PODCAST_AUDIO_DIR
|
||||
if not str(audio_path).startswith(str(PODCAST_AUDIO_DIR)):
|
||||
logger.error(f"[Podcast] Attempted path traversal when resolving audio: {audio_url}")
|
||||
raise HTTPException(status_code=403, detail="Invalid audio path")
|
||||
|
||||
if not audio_path.exists():
|
||||
logger.warning(f"[Podcast] Audio file not found: {audio_path}")
|
||||
raise HTTPException(status_code=404, detail=f"Audio file not found: {filename}")
|
||||
|
||||
return audio_path.read_bytes()
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.error(f"[Podcast] Failed to load audio: {exc}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to load audio: {str(exc)}")
|
||||
|
||||
|
||||
def load_podcast_image_bytes(image_url: str) -> bytes:
|
||||
"""Load podcast image bytes from URL. Only handles /api/podcast/images/ URLs."""
|
||||
if not image_url:
|
||||
raise HTTPException(status_code=400, detail="Image URL is required")
|
||||
|
||||
logger.info(f"[Podcast] Loading image from URL: {image_url}")
|
||||
|
||||
try:
|
||||
parsed = urlparse(image_url)
|
||||
path = parsed.path if parsed.scheme else image_url
|
||||
|
||||
# Only handle /api/podcast/images/ URLs
|
||||
prefix = "/api/podcast/images/"
|
||||
if prefix not in path:
|
||||
logger.error(f"[Podcast] Unsupported image URL format: {image_url}")
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Unsupported image URL format: {image_url}. Only /api/podcast/images/ URLs are supported."
|
||||
)
|
||||
|
||||
filename = path.split(prefix, 1)[1].split("?", 1)[0].strip()
|
||||
if not filename:
|
||||
logger.error(f"[Podcast] Could not extract filename from URL: {image_url}")
|
||||
raise HTTPException(status_code=400, detail=f"Could not extract filename from URL: {image_url}")
|
||||
|
||||
logger.info(f"[Podcast] Extracted filename: {filename}")
|
||||
logger.info(f"[Podcast] PODCAST_IMAGES_DIR: {PODCAST_IMAGES_DIR}")
|
||||
|
||||
# Podcast images are stored in podcast_images directory
|
||||
image_path = (PODCAST_IMAGES_DIR / filename).resolve()
|
||||
logger.info(f"[Podcast] Resolved image path: {image_path}")
|
||||
|
||||
# Security check: ensure path is within PODCAST_IMAGES_DIR
|
||||
if not str(image_path).startswith(str(PODCAST_IMAGES_DIR)):
|
||||
logger.error(f"[Podcast] Attempted path traversal when resolving image: {image_url} -> {image_path}")
|
||||
raise HTTPException(status_code=403, detail="Invalid image path")
|
||||
|
||||
if not image_path.exists():
|
||||
logger.error(f"[Podcast] Image file not found: {image_path}")
|
||||
raise HTTPException(status_code=404, detail=f"Image file not found: {filename}")
|
||||
|
||||
image_bytes = image_path.read_bytes()
|
||||
logger.info(f"[Podcast] ✅ Successfully loaded image: {len(image_bytes)} bytes from {image_path}")
|
||||
return image_bytes
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.error(f"[Podcast] Failed to load image: {exc}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to load image: {str(exc)}")
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import FileResponse
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel
|
||||
|
||||
@@ -53,6 +53,9 @@ class PodcastProject(Base):
|
||||
status = Column(String(50), default="draft", nullable=False, index=True) # draft, in_progress, completed, archived
|
||||
is_favorite = Column(Boolean, default=False, index=True)
|
||||
|
||||
# Final combined video URL (persisted for reloads)
|
||||
final_video_url = Column(String(1000), nullable=True) # URL to final combined podcast video
|
||||
|
||||
# Timestamps
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False, index=True)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False, index=True)
|
||||
|
||||
91
backend/scripts/run_final_video_url_migration.py
Normal file
91
backend/scripts/run_final_video_url_migration.py
Normal file
@@ -0,0 +1,91 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Migration script to add final_video_url column to podcast_projects table.
|
||||
This script should be run once to add the column to existing databases.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
from loguru import logger
|
||||
|
||||
# Add the backend directory to the Python path
|
||||
backend_dir = Path(__file__).parent.parent
|
||||
sys.path.insert(0, str(backend_dir))
|
||||
|
||||
def run_migration():
|
||||
"""Run the final_video_url column migration."""
|
||||
try:
|
||||
# Get the database path
|
||||
db_path = backend_dir / "alwrity.db"
|
||||
|
||||
logger.info(f"🔄 Starting final_video_url column migration...")
|
||||
logger.info(f"📁 Database path: {db_path}")
|
||||
|
||||
# Check if database exists
|
||||
if not db_path.exists():
|
||||
logger.warning(f"⚠️ Database file not found at {db_path}")
|
||||
logger.info("ℹ️ New databases will have this column created automatically by SQLAlchemy")
|
||||
return True
|
||||
|
||||
# Read the migration SQL
|
||||
migration_file = backend_dir / "database" / "migrations" / "009_add_final_video_url_to_podcast_projects.sql"
|
||||
|
||||
if not migration_file.exists():
|
||||
logger.error(f"❌ Migration file not found: {migration_file}")
|
||||
return False
|
||||
|
||||
with open(migration_file, 'r') as f:
|
||||
migration_sql = f.read()
|
||||
|
||||
logger.info("📋 Migration SQL loaded successfully")
|
||||
|
||||
# Connect to database and run migration
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Check if column already exists
|
||||
cursor.execute("PRAGMA table_info(podcast_projects)")
|
||||
columns = [row[1] for row in cursor.fetchall()]
|
||||
|
||||
if 'final_video_url' in columns:
|
||||
logger.info("ℹ️ Column 'final_video_url' already exists, skipping migration")
|
||||
conn.close()
|
||||
return True
|
||||
|
||||
# Execute the migration
|
||||
logger.info("🔧 Adding final_video_url column...")
|
||||
cursor.execute("ALTER TABLE podcast_projects ADD COLUMN final_video_url VARCHAR(1000) NULL")
|
||||
conn.commit()
|
||||
|
||||
# Verify the column was added
|
||||
cursor.execute("PRAGMA table_info(podcast_projects)")
|
||||
columns_after = [row[1] for row in cursor.fetchall()]
|
||||
|
||||
if 'final_video_url' in columns_after:
|
||||
logger.info("✅ Migration completed successfully! Column 'final_video_url' added to podcast_projects table")
|
||||
conn.close()
|
||||
return True
|
||||
else:
|
||||
logger.error("❌ Migration failed: Column was not added")
|
||||
conn.close()
|
||||
return False
|
||||
|
||||
except sqlite3.OperationalError as e:
|
||||
if "duplicate column name" in str(e).lower():
|
||||
logger.info("ℹ️ Column 'final_video_url' already exists, skipping migration")
|
||||
return True
|
||||
else:
|
||||
logger.error(f"❌ Database error: {e}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Error running migration: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = run_migration()
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
@@ -137,6 +137,9 @@ def generate_audio(
|
||||
|
||||
# Generate audio using WaveSpeed
|
||||
try:
|
||||
# Avoid passing duplicate enable_sync_mode; allow override via kwargs
|
||||
enable_sync_mode = kwargs.pop("enable_sync_mode", True)
|
||||
|
||||
client = WaveSpeedClient()
|
||||
audio_bytes = client.generate_speech(
|
||||
text=text,
|
||||
@@ -145,7 +148,7 @@ def generate_audio(
|
||||
volume=volume,
|
||||
pitch=pitch,
|
||||
emotion=emotion,
|
||||
enable_sync_mode=True,
|
||||
enable_sync_mode=enable_sync_mode,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
|
||||
11
backend/services/podcast/__init__.py
Normal file
11
backend/services/podcast/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""
|
||||
Podcast Services Module
|
||||
|
||||
Dedicated services for podcast generation functionality.
|
||||
Separate from story writer services to maintain clear separation of concerns.
|
||||
"""
|
||||
|
||||
from .video_combination_service import PodcastVideoCombinationService
|
||||
|
||||
__all__ = ["PodcastVideoCombinationService"]
|
||||
|
||||
382
backend/services/podcast/video_combination_service.py
Normal file
382
backend/services/podcast/video_combination_service.py
Normal file
@@ -0,0 +1,382 @@
|
||||
"""
|
||||
Podcast Video Combination Service
|
||||
|
||||
Dedicated service for combining podcast scene videos into final episodes.
|
||||
Separate from StoryVideoGenerationService to avoid breaking story writer functionality.
|
||||
"""
|
||||
|
||||
import uuid
|
||||
import warnings
|
||||
import time
|
||||
import threading
|
||||
from typing import List, Dict, Any, Optional
|
||||
from pathlib import Path
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class PodcastVideoCombinationService:
|
||||
"""Service for combining podcast scene videos into final episodes."""
|
||||
|
||||
def __init__(self, output_dir: Optional[str] = None):
|
||||
"""
|
||||
Initialize the podcast video combination service.
|
||||
|
||||
Parameters:
|
||||
output_dir (str, optional): Directory to save combined videos.
|
||||
Defaults to 'backend/podcast_videos/Final_Videos' if not provided.
|
||||
"""
|
||||
if output_dir:
|
||||
self.output_dir = Path(output_dir)
|
||||
else:
|
||||
# Default to podcast_videos/Final_Videos directory
|
||||
base_dir = Path(__file__).parent.parent.parent
|
||||
self.output_dir = base_dir / "podcast_videos" / "Final_Videos"
|
||||
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
logger.info(f"[PodcastVideoCombination] Initialized with output directory: {self.output_dir}")
|
||||
|
||||
def combine_videos(
|
||||
self,
|
||||
video_paths: List[str],
|
||||
podcast_title: str,
|
||||
fps: int = 30,
|
||||
progress_callback: Optional[callable] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Combine multiple video files into a single final podcast video.
|
||||
|
||||
This method is specifically designed for podcast videos that already have
|
||||
embedded audio. It does not require separate audio files.
|
||||
|
||||
Parameters:
|
||||
video_paths (List[str]): List of video file paths to combine.
|
||||
podcast_title (str): Title of the podcast episode.
|
||||
fps (int): Frames per second for output video (default: 30).
|
||||
progress_callback (callable, optional): Callback function for progress updates.
|
||||
Signature: callback(progress: float, message: str)
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Video metadata including file path, URL, duration, and file size.
|
||||
|
||||
Raises:
|
||||
ValueError: If no valid video files are provided.
|
||||
RuntimeError: If video combination fails.
|
||||
"""
|
||||
if not video_paths:
|
||||
raise ValueError("No video paths provided")
|
||||
|
||||
# Validate all video files exist
|
||||
valid_video_paths = []
|
||||
for video_path in video_paths:
|
||||
path = Path(video_path)
|
||||
if path.exists() and path.is_file():
|
||||
valid_video_paths.append(str(path))
|
||||
else:
|
||||
logger.warning(f"[PodcastVideoCombination] Video not found: {video_path}")
|
||||
|
||||
if not valid_video_paths:
|
||||
raise ValueError("No valid video files found to combine")
|
||||
|
||||
logger.info(f"[PodcastVideoCombination] Combining {len(valid_video_paths)} videos")
|
||||
|
||||
try:
|
||||
# Import MoviePy
|
||||
try:
|
||||
from moviepy import VideoFileClip, concatenate_videoclips
|
||||
except Exception as e:
|
||||
logger.error(f"[PodcastVideoCombination] MoviePy not installed: {e}")
|
||||
raise RuntimeError("MoviePy is not installed. Please install it to combine videos.")
|
||||
|
||||
# Suppress MoviePy warnings about incomplete frames (common with some video encodings)
|
||||
warnings.filterwarnings("ignore", category=UserWarning, module="moviepy")
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(10.0, "Loading video clips...")
|
||||
|
||||
# Load all video clips
|
||||
video_clips = []
|
||||
total_duration = 0.0
|
||||
|
||||
for idx, video_path in enumerate(valid_video_paths):
|
||||
try:
|
||||
logger.info(f"[PodcastVideoCombination] Loading video {idx + 1}/{len(valid_video_paths)}: {video_path}")
|
||||
|
||||
# Load video clip with error handling for incomplete files
|
||||
# MoviePy will use the last valid frame if frames are missing at the end
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", UserWarning)
|
||||
video_clip = VideoFileClip(str(video_path))
|
||||
|
||||
# Validate clip was loaded successfully
|
||||
if video_clip.duration <= 0:
|
||||
logger.warning(f"[PodcastVideoCombination] Video {video_path} has invalid duration, skipping")
|
||||
video_clip.close()
|
||||
continue
|
||||
|
||||
# Videos already have embedded audio, no need to replace
|
||||
video_clips.append(video_clip)
|
||||
total_duration += video_clip.duration
|
||||
|
||||
if progress_callback:
|
||||
progress = 10.0 + ((idx + 1) / len(valid_video_paths)) * 60.0
|
||||
progress_callback(progress, f"Loaded video {idx + 1}/{len(valid_video_paths)}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[PodcastVideoCombination] Failed to load video {video_path}: {e}")
|
||||
# Continue with other videos instead of failing completely
|
||||
continue
|
||||
|
||||
if not video_clips:
|
||||
raise RuntimeError("No valid video clips were loaded")
|
||||
|
||||
logger.info(f"[PodcastVideoCombination] Loaded {len(video_clips)} clips, total duration: {total_duration:.2f}s")
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(75.0, f"Concatenating {len(video_clips)} videos ({total_duration:.1f}s total)...")
|
||||
|
||||
# Concatenate all video clips
|
||||
logger.info(f"[PodcastVideoCombination] Concatenating {len(video_clips)} video clips (total duration: {total_duration:.2f}s)")
|
||||
final_video = concatenate_videoclips(video_clips, method="compose")
|
||||
logger.info(f"[PodcastVideoCombination] Concatenation complete, final video duration: {final_video.duration:.2f}s")
|
||||
|
||||
# Generate output filename
|
||||
video_filename = self._generate_video_filename(podcast_title)
|
||||
video_path = self.output_dir / video_filename
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(85.0, f"Rendering final video ({total_duration:.1f}s total)...")
|
||||
|
||||
# Write final video file
|
||||
logger.info(
|
||||
f"[PodcastVideoCombination] Rendering final video to: {video_path} "
|
||||
f"(duration: {total_duration:.2f}s, {len(video_clips)} clips)"
|
||||
)
|
||||
|
||||
# Use faster preset for quicker encoding (still good quality)
|
||||
# 'ultrafast' is fastest but lower quality, 'fast' is good balance
|
||||
encoding_preset = 'fast' # Faster than 'medium' but still good quality
|
||||
|
||||
# Suppress warnings during video writing as well
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", UserWarning)
|
||||
|
||||
# Write video with optimized settings
|
||||
# Note: write_videofile is blocking and can take several minutes for longer videos
|
||||
# Estimated time: ~1-2 minutes per minute of video content
|
||||
estimated_time_minutes = max(1, int(total_duration / 60) * 2)
|
||||
logger.info(
|
||||
f"[PodcastVideoCombination] Starting video encoding "
|
||||
f"(estimated time: ~{estimated_time_minutes} minutes for {total_duration:.1f}s video)..."
|
||||
)
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# Start a thread to update progress periodically during encoding
|
||||
# Since write_videofile is blocking, we'll simulate progress
|
||||
progress_thread = None
|
||||
encoding_done = threading.Event()
|
||||
|
||||
if progress_callback:
|
||||
def update_progress_periodically():
|
||||
"""Update progress every 5 seconds during encoding"""
|
||||
base_progress = 87.0
|
||||
max_progress = 98.0
|
||||
progress_range = max_progress - base_progress
|
||||
update_interval = 5.0 # Update every 5 seconds
|
||||
elapsed = 0.0
|
||||
|
||||
try:
|
||||
while not encoding_done.is_set():
|
||||
elapsed += update_interval
|
||||
# Simulate progress: start at 87%, gradually increase to 98%
|
||||
# Use logarithmic curve to slow down as we approach completion
|
||||
progress = base_progress + (progress_range * min(1.0, elapsed / (estimated_time_minutes * 60)))
|
||||
progress = min(max_progress, progress)
|
||||
|
||||
remaining_minutes = max(0, estimated_time_minutes - int(elapsed / 60))
|
||||
message = f"Encoding video... ({remaining_minutes} min remaining)"
|
||||
if remaining_minutes == 0:
|
||||
message = "Finalizing video..."
|
||||
|
||||
try:
|
||||
progress_callback(progress, message)
|
||||
except Exception as e:
|
||||
logger.warning(f"[PodcastVideoCombination] Error in progress callback: {e}")
|
||||
break
|
||||
|
||||
# Use wait with timeout instead of sleep to check event more frequently
|
||||
if encoding_done.wait(timeout=update_interval):
|
||||
break # Event was set, exit immediately
|
||||
except Exception as e:
|
||||
logger.warning(f"[PodcastVideoCombination] Error in progress thread: {e}")
|
||||
|
||||
progress_thread = threading.Thread(target=update_progress_periodically, daemon=True)
|
||||
progress_thread.start()
|
||||
|
||||
# Write video file - this is the blocking operation
|
||||
logger.info(f"[PodcastVideoCombination] Calling write_videofile...")
|
||||
try:
|
||||
final_video.write_videofile(
|
||||
str(video_path),
|
||||
fps=fps,
|
||||
codec='libx264',
|
||||
audio_codec='aac',
|
||||
preset=encoding_preset, # Faster encoding
|
||||
threads=4,
|
||||
logger=None, # Disable MoviePy's default logger
|
||||
bitrate=None, # Let encoder choose optimal bitrate
|
||||
audio_bitrate='192k', # Good quality audio
|
||||
temp_audiofile=str(video_path.with_suffix('.m4a')), # Temporary audio file
|
||||
remove_temp=True, # Clean up temp files
|
||||
write_logfile=False, # Don't write log file
|
||||
)
|
||||
logger.info(f"[PodcastVideoCombination] write_videofile completed successfully")
|
||||
except Exception as write_error:
|
||||
logger.error(f"[PodcastVideoCombination] Error in write_videofile: {write_error}")
|
||||
# Check if file was created despite error
|
||||
if video_path.exists() and video_path.stat().st_size > 0:
|
||||
logger.warning(f"[PodcastVideoCombination] Video file exists despite error, continuing...")
|
||||
else:
|
||||
raise
|
||||
finally:
|
||||
# Always signal that encoding is done - don't wait for progress thread
|
||||
if progress_thread:
|
||||
encoding_done.set()
|
||||
# Don't join - let it finish on its own (daemon thread)
|
||||
|
||||
elapsed_time = time.time() - start_time
|
||||
logger.info(
|
||||
f"[PodcastVideoCombination] Video encoding completed in {elapsed_time:.1f} seconds "
|
||||
f"({elapsed_time/60:.1f} minutes)"
|
||||
)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(99.0, "Video encoding complete! Finalizing...")
|
||||
|
||||
# Verify file was created and get file size
|
||||
# Use retry logic in case file is still being written
|
||||
max_retries = 5
|
||||
file_size = 0
|
||||
for retry in range(max_retries):
|
||||
if video_path.exists():
|
||||
file_size = video_path.stat().st_size
|
||||
if file_size > 0:
|
||||
break
|
||||
if retry < max_retries - 1:
|
||||
logger.info(f"[PodcastVideoCombination] Waiting for video file to be written (retry {retry + 1}/{max_retries})...")
|
||||
time.sleep(1)
|
||||
|
||||
if not video_path.exists():
|
||||
raise RuntimeError(f"Video file was not created: {video_path}")
|
||||
|
||||
if file_size == 0:
|
||||
raise RuntimeError(f"Video file is empty: {video_path}")
|
||||
|
||||
logger.info(f"[PodcastVideoCombination] Video file verified: {video_path} ({file_size} bytes)")
|
||||
|
||||
# Clean up clips immediately but quickly - don't block
|
||||
# Close clips synchronously but with timeout protection
|
||||
try:
|
||||
final_video.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"[PodcastVideoCombination] Error closing final video clip: {e}")
|
||||
|
||||
# Close individual clips quickly
|
||||
for clip in video_clips:
|
||||
try:
|
||||
clip.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"[PodcastVideoCombination] Error closing video clip: {e}")
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(100.0, "Video combination complete!")
|
||||
|
||||
logger.info(f"[PodcastVideoCombination] Saved combined video to: {video_path} ({file_size} bytes)")
|
||||
|
||||
# Return video metadata immediately - don't wait for cleanup
|
||||
# This prevents blocking if cleanup hangs
|
||||
return {
|
||||
"video_path": str(video_path),
|
||||
"video_filename": video_filename,
|
||||
"video_url": f"/api/podcast/final-videos/{video_filename}",
|
||||
"duration": total_duration,
|
||||
"fps": fps,
|
||||
"file_size": file_size,
|
||||
"num_scenes": len(video_clips),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"[PodcastVideoCombination] Error combining videos: {e}")
|
||||
raise RuntimeError(f"Failed to combine videos: {str(e)}") from e
|
||||
|
||||
def save_scene_video(self, video_bytes: bytes, scene_number: int, user_id: str) -> Dict[str, str]:
|
||||
"""
|
||||
Save a single scene video to disk.
|
||||
|
||||
This is a utility method for saving individual scene videos before combination.
|
||||
Separate from story writer to maintain clear separation of concerns.
|
||||
|
||||
Parameters:
|
||||
video_bytes (bytes): Raw video file bytes.
|
||||
scene_number (int): Scene number for filename.
|
||||
user_id (str): User ID for filename.
|
||||
|
||||
Returns:
|
||||
Dict[str, str]: Dictionary with 'video_filename', 'video_path', 'video_url', and 'file_size'.
|
||||
"""
|
||||
import uuid
|
||||
|
||||
try:
|
||||
# Generate unique filename matching story writer format
|
||||
clean_user_id = "".join(c if c.isalnum() or c in ('-', '_') else '_' for c in user_id[:16])
|
||||
timestamp = str(uuid.uuid4())[:8]
|
||||
video_filename = f"scene_{scene_number}_{clean_user_id}_{timestamp}.mp4"
|
||||
|
||||
# Save to AI_Videos subdirectory (scene videos before combination)
|
||||
# output_dir is Final_Videos, so parent is podcast_videos, then AI_Videos
|
||||
scene_videos_dir = self.output_dir.parent / "AI_Videos"
|
||||
scene_videos_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
video_path = scene_videos_dir / video_filename
|
||||
|
||||
# Write video bytes to file
|
||||
with open(video_path, "wb") as f:
|
||||
f.write(video_bytes)
|
||||
|
||||
file_size = video_path.stat().st_size
|
||||
logger.info(f"[PodcastVideoCombination] Saved scene {scene_number} video: {video_filename} ({file_size} bytes)")
|
||||
|
||||
# Generate URL path (relative to /api/podcast/videos/)
|
||||
video_url = f"/api/podcast/videos/{video_filename}"
|
||||
|
||||
return {
|
||||
"video_filename": video_filename,
|
||||
"video_url": video_url,
|
||||
"video_path": str(video_path),
|
||||
"file_size": file_size,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[PodcastVideoCombination] Error saving scene video: {e}", exc_info=True)
|
||||
raise RuntimeError(f"Failed to save scene video: {str(e)}") from e
|
||||
|
||||
def _generate_video_filename(self, podcast_title: str) -> str:
|
||||
"""
|
||||
Generate a unique filename for the combined video.
|
||||
|
||||
Parameters:
|
||||
podcast_title (str): Title of the podcast episode.
|
||||
|
||||
Returns:
|
||||
str: Generated filename.
|
||||
"""
|
||||
# Sanitize title for filename
|
||||
safe_title = "".join(c for c in podcast_title if c.isalnum() or c in (' ', '-', '_')).strip()
|
||||
safe_title = safe_title.replace(' ', '_')[:50] # Limit length
|
||||
|
||||
# Add unique ID and timestamp
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
timestamp = int(Path(__file__).stat().st_mtime) # Use file modification time as simple timestamp
|
||||
|
||||
return f"podcast_{safe_title}_{unique_id}_{timestamp}.mp4"
|
||||
|
||||
@@ -301,6 +301,12 @@ class StoryAudioGenerationService:
|
||||
pitch: float = 0.0,
|
||||
emotion: str = "happy",
|
||||
english_normalization: bool = False,
|
||||
sample_rate: Optional[int] = None,
|
||||
bitrate: Optional[int] = None,
|
||||
channel: Optional[str] = None,
|
||||
format: Optional[str] = None,
|
||||
language_boost: Optional[str] = None,
|
||||
enable_sync_mode: Optional[bool] = True,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate AI audio for a single scene using main_audio_generation.
|
||||
@@ -340,6 +346,12 @@ class StoryAudioGenerationService:
|
||||
emotion=emotion,
|
||||
user_id=user_id,
|
||||
english_normalization=english_normalization,
|
||||
sample_rate=sample_rate,
|
||||
bitrate=bitrate,
|
||||
channel=channel,
|
||||
format=format,
|
||||
language_boost=language_boost,
|
||||
enable_sync_mode=enable_sync_mode,
|
||||
)
|
||||
|
||||
# Save audio to file
|
||||
|
||||
@@ -252,10 +252,14 @@ class StoryVideoGenerationService:
|
||||
if len(scenes) != len(audio_paths):
|
||||
raise ValueError("Number of scenes and audio paths must match")
|
||||
|
||||
video_paths = video_paths or [None] * len(scenes)
|
||||
if len(video_paths) != len(scenes):
|
||||
# Ensure video_paths is a list and matches scenes length
|
||||
if video_paths is None:
|
||||
video_paths = [None] * len(scenes)
|
||||
elif len(video_paths) != len(scenes):
|
||||
video_paths = video_paths + [None] * (len(scenes) - len(video_paths))
|
||||
|
||||
logger.debug(f"[StoryVideoGeneration] video_paths length: {len(video_paths)}, scenes length: {len(scenes)}")
|
||||
|
||||
try:
|
||||
logger.info(f"[StoryVideoGeneration] Generating story video for {len(scenes)} scenes")
|
||||
|
||||
@@ -311,49 +315,64 @@ class StoryVideoGenerationService:
|
||||
scene_title = scene.get("title", "Untitled")
|
||||
|
||||
logger.info(f"[StoryVideoGeneration] Processing scene {scene_number}/{len(scenes)}: {scene_title}")
|
||||
|
||||
audio_file = Path(audio_path)
|
||||
if not audio_file.exists():
|
||||
logger.warning(f"[StoryVideoGeneration] Audio not found: {audio_path}, skipping scene {scene_number}")
|
||||
continue
|
||||
|
||||
# Load audio
|
||||
audio_clip = AudioFileClip(str(audio_file))
|
||||
audio_duration = audio_clip.duration
|
||||
logger.debug(f"[StoryVideoGeneration] Scene {scene_number} paths - video: {video_path}, audio: {audio_path}, image: {image_path}")
|
||||
|
||||
# Prefer animated video if available
|
||||
if video_path and Path(video_path).exists():
|
||||
# Check video_path is not None and is a valid string before calling Path()
|
||||
if video_path is not None and isinstance(video_path, (str, Path)) and video_path and Path(video_path).exists():
|
||||
logger.info(f"[StoryVideoGeneration] Using animated video for scene {scene_number}: {video_path}")
|
||||
# Load animated video
|
||||
if VideoFileClip is None:
|
||||
raise RuntimeError("VideoFileClip not available - MoviePy may not be fully installed")
|
||||
video_clip = VideoFileClip(str(video_path))
|
||||
# Replace audio with the preferred audio (AI or free)
|
||||
video_clip = video_clip.with_audio(audio_clip)
|
||||
# Match duration to audio if needed
|
||||
if video_clip.duration > audio_duration:
|
||||
video_clip = video_clip.subclip(0, audio_duration)
|
||||
elif video_clip.duration < audio_duration:
|
||||
# Loop the video if it's shorter than audio
|
||||
loops_needed = int(audio_duration / video_clip.duration) + 1
|
||||
video_clip = concatenate_videoclips([video_clip] * loops_needed).subclip(0, audio_duration)
|
||||
|
||||
# Handle audio: use embedded audio if no separate audio_path provided
|
||||
if audio_path is not None and isinstance(audio_path, (str, Path)) and audio_path and Path(audio_path).exists():
|
||||
# Load separate audio file and replace video's audio
|
||||
logger.info(f"[StoryVideoGeneration] Replacing video audio with separate audio file: {audio_path}")
|
||||
audio_clip = AudioFileClip(str(audio_path))
|
||||
audio_duration = audio_clip.duration
|
||||
video_clip = video_clip.with_audio(audio_clip)
|
||||
elif image_path and Path(image_path).exists():
|
||||
# Fall back to static image
|
||||
logger.info(f"[StoryVideoGeneration] Using static image for scene {scene_number}: {image_path}")
|
||||
image_file = Path(image_path)
|
||||
# Create image clip (MoviePy v2: use with_* API)
|
||||
image_clip = ImageClip(str(image_file)).with_duration(audio_duration)
|
||||
image_clip = image_clip.with_fps(fps)
|
||||
# Set audio to image clip
|
||||
video_clip = image_clip.with_audio(audio_clip)
|
||||
# Match duration to audio if needed
|
||||
if video_clip.duration > audio_duration:
|
||||
video_clip = video_clip.subclip(0, audio_duration)
|
||||
elif video_clip.duration < audio_duration:
|
||||
# Loop the video if it's shorter than audio
|
||||
loops_needed = int(audio_duration / video_clip.duration) + 1
|
||||
video_clip = concatenate_videoclips([video_clip] * loops_needed).subclip(0, audio_duration)
|
||||
video_clip = video_clip.with_audio(audio_clip)
|
||||
else:
|
||||
# Use embedded audio from video
|
||||
logger.info(f"[StoryVideoGeneration] Using embedded audio from video for scene {scene_number}")
|
||||
audio_duration = video_clip.duration
|
||||
# Video already has audio, no need to replace
|
||||
|
||||
scene_clips.append(video_clip)
|
||||
total_duration += audio_duration
|
||||
elif audio_path is not None and isinstance(audio_path, (str, Path)) and audio_path and Path(audio_path).exists():
|
||||
# No video, but we have audio - use with image or create blank
|
||||
audio_file = Path(audio_path)
|
||||
audio_clip = AudioFileClip(str(audio_file))
|
||||
audio_duration = audio_clip.duration
|
||||
|
||||
if image_path is not None and isinstance(image_path, (str, Path)) and image_path and Path(image_path).exists():
|
||||
# Fall back to static image with audio
|
||||
logger.info(f"[StoryVideoGeneration] Using static image for scene {scene_number}: {image_path}")
|
||||
image_file = Path(image_path)
|
||||
# Create image clip (MoviePy v2: use with_* API)
|
||||
image_clip = ImageClip(str(image_file)).with_duration(audio_duration)
|
||||
image_clip = image_clip.with_fps(fps)
|
||||
# Set audio to image clip
|
||||
video_clip = image_clip.with_audio(audio_clip)
|
||||
scene_clips.append(video_clip)
|
||||
total_duration += audio_duration
|
||||
else:
|
||||
logger.warning(f"[StoryVideoGeneration] Audio provided but no video or image for scene {scene_number}, skipping")
|
||||
continue
|
||||
else:
|
||||
logger.warning(f"[StoryVideoGeneration] No video or image found for scene {scene_number}, skipping")
|
||||
logger.warning(f"[StoryVideoGeneration] No video, audio, or image found for scene {scene_number}, skipping")
|
||||
continue
|
||||
|
||||
scene_clips.append(video_clip)
|
||||
total_duration += audio_duration
|
||||
|
||||
# Call progress callback if provided
|
||||
if progress_callback:
|
||||
progress = ((idx + 1) / len(scenes)) * 90 # Reserve 10% for final composition
|
||||
@@ -362,7 +381,12 @@ class StoryVideoGenerationService:
|
||||
logger.info(f"[StoryVideoGeneration] Processed scene {idx + 1}/{len(scenes)}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[StoryVideoGeneration] Failed to process scene {idx + 1}: {e}")
|
||||
logger.error(
|
||||
f"[StoryVideoGeneration] Failed to process scene {idx + 1} ({scene_number}): {e}\n"
|
||||
f" video_path: {video_path} (type: {type(video_path)})\n"
|
||||
f" audio_path: {audio_path} (type: {type(audio_path)})\n"
|
||||
f" image_path: {image_path} (type: {type(image_path)})"
|
||||
)
|
||||
# Continue with next scene instead of failing completely
|
||||
continue
|
||||
|
||||
|
||||
@@ -71,13 +71,16 @@ class WaveSpeedClient:
|
||||
logger.info(f"[WaveSpeed] Submitted request: {prediction_id}")
|
||||
return prediction_id
|
||||
|
||||
def get_prediction_result(self, prediction_id: str, timeout: int = 120) -> Dict[str, Any]:
|
||||
def get_prediction_result(self, prediction_id: str, timeout: int = 30) -> Dict[str, Any]:
|
||||
"""
|
||||
Fetch the current status/result for a prediction.
|
||||
Matches the example pattern: simple GET request, check status_code == 200, return data.
|
||||
"""
|
||||
url = f"{self.BASE_URL}/predictions/{prediction_id}/result"
|
||||
headers = {"Authorization": f"Bearer {self.api_key}"}
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers={"Authorization": f"Bearer {self.api_key}"}, timeout=timeout)
|
||||
response = requests.get(url, headers=headers, timeout=timeout)
|
||||
except requests_exceptions.Timeout as exc:
|
||||
raise HTTPException(
|
||||
status_code=504,
|
||||
@@ -98,7 +101,15 @@ class WaveSpeedClient:
|
||||
"exception": str(exc),
|
||||
},
|
||||
) from exc
|
||||
if response.status_code != 200:
|
||||
|
||||
# Match example pattern: check status_code == 200, then get data
|
||||
if response.status_code == 200:
|
||||
result = response.json().get("data")
|
||||
if not result:
|
||||
raise HTTPException(status_code=502, detail={"error": "WaveSpeed polling response missing data"})
|
||||
return result
|
||||
else:
|
||||
# Non-200 status - log and raise error (matching example's break behavior)
|
||||
logger.error(f"[WaveSpeed] Polling failed: {response.status_code} {response.text}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
@@ -109,59 +120,116 @@ class WaveSpeedClient:
|
||||
},
|
||||
)
|
||||
|
||||
result = response.json().get("data")
|
||||
if not result:
|
||||
raise HTTPException(status_code=502, detail={"error": "WaveSpeed polling response missing data"})
|
||||
return result
|
||||
|
||||
def poll_until_complete(
|
||||
self,
|
||||
prediction_id: str,
|
||||
timeout_seconds: int = 240,
|
||||
timeout_seconds: Optional[int] = None,
|
||||
interval_seconds: float = 1.0,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Poll WaveSpeed until the job completes, fails, or times out.
|
||||
Poll WaveSpeed until the job completes or fails.
|
||||
Matches the example pattern: simple polling loop until status is "completed" or "failed".
|
||||
|
||||
Args:
|
||||
prediction_id: The prediction ID to poll for
|
||||
timeout_seconds: Optional timeout in seconds. If None, polls indefinitely until completion/failure.
|
||||
interval_seconds: Seconds to wait between polling attempts (default: 1.0, faster than 2.0)
|
||||
|
||||
Returns:
|
||||
Dict containing the completed result
|
||||
|
||||
Raises:
|
||||
HTTPException: If the task fails, polling fails, or times out (if timeout_seconds is set)
|
||||
"""
|
||||
start_time = time.time()
|
||||
consecutive_errors = 0
|
||||
max_consecutive_errors = 6 # safety guard for non-transient errors
|
||||
|
||||
while True:
|
||||
try:
|
||||
result = self.get_prediction_result(prediction_id)
|
||||
consecutive_errors = 0 # Reset error counter on success
|
||||
except HTTPException as exc:
|
||||
detail = exc.detail or {}
|
||||
if isinstance(detail, dict):
|
||||
detail.setdefault("prediction_id", prediction_id)
|
||||
detail.setdefault("resume_available", True)
|
||||
detail.setdefault("error", detail.get("error", "WaveSpeed polling failed"))
|
||||
raise HTTPException(status_code=exc.status_code, detail=detail) from exc
|
||||
|
||||
# Determine underlying status code (WaveSpeed vs proxy)
|
||||
status_code = detail.get("status_code", exc.status_code)
|
||||
|
||||
# Treat 5xx as transient: keep polling indefinitely with backoff
|
||||
if 500 <= int(status_code) < 600:
|
||||
consecutive_errors += 1
|
||||
backoff = min(30.0, interval_seconds * (2 ** (consecutive_errors - 1)))
|
||||
logger.warning(
|
||||
f"[WaveSpeed] Transient polling error {consecutive_errors} for {prediction_id}: "
|
||||
f"{status_code}. Backing off {backoff:.1f}s"
|
||||
)
|
||||
time.sleep(backoff)
|
||||
continue
|
||||
|
||||
# For non-transient (typically 4xx) errors, apply safety cap
|
||||
consecutive_errors += 1
|
||||
if consecutive_errors >= max_consecutive_errors:
|
||||
logger.error(
|
||||
f"[WaveSpeed] Too many polling errors ({consecutive_errors}) for {prediction_id}, "
|
||||
f"status_code={status_code}. Giving up."
|
||||
)
|
||||
raise HTTPException(status_code=exc.status_code, detail=detail) from exc
|
||||
|
||||
backoff = min(30.0, interval_seconds * (2 ** (consecutive_errors - 1)))
|
||||
logger.warning(
|
||||
f"[WaveSpeed] Polling error {consecutive_errors}/{max_consecutive_errors} for {prediction_id}: "
|
||||
f"{status_code}. Backing off {backoff:.1f}s"
|
||||
)
|
||||
time.sleep(backoff)
|
||||
continue
|
||||
|
||||
# Extract status from result (matching example pattern)
|
||||
status = result.get("status")
|
||||
|
||||
if status == "completed":
|
||||
logger.info(f"[WaveSpeed] Prediction {prediction_id} completed.")
|
||||
elapsed = time.time() - start_time
|
||||
logger.info(f"[WaveSpeed] Prediction {prediction_id} completed in {elapsed:.1f}s")
|
||||
return result
|
||||
|
||||
if status == "failed":
|
||||
logger.error(f"[WaveSpeed] Prediction {prediction_id} failed: {result.get('error')}")
|
||||
error_msg = result.get("error", "Unknown error")
|
||||
logger.error(f"[WaveSpeed] Prediction {prediction_id} failed: {error_msg}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail={
|
||||
"error": "WaveSpeed animation failed",
|
||||
"prediction_id": prediction_id,
|
||||
"details": result.get("error"),
|
||||
},
|
||||
)
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
if elapsed > timeout_seconds:
|
||||
logger.error(f"[WaveSpeed] Prediction {prediction_id} timed out after {timeout_seconds}s")
|
||||
raise HTTPException(
|
||||
status_code=504,
|
||||
detail={
|
||||
"error": "WaveSpeed animation timed out",
|
||||
"error": "WaveSpeed task failed",
|
||||
"prediction_id": prediction_id,
|
||||
"message": error_msg,
|
||||
"details": result,
|
||||
},
|
||||
)
|
||||
|
||||
logger.debug(f"[WaveSpeed] Prediction {prediction_id} status={status}. Waiting...")
|
||||
# Check timeout only if specified
|
||||
if timeout_seconds is not None:
|
||||
elapsed = time.time() - start_time
|
||||
if elapsed > timeout_seconds:
|
||||
logger.error(f"[WaveSpeed] Prediction {prediction_id} timed out after {timeout_seconds}s")
|
||||
raise HTTPException(
|
||||
status_code=504,
|
||||
detail={
|
||||
"error": "WaveSpeed task timed out",
|
||||
"prediction_id": prediction_id,
|
||||
"timeout_seconds": timeout_seconds,
|
||||
"current_status": status,
|
||||
"message": f"Task did not complete within {timeout_seconds} seconds. Status: {status}",
|
||||
},
|
||||
)
|
||||
|
||||
# Log progress periodically (every 30 seconds)
|
||||
elapsed = time.time() - start_time
|
||||
if int(elapsed) % 30 == 0 and elapsed > 0:
|
||||
logger.info(f"[WaveSpeed] Polling {prediction_id}: status={status}, elapsed={elapsed:.0f}s")
|
||||
|
||||
# Poll faster (1.0s instead of 2.0s) to match example's responsiveness
|
||||
time.sleep(interval_seconds)
|
||||
|
||||
def optimize_prompt(
|
||||
@@ -469,7 +537,9 @@ class WaveSpeedClient:
|
||||
|
||||
# Fetch image bytes
|
||||
logger.info(f"[WaveSpeed] Fetching image from URL: {image_url}")
|
||||
image_response = requests.get(image_url, timeout=timeout)
|
||||
# Use reasonable timeout for downloading the final image (60s should be enough)
|
||||
# The timeout parameter is for polling, not for downloading
|
||||
image_response = requests.get(image_url, timeout=60)
|
||||
if image_response.status_code == 200:
|
||||
image_bytes = image_response.content
|
||||
logger.info(f"[WaveSpeed] Image generated successfully (size: {len(image_bytes)} bytes)")
|
||||
@@ -481,6 +551,208 @@ class WaveSpeedClient:
|
||||
detail="Failed to fetch generated image from WaveSpeed URL",
|
||||
)
|
||||
|
||||
def generate_character_image(
|
||||
self,
|
||||
prompt: str,
|
||||
reference_image_bytes: bytes,
|
||||
style: str = "Auto",
|
||||
aspect_ratio: str = "16:9",
|
||||
rendering_speed: str = "Default",
|
||||
timeout: Optional[int] = None,
|
||||
) -> bytes:
|
||||
"""
|
||||
Generate image using Ideogram Character API to maintain character consistency.
|
||||
Creates variations of a reference character image while respecting the base appearance.
|
||||
|
||||
Note: This API is always async and requires polling for results.
|
||||
|
||||
Args:
|
||||
prompt: Text prompt describing the scene/context for the character
|
||||
reference_image_bytes: Reference image bytes (base avatar)
|
||||
style: Character style type ("Auto", "Fiction", or "Realistic")
|
||||
aspect_ratio: Aspect ratio ("1:1", "16:9", "9:16", "4:3", "3:4")
|
||||
rendering_speed: Rendering speed ("Default", "Turbo", "Quality")
|
||||
timeout: Total timeout in seconds for submission + polling (default: 180)
|
||||
|
||||
Returns:
|
||||
bytes: Generated image bytes with consistent character
|
||||
"""
|
||||
import base64
|
||||
|
||||
# Encode reference image to base64
|
||||
image_base64 = base64.b64encode(reference_image_bytes).decode('utf-8')
|
||||
# Add data URI prefix
|
||||
image_data_uri = f"data:image/png;base64,{image_base64}"
|
||||
|
||||
url = f"{self.BASE_URL}/ideogram-ai/ideogram-character"
|
||||
|
||||
# Note: enable_sync_mode is not a valid parameter for Ideogram Character API
|
||||
# The API is always async and requires polling
|
||||
payload = {
|
||||
"prompt": prompt,
|
||||
"image": image_data_uri,
|
||||
"style": style,
|
||||
"aspect_ratio": aspect_ratio,
|
||||
"rendering_speed": rendering_speed,
|
||||
}
|
||||
|
||||
logger.info(f"[WaveSpeed] Generating character image via Ideogram Character (prompt_length={len(prompt)})")
|
||||
# POST request should return quickly with just the task ID
|
||||
# Use reasonable timeouts for the initial submission
|
||||
# Connection timeout: 30s (increased for reliability - network may be slow)
|
||||
# Read timeout: 30s (should be enough to get task ID response)
|
||||
# Retry logic for transient connection failures
|
||||
max_retries = 2
|
||||
retry_delay = 2.0 # seconds
|
||||
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
response = requests.post(
|
||||
url,
|
||||
headers=self._headers(),
|
||||
json=payload,
|
||||
timeout=(30, 30) # (connect_timeout, read_timeout) - increased for network reliability
|
||||
)
|
||||
break # Success, exit retry loop
|
||||
except (requests_exceptions.ConnectTimeout, requests_exceptions.ConnectionError) as e:
|
||||
if attempt < max_retries:
|
||||
logger.warning(f"[WaveSpeed] Connection attempt {attempt + 1}/{max_retries + 1} failed, retrying in {retry_delay}s: {e}")
|
||||
time.sleep(retry_delay)
|
||||
retry_delay *= 2 # Exponential backoff
|
||||
continue
|
||||
else:
|
||||
# Final attempt failed
|
||||
error_type = "Connection timeout" if isinstance(e, requests_exceptions.ConnectTimeout) else "Connection error"
|
||||
logger.error(f"[WaveSpeed] {error_type} to Ideogram Character API after {max_retries + 1} attempts: {e}")
|
||||
raise HTTPException(
|
||||
status_code=504 if isinstance(e, requests_exceptions.ConnectTimeout) else 502,
|
||||
detail={
|
||||
"error": f"{error_type} to WaveSpeed Ideogram Character API",
|
||||
"message": "Unable to establish connection to the image generation service after multiple attempts. Please check your network connection and try again.",
|
||||
"exception": str(e),
|
||||
"retry_recommended": True,
|
||||
},
|
||||
)
|
||||
except requests_exceptions.Timeout as e:
|
||||
logger.error(f"[WaveSpeed] Request timeout to Ideogram Character API: {e}")
|
||||
raise HTTPException(
|
||||
status_code=504,
|
||||
detail={
|
||||
"error": "Request timeout to WaveSpeed Ideogram Character API",
|
||||
"message": "The image generation request took too long. Please try again.",
|
||||
"exception": str(e),
|
||||
},
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.error(f"[WaveSpeed] Character image generation failed: {response.status_code} {response.text}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail={
|
||||
"error": "WaveSpeed Ideogram Character generation failed",
|
||||
"status_code": response.status_code,
|
||||
"response": response.text,
|
||||
},
|
||||
)
|
||||
|
||||
response_json = response.json()
|
||||
data = response_json.get("data") or response_json
|
||||
|
||||
# Extract prediction ID
|
||||
prediction_id = data.get("id")
|
||||
if not prediction_id:
|
||||
logger.error(f"[WaveSpeed] No prediction ID in response: {response.text}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail="WaveSpeed Ideogram Character response missing prediction id",
|
||||
)
|
||||
|
||||
# Ideogram Character API is always async - check status and poll if needed
|
||||
outputs = data.get("outputs") or []
|
||||
status = data.get("status", "unknown")
|
||||
|
||||
logger.info(f"[WaveSpeed] Ideogram Character task created: prediction_id={prediction_id}, status={status}")
|
||||
|
||||
# If status is already completed, use outputs directly (unlikely but possible)
|
||||
if outputs and status == "completed":
|
||||
logger.info(f"[WaveSpeed] Got immediate results from Ideogram Character")
|
||||
else:
|
||||
# Always need to poll for results (API is async)
|
||||
logger.info(f"[WaveSpeed] Polling for Ideogram Character result (status: {status}, prediction_id: {prediction_id})")
|
||||
# Poll until complete - use timeout if provided, otherwise poll indefinitely
|
||||
# Match example pattern exactly: simple while True loop, check status, break on completed/failed
|
||||
polling_timeout = timeout if timeout else None # None means poll indefinitely
|
||||
result = self.poll_until_complete(
|
||||
prediction_id,
|
||||
timeout_seconds=polling_timeout,
|
||||
interval_seconds=0.5, # Poll every 0.5s (closer to example's 0.1s)
|
||||
)
|
||||
# Safely extract outputs and status
|
||||
if not isinstance(result, dict):
|
||||
logger.error(f"[WaveSpeed] Unexpected result type: {type(result)}, value: {result}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail="WaveSpeed Ideogram Character returned unexpected response format",
|
||||
)
|
||||
|
||||
outputs = result.get("outputs") or []
|
||||
status = result.get("status", "unknown")
|
||||
|
||||
if status != "completed":
|
||||
# Safely extract error message
|
||||
error_msg = "Unknown error"
|
||||
if isinstance(result, dict):
|
||||
error_msg = result.get("error") or result.get("message") or str(result.get("details", "Unknown error"))
|
||||
else:
|
||||
error_msg = str(result)
|
||||
|
||||
logger.error(f"[WaveSpeed] Ideogram Character task did not complete: status={status}, error={error_msg}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail={
|
||||
"error": "WaveSpeed Ideogram Character task failed",
|
||||
"status": status,
|
||||
"message": error_msg,
|
||||
}
|
||||
)
|
||||
|
||||
# Extract image URL from outputs
|
||||
if not outputs:
|
||||
logger.error(f"[WaveSpeed] No outputs after polling: status={status}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail="WaveSpeed Ideogram Character returned no outputs",
|
||||
)
|
||||
|
||||
image_url = None
|
||||
if isinstance(outputs, list) and len(outputs) > 0:
|
||||
first_output = outputs[0]
|
||||
if isinstance(first_output, str):
|
||||
image_url = first_output
|
||||
elif isinstance(first_output, dict):
|
||||
image_url = first_output.get("url") or first_output.get("image_url")
|
||||
|
||||
if not image_url:
|
||||
logger.error(f"[WaveSpeed] No image URL in outputs: {outputs}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail="WaveSpeed Ideogram Character response missing image URL",
|
||||
)
|
||||
|
||||
# Download image
|
||||
logger.info(f"[WaveSpeed] Downloading character image from: {image_url}")
|
||||
image_response = requests.get(image_url, timeout=60)
|
||||
if image_response.status_code != 200:
|
||||
logger.error(f"[WaveSpeed] Failed to download image: {image_response.status_code}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail="Failed to download generated character image",
|
||||
)
|
||||
|
||||
image_bytes = image_response.content
|
||||
logger.info(f"[WaveSpeed] ✅ Successfully generated character image: {len(image_bytes)} bytes")
|
||||
return image_bytes
|
||||
|
||||
def generate_speech(
|
||||
self,
|
||||
text: str,
|
||||
@@ -490,7 +762,7 @@ class WaveSpeedClient:
|
||||
pitch: float = 0.0,
|
||||
emotion: str = "happy",
|
||||
enable_sync_mode: bool = True,
|
||||
timeout: int = 60,
|
||||
timeout: int = 120,
|
||||
**kwargs
|
||||
) -> bytes:
|
||||
"""
|
||||
@@ -537,7 +809,51 @@ class WaveSpeedClient:
|
||||
payload[param] = kwargs[param]
|
||||
|
||||
logger.info(f"[WaveSpeed] Generating speech via {url} (voice={voice_id}, text_length={len(text)})")
|
||||
response = requests.post(url, headers=self._headers(), json=payload, timeout=timeout)
|
||||
|
||||
# Retry on transient connection issues
|
||||
max_retries = 2
|
||||
retry_delay = 2.0
|
||||
last_error = None
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
response = requests.post(
|
||||
url,
|
||||
headers=self._headers(),
|
||||
json=payload,
|
||||
timeout=(30, 60), # connect, read
|
||||
)
|
||||
break
|
||||
except (requests_exceptions.ConnectTimeout, requests_exceptions.ConnectionError) as e:
|
||||
last_error = e
|
||||
if attempt < max_retries:
|
||||
logger.warning(
|
||||
f"[WaveSpeed] Speech connection attempt {attempt + 1}/{max_retries + 1} failed, "
|
||||
f"retrying in {retry_delay}s: {e}"
|
||||
)
|
||||
time.sleep(retry_delay)
|
||||
retry_delay *= 2
|
||||
continue
|
||||
logger.error(f"[WaveSpeed] Speech connection failed after {max_retries + 1} attempts: {e}")
|
||||
raise HTTPException(
|
||||
status_code=504,
|
||||
detail={
|
||||
"error": "Connection to WaveSpeed speech API timed out",
|
||||
"message": "Unable to reach the speech service. Please try again.",
|
||||
"exception": str(e),
|
||||
"retry_recommended": True,
|
||||
},
|
||||
)
|
||||
except requests_exceptions.Timeout as e:
|
||||
last_error = e
|
||||
logger.error(f"[WaveSpeed] Speech request timeout: {e}")
|
||||
raise HTTPException(
|
||||
status_code=504,
|
||||
detail={
|
||||
"error": "WaveSpeed speech request timed out",
|
||||
"message": "The speech generation request took too long. Please try again.",
|
||||
"exception": str(e),
|
||||
},
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.error(f"[WaveSpeed] Speech generation failed: {response.status_code} {response.text}")
|
||||
|
||||
@@ -8,7 +8,6 @@ from fastapi import HTTPException
|
||||
from loguru import logger
|
||||
|
||||
from .client import WaveSpeedClient
|
||||
from .kling_animation import generate_animation_prompt
|
||||
|
||||
INFINITALK_MODEL_PATH = "wavespeed-ai/infinitetalk"
|
||||
INFINITALK_MODEL_NAME = "wavespeed-ai/infinitetalk"
|
||||
@@ -22,6 +21,67 @@ def _as_data_uri(content_bytes: bytes, mime_type: str) -> str:
|
||||
return f"data:{mime_type};base64,{encoded}"
|
||||
|
||||
|
||||
def _generate_simple_infinitetalk_prompt(
|
||||
scene_data: Dict[str, Any],
|
||||
story_context: Dict[str, Any],
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Generate a balanced, concise prompt for InfiniteTalk.
|
||||
InfiniteTalk is audio-driven, so the prompt should describe the scene and suggest
|
||||
subtle motion, but avoid overly elaborate cinematic descriptions.
|
||||
|
||||
Returns None if no meaningful prompt can be generated.
|
||||
"""
|
||||
title = (scene_data.get("title") or "").strip()
|
||||
description = (scene_data.get("description") or "").strip()
|
||||
image_prompt = (scene_data.get("image_prompt") or "").strip()
|
||||
|
||||
# Build a balanced prompt: scene description + simple motion hint
|
||||
parts = []
|
||||
|
||||
# Start with the main subject/scene
|
||||
if title and len(title) > 5 and title.lower() not in ("scene", "podcast", "episode"):
|
||||
parts.append(title)
|
||||
elif description:
|
||||
# Take first sentence or first 60 chars
|
||||
desc_part = description.split('.')[0][:60].strip()
|
||||
if desc_part:
|
||||
parts.append(desc_part)
|
||||
elif image_prompt:
|
||||
# Take first sentence or first 60 chars
|
||||
img_part = image_prompt.split('.')[0][:60].strip()
|
||||
if img_part:
|
||||
parts.append(img_part)
|
||||
|
||||
if not parts:
|
||||
return None
|
||||
|
||||
# Add a simple, subtle motion suggestion (not elaborate camera movements)
|
||||
# Keep it natural and audio-driven
|
||||
motion_hints = [
|
||||
"with subtle movement",
|
||||
"with gentle motion",
|
||||
"with natural animation",
|
||||
]
|
||||
|
||||
# Combine scene description with subtle motion hint
|
||||
if len(parts[0]) < 80:
|
||||
# Room for a motion hint
|
||||
prompt = f"{parts[0]}, {motion_hints[0]}"
|
||||
else:
|
||||
# Just use the description if it's already long enough
|
||||
prompt = parts[0]
|
||||
|
||||
# Keep it concise - max 120 characters (allows for scene + motion hint)
|
||||
prompt = prompt[:120].strip()
|
||||
|
||||
# Clean up trailing commas or incomplete sentences
|
||||
if prompt.endswith(','):
|
||||
prompt = prompt[:-1].strip()
|
||||
|
||||
return prompt if len(prompt) >= 15 else None
|
||||
|
||||
|
||||
def animate_scene_with_voiceover(
|
||||
*,
|
||||
image_bytes: bytes,
|
||||
@@ -31,6 +91,8 @@ def animate_scene_with_voiceover(
|
||||
user_id: str,
|
||||
resolution: str = "720p",
|
||||
prompt_override: Optional[str] = None,
|
||||
mask_image_bytes: Optional[bytes] = None,
|
||||
seed: Optional[int] = -1,
|
||||
image_mime: str = "image/png",
|
||||
audio_mime: str = "audio/mpeg",
|
||||
client: Optional[WaveSpeedClient] = None,
|
||||
@@ -59,21 +121,28 @@ def animate_scene_with_voiceover(
|
||||
if resolution not in {"480p", "720p"}:
|
||||
raise HTTPException(status_code=400, detail="Resolution must be '480p' or '720p'.")
|
||||
|
||||
animation_prompt = prompt_override or generate_animation_prompt(scene_data, story_context, user_id)
|
||||
# Generate simple, concise prompt for InfiniteTalk (audio-driven, less need for elaborate descriptions)
|
||||
animation_prompt = prompt_override or _generate_simple_infinitetalk_prompt(scene_data, story_context)
|
||||
|
||||
payload = {
|
||||
payload: Dict[str, Any] = {
|
||||
"image": _as_data_uri(image_bytes, image_mime),
|
||||
"audio": _as_data_uri(audio_bytes, audio_mime),
|
||||
"resolution": resolution,
|
||||
}
|
||||
# Only include prompt if we have a meaningful one (InfiniteTalk works fine without it)
|
||||
if animation_prompt:
|
||||
payload["prompt"] = animation_prompt
|
||||
if mask_image_bytes:
|
||||
payload["mask_image"] = _as_data_uri(mask_image_bytes, image_mime)
|
||||
if seed is not None:
|
||||
payload["seed"] = seed
|
||||
|
||||
client = client or WaveSpeedClient()
|
||||
prediction_id = client.submit_image_to_video(INFINITALK_MODEL_PATH, payload, timeout=60)
|
||||
|
||||
try:
|
||||
result = client.poll_until_complete(prediction_id, timeout_seconds=600, interval_seconds=1.0)
|
||||
# Poll faster (0.5s) to mirror reference pattern; allow up to 10 minutes
|
||||
result = client.poll_until_complete(prediction_id, timeout_seconds=600, interval_seconds=0.5)
|
||||
except HTTPException as exc:
|
||||
detail = exc.detail or {}
|
||||
if isinstance(detail, dict):
|
||||
|
||||
@@ -1,197 +0,0 @@
|
||||
# Build Optimization Guide
|
||||
|
||||
This guide explains how to optimize the production build for better performance.
|
||||
|
||||
## Current Issues
|
||||
|
||||
1. **Minify JavaScript**: 504 KiB savings possible
|
||||
2. **Reduce unused JavaScript**: 980 KiB savings possible
|
||||
3. **Minify CSS**: 24 KiB savings possible
|
||||
4. **Reduce unused CSS**: 25 KiB savings possible
|
||||
5. **Cache Headers**: 1,702 KiB not cached (requires server configuration)
|
||||
|
||||
## React Scripts Build Configuration
|
||||
|
||||
React Scripts already minifies JavaScript and CSS in production builds. However, you can optimize further:
|
||||
|
||||
### 1. Environment Variables
|
||||
|
||||
Create `.env.production` (already created) with:
|
||||
|
||||
```env
|
||||
GENERATE_SOURCEMAP=false
|
||||
INLINE_RUNTIME_CHUNK=false
|
||||
```
|
||||
|
||||
### 2. Build Command
|
||||
|
||||
Run production build:
|
||||
```bash
|
||||
npm run build
|
||||
```
|
||||
|
||||
This will:
|
||||
- Minify JavaScript (already enabled)
|
||||
- Minify CSS (already enabled)
|
||||
- Tree-shake unused code (already enabled)
|
||||
- Generate source maps (disabled via env var)
|
||||
|
||||
## Reducing Unused JavaScript
|
||||
|
||||
### Analyze Bundle Size
|
||||
|
||||
Install webpack-bundle-analyzer:
|
||||
```bash
|
||||
npm install --save-dev webpack-bundle-analyzer
|
||||
```
|
||||
|
||||
Add to `package.json` scripts:
|
||||
```json
|
||||
"analyze": "npm run build && npx webpack-bundle-analyzer build/static/js/*.js"
|
||||
```
|
||||
|
||||
Run:
|
||||
```bash
|
||||
npm run analyze
|
||||
```
|
||||
|
||||
### Common Issues and Solutions
|
||||
|
||||
1. **Large Dependencies**:
|
||||
- `framer-motion`: 246 KiB - Consider lazy loading animations
|
||||
- `@mui/material`: Multiple chunks - Already code-split
|
||||
- `recharts`: Only load when needed
|
||||
|
||||
2. **Unused Imports**:
|
||||
- Use ESLint rule: `"no-unused-vars": "error"`
|
||||
- Run: `npx eslint --ext .ts,.tsx src/ --fix`
|
||||
|
||||
3. **Dynamic Imports**:
|
||||
- Already implemented for routes
|
||||
- Consider lazy loading heavy components like charts
|
||||
|
||||
## Server-Side Cache Headers
|
||||
|
||||
### For Express.js (if using)
|
||||
|
||||
```javascript
|
||||
// Add to your Express server
|
||||
app.use(express.static('build', {
|
||||
maxAge: '1y',
|
||||
immutable: true,
|
||||
etag: true,
|
||||
lastModified: true
|
||||
}));
|
||||
```
|
||||
|
||||
### For Nginx
|
||||
|
||||
```nginx
|
||||
location /static {
|
||||
alias /path/to/build/static;
|
||||
expires 1y;
|
||||
add_header Cache-Control "public, immutable";
|
||||
access_log off;
|
||||
}
|
||||
```
|
||||
|
||||
### For Apache
|
||||
|
||||
```apache
|
||||
<IfModule mod_expires.c>
|
||||
ExpiresActive On
|
||||
ExpiresByType application/javascript "access plus 1 year"
|
||||
ExpiresByType text/css "access plus 1 year"
|
||||
ExpiresByType image/png "access plus 1 year"
|
||||
ExpiresByType image/jpeg "access plus 1 year"
|
||||
ExpiresByType image/webp "access plus 1 year"
|
||||
</IfModule>
|
||||
```
|
||||
|
||||
## Image Optimization
|
||||
|
||||
### Convert AskAlwrity-min.ico to WebP
|
||||
|
||||
1. Install sharp or use online tool:
|
||||
```bash
|
||||
npm install --save-dev sharp
|
||||
```
|
||||
|
||||
2. Create script `scripts/optimize-images.js`:
|
||||
```javascript
|
||||
const sharp = require('sharp');
|
||||
const path = require('path');
|
||||
|
||||
sharp('public/AskAlwrity-min.ico')
|
||||
.resize(60, 60)
|
||||
.webp({ quality: 80 })
|
||||
.toFile('public/AskAlwrity-min.webp')
|
||||
.then(() => console.log('Image optimized!'));
|
||||
```
|
||||
|
||||
3. Update `index.html`:
|
||||
```html
|
||||
<link rel="icon" href="%PUBLIC_URL%/AskAlwrity-min.webp" />
|
||||
```
|
||||
|
||||
## Performance Budget
|
||||
|
||||
Set performance budgets in `package.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"performance": {
|
||||
"budgets": [
|
||||
{
|
||||
"type": "initial",
|
||||
"maximumWarning": "500kb",
|
||||
"maximumError": "1mb"
|
||||
},
|
||||
{
|
||||
"type": "anyComponentStyle",
|
||||
"maximumWarning": "50kb",
|
||||
"maximumError": "100kb"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Monitoring
|
||||
|
||||
### Lighthouse CI
|
||||
|
||||
Add to CI/CD pipeline:
|
||||
```bash
|
||||
npm install -g @lhci/cli
|
||||
lhci autorun
|
||||
```
|
||||
|
||||
### Web Vitals
|
||||
|
||||
Monitor in production:
|
||||
```javascript
|
||||
import { getCLS, getFID, getFCP, getLCP, getTTFB } from 'web-vitals';
|
||||
|
||||
function sendToAnalytics(metric) {
|
||||
// Send to your analytics service
|
||||
console.log(metric);
|
||||
}
|
||||
|
||||
getCLS(sendToAnalytics);
|
||||
getFID(sendToAnalytics);
|
||||
getFCP(sendToAnalytics);
|
||||
getLCP(sendToAnalytics);
|
||||
getTTFB(sendToAnalytics);
|
||||
```
|
||||
|
||||
## Expected Improvements
|
||||
|
||||
After implementing all optimizations:
|
||||
|
||||
- **Performance Score**: 28 → 70-80+
|
||||
- **Bundle Size**: Reduced by ~1.5MB (unused code + minification)
|
||||
- **Cache Hit Rate**: 0% → 90%+ (with proper headers)
|
||||
- **CLS**: 0.167 → <0.1 (with layout fixes)
|
||||
- **LCP**: Improved by additional 200-300ms
|
||||
|
||||
@@ -1,114 +0,0 @@
|
||||
# Unused JavaScript Optimization - Progress Tracker
|
||||
|
||||
## ✅ Completed
|
||||
|
||||
1. **Bundle Analysis Setup**
|
||||
- Added `source-map-explorer` to devDependencies
|
||||
- Added `npm run analyze` script
|
||||
- Created analysis guide
|
||||
|
||||
2. **Lazy Loading Infrastructure**
|
||||
- ✅ Created `frontend/src/utils/lazyRecharts.tsx` - Lazy load recharts
|
||||
- ✅ Created `frontend/src/utils/lazyWix.ts` - Lazy load Wix SDK
|
||||
- ✅ Updated `frontend/src/components/billing/UsageTrends.tsx`:
|
||||
- Replaced direct recharts imports with lazy versions
|
||||
- Replaced lucide-react icons with MUI icons
|
||||
- Added Suspense boundaries
|
||||
|
||||
## 📋 Remaining Tasks
|
||||
|
||||
### High Priority (Large Impact)
|
||||
|
||||
1. **Update Other Chart Components**
|
||||
- [ ] `frontend/src/components/SchedulerDashboard/SchedulerCharts.tsx`
|
||||
- [ ] `frontend/src/components/ContentPlanningDashboard/components/MonitoringCharts.tsx`
|
||||
- [ ] `frontend/src/components/shared/charts/AdvancedChartComponents.tsx`
|
||||
|
||||
2. **Lazy Load Wix SDK**
|
||||
- [ ] `frontend/src/components/WixTestPage/WixTestPage.tsx`
|
||||
- [ ] `frontend/src/components/WixCallbackPage/WixCallbackPage.tsx`
|
||||
- [ ] `frontend/src/components/OnboardingWizard/common/usePlatformConnections.ts`
|
||||
|
||||
### Medium Priority
|
||||
|
||||
3. **Replace Lucide Icons with MUI Icons**
|
||||
- [ ] `frontend/src/components/billing/EnhancedBillingDashboard.tsx`
|
||||
- [ ] `frontend/src/components/billing/CompactBillingDashboard.tsx`
|
||||
- [ ] `frontend/src/components/billing/BillingOverview.tsx`
|
||||
- [ ] Other billing components using lucide-react
|
||||
|
||||
4. **Optimize Framer Motion**
|
||||
- Review usage and replace simple animations with CSS
|
||||
- Lazy load for non-critical animations
|
||||
|
||||
### Low Priority
|
||||
|
||||
5. **Further Code Splitting**
|
||||
- Lazy load heavy components within routes
|
||||
- Split large components into smaller chunks
|
||||
|
||||
## 🎯 How to Continue
|
||||
|
||||
### Step 1: Run Bundle Analysis
|
||||
```bash
|
||||
cd frontend
|
||||
npm install # Install source-map-explorer
|
||||
npm run analyze
|
||||
# Open bundle-report.html to see current state
|
||||
```
|
||||
|
||||
### Step 2: Update Chart Components
|
||||
Follow the pattern in `UsageTrends.tsx`:
|
||||
|
||||
```typescript
|
||||
// Before
|
||||
import { LineChart, Line } from 'recharts';
|
||||
|
||||
// After
|
||||
import { LazyLineChart, Line, ChartLoadingFallback } from '../../utils/lazyRecharts';
|
||||
import { Suspense } from 'react';
|
||||
|
||||
<Suspense fallback={<ChartLoadingFallback />}>
|
||||
<LazyLineChart data={data}>
|
||||
<Line />
|
||||
</LazyLineChart>
|
||||
</Suspense>
|
||||
```
|
||||
|
||||
### Step 3: Replace Icons
|
||||
```typescript
|
||||
// Before
|
||||
import { TrendingUp } from 'lucide-react';
|
||||
<TrendingUp size={20} />
|
||||
|
||||
// After
|
||||
import { TrendingUp as TrendingUpIcon } from '@mui/icons-material';
|
||||
<TrendingUpIcon fontSize="small" />
|
||||
```
|
||||
|
||||
### Step 4: Test
|
||||
```bash
|
||||
npm run build
|
||||
npm run analyze # Check if bundle size decreased
|
||||
```
|
||||
|
||||
## 📊 Expected Results
|
||||
|
||||
### Current
|
||||
- Unused JavaScript: 980 KiB
|
||||
- Recharts: ~200 KiB (loaded on every page)
|
||||
- Wix SDK: ~100 KiB (loaded on every page)
|
||||
|
||||
### After All Optimizations
|
||||
- Unused JavaScript: < 200 KiB (estimated)
|
||||
- Recharts: Only loaded when charts are viewed
|
||||
- Wix SDK: Only loaded on Wix-related pages
|
||||
- Performance: 33 → 50-60+ (estimated)
|
||||
|
||||
## 📝 Notes
|
||||
|
||||
- Lazy loading adds a small delay when components first load
|
||||
- Use Suspense boundaries with loading states
|
||||
- Test all functionality after changes
|
||||
- Monitor bundle size after each change
|
||||
|
||||
@@ -1,162 +0,0 @@
|
||||
# Performance Optimizations Applied
|
||||
|
||||
This document outlines all the performance optimizations implemented to improve Lighthouse scores and overall app performance.
|
||||
|
||||
## 1. Font Loading Optimization
|
||||
|
||||
### Changes Made:
|
||||
- Added `preconnect` hints for Google Fonts in `index.html`
|
||||
- Added `dns-prefetch` for faster DNS resolution
|
||||
- Font loading already uses `font-display: swap` in `global.css`
|
||||
|
||||
### Impact:
|
||||
- Reduces font loading time by ~330ms (LCP improvement)
|
||||
- Prevents render-blocking font requests
|
||||
|
||||
## 2. Code Splitting
|
||||
|
||||
### Changes Made:
|
||||
- Implemented `React.lazy()` for all route components in `App.tsx`
|
||||
- Added `Suspense` boundaries with loading fallbacks
|
||||
- Route-level code splitting reduces initial bundle size
|
||||
|
||||
### Impact:
|
||||
- Reduces initial JavaScript bundle from ~3.4MB to smaller chunks
|
||||
- Each route loads only when needed
|
||||
- Estimated savings: ~2,474 KiB of unused JavaScript
|
||||
|
||||
## 3. Layout Shift (CLS) Fixes
|
||||
|
||||
### Changes Made:
|
||||
- Changed `::after` and `::before` pseudo-elements from `absolute` to `fixed` positioning
|
||||
- Added `will-change: transform` for animation optimization
|
||||
- Added `overflow: hidden` to prevent layout shifts
|
||||
- Added `minHeight` to WorkflowHeroSection and parent containers to reserve space
|
||||
- Added `pointerEvents: 'none'` to pseudo-elements to prevent layout impact
|
||||
- Fixed line-height and width constraints on typography elements
|
||||
|
||||
### Impact:
|
||||
- Reduced CLS score from 0.634 to 0.167 (73% improvement)
|
||||
- Further improvements expected with reserved space for hero section
|
||||
- Prevents visual instability during page load
|
||||
|
||||
## 4. Component Memoization
|
||||
|
||||
### Changes Made:
|
||||
- Added `useMemo` for expensive search computations in `MainDashboard`
|
||||
- Added `useCallback` for event handlers to prevent unnecessary re-renders
|
||||
- Optimized search debouncing logic
|
||||
|
||||
### Impact:
|
||||
- Reduces unnecessary re-renders
|
||||
- Improves main thread performance
|
||||
- Reduces JavaScript execution time
|
||||
|
||||
## 5. Build Optimizations
|
||||
|
||||
### Changes Made:
|
||||
- Created `.env.production` with optimization flags
|
||||
- `GENERATE_SOURCEMAP=false` for smaller production builds
|
||||
- `INLINE_RUNTIME_CHUNK=false` for better caching
|
||||
|
||||
### Impact:
|
||||
- Smaller production bundle size
|
||||
- Better browser caching
|
||||
- Faster subsequent page loads
|
||||
|
||||
## 6. Resource Hints
|
||||
|
||||
### Changes Made:
|
||||
- Added `preconnect` for Google Fonts
|
||||
- Added `dns-prefetch` for external domains
|
||||
- Added meta tags for better browser optimization
|
||||
|
||||
### Impact:
|
||||
- Faster connection establishment
|
||||
- Reduced latency for external resources
|
||||
|
||||
## Performance Progress
|
||||
|
||||
### Before Optimizations:
|
||||
- **Performance Score**: 12
|
||||
- **CLS**: 0.634
|
||||
- **Bundle Size**: 3,435 KiB (single bundle)
|
||||
- **Cache**: 0% (3,514 KiB not cached)
|
||||
|
||||
### After Initial Optimizations:
|
||||
- **Performance Score**: 28 (133% improvement)
|
||||
- **CLS**: 0.167 (73% improvement)
|
||||
- **Bundle Size**: Code-split into multiple chunks
|
||||
- **Cache**: Still needs server configuration
|
||||
|
||||
### Remaining Optimizations Needed
|
||||
|
||||
### 1. Image Optimization
|
||||
- **Issue**: `AskAlwrity-min.ico` is 78.6 KiB but displayed at 60x60
|
||||
- **Solution**:
|
||||
- Convert to WebP format (saves ~68 KiB)
|
||||
- Resize to actual display size (saves ~74 KiB)
|
||||
- Use responsive images with `srcset`
|
||||
|
||||
### 2. Cache Headers
|
||||
- **Issue**: No cache headers for static assets (3,514 KiB not cached)
|
||||
- **Solution**: Configure server to add cache headers:
|
||||
```
|
||||
Cache-Control: public, max-age=31536000, immutable
|
||||
```
|
||||
For `bundle.js` and other static assets
|
||||
|
||||
### 3. Bundle Analysis
|
||||
- **Issue**: Large bundle size (3,435 KiB for bundle.js)
|
||||
- **Solution**:
|
||||
- Analyze bundle with `webpack-bundle-analyzer`
|
||||
- Remove unused dependencies
|
||||
- Consider dynamic imports for heavy libraries
|
||||
|
||||
### 4. Third-Party Scripts
|
||||
- **Issue**: Clerk and CopilotKit scripts add to main thread work
|
||||
- **Solution**:
|
||||
- Load third-party scripts asynchronously
|
||||
- Defer non-critical scripts
|
||||
- Consider loading Clerk after initial render
|
||||
|
||||
### 5. Long Tasks
|
||||
- **Issue**: 20 long tasks found, longest 6,208ms
|
||||
- **Solution**:
|
||||
- Break up large computations
|
||||
- Use `requestIdleCallback` for non-critical work
|
||||
- Implement virtual scrolling for long lists
|
||||
|
||||
## Performance Monitoring
|
||||
|
||||
### Recommended Tools:
|
||||
1. **Lighthouse CI**: Automate performance testing
|
||||
2. **Web Vitals**: Monitor Core Web Vitals in production
|
||||
3. **Bundle Analyzer**: Track bundle size over time
|
||||
4. **React DevTools Profiler**: Identify slow components
|
||||
|
||||
### Target Metrics:
|
||||
- **Performance Score**: 90+ (currently 12)
|
||||
- **FCP**: < 1.8s
|
||||
- **LCP**: < 2.5s
|
||||
- **CLS**: < 0.1
|
||||
- **TBT**: < 200ms
|
||||
- **Bundle Size**: < 500 KiB initial load
|
||||
|
||||
## Next Steps
|
||||
|
||||
1. **Immediate**:
|
||||
- Optimize images (WebP conversion)
|
||||
- Configure server cache headers
|
||||
- Run bundle analysis
|
||||
|
||||
2. **Short-term**:
|
||||
- Implement virtual scrolling
|
||||
- Optimize third-party script loading
|
||||
- Add service worker for caching
|
||||
|
||||
3. **Long-term**:
|
||||
- Consider migrating to Vite for faster builds
|
||||
- Implement progressive web app features
|
||||
- Add performance budgets to CI/CD
|
||||
|
||||
@@ -1,231 +0,0 @@
|
||||
# Unused JavaScript Optimization Guide
|
||||
|
||||
## Current Issue
|
||||
Lighthouse reports **980 KiB of unused JavaScript**. This guide helps identify and fix it.
|
||||
|
||||
## Strategy
|
||||
|
||||
### 1. Bundle Analysis
|
||||
First, analyze what's taking up space:
|
||||
|
||||
```bash
|
||||
cd frontend
|
||||
npm install # Install source-map-explorer if needed
|
||||
npm run analyze
|
||||
```
|
||||
|
||||
This creates `bundle-report.html` - open it in a browser to see:
|
||||
- Which packages are largest
|
||||
- Which files import them
|
||||
- Unused code within packages
|
||||
|
||||
### 2. Lazy Load Heavy Dependencies
|
||||
|
||||
#### A. Recharts (Charts Library)
|
||||
**Size**: ~200+ KiB
|
||||
**Usage**: Only in billing, analytics, and scheduler dashboards
|
||||
|
||||
**Before**:
|
||||
```typescript
|
||||
import { LineChart, Line } from 'recharts';
|
||||
```
|
||||
|
||||
**After**:
|
||||
```typescript
|
||||
import { LazyLineChart, Line } from '../../utils/lazyRecharts';
|
||||
import { Suspense } from 'react';
|
||||
|
||||
<Suspense fallback={<ChartSkeleton />}>
|
||||
<LazyLineChart>
|
||||
<Line />
|
||||
</LazyLineChart>
|
||||
</Suspense>
|
||||
```
|
||||
|
||||
**Files to update**:
|
||||
- `frontend/src/components/billing/UsageTrends.tsx`
|
||||
- `frontend/src/components/SchedulerDashboard/SchedulerCharts.tsx`
|
||||
- `frontend/src/components/ContentPlanningDashboard/components/MonitoringCharts.tsx`
|
||||
- `frontend/src/components/shared/charts/AdvancedChartComponents.tsx`
|
||||
|
||||
#### B. Wix SDK
|
||||
**Size**: ~100+ KiB
|
||||
**Usage**: Only in WixTestPage and WixCallbackPage
|
||||
|
||||
**Before**:
|
||||
```typescript
|
||||
import { createClient } from '@wix/sdk';
|
||||
```
|
||||
|
||||
**After**:
|
||||
```typescript
|
||||
const { createClient } = await import('@wix/sdk');
|
||||
// Or use lazy loading in component
|
||||
```
|
||||
|
||||
**Files to update**:
|
||||
- `frontend/src/components/WixTestPage/WixTestPage.tsx`
|
||||
- `frontend/src/components/WixCallbackPage/WixCallbackPage.tsx`
|
||||
- `frontend/src/components/OnboardingWizard/common/usePlatformConnections.ts`
|
||||
|
||||
#### C. Framer Motion (Animations)
|
||||
**Size**: ~246 KiB
|
||||
**Usage**: Used extensively but can be optimized
|
||||
|
||||
**Strategy**:
|
||||
1. Use CSS animations for simple transitions
|
||||
2. Lazy load framer-motion for non-critical animations
|
||||
3. Use `will-change` CSS property instead of complex animations
|
||||
|
||||
**Example**:
|
||||
```typescript
|
||||
// Instead of complex framer-motion for simple fade
|
||||
// Use CSS:
|
||||
const fadeIn = {
|
||||
animation: 'fadeIn 0.3s ease-in'
|
||||
};
|
||||
```
|
||||
|
||||
### 3. Tree Shaking Optimization
|
||||
|
||||
#### A. Material-UI Icons
|
||||
**Issue**: Importing entire icon set
|
||||
|
||||
**Before**:
|
||||
```typescript
|
||||
import { TrendingUp, TrendingDown } from '@mui/icons-material';
|
||||
```
|
||||
|
||||
**After** (already optimized, but verify):
|
||||
```typescript
|
||||
// React Scripts should tree-shake automatically
|
||||
// But verify imports are specific
|
||||
```
|
||||
|
||||
#### B. Lucide React Icons
|
||||
**Issue**: Large icon library, some can be replaced with MUI icons
|
||||
|
||||
**Strategy**: Replace lucide-react icons with MUI icons where possible
|
||||
|
||||
**Before**:
|
||||
```typescript
|
||||
import { TrendingUp } from 'lucide-react';
|
||||
```
|
||||
|
||||
**After**:
|
||||
```typescript
|
||||
import { TrendingUp } from '@mui/icons-material';
|
||||
```
|
||||
|
||||
### 4. Remove Unused Dependencies
|
||||
|
||||
Check if these are actually used:
|
||||
- `@wix/blog` - Only in WixTestPage
|
||||
- `lucide-react` - Can be replaced with MUI icons in many places
|
||||
- `zod` - Verify if all schemas are used
|
||||
|
||||
### 5. Code Splitting Improvements
|
||||
|
||||
#### A. Route-Level Splitting (Already Done ✅)
|
||||
Routes are already lazy-loaded.
|
||||
|
||||
#### B. Component-Level Splitting
|
||||
Lazy load heavy components within routes:
|
||||
|
||||
```typescript
|
||||
// In MainDashboard.tsx
|
||||
const EnhancedBillingDashboard = lazy(() =>
|
||||
import('../billing/EnhancedBillingDashboard')
|
||||
);
|
||||
```
|
||||
|
||||
### 6. Dynamic Imports for Heavy Features
|
||||
|
||||
#### A. Charts
|
||||
Only load charts when dashboard is viewed:
|
||||
|
||||
```typescript
|
||||
const loadCharts = () => import('recharts');
|
||||
```
|
||||
|
||||
#### B. Analytics
|
||||
Only load analytics when analytics tab is opened:
|
||||
|
||||
```typescript
|
||||
const loadAnalytics = () => import('./components/AnalyticsInsights');
|
||||
```
|
||||
|
||||
## Implementation Steps
|
||||
|
||||
### Step 1: Analyze Bundle
|
||||
```bash
|
||||
npm run analyze
|
||||
# Open bundle-report.html
|
||||
```
|
||||
|
||||
### Step 2: Identify Large Dependencies
|
||||
Look for:
|
||||
- Packages > 50 KiB
|
||||
- Packages used in < 3 places
|
||||
- Packages that can be lazy-loaded
|
||||
|
||||
### Step 3: Lazy Load Heavy Dependencies
|
||||
1. Create lazy wrappers (see `lazyRecharts.tsx`)
|
||||
2. Update imports to use lazy versions
|
||||
3. Add Suspense boundaries
|
||||
|
||||
### Step 4: Replace Icons
|
||||
1. Find lucide-react imports
|
||||
2. Replace with MUI icons where possible
|
||||
3. Remove lucide-react if not needed
|
||||
|
||||
### Step 5: Test
|
||||
```bash
|
||||
npm run build
|
||||
npm run analyze # Check if bundle size decreased
|
||||
```
|
||||
|
||||
## Expected Results
|
||||
|
||||
### Before
|
||||
- Unused JavaScript: 980 KiB
|
||||
- Bundle size: Large initial load
|
||||
|
||||
### After
|
||||
- Unused JavaScript: < 200 KiB (estimated)
|
||||
- Bundle size: Reduced by ~500-700 KiB
|
||||
- Performance: Improved initial load time
|
||||
|
||||
## Files to Update
|
||||
|
||||
### High Priority (Large Impact)
|
||||
1. ✅ `frontend/src/utils/lazyRecharts.tsx` - Created
|
||||
2. ✅ `frontend/src/utils/lazyWix.ts` - Created
|
||||
3. `frontend/src/components/billing/UsageTrends.tsx` - Use lazy recharts
|
||||
4. `frontend/src/components/SchedulerDashboard/SchedulerCharts.tsx` - Use lazy recharts
|
||||
5. `frontend/src/components/WixTestPage/WixTestPage.tsx` - Use lazy Wix SDK
|
||||
|
||||
### Medium Priority
|
||||
6. `frontend/src/components/ContentPlanningDashboard/components/MonitoringCharts.tsx`
|
||||
7. `frontend/src/components/shared/charts/AdvancedChartComponents.tsx`
|
||||
8. Replace lucide-react with MUI icons in billing components
|
||||
|
||||
### Low Priority (Optimization)
|
||||
9. Optimize framer-motion usage
|
||||
10. Further code splitting within components
|
||||
|
||||
## Monitoring
|
||||
|
||||
After changes, verify:
|
||||
1. Bundle size decreased
|
||||
2. Lighthouse "Reduce unused JavaScript" improved
|
||||
3. No broken functionality
|
||||
4. Charts still work (with loading states)
|
||||
|
||||
## Next Steps
|
||||
|
||||
1. Run `npm run analyze` to see current bundle
|
||||
2. Update components to use lazy-loaded dependencies
|
||||
3. Test functionality
|
||||
4. Re-run Lighthouse audit
|
||||
|
||||
@@ -1,16 +1,97 @@
|
||||
import React from "react";
|
||||
import { Stack, Box, Typography, Divider, Chip, Paper, alpha } from "@mui/material";
|
||||
import { Psychology as PsychologyIcon, Insights as InsightsIcon, Search as SearchIcon } from "@mui/icons-material";
|
||||
import React, { useState, useEffect } from "react";
|
||||
import { Stack, Box, Typography, Divider, Chip, Paper, alpha, CircularProgress } from "@mui/material";
|
||||
import { Psychology as PsychologyIcon, Insights as InsightsIcon, Search as SearchIcon, Person as PersonIcon, AutoAwesome as AutoAwesomeIcon } from "@mui/icons-material";
|
||||
import { PodcastAnalysis } from "./types";
|
||||
import { GlassyCard, glassyCardSx, SecondaryButton } from "./ui";
|
||||
import { Refresh as RefreshIcon } from "@mui/icons-material";
|
||||
import { aiApiClient } from "../../api/client";
|
||||
|
||||
interface AnalysisPanelProps {
|
||||
analysis: PodcastAnalysis | null;
|
||||
idea?: string;
|
||||
duration?: number;
|
||||
speakers?: number;
|
||||
avatarUrl?: string | null;
|
||||
avatarPrompt?: string | null;
|
||||
onRegenerate?: () => void;
|
||||
}
|
||||
|
||||
export const AnalysisPanel: React.FC<AnalysisPanelProps> = ({ analysis, onRegenerate }) => {
|
||||
export const AnalysisPanel: React.FC<AnalysisPanelProps> = ({ analysis, idea, duration, speakers, avatarUrl, avatarPrompt, onRegenerate }) => {
|
||||
const [avatarBlobUrl, setAvatarBlobUrl] = useState<string | null>(null);
|
||||
const [avatarLoading, setAvatarLoading] = useState(false);
|
||||
const [avatarError, setAvatarError] = useState(false);
|
||||
|
||||
// Load avatar image as blob for authenticated URLs
|
||||
useEffect(() => {
|
||||
if (!avatarUrl) {
|
||||
setAvatarBlobUrl(null);
|
||||
setAvatarError(false);
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if it's already a blob URL
|
||||
if (avatarUrl.startsWith('blob:')) {
|
||||
setAvatarBlobUrl(avatarUrl);
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if it's an authenticated endpoint
|
||||
const isAuthenticatedEndpoint = avatarUrl.includes('/api/podcast/images/') || avatarUrl.includes('/api/podcast/avatar/');
|
||||
|
||||
let currentBlobUrl: string | null = null;
|
||||
|
||||
if (isAuthenticatedEndpoint) {
|
||||
setAvatarLoading(true);
|
||||
setAvatarError(false);
|
||||
|
||||
const loadAvatarBlob = async () => {
|
||||
try {
|
||||
const response = await aiApiClient.get(avatarUrl, { responseType: 'blob' });
|
||||
const blobUrl = URL.createObjectURL(response.data);
|
||||
currentBlobUrl = blobUrl;
|
||||
setAvatarBlobUrl(blobUrl);
|
||||
setAvatarError(false);
|
||||
} catch (error) {
|
||||
console.error('[AnalysisPanel] Failed to load avatar as blob:', error);
|
||||
// Fallback: try with query token
|
||||
try {
|
||||
const token = localStorage.getItem('clerk_dashboard_token') || '';
|
||||
if (token) {
|
||||
const urlWithToken = `${avatarUrl}?token=${encodeURIComponent(token)}`;
|
||||
setAvatarBlobUrl(urlWithToken);
|
||||
} else {
|
||||
setAvatarError(true);
|
||||
}
|
||||
} catch (fallbackError) {
|
||||
console.error('[AnalysisPanel] Fallback avatar loading failed:', fallbackError);
|
||||
setAvatarError(true);
|
||||
}
|
||||
} finally {
|
||||
setAvatarLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
loadAvatarBlob();
|
||||
|
||||
// Cleanup blob URL on unmount or when avatarUrl changes
|
||||
return () => {
|
||||
if (currentBlobUrl && currentBlobUrl.startsWith('blob:')) {
|
||||
URL.revokeObjectURL(currentBlobUrl);
|
||||
}
|
||||
// Also cleanup any previous blob URL from state
|
||||
setAvatarBlobUrl((prev) => {
|
||||
if (prev && prev.startsWith('blob:') && prev !== currentBlobUrl) {
|
||||
URL.revokeObjectURL(prev);
|
||||
}
|
||||
return null;
|
||||
});
|
||||
};
|
||||
} else {
|
||||
// Direct URL, use as-is
|
||||
setAvatarBlobUrl(avatarUrl);
|
||||
}
|
||||
}, [avatarUrl]);
|
||||
|
||||
if (!analysis) return null;
|
||||
return (
|
||||
<GlassyCard
|
||||
@@ -54,6 +135,229 @@ export const AnalysisPanel: React.FC<AnalysisPanelProps> = ({ analysis, onRegene
|
||||
|
||||
<Divider sx={{ borderColor: "rgba(0,0,0,0.06)" }} />
|
||||
|
||||
{/* Inputs Section */}
|
||||
{(idea || duration || speakers || avatarUrl || avatarPrompt) && (
|
||||
<>
|
||||
<Box>
|
||||
<Typography
|
||||
variant="subtitle1"
|
||||
sx={{
|
||||
color: "#0f172a",
|
||||
fontWeight: 700,
|
||||
mb: 1.5,
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
gap: 0.5,
|
||||
}}
|
||||
>
|
||||
Your Inputs
|
||||
</Typography>
|
||||
<Box
|
||||
sx={{
|
||||
display: "grid",
|
||||
gridTemplateColumns: { xs: "1fr", md: avatarUrl ? "1fr 1fr" : "1fr" },
|
||||
gap: 3,
|
||||
alignItems: "flex-start",
|
||||
}}
|
||||
>
|
||||
{/* Left Column: Text Inputs */}
|
||||
<Stack spacing={1.5}>
|
||||
{idea && (
|
||||
<Box>
|
||||
<Typography variant="caption" sx={{ color: "#64748b", fontWeight: 600, display: "block", mb: 0.5 }}>
|
||||
Podcast Idea
|
||||
</Typography>
|
||||
<Typography variant="body2" sx={{ color: "#0f172a", wordBreak: "break-word" }}>
|
||||
{idea}
|
||||
</Typography>
|
||||
</Box>
|
||||
)}
|
||||
<Stack direction="row" spacing={2} flexWrap="wrap">
|
||||
{duration !== undefined && (
|
||||
<Box>
|
||||
<Typography variant="caption" sx={{ color: "#64748b", fontWeight: 600, display: "block", mb: 0.5 }}>
|
||||
Duration
|
||||
</Typography>
|
||||
<Chip
|
||||
label={`${duration} minutes`}
|
||||
size="small"
|
||||
sx={{ background: "#f1f5f9", color: "#0f172a", border: "1px solid rgba(0,0,0,0.08)" }}
|
||||
/>
|
||||
</Box>
|
||||
)}
|
||||
{speakers !== undefined && (
|
||||
<Box>
|
||||
<Typography variant="caption" sx={{ color: "#64748b", fontWeight: 600, display: "block", mb: 0.5 }}>
|
||||
Speakers
|
||||
</Typography>
|
||||
<Chip
|
||||
label={`${speakers} ${speakers === 1 ? "speaker" : "speakers"}`}
|
||||
size="small"
|
||||
sx={{ background: "#f1f5f9", color: "#0f172a", border: "1px solid rgba(0,0,0,0.08)" }}
|
||||
/>
|
||||
</Box>
|
||||
)}
|
||||
</Stack>
|
||||
|
||||
{/* AI Prompt Used for Avatar Generation */}
|
||||
{avatarUrl && (
|
||||
<Box>
|
||||
<Typography
|
||||
variant="caption"
|
||||
sx={{
|
||||
color: "#64748b",
|
||||
fontWeight: 600,
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
gap: 0.5,
|
||||
mb: 0.75,
|
||||
}}
|
||||
>
|
||||
<AutoAwesomeIcon sx={{ fontSize: 14 }} />
|
||||
AI Generation Prompt
|
||||
</Typography>
|
||||
{avatarPrompt ? (
|
||||
<Paper
|
||||
sx={{
|
||||
p: 1.5,
|
||||
background: "#f8fafc",
|
||||
border: "1px solid rgba(0,0,0,0.08)",
|
||||
borderRadius: 1.5,
|
||||
maxHeight: 200,
|
||||
overflow: "auto",
|
||||
}}
|
||||
>
|
||||
<Typography
|
||||
variant="caption"
|
||||
sx={{
|
||||
color: "#475569",
|
||||
fontFamily: "monospace",
|
||||
fontSize: "0.75rem",
|
||||
lineHeight: 1.6,
|
||||
whiteSpace: "pre-wrap",
|
||||
wordBreak: "break-word",
|
||||
display: "block",
|
||||
}}
|
||||
>
|
||||
{avatarPrompt}
|
||||
</Typography>
|
||||
</Paper>
|
||||
) : (
|
||||
<Paper
|
||||
sx={{
|
||||
p: 1.5,
|
||||
background: "#f1f5f9",
|
||||
border: "1px solid rgba(0,0,0,0.08)",
|
||||
borderRadius: 1.5,
|
||||
}}
|
||||
>
|
||||
<Typography
|
||||
variant="caption"
|
||||
sx={{
|
||||
color: "#64748b",
|
||||
fontStyle: "italic",
|
||||
fontSize: "0.75rem",
|
||||
}}
|
||||
>
|
||||
Prompt not available (avatar was uploaded or generated before this feature was added)
|
||||
</Typography>
|
||||
</Paper>
|
||||
)}
|
||||
</Box>
|
||||
)}
|
||||
</Stack>
|
||||
|
||||
{/* Right Column: Presenter Avatar */}
|
||||
{avatarUrl && (
|
||||
<Box>
|
||||
<Typography
|
||||
variant="caption"
|
||||
sx={{
|
||||
color: "#64748b",
|
||||
fontWeight: 600,
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
gap: 0.5,
|
||||
mb: 1,
|
||||
}}
|
||||
>
|
||||
<PersonIcon sx={{ fontSize: 16 }} />
|
||||
Presenter Avatar
|
||||
</Typography>
|
||||
<Box
|
||||
sx={{
|
||||
width: "100%",
|
||||
maxWidth: { xs: "100%", md: 300 },
|
||||
borderRadius: 2,
|
||||
overflow: "hidden",
|
||||
border: "1px solid rgba(102,126,234,0.2)",
|
||||
background: alpha("#667eea", 0.05),
|
||||
position: "relative",
|
||||
aspectRatio: "1",
|
||||
boxShadow: "0 4px 12px rgba(0,0,0,0.08)",
|
||||
}}
|
||||
>
|
||||
{avatarLoading ? (
|
||||
<Box
|
||||
sx={{
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
justifyContent: "center",
|
||||
height: "100%",
|
||||
background: "#f8fafc",
|
||||
}}
|
||||
>
|
||||
<CircularProgress size={40} />
|
||||
</Box>
|
||||
) : avatarError ? (
|
||||
<Box
|
||||
sx={{
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
justifyContent: "center",
|
||||
height: "100%",
|
||||
background: "#fef2f2",
|
||||
color: "#dc2626",
|
||||
p: 2,
|
||||
}}
|
||||
>
|
||||
<Typography variant="caption" sx={{ textAlign: "center" }}>
|
||||
Failed to load avatar
|
||||
</Typography>
|
||||
</Box>
|
||||
) : avatarBlobUrl ? (
|
||||
<Box
|
||||
component="img"
|
||||
src={avatarBlobUrl}
|
||||
alt="Podcast Presenter"
|
||||
sx={{
|
||||
width: "100%",
|
||||
height: "100%",
|
||||
objectFit: "cover",
|
||||
display: "block",
|
||||
}}
|
||||
onError={(e) => {
|
||||
console.error('[AnalysisPanel] Avatar image failed to load:', {
|
||||
src: e.currentTarget.src,
|
||||
avatarUrl,
|
||||
avatarBlobUrl,
|
||||
});
|
||||
setAvatarError(true);
|
||||
}}
|
||||
onLoad={() => {
|
||||
console.log('[AnalysisPanel] Avatar image loaded successfully');
|
||||
}}
|
||||
/>
|
||||
) : null}
|
||||
</Box>
|
||||
</Box>
|
||||
)}
|
||||
</Box>
|
||||
</Box>
|
||||
<Divider sx={{ borderColor: "rgba(0,0,0,0.06)" }} />
|
||||
</>
|
||||
)}
|
||||
|
||||
<Box sx={{ display: "grid", gridTemplateColumns: { xs: "1fr", md: "1fr 1fr" }, gap: 3 }}>
|
||||
<Stack spacing={2}>
|
||||
<Box>
|
||||
|
||||
@@ -6,6 +6,9 @@ import {
|
||||
Info as InfoIcon,
|
||||
HelpOutline as HelpOutlineIcon,
|
||||
AttachMoney as AttachMoneyIcon,
|
||||
CloudUpload as CloudUploadIcon,
|
||||
Person as PersonIcon,
|
||||
Delete as DeleteIcon,
|
||||
} from "@mui/icons-material";
|
||||
import { CreateProjectPayload, Knobs } from "./types";
|
||||
import { PrimaryButton, SecondaryButton } from "./ui";
|
||||
@@ -35,6 +38,9 @@ export const CreateModal: React.FC<CreateModalProps> = ({ onCreate, open, defaul
|
||||
const [budgetCap, setBudgetCap] = useState<number>(50);
|
||||
const [voiceFile, setVoiceFile] = useState<File | null>(null);
|
||||
const [avatarFile, setAvatarFile] = useState<File | null>(null);
|
||||
const [avatarPreview, setAvatarPreview] = useState<string | null>(null);
|
||||
const [avatarUrl, setAvatarUrl] = useState<string | null>(null); // Store uploaded avatar URL
|
||||
const [makingPresentable, setMakingPresentable] = useState(false);
|
||||
const [knobs, setKnobs] = useState<Knobs>({ ...defaultKnobs });
|
||||
const [placeholderIndex, setPlaceholderIndex] = useState(0);
|
||||
|
||||
@@ -107,8 +113,22 @@ export const CreateModal: React.FC<CreateModalProps> = ({ onCreate, open, defaul
|
||||
|
||||
const canSubmit = Boolean(idea || url);
|
||||
|
||||
const submit = () => {
|
||||
const submit = async () => {
|
||||
if (!canSubmit || isSubmitting) return;
|
||||
|
||||
// If avatar was uploaded but not yet uploaded to server, upload it now
|
||||
let finalAvatarUrl: string | null = avatarUrl;
|
||||
if (avatarFile && !avatarUrl) {
|
||||
try {
|
||||
const { podcastApi } = await import("../../services/podcastApi");
|
||||
const uploadResult = await podcastApi.uploadAvatar(avatarFile);
|
||||
finalAvatarUrl = uploadResult.avatar_url;
|
||||
} catch (error) {
|
||||
console.error('Avatar upload failed:', error);
|
||||
// Continue without avatar
|
||||
}
|
||||
}
|
||||
|
||||
onCreate({
|
||||
ideaOrUrl: idea || url,
|
||||
speakers,
|
||||
@@ -116,6 +136,7 @@ export const CreateModal: React.FC<CreateModalProps> = ({ onCreate, open, defaul
|
||||
knobs,
|
||||
budgetCap,
|
||||
files: { voiceFile, avatarFile },
|
||||
avatarUrl: finalAvatarUrl,
|
||||
});
|
||||
};
|
||||
|
||||
@@ -127,6 +148,9 @@ export const CreateModal: React.FC<CreateModalProps> = ({ onCreate, open, defaul
|
||||
setBudgetCap(50);
|
||||
setVoiceFile(null);
|
||||
setAvatarFile(null);
|
||||
setAvatarPreview(null);
|
||||
setAvatarUrl(null);
|
||||
setMakingPresentable(false);
|
||||
setKnobs({ ...defaultKnobs });
|
||||
setPlaceholderIndex(0);
|
||||
};
|
||||
@@ -141,6 +165,68 @@ export const CreateModal: React.FC<CreateModalProps> = ({ onCreate, open, defaul
|
||||
setSpeakers(clamped);
|
||||
};
|
||||
|
||||
const handleAvatarChange = async (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
const file = e.target.files?.[0];
|
||||
if (file) {
|
||||
// Validate file type
|
||||
if (!file.type.startsWith('image/')) {
|
||||
console.error('Please select an image file');
|
||||
return;
|
||||
}
|
||||
// Validate file size (e.g., max 5MB)
|
||||
if (file.size > 5 * 1024 * 1024) {
|
||||
console.error('Image file size must be less than 5MB');
|
||||
return;
|
||||
}
|
||||
setAvatarFile(file);
|
||||
// Create preview
|
||||
const reader = new FileReader();
|
||||
reader.onloadend = () => {
|
||||
setAvatarPreview(reader.result as string);
|
||||
};
|
||||
reader.readAsDataURL(file);
|
||||
|
||||
// Upload image immediately to get URL (for "Make Presentable" feature)
|
||||
try {
|
||||
const { podcastApi } = await import("../../services/podcastApi");
|
||||
const uploadResult = await podcastApi.uploadAvatar(file);
|
||||
setAvatarUrl(uploadResult.avatar_url);
|
||||
} catch (error) {
|
||||
console.error('Avatar upload failed:', error);
|
||||
// Continue with local preview - upload will happen on submit
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const handleRemoveAvatar = () => {
|
||||
setAvatarFile(null);
|
||||
setAvatarPreview(null);
|
||||
setAvatarUrl(null);
|
||||
setMakingPresentable(false);
|
||||
};
|
||||
|
||||
const handleMakePresentable = async () => {
|
||||
if (!avatarUrl || makingPresentable) return;
|
||||
|
||||
try {
|
||||
setMakingPresentable(true);
|
||||
const { podcastApi } = await import("../../services/podcastApi");
|
||||
const result = await podcastApi.makeAvatarPresentable(avatarUrl);
|
||||
|
||||
// Fetch the transformed image as blob to display
|
||||
const { aiApiClient } = await import("../../api/client");
|
||||
const response = await aiApiClient.get(result.avatar_url, { responseType: 'blob' });
|
||||
const blobUrl = URL.createObjectURL(response.data);
|
||||
setAvatarPreview(blobUrl);
|
||||
setAvatarUrl(result.avatar_url);
|
||||
} catch (error) {
|
||||
console.error('Failed to make avatar presentable:', error);
|
||||
// Could show error message to user
|
||||
} finally {
|
||||
setMakingPresentable(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<Paper
|
||||
elevation={0}
|
||||
@@ -601,181 +687,372 @@ export const CreateModal: React.FC<CreateModalProps> = ({ onCreate, open, defaul
|
||||
{/* Settings Section */}
|
||||
<Box
|
||||
sx={{
|
||||
p: 3,
|
||||
borderRadius: 2,
|
||||
background: alpha("#f8fafc", 0.5),
|
||||
border: "1px solid rgba(15, 23, 42, 0.06)",
|
||||
p: 3.5,
|
||||
borderRadius: 2.5,
|
||||
background: "linear-gradient(135deg, rgba(248, 250, 252, 0.8) 0%, rgba(241, 245, 249, 0.8) 100%)",
|
||||
border: "1.5px solid rgba(15, 23, 42, 0.08)",
|
||||
boxShadow: "0 1px 3px rgba(15, 23, 42, 0.04), 0 4px 12px rgba(15, 23, 42, 0.06)",
|
||||
}}
|
||||
>
|
||||
<Typography variant="subtitle2" sx={{ mb: 2, color: "#0f172a", fontWeight: 600, fontSize: "0.9375rem" }}>
|
||||
Podcast Settings
|
||||
</Typography>
|
||||
<Stack direction={{ xs: "column", sm: "row" }} spacing={2} alignItems="flex-start">
|
||||
<Stack direction={{ xs: "column", sm: "row" }} spacing={2} sx={{ flex: 1 }}>
|
||||
<TextField
|
||||
label="Duration (minutes)"
|
||||
type="number"
|
||||
value={duration}
|
||||
onChange={(e) => handleDurationChange(Number(e.target.value) || 1)}
|
||||
InputProps={{ inputProps: { min: 1, max: 10 } }}
|
||||
size="small"
|
||||
helperText={duration > 10 ? "Maximum duration is 10 minutes" : `Recommended: 1-3 minutes for quick tests (currently: ${duration} min)`}
|
||||
error={duration > 10}
|
||||
<Stack direction="row" spacing={1.5} alignItems="center" sx={{ mb: 3 }}>
|
||||
<Box
|
||||
sx={{
|
||||
maxWidth: 220,
|
||||
"& .MuiOutlinedInput-root": {
|
||||
backgroundColor: "#ffffff",
|
||||
border: "1.5px solid rgba(15, 23, 42, 0.12)",
|
||||
borderRadius: 2,
|
||||
"&:hover": {
|
||||
backgroundColor: "#ffffff",
|
||||
borderColor: "rgba(102, 126, 234, 0.4)",
|
||||
},
|
||||
"&.Mui-focused": {
|
||||
borderColor: "#667eea",
|
||||
borderWidth: 2,
|
||||
},
|
||||
},
|
||||
"& .MuiInputLabel-root": {
|
||||
color: "#64748b",
|
||||
"&.Mui-focused": {
|
||||
color: "#667eea",
|
||||
},
|
||||
},
|
||||
"& .MuiFormHelperText-root": {
|
||||
color: "#64748b",
|
||||
fontSize: "0.8125rem",
|
||||
},
|
||||
}}
|
||||
/>
|
||||
<TextField
|
||||
label="Number of speakers"
|
||||
type="number"
|
||||
value={speakers}
|
||||
onChange={(e) => handleSpeakersChange(Number(e.target.value) || 1)}
|
||||
InputProps={{ inputProps: { min: 1, max: 2 } }}
|
||||
size="small"
|
||||
helperText={speakers > 2 ? "Maximum 2 speakers supported" : `Supports 1-2 speakers (currently: ${speakers})`}
|
||||
error={speakers > 2}
|
||||
sx={{
|
||||
maxWidth: 220,
|
||||
"& .MuiOutlinedInput-root": {
|
||||
backgroundColor: "#ffffff",
|
||||
border: "1.5px solid rgba(15, 23, 42, 0.12)",
|
||||
borderRadius: 2,
|
||||
"&:hover": {
|
||||
backgroundColor: "#ffffff",
|
||||
borderColor: "rgba(102, 126, 234, 0.4)",
|
||||
},
|
||||
"&.Mui-focused": {
|
||||
borderColor: "#667eea",
|
||||
borderWidth: 2,
|
||||
},
|
||||
},
|
||||
"& .MuiInputLabel-root": {
|
||||
color: "#64748b",
|
||||
"&.Mui-focused": {
|
||||
color: "#667eea",
|
||||
},
|
||||
},
|
||||
"& .MuiFormHelperText-root": {
|
||||
color: "#64748b",
|
||||
fontSize: "0.8125rem",
|
||||
},
|
||||
}}
|
||||
/>
|
||||
</Stack>
|
||||
|
||||
{/* Cost Breakdown Panel - positioned in empty space */}
|
||||
<Paper
|
||||
elevation={0}
|
||||
sx={{
|
||||
p: 2.5,
|
||||
background: "linear-gradient(135deg, rgba(16, 185, 129, 0.08) 0%, rgba(5, 150, 105, 0.08) 100%)",
|
||||
border: "1.5px solid rgba(16, 185, 129, 0.2)",
|
||||
width: 40,
|
||||
height: 40,
|
||||
borderRadius: 2,
|
||||
minWidth: { xs: "100%", sm: 300 },
|
||||
flex: { xs: "none", sm: "0 0 auto" },
|
||||
boxShadow: "0 2px 8px rgba(16, 185, 129, 0.08)",
|
||||
background: "linear-gradient(135deg, rgba(102, 126, 234, 0.12) 0%, rgba(118, 75, 162, 0.12) 100%)",
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
justifyContent: "center",
|
||||
}}
|
||||
>
|
||||
<Stack spacing={1.5}>
|
||||
<Stack direction="row" spacing={1} alignItems="center">
|
||||
<Box
|
||||
sx={{
|
||||
width: 32,
|
||||
height: 32,
|
||||
borderRadius: 1.5,
|
||||
background: "linear-gradient(135deg, rgba(16, 185, 129, 0.15) 0%, rgba(5, 150, 105, 0.15) 100%)",
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
justifyContent: "center",
|
||||
}}
|
||||
>
|
||||
<AttachMoneyIcon sx={{ fontSize: "1.125rem", color: "#059669" }} />
|
||||
</Box>
|
||||
<Typography variant="subtitle2" sx={{ color: "#0f172a", fontWeight: 600, fontSize: "0.875rem" }}>
|
||||
Estimated Cost
|
||||
</Typography>
|
||||
</Stack>
|
||||
<Typography
|
||||
variant="h5"
|
||||
sx={{
|
||||
color: "#059669",
|
||||
fontWeight: 700,
|
||||
fontSize: "1.75rem",
|
||||
lineHeight: 1.2,
|
||||
<AutoAwesomeIcon sx={{ color: "#667eea", fontSize: "1.25rem" }} />
|
||||
</Box>
|
||||
<Typography variant="h6" sx={{ color: "#0f172a", fontWeight: 700, fontSize: "1.125rem", letterSpacing: "-0.01em" }}>
|
||||
Podcast Settings
|
||||
</Typography>
|
||||
</Stack>
|
||||
|
||||
<Stack direction={{ xs: "column", lg: "row" }} spacing={3} alignItems="flex-start">
|
||||
{/* Duration and Speakers in vertical column */}
|
||||
<Box
|
||||
sx={{
|
||||
flex: { xs: "1 1 auto", lg: "0 0 280px" },
|
||||
width: { xs: "100%", lg: "280px" },
|
||||
p: 2.5,
|
||||
borderRadius: 2,
|
||||
background: "#ffffff",
|
||||
border: "1px solid rgba(15, 23, 42, 0.08)",
|
||||
boxShadow: "0 1px 2px rgba(15, 23, 42, 0.04)",
|
||||
}}
|
||||
>
|
||||
<Typography variant="subtitle2" sx={{ mb: 2, color: "#0f172a", fontWeight: 600, fontSize: "0.875rem" }}>
|
||||
Basic Configuration
|
||||
</Typography>
|
||||
<Stack spacing={2.5}>
|
||||
<TextField
|
||||
label="Duration (minutes)"
|
||||
type="number"
|
||||
value={duration}
|
||||
onChange={(e) => handleDurationChange(Number(e.target.value) || 1)}
|
||||
InputProps={{ inputProps: { min: 1, max: 10 } }}
|
||||
size="small"
|
||||
helperText={duration > 10 ? "Maximum duration is 10 minutes" : `Recommended: 1-3 minutes for quick tests`}
|
||||
error={duration > 10}
|
||||
fullWidth
|
||||
sx={{
|
||||
"& .MuiOutlinedInput-root": {
|
||||
backgroundColor: "#f8fafc",
|
||||
border: "1.5px solid rgba(15, 23, 42, 0.12)",
|
||||
borderRadius: 2,
|
||||
transition: "all 0.2s",
|
||||
"&:hover": {
|
||||
backgroundColor: "#ffffff",
|
||||
borderColor: "rgba(102, 126, 234, 0.4)",
|
||||
boxShadow: "0 0 0 3px rgba(102, 126, 234, 0.08)",
|
||||
},
|
||||
"&.Mui-focused": {
|
||||
backgroundColor: "#ffffff",
|
||||
borderColor: "#667eea",
|
||||
borderWidth: 2,
|
||||
boxShadow: "0 0 0 3px rgba(102, 126, 234, 0.12)",
|
||||
},
|
||||
},
|
||||
"& .MuiInputLabel-root": {
|
||||
color: "#64748b",
|
||||
fontWeight: 500,
|
||||
"&.Mui-focused": {
|
||||
color: "#667eea",
|
||||
fontWeight: 600,
|
||||
},
|
||||
},
|
||||
"& .MuiFormHelperText-root": {
|
||||
color: duration > 10 ? "#dc2626" : "#64748b",
|
||||
fontSize: "0.8125rem",
|
||||
mt: 0.75,
|
||||
},
|
||||
}}
|
||||
>
|
||||
${estimatedCost.total}
|
||||
</Typography>
|
||||
<Stack spacing={0.75} sx={{ mt: 0.5 }}>
|
||||
<Box sx={{ display: "flex", justifyContent: "space-between", alignItems: "center" }}>
|
||||
<Typography variant="caption" sx={{ color: "#64748b", fontSize: "0.8125rem", fontWeight: 400 }}>
|
||||
Audio Generation
|
||||
</Typography>
|
||||
<Typography variant="caption" sx={{ color: "#0f172a", fontSize: "0.8125rem", fontWeight: 600 }}>
|
||||
${estimatedCost.ttsCost}
|
||||
</Typography>
|
||||
</Box>
|
||||
<Box sx={{ display: "flex", justifyContent: "space-between", alignItems: "center" }}>
|
||||
<Typography variant="caption" sx={{ color: "#64748b", fontSize: "0.8125rem", fontWeight: 400 }}>
|
||||
Avatar Creation
|
||||
</Typography>
|
||||
<Typography variant="caption" sx={{ color: "#0f172a", fontSize: "0.8125rem", fontWeight: 600 }}>
|
||||
${estimatedCost.avatarCost}
|
||||
</Typography>
|
||||
</Box>
|
||||
<Box sx={{ display: "flex", justifyContent: "space-between", alignItems: "center" }}>
|
||||
<Typography variant="caption" sx={{ color: "#64748b", fontSize: "0.8125rem", fontWeight: 400 }}>
|
||||
Video Rendering
|
||||
</Typography>
|
||||
<Typography variant="caption" sx={{ color: "#0f172a", fontSize: "0.8125rem", fontWeight: 600 }}>
|
||||
${estimatedCost.videoCost}
|
||||
</Typography>
|
||||
</Box>
|
||||
<Box sx={{ display: "flex", justifyContent: "space-between", alignItems: "center" }}>
|
||||
<Typography variant="caption" sx={{ color: "#64748b", fontSize: "0.8125rem", fontWeight: 400 }}>
|
||||
Research
|
||||
</Typography>
|
||||
<Typography variant="caption" sx={{ color: "#0f172a", fontSize: "0.8125rem", fontWeight: 600 }}>
|
||||
${estimatedCost.researchCost}
|
||||
</Typography>
|
||||
</Box>
|
||||
</Stack>
|
||||
/>
|
||||
<TextField
|
||||
label="Number of speakers"
|
||||
type="number"
|
||||
value={speakers}
|
||||
onChange={(e) => handleSpeakersChange(Number(e.target.value) || 1)}
|
||||
InputProps={{ inputProps: { min: 1, max: 2 } }}
|
||||
size="small"
|
||||
helperText={speakers > 2 ? "Maximum 2 speakers supported" : `Supports 1-2 speakers`}
|
||||
error={speakers > 2}
|
||||
fullWidth
|
||||
sx={{
|
||||
"& .MuiOutlinedInput-root": {
|
||||
backgroundColor: "#f8fafc",
|
||||
border: "1.5px solid rgba(15, 23, 42, 0.12)",
|
||||
borderRadius: 2,
|
||||
transition: "all 0.2s",
|
||||
"&:hover": {
|
||||
backgroundColor: "#ffffff",
|
||||
borderColor: "rgba(102, 126, 234, 0.4)",
|
||||
boxShadow: "0 0 0 3px rgba(102, 126, 234, 0.08)",
|
||||
},
|
||||
"&.Mui-focused": {
|
||||
backgroundColor: "#ffffff",
|
||||
borderColor: "#667eea",
|
||||
borderWidth: 2,
|
||||
boxShadow: "0 0 0 3px rgba(102, 126, 234, 0.12)",
|
||||
},
|
||||
},
|
||||
"& .MuiInputLabel-root": {
|
||||
color: "#64748b",
|
||||
fontWeight: 500,
|
||||
"&.Mui-focused": {
|
||||
color: "#667eea",
|
||||
fontWeight: 600,
|
||||
},
|
||||
},
|
||||
"& .MuiFormHelperText-root": {
|
||||
color: speakers > 2 ? "#dc2626" : "#64748b",
|
||||
fontSize: "0.8125rem",
|
||||
mt: 0.75,
|
||||
},
|
||||
}}
|
||||
/>
|
||||
</Stack>
|
||||
</Box>
|
||||
|
||||
{/* Avatar Upload Section - replacing Estimated Cost */}
|
||||
<Box
|
||||
sx={{
|
||||
flex: 1,
|
||||
minWidth: 0,
|
||||
p: 2.5,
|
||||
borderRadius: 2,
|
||||
background: "#ffffff",
|
||||
border: "1px solid rgba(15, 23, 42, 0.08)",
|
||||
boxShadow: "0 1px 2px rgba(15, 23, 42, 0.04)",
|
||||
}}
|
||||
>
|
||||
<Stack direction="row" spacing={1.5} alignItems="center" sx={{ mb: 2.5 }}>
|
||||
<Box
|
||||
sx={{
|
||||
mt: 1,
|
||||
pt: 1.5,
|
||||
borderTop: "1.5px solid rgba(16, 185, 129, 0.15)",
|
||||
width: 36,
|
||||
height: 36,
|
||||
borderRadius: 1.5,
|
||||
background: "linear-gradient(135deg, rgba(102, 126, 234, 0.12) 0%, rgba(118, 75, 162, 0.12) 100%)",
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
justifyContent: "center",
|
||||
}}
|
||||
>
|
||||
<Typography variant="caption" sx={{ color: "#64748b", fontSize: "0.75rem", fontWeight: 500 }}>
|
||||
{duration} min • {speakers} speaker{speakers > 1 ? "s" : ""} • {knobs.bitrate === "hd" ? "HD" : "Standard"} quality
|
||||
</Typography>
|
||||
<PersonIcon fontSize="small" sx={{ color: "#667eea" }} />
|
||||
</Box>
|
||||
<Typography variant="subtitle2" sx={{ color: "#0f172a", fontWeight: 600, fontSize: "0.9375rem" }}>
|
||||
Podcast Presenter Avatar
|
||||
</Typography>
|
||||
<Tooltip
|
||||
title={
|
||||
<Box>
|
||||
<Typography variant="body2" sx={{ fontWeight: 600, mb: 0.5 }}>
|
||||
Avatar Options:
|
||||
</Typography>
|
||||
<Typography variant="body2" component="div" sx={{ fontSize: "0.875rem", lineHeight: 1.6 }}>
|
||||
<strong>Upload your photo:</strong> We'll enhance it into a professional podcast presenter using AI. Click "Make Presentable" after upload.<br/><br/>
|
||||
<strong>Skip upload:</strong> After analysis completes, we'll generate professional presenter images based on your podcast topic, audience, and speaker count.
|
||||
</Typography>
|
||||
</Box>
|
||||
}
|
||||
arrow
|
||||
placement="top"
|
||||
componentsProps={{
|
||||
tooltip: {
|
||||
sx: {
|
||||
bgcolor: "#0f172a",
|
||||
color: "#ffffff",
|
||||
maxWidth: 320,
|
||||
fontSize: "0.875rem",
|
||||
p: 1.5,
|
||||
boxShadow: "0 4px 12px rgba(0,0,0,0.15)",
|
||||
},
|
||||
},
|
||||
arrow: {
|
||||
sx: {
|
||||
color: "#0f172a",
|
||||
},
|
||||
},
|
||||
}}
|
||||
>
|
||||
<InfoIcon fontSize="small" sx={{ color: "#94a3b8", cursor: "help" }} />
|
||||
</Tooltip>
|
||||
</Stack>
|
||||
|
||||
<Stack direction={{ xs: "column", sm: "row" }} spacing={2.5} alignItems="flex-start">
|
||||
{avatarPreview ? (
|
||||
<Stack spacing={1.5} sx={{ flexShrink: 0 }}>
|
||||
<Box sx={{ position: "relative", display: "inline-block" }}>
|
||||
<Box
|
||||
component="img"
|
||||
src={avatarPreview}
|
||||
alt="Avatar preview"
|
||||
sx={{
|
||||
width: 140,
|
||||
height: 140,
|
||||
objectFit: "cover",
|
||||
borderRadius: 2.5,
|
||||
border: "2px solid #e2e8f0",
|
||||
boxShadow: "0 2px 8px rgba(15, 23, 42, 0.08)",
|
||||
}}
|
||||
/>
|
||||
<IconButton
|
||||
size="small"
|
||||
onClick={handleRemoveAvatar}
|
||||
sx={{
|
||||
position: "absolute",
|
||||
top: -8,
|
||||
right: -8,
|
||||
bgcolor: "white",
|
||||
border: "1.5px solid #e2e8f0",
|
||||
boxShadow: "0 2px 4px rgba(15, 23, 42, 0.1)",
|
||||
"&:hover": {
|
||||
bgcolor: "#f8fafc",
|
||||
borderColor: "#dc2626",
|
||||
color: "#dc2626",
|
||||
},
|
||||
}}
|
||||
>
|
||||
<DeleteIcon fontSize="small" />
|
||||
</IconButton>
|
||||
</Box>
|
||||
{avatarUrl && (
|
||||
<Tooltip
|
||||
title="Transform your uploaded photo into a professional podcast presenter. This AI enhancement optimizes your photo for video generation while maintaining your appearance and identity."
|
||||
arrow
|
||||
placement="top"
|
||||
>
|
||||
<Box>
|
||||
<SecondaryButton
|
||||
onClick={handleMakePresentable}
|
||||
disabled={makingPresentable}
|
||||
loading={makingPresentable}
|
||||
startIcon={!makingPresentable ? <AutoAwesomeIcon fontSize="small" /> : undefined}
|
||||
sx={{
|
||||
fontSize: "0.8125rem",
|
||||
py: 0.75,
|
||||
width: "100%",
|
||||
background: makingPresentable ? undefined : "linear-gradient(135deg, rgba(102, 126, 234, 0.08) 0%, rgba(118, 75, 162, 0.08) 100%)",
|
||||
border: makingPresentable ? undefined : "1px solid rgba(102, 126, 234, 0.2)",
|
||||
color: makingPresentable ? undefined : "#667eea",
|
||||
fontWeight: 600,
|
||||
"&:hover": {
|
||||
background: makingPresentable ? undefined : "linear-gradient(135deg, rgba(102, 126, 234, 0.12) 0%, rgba(118, 75, 162, 0.12) 100%)",
|
||||
},
|
||||
}}
|
||||
>
|
||||
{makingPresentable ? "Transforming..." : "Make Presentable"}
|
||||
</SecondaryButton>
|
||||
</Box>
|
||||
</Tooltip>
|
||||
)}
|
||||
</Stack>
|
||||
) : (
|
||||
<Box
|
||||
component="label"
|
||||
sx={{
|
||||
display: "flex",
|
||||
flexDirection: "column",
|
||||
alignItems: "center",
|
||||
justifyContent: "center",
|
||||
width: { xs: "100%", sm: 200 },
|
||||
minHeight: 140,
|
||||
border: "2px dashed #cbd5e1",
|
||||
borderRadius: 2.5,
|
||||
bgcolor: "#f8fafc",
|
||||
cursor: "pointer",
|
||||
transition: "all 0.2s",
|
||||
flexShrink: 0,
|
||||
"&:hover": {
|
||||
borderColor: "#667eea",
|
||||
bgcolor: "#f1f5f9",
|
||||
borderWidth: "2.5px",
|
||||
boxShadow: "0 0 0 3px rgba(102, 126, 234, 0.08)",
|
||||
},
|
||||
}}
|
||||
>
|
||||
<input
|
||||
type="file"
|
||||
accept="image/*"
|
||||
onChange={handleAvatarChange}
|
||||
style={{ display: "none" }}
|
||||
/>
|
||||
<CloudUploadIcon sx={{ color: "#94a3b8", fontSize: 36, mb: 1.5 }} />
|
||||
<Typography variant="body2" sx={{ color: "#64748b", fontWeight: 600, mb: 0.5 }}>
|
||||
Upload Your Photo
|
||||
</Typography>
|
||||
<Typography variant="caption" sx={{ color: "#94a3b8", textAlign: "center", px: 2, lineHeight: 1.5 }}>
|
||||
Optional - We'll enhance it with AI or generate one after analysis
|
||||
</Typography>
|
||||
</Box>
|
||||
)}
|
||||
|
||||
<Box sx={{ flex: 1, minWidth: 0 }}>
|
||||
<Stack spacing={1.5}>
|
||||
<Box>
|
||||
<Typography variant="body2" sx={{ color: "#0f172a", fontSize: "0.9375rem", lineHeight: 1.7, fontWeight: 500, mb: 1 }}>
|
||||
Choose Your Avatar Option:
|
||||
</Typography>
|
||||
|
||||
<Stack spacing={1.5}>
|
||||
<Box
|
||||
sx={{
|
||||
p: 1.5,
|
||||
borderRadius: 1.5,
|
||||
background: alpha("#f0f4ff", 0.6),
|
||||
border: "1px solid rgba(99, 102, 241, 0.2)",
|
||||
}}
|
||||
>
|
||||
<Typography variant="body2" sx={{ color: "#0f172a", fontSize: "0.875rem", fontWeight: 600, mb: 0.5, display: "flex", alignItems: "center", gap: 0.5 }}>
|
||||
<AutoAwesomeIcon fontSize="small" sx={{ color: "#667eea" }} />
|
||||
Upload Your Photo (Recommended)
|
||||
</Typography>
|
||||
<Typography variant="body2" sx={{ color: "#475569", fontSize: "0.8125rem", lineHeight: 1.6 }}>
|
||||
Upload your photo and we'll enhance it into a professional podcast presenter using AI. After upload, click <strong>"Make Presentable"</strong> to transform your photo into a podcast-ready avatar that maintains your appearance while optimizing it for video generation.
|
||||
</Typography>
|
||||
</Box>
|
||||
|
||||
<Box
|
||||
sx={{
|
||||
p: 1.5,
|
||||
borderRadius: 1.5,
|
||||
background: alpha("#f8fafc", 0.8),
|
||||
border: "1px solid rgba(15, 23, 42, 0.1)",
|
||||
}}
|
||||
>
|
||||
<Typography variant="body2" sx={{ color: "#0f172a", fontSize: "0.875rem", fontWeight: 600, mb: 0.5, display: "flex", alignItems: "center", gap: 0.5 }}>
|
||||
<PersonIcon fontSize="small" sx={{ color: "#64748b" }} />
|
||||
Let ALwrity Generate (Alternative)
|
||||
</Typography>
|
||||
<Typography variant="body2" sx={{ color: "#475569", fontSize: "0.8125rem", lineHeight: 1.6 }}>
|
||||
If you skip upload, we'll automatically generate professional presenter images <strong>after the AI analysis completes</strong>. The generated presenters will be tailored to your podcast topic, target audience, content type, and speaker count for the best fit.
|
||||
</Typography>
|
||||
</Box>
|
||||
</Stack>
|
||||
</Box>
|
||||
|
||||
<Box
|
||||
sx={{
|
||||
p: 1.5,
|
||||
borderRadius: 1.5,
|
||||
background: alpha("#f0f4ff", 0.5),
|
||||
border: "1px solid rgba(99, 102, 241, 0.15)",
|
||||
}}
|
||||
>
|
||||
<Typography variant="caption" sx={{ color: "#6366f1", fontSize: "0.8125rem", fontWeight: 500, display: "flex", alignItems: "center", gap: 0.5 }}>
|
||||
<InfoIcon fontSize="inherit" />
|
||||
Supported formats: JPG, PNG, WebP (max 5MB)
|
||||
</Typography>
|
||||
</Box>
|
||||
</Stack>
|
||||
</Box>
|
||||
</Stack>
|
||||
</Paper>
|
||||
</Box>
|
||||
</Stack>
|
||||
</Box>
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import React, { useMemo } from "react";
|
||||
import { Stack, Typography, Divider, Chip, Tooltip, IconButton, alpha } from "@mui/material";
|
||||
import { OpenInNew as OpenInNewIcon, ContentCopy as ContentCopyIcon } from "@mui/icons-material";
|
||||
import React, { useMemo, useState } from "react";
|
||||
import { Stack, Typography, Divider, Chip, Tooltip, IconButton, alpha, Box } from "@mui/material";
|
||||
import { OpenInNew as OpenInNewIcon, ContentCopy as ContentCopyIcon, ExpandMore as ExpandMoreIcon, ExpandLess as ExpandLessIcon } from "@mui/icons-material";
|
||||
import { Fact } from "./types";
|
||||
import { GlassyCard, glassyCardSx } from "./ui";
|
||||
|
||||
@@ -8,7 +8,10 @@ interface FactCardProps {
|
||||
fact: Fact;
|
||||
}
|
||||
|
||||
const MAX_PREVIEW_LENGTH = 200; // Characters to show before truncation
|
||||
|
||||
export const FactCard: React.FC<FactCardProps> = ({ fact }) => {
|
||||
const [expanded, setExpanded] = useState(false);
|
||||
const hostname = useMemo(() => {
|
||||
try {
|
||||
return new URL(fact.url).hostname;
|
||||
@@ -21,30 +24,77 @@ export const FactCard: React.FC<FactCardProps> = ({ fact }) => {
|
||||
navigator.clipboard.writeText(fact.quote);
|
||||
};
|
||||
|
||||
const shouldTruncate = fact.quote.length > MAX_PREVIEW_LENGTH;
|
||||
const previewText = shouldTruncate ? fact.quote.slice(0, MAX_PREVIEW_LENGTH).trim() + "..." : fact.quote;
|
||||
const fullText = fact.quote;
|
||||
|
||||
return (
|
||||
<GlassyCard
|
||||
whileHover={{ y: -4 }}
|
||||
whileHover={{ y: -2 }}
|
||||
sx={{
|
||||
...glassyCardSx,
|
||||
p: 2,
|
||||
p: 1.5,
|
||||
cursor: "pointer",
|
||||
transition: "all 0.2s",
|
||||
height: "100%",
|
||||
display: "flex",
|
||||
flexDirection: "column",
|
||||
"&:hover": {
|
||||
borderColor: "rgba(102,126,234,0.25)",
|
||||
boxShadow: "0 12px 28px rgba(15,23,42,0.08)",
|
||||
boxShadow: "0 8px 20px rgba(15,23,42,0.08)",
|
||||
},
|
||||
background: "#ffffff",
|
||||
border: "1px solid rgba(0,0,0,0.06)",
|
||||
}}
|
||||
>
|
||||
<Stack spacing={1.5}>
|
||||
<Typography variant="body2" sx={{ lineHeight: 1.6, color: "#0f172a" }}>
|
||||
{fact.quote}
|
||||
</Typography>
|
||||
<Divider sx={{ borderColor: "rgba(0,0,0,0.06)" }} />
|
||||
<Stack direction="row" spacing={1} alignItems="center" justifyContent="space-between">
|
||||
<Stack direction="row" spacing={1} alignItems="center" flex={1}>
|
||||
<OpenInNewIcon fontSize="small" sx={{ color: "rgba(15,23,42,0.6)" }} />
|
||||
<Stack spacing={1} sx={{ flex: 1, minHeight: 0 }}>
|
||||
{/* Quote Text - Truncated with expand option */}
|
||||
<Box sx={{ flex: 1, minHeight: 0 }}>
|
||||
<Typography
|
||||
variant="body2"
|
||||
sx={{
|
||||
lineHeight: 1.5,
|
||||
color: "#0f172a",
|
||||
fontSize: "0.8125rem",
|
||||
display: "-webkit-box",
|
||||
WebkitLineClamp: expanded ? "none" : 4,
|
||||
WebkitBoxOrient: "vertical",
|
||||
overflow: "hidden",
|
||||
textOverflow: "ellipsis",
|
||||
mb: shouldTruncate ? 0.5 : 0,
|
||||
}}
|
||||
>
|
||||
{expanded ? fullText : previewText}
|
||||
</Typography>
|
||||
{shouldTruncate && (
|
||||
<IconButton
|
||||
size="small"
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
setExpanded(!expanded);
|
||||
}}
|
||||
sx={{
|
||||
p: 0.25,
|
||||
mt: 0.25,
|
||||
color: "#4f46e5",
|
||||
"&:hover": { background: alpha("#4f46e5", 0.1) },
|
||||
}}
|
||||
>
|
||||
{expanded ? (
|
||||
<ExpandLessIcon fontSize="small" />
|
||||
) : (
|
||||
<ExpandMoreIcon fontSize="small" />
|
||||
)}
|
||||
</IconButton>
|
||||
)}
|
||||
</Box>
|
||||
|
||||
<Divider sx={{ borderColor: "rgba(0,0,0,0.06)", my: 0.5 }} />
|
||||
|
||||
{/* Source and Actions */}
|
||||
<Stack direction="row" spacing={0.75} alignItems="center" justifyContent="space-between">
|
||||
<Stack direction="row" spacing={0.5} alignItems="center" flex={1} minWidth={0}>
|
||||
<OpenInNewIcon fontSize="small" sx={{ color: "rgba(15,23,42,0.5)", flexShrink: 0 }} />
|
||||
<Typography
|
||||
variant="caption"
|
||||
component="a"
|
||||
@@ -55,34 +105,49 @@ export const FactCard: React.FC<FactCardProps> = ({ fact }) => {
|
||||
color: "#4f46e5",
|
||||
textDecoration: "none",
|
||||
"&:hover": { textDecoration: "underline" },
|
||||
flex: 1,
|
||||
overflow: "hidden",
|
||||
textOverflow: "ellipsis",
|
||||
whiteSpace: "nowrap",
|
||||
fontSize: "0.7rem",
|
||||
}}
|
||||
>
|
||||
{hostname || "source"}
|
||||
</Typography>
|
||||
</Stack>
|
||||
<Tooltip title="Copy citation">
|
||||
<IconButton size="small" onClick={handleCopy} sx={{ color: "rgba(15,23,42,0.65)" }}>
|
||||
<IconButton
|
||||
size="small"
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
handleCopy();
|
||||
}}
|
||||
sx={{
|
||||
color: "rgba(15,23,42,0.6)",
|
||||
p: 0.5,
|
||||
"&:hover": { background: alpha("#4f46e5", 0.1) },
|
||||
}}
|
||||
>
|
||||
<ContentCopyIcon fontSize="small" />
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
</Stack>
|
||||
<Stack direction="row" spacing={2}>
|
||||
|
||||
{/* Confidence and Date */}
|
||||
<Stack direction="row" spacing={1} alignItems="center" justifyContent="space-between">
|
||||
<Chip
|
||||
label={`${(fact.confidence * 100).toFixed(0)}% confidence`}
|
||||
label={`${(fact.confidence * 100).toFixed(0)}%`}
|
||||
size="small"
|
||||
sx={{
|
||||
height: 20,
|
||||
height: 18,
|
||||
fontSize: "0.65rem",
|
||||
background: alpha("#22c55e", 0.15),
|
||||
color: "#15803d",
|
||||
border: "1px solid rgba(34,197,94,0.35)",
|
||||
fontWeight: 600,
|
||||
}}
|
||||
/>
|
||||
<Typography variant="caption" sx={{ color: "#475569" }}>
|
||||
{fact.date}
|
||||
<Typography variant="caption" sx={{ color: "#64748b", fontSize: "0.7rem" }}>
|
||||
{fact.date !== "Unknown" ? new Date(fact.date).toLocaleDateString("en-US", { month: "short", year: "numeric" }) : fact.date}
|
||||
</Typography>
|
||||
</Stack>
|
||||
</Stack>
|
||||
|
||||
@@ -217,6 +217,11 @@ const PodcastDashboard: React.FC = () => {
|
||||
{analysis && !showScriptEditor && !showRenderQueue && (
|
||||
<AnalysisPanel
|
||||
analysis={analysis}
|
||||
idea={project?.idea}
|
||||
duration={project?.duration}
|
||||
speakers={project?.speakers}
|
||||
avatarUrl={project?.avatarUrl}
|
||||
avatarPrompt={project?.avatarPrompt}
|
||||
onRegenerate={() => {}}
|
||||
/>
|
||||
)}
|
||||
@@ -259,6 +264,7 @@ const PodcastDashboard: React.FC = () => {
|
||||
onBackToResearch={() => setShowScriptEditor(false)}
|
||||
onProceedToRendering={(s) => workflow.handleProceedToRendering(s)}
|
||||
onError={(msg) => workflow.setAnnouncement(msg)}
|
||||
avatarUrl={project?.avatarUrl}
|
||||
/>
|
||||
)}
|
||||
|
||||
|
||||
@@ -19,8 +19,15 @@ export const Header: React.FC<HeaderProps> = ({ onShowProjects, onNewEpisode })
|
||||
const navigate = useNavigate();
|
||||
|
||||
return (
|
||||
<Stack direction="row" justifyContent="space-between" alignItems="flex-start" flexWrap="wrap" gap={2}>
|
||||
<Box>
|
||||
<Stack
|
||||
direction="row"
|
||||
justifyContent="space-between"
|
||||
alignItems="flex-start"
|
||||
flexWrap="wrap"
|
||||
gap={2}
|
||||
sx={{ width: "100%", minWidth: 0 }} // Ensure full width and allow wrapping
|
||||
>
|
||||
<Box sx={{ minWidth: 0, flex: { xs: "1 1 100%", md: "0 1 auto" } }}>
|
||||
<Typography
|
||||
variant="h3"
|
||||
sx={{
|
||||
@@ -30,24 +37,61 @@ export const Header: React.FC<HeaderProps> = ({ onShowProjects, onNewEpisode })
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
gap: 1.5,
|
||||
fontSize: { xs: "1.5rem", md: "2rem" },
|
||||
}}
|
||||
>
|
||||
<MicIcon fontSize="large" sx={{ color: "#667eea" }} />
|
||||
AI Podcast Maker
|
||||
</Typography>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
<Typography variant="body2" color="text.secondary" sx={{ display: { xs: "none", sm: "block" } }}>
|
||||
Create professional podcast episodes with AI-powered research, smart scriptwriting, and natural voice narration
|
||||
</Typography>
|
||||
</Box>
|
||||
<Stack direction="row" spacing={1} alignItems="center">
|
||||
<Stack
|
||||
direction="row"
|
||||
spacing={1}
|
||||
alignItems="center"
|
||||
flexWrap="wrap"
|
||||
useFlexGap
|
||||
sx={{
|
||||
justifyContent: { xs: "flex-start", md: "flex-end" },
|
||||
gap: { xs: 0.5, md: 1 },
|
||||
minWidth: 0,
|
||||
width: { xs: "100%", md: "auto" }, // Full width on mobile to allow wrapping
|
||||
flex: { xs: "1 1 100%", md: "0 1 auto" }, // Take full width on mobile
|
||||
}}
|
||||
>
|
||||
<HeaderControls colorMode="light" showAlerts={true} showUser={true} />
|
||||
<SecondaryButton onClick={() => window.open("/docs", "_blank")} startIcon={<InfoIcon />}>
|
||||
<SecondaryButton
|
||||
onClick={() => window.open("/docs", "_blank")}
|
||||
startIcon={<InfoIcon />}
|
||||
sx={{
|
||||
display: { xs: "none", lg: "flex" },
|
||||
// Override for light theme
|
||||
borderColor: "rgba(102, 126, 234, 0.3) !important",
|
||||
color: "#667eea !important",
|
||||
"&:hover": {
|
||||
borderColor: "rgba(102, 126, 234, 0.5) !important",
|
||||
background: "rgba(102, 126, 234, 0.1) !important",
|
||||
},
|
||||
}}
|
||||
>
|
||||
Help
|
||||
</SecondaryButton>
|
||||
<SecondaryButton
|
||||
onClick={() => navigate("/asset-library?source_module=podcast_maker&asset_type=audio")}
|
||||
startIcon={<LibraryMusicIcon />}
|
||||
tooltip="View all podcast episodes in Asset Library"
|
||||
sx={{
|
||||
display: { xs: "none", xl: "flex" },
|
||||
// Override for light theme
|
||||
borderColor: "rgba(102, 126, 234, 0.3) !important",
|
||||
color: "#667eea !important",
|
||||
"&:hover": {
|
||||
borderColor: "rgba(102, 126, 234, 0.5) !important",
|
||||
background: "rgba(102, 126, 234, 0.1) !important",
|
||||
},
|
||||
}}
|
||||
>
|
||||
My Episodes
|
||||
</SecondaryButton>
|
||||
@@ -55,10 +99,30 @@ export const Header: React.FC<HeaderProps> = ({ onShowProjects, onNewEpisode })
|
||||
onClick={onShowProjects}
|
||||
startIcon={<MicIcon />}
|
||||
tooltip="View and resume saved projects"
|
||||
sx={{
|
||||
flexShrink: 0,
|
||||
display: "flex !important", // Always show "My Projects" - force display
|
||||
order: { xs: 1, md: 0 }, // Show first on mobile
|
||||
// Override button colors for light theme
|
||||
borderColor: "rgba(102, 126, 234, 0.3) !important",
|
||||
color: "#667eea !important",
|
||||
"&:hover": {
|
||||
borderColor: "rgba(102, 126, 234, 0.5) !important",
|
||||
background: "rgba(102, 126, 234, 0.1) !important",
|
||||
},
|
||||
}}
|
||||
>
|
||||
My Projects
|
||||
</SecondaryButton>
|
||||
<PrimaryButton onClick={onNewEpisode} startIcon={<AutoAwesomeIcon />}>
|
||||
<PrimaryButton
|
||||
onClick={onNewEpisode}
|
||||
startIcon={<AutoAwesomeIcon />}
|
||||
sx={{
|
||||
flexShrink: 0,
|
||||
display: "flex", // Always show "New Episode"
|
||||
order: { xs: 0, md: 1 }, // Show first on mobile
|
||||
}}
|
||||
>
|
||||
New Episode
|
||||
</PrimaryButton>
|
||||
</Stack>
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
import React from "react";
|
||||
import { Stack, Typography, Chip, Divider, Box, alpha } from "@mui/material";
|
||||
import React, { useMemo } from "react";
|
||||
import { Stack, Typography, Chip, Divider, Box, alpha, Paper } from "@mui/material";
|
||||
import {
|
||||
Insights as InsightsIcon,
|
||||
Search as SearchIcon,
|
||||
AttachMoney as AttachMoneyIcon,
|
||||
EditNote as EditNoteIcon,
|
||||
Article as ArticleIcon,
|
||||
} from "@mui/icons-material";
|
||||
import { Research } from "../types";
|
||||
import { GlassyCard, glassyCardSx, PrimaryButton } from "../ui";
|
||||
@@ -21,17 +22,68 @@ export const ResearchSummary: React.FC<ResearchSummaryProps> = ({
|
||||
canGenerateScript,
|
||||
onGenerateScript,
|
||||
}) => {
|
||||
// Extract key insights from summary if it's long
|
||||
const summaryParts = useMemo(() => {
|
||||
const fullSummary = research.summary || "";
|
||||
if (fullSummary.length > 500) {
|
||||
// Try to split into paragraphs or sentences
|
||||
const sentences = fullSummary.split(/[.!?]\s+/).filter(s => s.trim().length > 20);
|
||||
const keyPoints = sentences.slice(0, 3);
|
||||
const remainingText = sentences.slice(3).join(". ") + (sentences.length > 3 ? "." : "");
|
||||
return { keyPoints, remainingText };
|
||||
}
|
||||
return { keyPoints: [], remainingText: fullSummary };
|
||||
}, [research.summary]);
|
||||
|
||||
return (
|
||||
<GlassyCard sx={glassyCardSx}>
|
||||
<Stack spacing={3}>
|
||||
<Stack direction="row" justifyContent="space-between" alignItems="flex-start" flexWrap="wrap" gap={2}>
|
||||
<Box sx={{ flex: 1 }}>
|
||||
<Typography variant="h6" sx={{ display: "flex", alignItems: "center", gap: 1, mb: 1 }}>
|
||||
<Box sx={{ flex: 1, minWidth: { xs: "100%", md: "60%" } }}>
|
||||
<Typography variant="h6" sx={{ display: "flex", alignItems: "center", gap: 1, mb: 1.5 }}>
|
||||
<InsightsIcon />
|
||||
Research Summary
|
||||
</Typography>
|
||||
<Typography variant="body2" color="text.secondary" sx={{ mb: 2, lineHeight: 1.7 }}>
|
||||
{research.summary}
|
||||
|
||||
{/* Key Insights */}
|
||||
{summaryParts.keyPoints.length > 0 && (
|
||||
<Box sx={{ mb: 2 }}>
|
||||
<Typography variant="subtitle2" sx={{ mb: 1, color: "#0f172a", fontWeight: 600, display: "flex", alignItems: "center", gap: 0.5 }}>
|
||||
<ArticleIcon fontSize="small" />
|
||||
Key Insights
|
||||
</Typography>
|
||||
<Stack spacing={1}>
|
||||
{summaryParts.keyPoints.map((point, idx) => (
|
||||
<Paper
|
||||
key={idx}
|
||||
sx={{
|
||||
p: 1.25,
|
||||
background: alpha("#667eea", 0.05),
|
||||
border: "1px solid rgba(102, 126, 234, 0.15)",
|
||||
borderRadius: 1.5,
|
||||
}}
|
||||
>
|
||||
<Typography variant="body2" sx={{ color: "#0f172a", lineHeight: 1.6, fontSize: "0.875rem" }}>
|
||||
{point}
|
||||
</Typography>
|
||||
</Paper>
|
||||
))}
|
||||
</Stack>
|
||||
</Box>
|
||||
)}
|
||||
|
||||
{/* Full Summary Text */}
|
||||
<Typography
|
||||
variant="body2"
|
||||
color="text.secondary"
|
||||
sx={{
|
||||
mb: 2,
|
||||
lineHeight: 1.7,
|
||||
fontSize: "0.875rem",
|
||||
color: "#475569",
|
||||
}}
|
||||
>
|
||||
{summaryParts.remainingText || research.summary}
|
||||
</Typography>
|
||||
|
||||
{/* Research Metadata */}
|
||||
@@ -126,15 +178,23 @@ export const ResearchSummary: React.FC<ResearchSummaryProps> = ({
|
||||
{research.factCards.length > 0 && (
|
||||
<>
|
||||
<Divider sx={{ borderColor: "rgba(0,0,0,0.08)" }} />
|
||||
<Stack direction="row" justifyContent="space-between" alignItems="center" sx={{ mb: 1.5 }}>
|
||||
<Stack direction="row" justifyContent="space-between" alignItems="center" sx={{ mb: 1.5, flexWrap: "wrap", gap: 1 }}>
|
||||
<Typography variant="subtitle2" sx={{ color: "#0f172a", fontWeight: 600 }}>
|
||||
Research Sources & Facts ({research.factCards.length})
|
||||
</Typography>
|
||||
<Typography variant="caption" sx={{ color: "#64748b" }}>
|
||||
Click any card to view source details
|
||||
<Typography variant="caption" sx={{ color: "#64748b", fontSize: "0.75rem" }}>
|
||||
Click to expand • Hover to see source
|
||||
</Typography>
|
||||
</Stack>
|
||||
<Box sx={{ display: "grid", gridTemplateColumns: { xs: "1fr", sm: "1fr 1fr", lg: "1fr 1fr 1fr" }, gap: 2 }}>
|
||||
<Box
|
||||
sx={{
|
||||
display: "grid",
|
||||
gridTemplateColumns: { xs: "1fr", sm: "repeat(2, 1fr)", md: "repeat(3, 1fr)", lg: "repeat(4, 1fr)" },
|
||||
gap: 1.5,
|
||||
width: "100%",
|
||||
overflow: "hidden",
|
||||
}}
|
||||
>
|
||||
{research.factCards.map((fact) => (
|
||||
<FactCard key={fact.id} fact={fact} />
|
||||
))}
|
||||
|
||||
@@ -94,17 +94,66 @@ export const usePodcastWorkflow = ({ projectState, onError }: UsePodcastWorkflow
|
||||
setShowRenderQueue(false);
|
||||
try {
|
||||
setIsAnalyzing(true);
|
||||
|
||||
// Upload avatar if provided, or generate presenters
|
||||
let avatarUrl: string | null = null;
|
||||
if (payload.files.avatarFile) {
|
||||
try {
|
||||
setAnnouncement("Uploading presenter avatar...");
|
||||
const uploadResponse = await podcastApi.uploadAvatar(payload.files.avatarFile);
|
||||
avatarUrl = uploadResponse.avatar_url;
|
||||
} catch (error) {
|
||||
console.error('Avatar upload failed:', error);
|
||||
// Continue without avatar - will generate one later
|
||||
}
|
||||
}
|
||||
|
||||
setAnnouncement("Analyzing your idea — AI suggestions incoming");
|
||||
const result = await podcastApi.createProject(payload);
|
||||
const result = await podcastApi.createProject({ ...payload, avatarUrl });
|
||||
await initializeProject(payload, result.projectId);
|
||||
setProject({ id: result.projectId, idea: payload.ideaOrUrl, duration: payload.duration, speakers: payload.speakers });
|
||||
setProject({ id: result.projectId, idea: payload.ideaOrUrl, duration: payload.duration, speakers: payload.speakers, avatarUrl });
|
||||
setAnalysis(result.analysis);
|
||||
setEstimate(result.estimate);
|
||||
setQueries(result.queries);
|
||||
setSelectedQueries(new Set(result.queries.map((q) => q.id)));
|
||||
setKnobs(payload.knobs);
|
||||
setBudgetCap(payload.budgetCap);
|
||||
setAnnouncement("Analysis complete");
|
||||
|
||||
// Generate presenters AFTER analysis completes (to use analysis insights)
|
||||
// This happens only if no avatar was uploaded
|
||||
if (!avatarUrl && payload.speakers > 0 && result.analysis) {
|
||||
try {
|
||||
setAnnouncement("Generating presenter avatars using AI insights...");
|
||||
const presentersResponse = await podcastApi.generatePresenters(
|
||||
payload.speakers,
|
||||
result.projectId,
|
||||
result.analysis.audience,
|
||||
result.analysis.contentType,
|
||||
result.analysis.topKeywords
|
||||
);
|
||||
if (presentersResponse.avatars && presentersResponse.avatars.length > 0) {
|
||||
// Store the first presenter avatar URL and prompt
|
||||
const firstAvatar = presentersResponse.avatars[0];
|
||||
const prompt = firstAvatar.prompt || null;
|
||||
setProject({
|
||||
id: result.projectId,
|
||||
idea: payload.ideaOrUrl,
|
||||
duration: payload.duration,
|
||||
speakers: payload.speakers,
|
||||
avatarUrl: firstAvatar.avatar_url,
|
||||
avatarPrompt: prompt,
|
||||
avatarPersonaId: firstAvatar.persona_id || presentersResponse.persona_id || null,
|
||||
});
|
||||
setAnnouncement("Analysis complete - Presenter avatars generated");
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Presenter generation failed:', error);
|
||||
setAnnouncement("Analysis complete - Avatar generation will happen later");
|
||||
// Continue without presenters - can generate later
|
||||
}
|
||||
} else {
|
||||
setAnnouncement("Analysis complete");
|
||||
}
|
||||
} catch (error: any) {
|
||||
if (error?.response?.status === 429 || error?.response?.data?.detail) {
|
||||
const errorDetail = error.response.data.detail;
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
import React, { useCallback } from "react";
|
||||
import { Box, Stack, Typography, Alert, Paper, alpha } from "@mui/material";
|
||||
import React, { useCallback, useState, useEffect } from "react";
|
||||
import { Box, Stack, Typography, Alert, Paper, alpha, Button, CircularProgress, LinearProgress } from "@mui/material";
|
||||
import {
|
||||
PlayArrow as PlayArrowIcon,
|
||||
ArrowBack as ArrowBackIcon,
|
||||
VideoLibrary as VideoLibraryIcon,
|
||||
Download as DownloadIcon,
|
||||
CheckCircle as CheckCircleIcon,
|
||||
} from "@mui/icons-material";
|
||||
import { Script, Knobs, Job } from "./types";
|
||||
import { SecondaryButton } from "./ui";
|
||||
@@ -10,6 +13,7 @@ import { SceneCard } from "./RenderQueue/SceneCard";
|
||||
import { SummaryStats } from "./RenderQueue/SummaryStats";
|
||||
import { GuidancePanel } from "./RenderQueue/GuidancePanel";
|
||||
import { useRenderQueue } from "./RenderQueue/useRenderQueue";
|
||||
import { fetchMediaBlobUrl } from "../../utils/fetchMediaBlobUrl";
|
||||
|
||||
interface RenderQueueProps {
|
||||
projectId: string;
|
||||
@@ -36,6 +40,7 @@ export const RenderQueue: React.FC<RenderQueueProps> = ({
|
||||
onBack,
|
||||
onError,
|
||||
}) => {
|
||||
const [localError, setLocalError] = useState<string>("");
|
||||
const {
|
||||
rendering,
|
||||
generatingImage,
|
||||
@@ -43,6 +48,10 @@ export const RenderQueue: React.FC<RenderQueueProps> = ({
|
||||
runRender,
|
||||
runImageGeneration,
|
||||
runVideoRender,
|
||||
combiningVideos,
|
||||
combiningProgress,
|
||||
finalVideoUrl,
|
||||
combineFinalVideo,
|
||||
} = useRenderQueue({
|
||||
script,
|
||||
jobs,
|
||||
@@ -52,7 +61,10 @@ export const RenderQueue: React.FC<RenderQueueProps> = ({
|
||||
avatarImageUrl,
|
||||
onUpdateJob,
|
||||
onUpdateScript,
|
||||
onError,
|
||||
onError: (msg) => {
|
||||
setLocalError(msg);
|
||||
onError(msg);
|
||||
},
|
||||
});
|
||||
|
||||
const handleDownloadAudio = useCallback((audioUrl: string, title: string) => {
|
||||
@@ -76,11 +88,11 @@ export const RenderQueue: React.FC<RenderQueueProps> = ({
|
||||
title,
|
||||
text: `Check out this podcast episode: ${title}`,
|
||||
url: audioUrl,
|
||||
});
|
||||
});
|
||||
} catch (err) {
|
||||
// User cancelled or error
|
||||
}
|
||||
} else {
|
||||
} else {
|
||||
// Fallback: copy to clipboard
|
||||
await navigator.clipboard.writeText(audioUrl);
|
||||
alert("Audio URL copied to clipboard!");
|
||||
@@ -91,6 +103,28 @@ export const RenderQueue: React.FC<RenderQueueProps> = ({
|
||||
(jobs.length > 0 && jobs.every((j) => j.status === "completed" && j.imageUrl)) ||
|
||||
(script.scenes.length > 0 && script.scenes.every((s) => s.audioUrl && s.imageUrl));
|
||||
|
||||
const allVideosReady = jobs.length > 0 && jobs.every((j) => j.videoUrl);
|
||||
|
||||
// State for final video blob URL
|
||||
const [finalVideoBlobUrl, setFinalVideoBlobUrl] = useState<string | null>(null);
|
||||
|
||||
// Load final video as blob when URL changes
|
||||
useEffect(() => {
|
||||
if (finalVideoUrl) {
|
||||
fetchMediaBlobUrl(finalVideoUrl)
|
||||
.then((blobUrl) => {
|
||||
if (blobUrl) {
|
||||
setFinalVideoBlobUrl(blobUrl);
|
||||
}
|
||||
})
|
||||
.catch((err) => {
|
||||
console.error("Failed to load final video blob:", err);
|
||||
});
|
||||
} else {
|
||||
setFinalVideoBlobUrl(null);
|
||||
}
|
||||
}, [finalVideoUrl]);
|
||||
|
||||
return (
|
||||
<Box sx={{ mt: 3 }}>
|
||||
{/* Header */}
|
||||
@@ -115,6 +149,24 @@ export const RenderQueue: React.FC<RenderQueueProps> = ({
|
||||
</Typography>
|
||||
</Stack>
|
||||
|
||||
{/* Error Display */}
|
||||
{localError && (
|
||||
<Alert
|
||||
severity="error"
|
||||
onClose={() => setLocalError("")}
|
||||
sx={{
|
||||
mb: 3,
|
||||
background: alpha("#ef4444", 0.1),
|
||||
border: "1px solid",
|
||||
borderColor: alpha("#ef4444", 0.3),
|
||||
}}
|
||||
>
|
||||
<Typography variant="body2" sx={{ fontWeight: 600 }}>
|
||||
❌ {localError}
|
||||
</Typography>
|
||||
</Alert>
|
||||
)}
|
||||
|
||||
{/* Info Alert */}
|
||||
<Alert severity="info" sx={{ mb: 3, background: alpha("#3b82f6", 0.1), border: "1px solid rgba(59,130,246,0.3)" }}>
|
||||
<Typography variant="body2">
|
||||
@@ -127,21 +179,21 @@ export const RenderQueue: React.FC<RenderQueueProps> = ({
|
||||
|
||||
{/* Empty State */}
|
||||
{jobs.length === 0 && script.scenes.length === 0 && (
|
||||
<Paper
|
||||
sx={{
|
||||
<Paper
|
||||
sx={{
|
||||
p: 4,
|
||||
textAlign: "center",
|
||||
background: "linear-gradient(135deg, rgba(102, 126, 234, 0.05) 0%, rgba(118, 75, 162, 0.05) 100%)",
|
||||
border: "2px dashed rgba(102, 126, 234, 0.3)",
|
||||
borderRadius: 2,
|
||||
}}
|
||||
>
|
||||
}}
|
||||
>
|
||||
<Typography variant="h6" sx={{ color: "#0f172a", fontWeight: 600, mb: 1 }}>
|
||||
No scenes to render
|
||||
</Typography>
|
||||
</Typography>
|
||||
<Typography variant="body2" sx={{ color: "#64748b", mb: 3 }}>
|
||||
Go back to the script editor to generate and approve scenes first.
|
||||
</Typography>
|
||||
</Typography>
|
||||
<SecondaryButton onClick={onBack} startIcon={<ArrowBackIcon />}>
|
||||
Back to Script Editor
|
||||
</SecondaryButton>
|
||||
@@ -166,7 +218,7 @@ export const RenderQueue: React.FC<RenderQueueProps> = ({
|
||||
avatarImageUrl={avatarImageUrl}
|
||||
onRender={runRender}
|
||||
onImageGenerate={runImageGeneration}
|
||||
onVideoRender={runVideoRender}
|
||||
onVideoGenerate={(sceneId, settings) => runVideoRender(sceneId, settings)}
|
||||
onDownloadAudio={handleDownloadAudio}
|
||||
onDownloadVideo={handleDownloadVideo}
|
||||
onShare={handleShare}
|
||||
@@ -176,6 +228,224 @@ export const RenderQueue: React.FC<RenderQueueProps> = ({
|
||||
})}
|
||||
</Stack>
|
||||
|
||||
{/* Final Export Section - Show when all scene videos are ready */}
|
||||
{allVideosReady && (
|
||||
<Paper
|
||||
elevation={3}
|
||||
sx={{
|
||||
mt: 4,
|
||||
p: 4,
|
||||
background: "linear-gradient(135deg, rgba(16, 185, 129, 0.05) 0%, rgba(6, 182, 212, 0.05) 100%)",
|
||||
border: "2px solid",
|
||||
borderColor: finalVideoUrl ? "success.main" : "info.light",
|
||||
borderRadius: 3,
|
||||
position: "relative",
|
||||
overflow: "hidden",
|
||||
"&::before": {
|
||||
content: '""',
|
||||
position: "absolute",
|
||||
top: 0,
|
||||
left: 0,
|
||||
right: 0,
|
||||
height: "4px",
|
||||
background: finalVideoUrl
|
||||
? "linear-gradient(90deg, #10b981 0%, #06b6d4 100%)"
|
||||
: "linear-gradient(90deg, #667eea 0%, #764ba2 100%)",
|
||||
},
|
||||
}}
|
||||
>
|
||||
<Stack spacing={3}>
|
||||
{/* Header */}
|
||||
<Stack direction="row" alignItems="center" spacing={2}>
|
||||
<Box
|
||||
sx={{
|
||||
p: 1.5,
|
||||
borderRadius: 2,
|
||||
background: finalVideoUrl
|
||||
? "linear-gradient(135deg, #10b981 0%, #059669 100%)"
|
||||
: "linear-gradient(135deg, #667eea 0%, #764ba2 100%)",
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
justifyContent: "center",
|
||||
boxShadow: "0 4px 12px rgba(102, 126, 234, 0.3)",
|
||||
}}
|
||||
>
|
||||
{finalVideoUrl ? (
|
||||
<CheckCircleIcon sx={{ color: "white", fontSize: 32 }} />
|
||||
) : (
|
||||
<VideoLibraryIcon sx={{ color: "white", fontSize: 32 }} />
|
||||
)}
|
||||
</Box>
|
||||
<Box>
|
||||
<Typography
|
||||
variant="h5"
|
||||
sx={{
|
||||
fontWeight: 700,
|
||||
color: "#0f172a",
|
||||
mb: 0.5,
|
||||
}}
|
||||
>
|
||||
{finalVideoUrl ? "🎉 Final Podcast Ready!" : "🎬 Final Podcast Export"}
|
||||
</Typography>
|
||||
<Typography variant="body2" sx={{ color: "#64748b" }}>
|
||||
{finalVideoUrl
|
||||
? "Your complete podcast video is ready to download"
|
||||
: `Combine ${script.scenes.length} scene videos into one final podcast`}
|
||||
</Typography>
|
||||
</Box>
|
||||
</Stack>
|
||||
|
||||
{finalVideoUrl ? (
|
||||
<Stack spacing={3}>
|
||||
<Alert
|
||||
severity="success"
|
||||
icon={<CheckCircleIcon />}
|
||||
sx={{
|
||||
background: alpha("#10b981", 0.1),
|
||||
border: "1px solid",
|
||||
borderColor: alpha("#10b981", 0.3),
|
||||
}}
|
||||
>
|
||||
<Typography variant="body2" sx={{ fontWeight: 600 }}>
|
||||
✅ Your final podcast video has been created successfully!
|
||||
</Typography>
|
||||
</Alert>
|
||||
|
||||
{/* Video Preview */}
|
||||
<Box
|
||||
sx={{
|
||||
width: "100%",
|
||||
maxWidth: 900,
|
||||
mx: "auto",
|
||||
borderRadius: 2,
|
||||
overflow: "hidden",
|
||||
boxShadow: "0 8px 24px rgba(0, 0, 0, 0.12)",
|
||||
border: "1px solid",
|
||||
borderColor: alpha("#10b981", 0.2),
|
||||
}}
|
||||
>
|
||||
<video
|
||||
controls
|
||||
src={finalVideoBlobUrl || finalVideoUrl}
|
||||
style={{
|
||||
width: "100%",
|
||||
display: "block",
|
||||
backgroundColor: "#000",
|
||||
}}
|
||||
>
|
||||
Your browser does not support video playback.
|
||||
</video>
|
||||
</Box>
|
||||
|
||||
{/* Download Button */}
|
||||
<Stack direction="row" spacing={2} justifyContent="center" sx={{ pt: 2 }}>
|
||||
<Button
|
||||
variant="contained"
|
||||
size="large"
|
||||
startIcon={<DownloadIcon />}
|
||||
onClick={async () => {
|
||||
if (finalVideoBlobUrl) {
|
||||
const link = document.createElement("a");
|
||||
link.href = finalVideoBlobUrl;
|
||||
link.download = `podcast-final-${Date.now()}.mp4`;
|
||||
document.body.appendChild(link);
|
||||
link.click();
|
||||
document.body.removeChild(link);
|
||||
}
|
||||
}}
|
||||
sx={{
|
||||
px: 4,
|
||||
py: 1.5,
|
||||
background: "linear-gradient(135deg, #10b981 0%, #059669 100%)",
|
||||
boxShadow: "0 4px 12px rgba(16, 185, 129, 0.4)",
|
||||
"&:hover": {
|
||||
background: "linear-gradient(135deg, #059669 0%, #047857 100%)",
|
||||
boxShadow: "0 6px 16px rgba(16, 185, 129, 0.5)",
|
||||
},
|
||||
}}
|
||||
>
|
||||
Download Final Podcast
|
||||
</Button>
|
||||
</Stack>
|
||||
</Stack>
|
||||
) : (
|
||||
<Stack spacing={3}>
|
||||
<Alert
|
||||
severity="info"
|
||||
sx={{
|
||||
background: alpha("#3b82f6", 0.08),
|
||||
border: "1px solid",
|
||||
borderColor: alpha("#3b82f6", 0.2),
|
||||
}}
|
||||
>
|
||||
<Typography variant="body2">
|
||||
<strong>Ready to export!</strong> Click below to combine all {script.scenes.length} scene videos into your final podcast video.
|
||||
</Typography>
|
||||
</Alert>
|
||||
|
||||
{combiningVideos && (
|
||||
<Box sx={{ width: "100%" }}>
|
||||
<Stack direction="row" justifyContent="space-between" sx={{ mb: 1 }}>
|
||||
<Typography variant="body2" sx={{ fontWeight: 600, color: "#0f172a" }}>
|
||||
{combiningProgress?.message || "Combining videos..."}
|
||||
</Typography>
|
||||
{combiningProgress && (
|
||||
<Typography variant="body2" sx={{ color: "#64748b", fontWeight: 600 }}>
|
||||
{combiningProgress.progress.toFixed(0)}%
|
||||
</Typography>
|
||||
)}
|
||||
</Stack>
|
||||
<LinearProgress
|
||||
variant={combiningProgress ? "determinate" : "indeterminate"}
|
||||
value={combiningProgress?.progress || 0}
|
||||
sx={{
|
||||
height: 8,
|
||||
borderRadius: 4,
|
||||
background: alpha("#667eea", 0.1),
|
||||
"& .MuiLinearProgress-bar": {
|
||||
background: "linear-gradient(90deg, #667eea 0%, #764ba2 100%)",
|
||||
borderRadius: 4,
|
||||
},
|
||||
}}
|
||||
/>
|
||||
{combiningProgress && combiningProgress.progress < 100 && (
|
||||
<Typography variant="caption" sx={{ color: "#64748b", mt: 0.5, display: "block" }}>
|
||||
Video encoding in progress. This may take a few minutes...
|
||||
</Typography>
|
||||
)}
|
||||
</Box>
|
||||
)}
|
||||
|
||||
<Button
|
||||
variant="contained"
|
||||
size="large"
|
||||
fullWidth
|
||||
startIcon={combiningVideos ? <CircularProgress size={20} sx={{ color: "white" }} /> : <VideoLibraryIcon />}
|
||||
onClick={combineFinalVideo}
|
||||
disabled={combiningVideos}
|
||||
sx={{
|
||||
py: 2,
|
||||
fontSize: "1.1rem",
|
||||
fontWeight: 700,
|
||||
background: "linear-gradient(135deg, #667eea 0%, #764ba2 100%)",
|
||||
boxShadow: "0 4px 12px rgba(102, 126, 234, 0.4)",
|
||||
"&:hover": {
|
||||
background: "linear-gradient(135deg, #5568d3 0%, #6a3f8f 100%)",
|
||||
boxShadow: "0 6px 16px rgba(102, 126, 234, 0.5)",
|
||||
},
|
||||
"&:disabled": {
|
||||
background: alpha("#667eea", 0.5),
|
||||
},
|
||||
}}
|
||||
>
|
||||
{combiningVideos ? "Combining Videos..." : "Combine Scenes into Final Video"}
|
||||
</Button>
|
||||
</Stack>
|
||||
)}
|
||||
</Stack>
|
||||
</Paper>
|
||||
)}
|
||||
|
||||
{/* Footer - Video Generation Focus */}
|
||||
<Paper
|
||||
sx={{
|
||||
@@ -191,13 +461,22 @@ export const RenderQueue: React.FC<RenderQueueProps> = ({
|
||||
<SecondaryButton onClick={onBack} startIcon={<ArrowBackIcon />}>
|
||||
Back to Script
|
||||
</SecondaryButton>
|
||||
{allScenesCompleted ? (
|
||||
{allVideosReady ? (
|
||||
<Stack spacing={1} alignItems="flex-end">
|
||||
<Typography variant="body1" sx={{ color: "#10b981", fontWeight: 700, fontSize: "1rem" }}>
|
||||
🎉 All scene videos ready!
|
||||
</Typography>
|
||||
<Typography variant="body2" sx={{ color: "#64748b" }}>
|
||||
Scroll up to combine them into your final podcast video.
|
||||
</Typography>
|
||||
</Stack>
|
||||
) : allScenesCompleted ? (
|
||||
<Stack spacing={1} alignItems="flex-end">
|
||||
<Typography variant="body1" sx={{ color: "#10b981", fontWeight: 700, fontSize: "1rem" }}>
|
||||
🎉 All scenes ready for video generation!
|
||||
</Typography>
|
||||
<Typography variant="body2" sx={{ color: "#64748b" }}>
|
||||
Generate videos for individual scenes or download them.
|
||||
Generate videos for individual scenes above.
|
||||
</Typography>
|
||||
</Stack>
|
||||
) : (
|
||||
|
||||
@@ -92,6 +92,9 @@ export const SceneActionButtons: React.FC<SceneActionButtonsProps> = ({
|
||||
}
|
||||
|
||||
// Has audio - show all action buttons
|
||||
const videoInProgress = rendering !== null;
|
||||
const isCurrentVideo = rendering === scene.id;
|
||||
|
||||
return (
|
||||
<Stack direction="row" spacing={1.5} justifyContent="flex-end" flexWrap="wrap" useFlexGap>
|
||||
{/* Generate Image */}
|
||||
@@ -114,21 +117,29 @@ export const SceneActionButtons: React.FC<SceneActionButtonsProps> = ({
|
||||
|
||||
{/* Generate Video */}
|
||||
<PrimaryButton
|
||||
onClick={() => onVideoRender(scene.id)}
|
||||
disabled={isBusy || !hasImage || hasVideo}
|
||||
onClick={() => {
|
||||
onVideoRender(scene.id);
|
||||
}}
|
||||
disabled={isBusy || videoInProgress || !hasImage || hasVideo}
|
||||
startIcon={<VideocamIcon />}
|
||||
tooltip={
|
||||
hasVideo
|
||||
? "Video already generated"
|
||||
: !hasImage
|
||||
? "Generate an image first to create video"
|
||||
: videoInProgress
|
||||
? "A video generation is already running. Please wait..."
|
||||
: isBusy
|
||||
? "Another operation in progress"
|
||||
: "Generate video for this scene"
|
||||
}
|
||||
sx={{ minWidth: 160 }}
|
||||
sx={{ minWidth: 180 }}
|
||||
>
|
||||
{hasVideo ? "Video Ready" : "Generate Video"}
|
||||
{videoInProgress && isCurrentVideo
|
||||
? "Generating Video..."
|
||||
: hasVideo
|
||||
? "Video Ready"
|
||||
: "Generate Video"}
|
||||
</PrimaryButton>
|
||||
|
||||
{/* Download Video */}
|
||||
|
||||
@@ -7,11 +7,13 @@ import {
|
||||
OpenInNew as OpenInNewIcon,
|
||||
Videocam as VideocamIcon,
|
||||
} from "@mui/icons-material";
|
||||
import { Scene, Job } from "../types";
|
||||
import { Scene, Job, VideoGenerationSettings } from "../types";
|
||||
import { GlassyCard, glassyCardSx } from "../ui";
|
||||
import { InlineAudioPlayer } from "../InlineAudioPlayer";
|
||||
import { SceneActionButtons } from "./SceneActionButtons";
|
||||
import { aiApiClient } from "../../../api/client";
|
||||
import { fetchMediaBlobUrl } from "../../../utils/fetchMediaBlobUrl";
|
||||
import { VideoRegenerateModal } from "./VideoRegenerateModal";
|
||||
|
||||
interface SceneCardProps {
|
||||
scene: Scene;
|
||||
@@ -22,7 +24,7 @@ interface SceneCardProps {
|
||||
avatarImageUrl?: string | null;
|
||||
onRender: (sceneId: string, mode: "preview" | "full") => void;
|
||||
onImageGenerate: (sceneId: string) => void;
|
||||
onVideoRender: (sceneId: string) => void;
|
||||
onVideoGenerate: (sceneId: string, settings: VideoGenerationSettings) => void;
|
||||
onDownloadAudio: (audioUrl: string, title: string) => void;
|
||||
onDownloadVideo: (videoUrl: string, title: string) => void;
|
||||
onShare: (audioUrl: string, title: string) => void;
|
||||
@@ -75,7 +77,7 @@ export const SceneCard: React.FC<SceneCardProps> = ({
|
||||
avatarImageUrl,
|
||||
onRender,
|
||||
onImageGenerate,
|
||||
onVideoRender,
|
||||
onVideoGenerate,
|
||||
onDownloadAudio,
|
||||
onDownloadVideo,
|
||||
onShare,
|
||||
@@ -89,8 +91,27 @@ export const SceneCard: React.FC<SceneCardProps> = ({
|
||||
const status = job?.status || (hasAudio ? "completed" : "idle");
|
||||
const initials = getInitials(scene.title);
|
||||
|
||||
|
||||
// Load image as blob if it's an authenticated endpoint
|
||||
const [imageBlobUrl, setImageBlobUrl] = useState<string | null>(null);
|
||||
const [videoBlobUrl, setVideoBlobUrl] = useState<string | null>(null);
|
||||
const [showVideoModal, setShowVideoModal] = useState(false);
|
||||
const [initialVideoPrompt, setInitialVideoPrompt] = useState<string>("");
|
||||
|
||||
// Prepare a simple default prompt based on the scene title/description
|
||||
useEffect(() => {
|
||||
const baseTitle = (scene.title || "").trim();
|
||||
const description = (scene as any).description as string | undefined;
|
||||
const descSnippet = (description || "").split(".")[0]?.trim();
|
||||
let prompt = baseTitle;
|
||||
if (!prompt && descSnippet) {
|
||||
prompt = descSnippet;
|
||||
}
|
||||
if (!prompt) {
|
||||
prompt = "Professional podcast scene with subtle movement";
|
||||
}
|
||||
setInitialVideoPrompt(prompt);
|
||||
}, [scene]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!imageUrl) {
|
||||
@@ -98,14 +119,11 @@ export const SceneCard: React.FC<SceneCardProps> = ({
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('[SceneCard] Loading image:', { imageUrl, hasImage, sceneId: scene.id });
|
||||
|
||||
// Check if this is a podcast image endpoint that requires authentication
|
||||
const isPodcastImage = imageUrl.includes('/api/podcast/images/') || imageUrl.includes('/api/story/images/');
|
||||
|
||||
if (!isPodcastImage) {
|
||||
// Regular URL (external), use directly
|
||||
console.log('[SceneCard] Using external image URL directly');
|
||||
setImageBlobUrl(imageUrl);
|
||||
return;
|
||||
}
|
||||
@@ -134,22 +152,17 @@ export const SceneCard: React.FC<SceneCardProps> = ({
|
||||
// Remove query parameters if present
|
||||
imagePath = imagePath.split('?')[0];
|
||||
|
||||
console.log('[SceneCard] Fetching image blob from:', imagePath);
|
||||
|
||||
const response = await aiApiClient.get(imagePath, {
|
||||
responseType: 'blob',
|
||||
});
|
||||
|
||||
if (!isMounted || imageUrl !== currentImageUrl) {
|
||||
console.log('[SceneCard] Component unmounted or URL changed, skipping blob URL set');
|
||||
return;
|
||||
}
|
||||
|
||||
const blob = response.data;
|
||||
const newBlobUrl = URL.createObjectURL(blob);
|
||||
|
||||
console.log('[SceneCard] Image blob loaded successfully, created blob URL');
|
||||
|
||||
setImageBlobUrl((prevBlobUrl) => {
|
||||
// Clean up previous blob URL if exists
|
||||
if (prevBlobUrl && prevBlobUrl !== newBlobUrl && prevBlobUrl.startsWith('blob:')) {
|
||||
@@ -184,11 +197,9 @@ export const SceneCard: React.FC<SceneCardProps> = ({
|
||||
const token = localStorage.getItem('clerk_dashboard_token') || '';
|
||||
if (token) {
|
||||
const urlWithToken = `${fallbackPath}?token=${encodeURIComponent(token)}`;
|
||||
console.log('[SceneCard] Trying URL with query token');
|
||||
setImageBlobUrl(urlWithToken);
|
||||
} else {
|
||||
// Fallback to original URL
|
||||
console.log('[SceneCard] No token available, using original URL');
|
||||
setImageBlobUrl(imageUrl);
|
||||
}
|
||||
} catch (fallbackErr) {
|
||||
@@ -213,6 +224,39 @@ export const SceneCard: React.FC<SceneCardProps> = ({
|
||||
};
|
||||
}, [imageUrl, hasImage, scene.id]);
|
||||
|
||||
// Load video as blob when videoUrl changes (using Story Writer's utility)
|
||||
useEffect(() => {
|
||||
if (!job?.videoUrl) {
|
||||
setVideoBlobUrl(null);
|
||||
return;
|
||||
}
|
||||
|
||||
let currentBlobUrl: string | null = null;
|
||||
|
||||
fetchMediaBlobUrl(job.videoUrl)
|
||||
.then((blobUrl) => {
|
||||
if (blobUrl) {
|
||||
currentBlobUrl = blobUrl;
|
||||
setVideoBlobUrl(blobUrl);
|
||||
} else {
|
||||
// File not found (404) - clear the blob URL
|
||||
console.warn('[SceneCard] Video file not found (404):', job.videoUrl);
|
||||
setVideoBlobUrl(null);
|
||||
}
|
||||
})
|
||||
.catch((err) => {
|
||||
console.error('[SceneCard] Failed to load video blob:', err);
|
||||
setVideoBlobUrl(null);
|
||||
});
|
||||
|
||||
return () => {
|
||||
// Cleanup blob URL when component unmounts or URL changes
|
||||
if (currentBlobUrl) {
|
||||
URL.revokeObjectURL(currentBlobUrl);
|
||||
}
|
||||
};
|
||||
}, [job?.videoUrl]);
|
||||
|
||||
return (
|
||||
<GlassyCard sx={glassyCardSx}>
|
||||
<Stack spacing={2}>
|
||||
@@ -279,13 +323,12 @@ export const SceneCard: React.FC<SceneCardProps> = ({
|
||||
</Box>
|
||||
</Box>
|
||||
)}
|
||||
{hasVideo && job?.videoUrl && (
|
||||
{hasVideo && videoBlobUrl && (
|
||||
<Box sx={{ mt: 1 }}>
|
||||
<Box
|
||||
component="a"
|
||||
href={job.videoUrl}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
href={videoBlobUrl}
|
||||
download={`${scene.title.replace(/[^a-z0-9]/gi, '_')}_video.mp4`}
|
||||
sx={{ color: "#a78bfa", textDecoration: "none", display: "inline-flex", alignItems: "center", gap: 0.5 }}
|
||||
>
|
||||
<VideocamIcon sx={{ fontSize: 16 }} />
|
||||
@@ -350,8 +393,57 @@ export const SceneCard: React.FC<SceneCardProps> = ({
|
||||
<InlineAudioPlayer audioUrl={audioUrl} title={scene.title} />
|
||||
)}
|
||||
|
||||
{/* Image Preview */}
|
||||
{hasImage && (imageBlobUrl || imageUrl) && (
|
||||
{/* Video Preview - Show video if available, otherwise show image */}
|
||||
{hasVideo && videoBlobUrl ? (
|
||||
<Box
|
||||
sx={{
|
||||
width: "100%",
|
||||
borderRadius: 2,
|
||||
overflow: "hidden",
|
||||
border: "2px solid rgba(56,189,248,0.5)",
|
||||
background: alpha("#0f172a", 0.85),
|
||||
position: "relative",
|
||||
}}
|
||||
>
|
||||
<Box
|
||||
component="video"
|
||||
src={videoBlobUrl}
|
||||
controls
|
||||
preload="metadata"
|
||||
sx={{
|
||||
width: "100%",
|
||||
height: "auto",
|
||||
display: "block",
|
||||
maxHeight: 420,
|
||||
objectFit: "cover",
|
||||
backgroundColor: "black",
|
||||
}}
|
||||
onError={(e) => {
|
||||
const videoElement = e.currentTarget as HTMLVideoElement;
|
||||
console.error("[SceneCard] Video failed to load:", {
|
||||
originalUrl: job?.videoUrl,
|
||||
networkState: videoElement.networkState,
|
||||
});
|
||||
}}
|
||||
/>
|
||||
<Box
|
||||
sx={{
|
||||
position: "absolute",
|
||||
top: 8,
|
||||
right: 8,
|
||||
bgcolor: "rgba(56,189,248,0.9)",
|
||||
color: "white",
|
||||
px: 1,
|
||||
py: 0.5,
|
||||
borderRadius: 1,
|
||||
fontSize: "0.75rem",
|
||||
fontWeight: 600,
|
||||
}}
|
||||
>
|
||||
VIDEO
|
||||
</Box>
|
||||
</Box>
|
||||
) : hasImage && (imageBlobUrl || imageUrl) ? (
|
||||
<Box
|
||||
sx={{
|
||||
width: "100%",
|
||||
@@ -373,21 +465,14 @@ export const SceneCard: React.FC<SceneCardProps> = ({
|
||||
objectFit: "cover",
|
||||
}}
|
||||
onError={(e) => {
|
||||
console.error('[SceneCard] Image failed to load:', {
|
||||
console.error("[SceneCard] Image failed to load:", {
|
||||
src: e.currentTarget.src,
|
||||
imageUrl,
|
||||
imageBlobUrl,
|
||||
hasImage,
|
||||
});
|
||||
}}
|
||||
onLoad={() => {
|
||||
console.log('[SceneCard] Image loaded successfully:', {
|
||||
src: imageBlobUrl || imageUrl,
|
||||
});
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
)}
|
||||
) : null}
|
||||
|
||||
{/* Action Buttons */}
|
||||
<SceneActionButtons
|
||||
@@ -402,12 +487,25 @@ export const SceneCard: React.FC<SceneCardProps> = ({
|
||||
isBusy={isBusy}
|
||||
onRender={onRender}
|
||||
onImageGenerate={onImageGenerate}
|
||||
onVideoRender={onVideoRender}
|
||||
onVideoRender={() => setShowVideoModal(true)}
|
||||
onDownloadAudio={onDownloadAudio}
|
||||
onDownloadVideo={onDownloadVideo}
|
||||
onShare={onShare}
|
||||
onError={onError}
|
||||
/>
|
||||
|
||||
{/* Video Generation Settings Modal */}
|
||||
<VideoRegenerateModal
|
||||
open={showVideoModal}
|
||||
onClose={() => setShowVideoModal(false)}
|
||||
onGenerate={(settings: VideoGenerationSettings) => {
|
||||
setShowVideoModal(false);
|
||||
onVideoGenerate(scene.id, settings);
|
||||
}}
|
||||
initialPrompt={initialVideoPrompt}
|
||||
initialResolution="480p"
|
||||
initialSeed={-1}
|
||||
/>
|
||||
</Stack>
|
||||
</GlassyCard>
|
||||
);
|
||||
|
||||
@@ -0,0 +1,228 @@
|
||||
import React, { useEffect, useState } from "react";
|
||||
import {
|
||||
Dialog,
|
||||
DialogTitle,
|
||||
DialogContent,
|
||||
DialogActions,
|
||||
Stack,
|
||||
Box,
|
||||
Typography,
|
||||
TextField,
|
||||
FormControl,
|
||||
FormLabel,
|
||||
RadioGroup,
|
||||
FormControlLabel,
|
||||
Radio,
|
||||
Tooltip,
|
||||
} from "@mui/material";
|
||||
import { Info as InfoIcon } from "@mui/icons-material";
|
||||
import { PrimaryButton, SecondaryButton } from "../ui";
|
||||
import type { VideoGenerationSettings } from "../types";
|
||||
|
||||
interface VideoRegenerateModalProps {
|
||||
open: boolean;
|
||||
onClose: () => void;
|
||||
onGenerate: (settings: VideoGenerationSettings) => void;
|
||||
initialPrompt: string;
|
||||
initialResolution?: "480p" | "720p";
|
||||
initialSeed?: number | null;
|
||||
}
|
||||
|
||||
export const VideoRegenerateModal: React.FC<VideoRegenerateModalProps> = ({
|
||||
open,
|
||||
onClose,
|
||||
onGenerate,
|
||||
initialPrompt,
|
||||
initialResolution = "480p",
|
||||
initialSeed = -1,
|
||||
}) => {
|
||||
const [prompt, setPrompt] = useState(initialPrompt);
|
||||
const [resolution, setResolution] = useState<"480p" | "720p">(initialResolution);
|
||||
const [seed, setSeed] = useState<string>(initialSeed != null && initialSeed !== -1 ? String(initialSeed) : "");
|
||||
const [maskImageUrl, setMaskImageUrl] = useState<string>("");
|
||||
|
||||
useEffect(() => {
|
||||
setPrompt(initialPrompt);
|
||||
setResolution(initialResolution);
|
||||
}, [initialResolution, initialPrompt]);
|
||||
|
||||
const handleGenerate = () => {
|
||||
const parsedSeed = seed.trim() === "" ? undefined : Number.isNaN(Number(seed)) ? undefined : Number(seed);
|
||||
const settings: VideoGenerationSettings = {
|
||||
prompt: prompt.trim(),
|
||||
resolution,
|
||||
seed: parsedSeed,
|
||||
maskImageUrl: maskImageUrl.trim() || undefined,
|
||||
};
|
||||
onGenerate(settings);
|
||||
};
|
||||
|
||||
return (
|
||||
<Dialog
|
||||
open={open}
|
||||
onClose={onClose}
|
||||
maxWidth="md"
|
||||
fullWidth
|
||||
PaperProps={{
|
||||
sx: {
|
||||
background: "rgba(15, 23, 42, 0.96)",
|
||||
backdropFilter: "blur(18px)",
|
||||
borderRadius: 4,
|
||||
border: "1px solid rgba(148, 163, 184, 0.4)",
|
||||
},
|
||||
}}
|
||||
>
|
||||
<DialogTitle>
|
||||
<Stack direction="row" justifyContent="space-between" alignItems="center">
|
||||
<Typography variant="h6" sx={{ color: "white", fontWeight: 600 }}>
|
||||
Configure Video Generation
|
||||
</Typography>
|
||||
<Tooltip title="Adjust how your talking-head video is rendered. These settings control resolution, prompt, and animation seed.">
|
||||
<InfoIcon sx={{ color: "rgba(148,163,184,0.9)" }} />
|
||||
</Tooltip>
|
||||
</Stack>
|
||||
<Typography variant="body2" sx={{ color: "rgba(148,163,184,0.9)", mt: 1 }}>
|
||||
Fine-tune how this scene is animated. InfiniteTalk is audio-driven, so use the prompt to describe the visual
|
||||
look and feel you want while keeping it concise.
|
||||
</Typography>
|
||||
</DialogTitle>
|
||||
|
||||
<DialogContent>
|
||||
<Stack spacing={3} sx={{ mt: 1 }}>
|
||||
{/* Prompt */}
|
||||
<Box>
|
||||
<FormLabel sx={{ color: "rgba(248,250,252,0.9)", mb: 0.5 }}>Visual prompt</FormLabel>
|
||||
<TextField
|
||||
multiline
|
||||
minRows={3}
|
||||
maxRows={6}
|
||||
fullWidth
|
||||
value={prompt}
|
||||
onChange={(e) => setPrompt(e.target.value)}
|
||||
placeholder="Short description of how the scene should look (lighting, mood, camera feel, etc.)"
|
||||
variant="outlined"
|
||||
InputProps={{
|
||||
sx: {
|
||||
bgcolor: "rgba(15,23,42,0.9)",
|
||||
color: "white",
|
||||
"& .MuiOutlinedInput-notchedOutline": {
|
||||
borderColor: "rgba(148,163,184,0.4)",
|
||||
},
|
||||
"&:hover .MuiOutlinedInput-notchedOutline": {
|
||||
borderColor: "rgba(125,211,252,0.8)",
|
||||
},
|
||||
},
|
||||
}}
|
||||
InputLabelProps={{
|
||||
sx: { color: "rgba(148,163,184,0.9)" },
|
||||
}}
|
||||
/>
|
||||
<Typography variant="caption" sx={{ color: "rgba(148,163,184,0.9)", mt: 0.5, display: "block" }}>
|
||||
Example: "Modern podcast studio with soft lighting, the host framed center, gentle camera movement."
|
||||
</Typography>
|
||||
</Box>
|
||||
|
||||
{/* Resolution */}
|
||||
<Box>
|
||||
<FormLabel sx={{ color: "rgba(248,250,252,0.9)", mb: 1 }}>Resolution & quality</FormLabel>
|
||||
<RadioGroup
|
||||
row
|
||||
value={resolution}
|
||||
onChange={(e) => setResolution(e.target.value as "480p" | "720p")}
|
||||
>
|
||||
<FormControlLabel
|
||||
value="480p"
|
||||
control={<Radio color="primary" />}
|
||||
label={
|
||||
<Box>
|
||||
<Typography variant="body2">480p (Recommended)</Typography>
|
||||
<Typography variant="caption" color="text.secondary">
|
||||
Faster render, lower cost, great for previews & social
|
||||
</Typography>
|
||||
</Box>
|
||||
}
|
||||
/>
|
||||
<FormControlLabel
|
||||
value="720p"
|
||||
control={<Radio color="primary" />}
|
||||
label={
|
||||
<Box>
|
||||
<Typography variant="body2">720p (Higher quality)</Typography>
|
||||
<Typography variant="caption" color="text.secondary">
|
||||
Sharper video, slightly higher cost and render time
|
||||
</Typography>
|
||||
</Box>
|
||||
}
|
||||
/>
|
||||
</RadioGroup>
|
||||
</Box>
|
||||
|
||||
{/* Seed & advanced options */}
|
||||
<Stack direction={{ xs: "column", sm: "row" }} spacing={2}>
|
||||
<FormControl fullWidth>
|
||||
<FormLabel sx={{ color: "rgba(248,250,252,0.9)", mb: 0.5 }}>Seed (optional)</FormLabel>
|
||||
<TextField
|
||||
type="number"
|
||||
value={seed}
|
||||
onChange={(e) => setSeed(e.target.value)}
|
||||
placeholder="Random each time if left empty"
|
||||
InputProps={{
|
||||
sx: {
|
||||
bgcolor: "rgba(15,23,42,0.9)",
|
||||
color: "white",
|
||||
"& .MuiOutlinedInput-notchedOutline": {
|
||||
borderColor: "rgba(148,163,184,0.4)",
|
||||
},
|
||||
"&:hover .MuiOutlinedInput-notchedOutline": {
|
||||
borderColor: "rgba(125,211,252,0.8)",
|
||||
},
|
||||
},
|
||||
}}
|
||||
/>
|
||||
<Typography variant="caption" sx={{ color: "rgba(148,163,184,0.9)", mt: 0.5 }}>
|
||||
Use the same seed to get a similar animation style across multiple scenes.
|
||||
</Typography>
|
||||
</FormControl>
|
||||
|
||||
<FormControl full-width="true">
|
||||
<FormLabel sx={{ color: "rgba(248,250,252,0.9)", mb: 0.5 }}>Mask image URL (optional)</FormLabel>
|
||||
<TextField
|
||||
value={maskImageUrl}
|
||||
onChange={(e) => setMaskImageUrl(e.target.value)}
|
||||
placeholder="e.g. /api/podcast/images/your_avatar_mask.png"
|
||||
InputProps={{
|
||||
sx: {
|
||||
bgcolor: "rgba(15,23,42,0.9)",
|
||||
color: "white",
|
||||
"& .MuiOutlinedInput-notchedOutline": {
|
||||
borderColor: "rgba(148,163,184,0.4)",
|
||||
},
|
||||
"&:hover .MuiOutlinedInput-notchedOutline": {
|
||||
borderColor: "rgba(125,211,252,0.8)",
|
||||
},
|
||||
},
|
||||
}}
|
||||
/>
|
||||
<Typography variant="caption" sx={{ color: "rgba(148,163,184,0.9)", mt: 0.5 }}>
|
||||
Optional: limit animation to a specific region (e.g. face) by providing a mask image URL. Leave empty to
|
||||
animate the whole frame.
|
||||
</Typography>
|
||||
</FormControl>
|
||||
</Stack>
|
||||
</Stack>
|
||||
</DialogContent>
|
||||
|
||||
<DialogActions sx={{ p: 2.5, pt: 0, justifyContent: "space-between" }}>
|
||||
<Typography variant="caption" sx={{ color: "rgba(148,163,184,0.9)" }}>
|
||||
Estimated cost at 480p is lower than 720p. You'll only be billed for successful renders.
|
||||
</Typography>
|
||||
<Stack direction="row" spacing={1}>
|
||||
<SecondaryButton onClick={onClose}>Cancel</SecondaryButton>
|
||||
<PrimaryButton onClick={handleGenerate}>Generate Video</PrimaryButton>
|
||||
</Stack>
|
||||
</DialogActions>
|
||||
</Dialog>
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { useState, useEffect, useRef, useCallback } from "react";
|
||||
import { Script, Knobs, Job, RenderJobResult, TaskStatus } from "../types";
|
||||
import { Script, Knobs, Job, RenderJobResult, TaskStatus, VideoGenerationSettings } from "../types";
|
||||
import { podcastApi } from "../../../services/podcastApi";
|
||||
|
||||
interface UseRenderQueueProps {
|
||||
@@ -36,7 +36,11 @@ export const useRenderQueue = ({
|
||||
duration: number;
|
||||
sceneCount: number;
|
||||
} | null>(null);
|
||||
const [combiningVideos, setCombiningVideos] = useState(false);
|
||||
const [finalVideoUrl, setFinalVideoUrl] = useState<string | null>(null);
|
||||
const [combiningProgress, setCombiningProgress] = useState<{ progress: number; message: string } | null>(null);
|
||||
const pollingIntervals = useRef<Map<string, NodeJS.Timeout>>(new Map());
|
||||
const pollingErrorCounts = useRef<Map<string, number>>(new Map());
|
||||
|
||||
// Cleanup polling intervals on unmount
|
||||
useEffect(() => {
|
||||
@@ -44,10 +48,11 @@ export const useRenderQueue = ({
|
||||
return () => {
|
||||
intervals.forEach((interval) => clearInterval(interval));
|
||||
intervals.clear();
|
||||
pollingErrorCounts.current.clear();
|
||||
};
|
||||
}, []);
|
||||
|
||||
// Initialize jobs if empty
|
||||
// Initialize jobs if empty (audio/image only)
|
||||
useEffect(() => {
|
||||
if (jobs.length === 0 && script.scenes.length > 0) {
|
||||
const initialJobs: Job[] = script.scenes.map((s) => {
|
||||
@@ -59,7 +64,7 @@ export const useRenderQueue = ({
|
||||
progress: hasExistingAudio ? 100 : 0,
|
||||
previewUrl: null,
|
||||
finalUrl: hasExistingAudio ? s.audioUrl || null : null,
|
||||
imageUrl: s.imageUrl || null, // Include existing imageUrl from scene
|
||||
imageUrl: s.imageUrl || null,
|
||||
jobId: null,
|
||||
};
|
||||
});
|
||||
@@ -67,25 +72,201 @@ export const useRenderQueue = ({
|
||||
onUpdateJob(job.sceneId, job);
|
||||
});
|
||||
}
|
||||
}, [script.scenes.length, jobs.length, onUpdateJob, script.scenes]);
|
||||
}, [jobs.length, script.scenes.length, onUpdateJob, script.scenes]);
|
||||
|
||||
// Load final video URL from project on mount (for persistence across reloads)
|
||||
useEffect(() => {
|
||||
if (!projectId) return;
|
||||
|
||||
podcastApi
|
||||
.loadProject(projectId)
|
||||
.then((project) => {
|
||||
if (project.final_video_url) {
|
||||
console.log("[useRenderQueue] Loaded final video URL from project:", project.final_video_url);
|
||||
setFinalVideoUrl(project.final_video_url);
|
||||
}
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error("[useRenderQueue] Failed to load project for final video URL:", error);
|
||||
// Don't show error to user - this is just for restoring state
|
||||
});
|
||||
}, [projectId]);
|
||||
|
||||
// Always try to attach existing videos to scenes (even after reloads)
|
||||
useEffect(() => {
|
||||
if (script.scenes.length === 0) return;
|
||||
|
||||
podcastApi
|
||||
.listVideos(projectId)
|
||||
.then((result) => {
|
||||
const videoMap = new Map<number, string>();
|
||||
|
||||
result.videos.forEach((video) => {
|
||||
// Use the most recent video for each scene number
|
||||
if (!videoMap.has(video.scene_number)) {
|
||||
// Store the raw video URL - SceneCard will handle authentication via blob loading
|
||||
videoMap.set(video.scene_number, video.video_url);
|
||||
}
|
||||
});
|
||||
|
||||
script.scenes.forEach((scene) => {
|
||||
const sceneNumberMatch = scene.id.match(/\d+/);
|
||||
const sceneNumber = sceneNumberMatch ? parseInt(sceneNumberMatch[0], 10) : null;
|
||||
|
||||
if (sceneNumber === null) return;
|
||||
|
||||
const videoUrl = videoMap.get(sceneNumber);
|
||||
if (!videoUrl) return;
|
||||
|
||||
const job = jobs.find((j) => j.sceneId === scene.id);
|
||||
|
||||
// Avoid redundant updates
|
||||
if (job?.videoUrl === videoUrl) return;
|
||||
|
||||
onUpdateJob(scene.id, {
|
||||
sceneId: scene.id,
|
||||
title: scene.title,
|
||||
videoUrl,
|
||||
status: "completed" as const,
|
||||
progress: 100,
|
||||
});
|
||||
});
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error("[useRenderQueue] Failed to list existing videos:", error);
|
||||
});
|
||||
}, [projectId, script.scenes, jobs, onUpdateJob]);
|
||||
|
||||
// Periodic check to rescue videos that were generated but not detected by polling
|
||||
useEffect(() => {
|
||||
if (rendering && script.scenes.length > 0) {
|
||||
const rescueInterval = setInterval(async () => {
|
||||
// Check for videos every 2 minutes while rendering is active
|
||||
try {
|
||||
const videoList = await podcastApi.listVideos(projectId);
|
||||
|
||||
const videoMap = new Map<number, string>();
|
||||
videoList.videos.forEach((video) => {
|
||||
if (!videoMap.has(video.scene_number)) {
|
||||
// Store the raw video URL - SceneCard will handle authentication via blob loading
|
||||
videoMap.set(video.scene_number, video.video_url);
|
||||
}
|
||||
});
|
||||
|
||||
// Update jobs for scenes that have videos but no videoUrl set
|
||||
script.scenes.forEach((scene) => {
|
||||
const sceneNumberMatch = scene.id.match(/\d+/);
|
||||
const sceneNumber = sceneNumberMatch ? parseInt(sceneNumberMatch[0], 10) : null;
|
||||
if (sceneNumber !== null) {
|
||||
const videoUrl = videoMap.get(sceneNumber);
|
||||
const job = jobs.find((j) => j.sceneId === scene.id);
|
||||
|
||||
if (videoUrl) {
|
||||
if (!job) {
|
||||
onUpdateJob(scene.id, {
|
||||
sceneId: scene.id,
|
||||
title: scene.title,
|
||||
status: "completed" as const,
|
||||
progress: 100,
|
||||
videoUrl,
|
||||
});
|
||||
} else if (!job.videoUrl) {
|
||||
onUpdateJob(scene.id, { videoUrl, status: "completed" as const, progress: 100 });
|
||||
// If this was the rendering scene, stop rendering
|
||||
if (rendering === scene.id) {
|
||||
setRendering(null);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
console.error("[useRenderQueue] Failed to rescue videos:", error);
|
||||
}
|
||||
}, 120000); // Check every 2 minutes
|
||||
|
||||
return () => clearInterval(rescueInterval);
|
||||
}
|
||||
}, [rendering, script.scenes, jobs, projectId, onUpdateJob]);
|
||||
|
||||
const getScene = useCallback((sceneId: string) => script.scenes.find((s) => s.id === sceneId), [script.scenes]);
|
||||
|
||||
const pollTaskStatus = useCallback(async (taskId: string, sceneId: string) => {
|
||||
try {
|
||||
const status: TaskStatus = await podcastApi.pollTaskStatus(taskId);
|
||||
const status: TaskStatus | null = await podcastApi.pollTaskStatus(taskId);
|
||||
|
||||
// Handle null response (task not found)
|
||||
if (!status) {
|
||||
const errorCount = (pollingErrorCounts.current.get(sceneId) || 0) + 1;
|
||||
pollingErrorCounts.current.set(sceneId, errorCount);
|
||||
|
||||
// Stop polling after 3 consecutive "task not found" errors
|
||||
if (errorCount >= 3) {
|
||||
onUpdateJob(sceneId, { status: "failed", progress: 0 });
|
||||
const interval = pollingIntervals.current.get(sceneId);
|
||||
if (interval) {
|
||||
clearInterval(interval);
|
||||
pollingIntervals.current.delete(sceneId);
|
||||
}
|
||||
pollingErrorCounts.current.delete(sceneId);
|
||||
setRendering(null);
|
||||
onError("Video generation task not found. The task may have expired or been cancelled.");
|
||||
return true; // Stop polling
|
||||
}
|
||||
return false; // Continue polling (might be transient)
|
||||
}
|
||||
|
||||
// Reset error count on successful poll
|
||||
pollingErrorCounts.current.delete(sceneId);
|
||||
|
||||
onUpdateJob(sceneId, {
|
||||
progress: status.progress ?? 0,
|
||||
status: status.status === "completed" ? "completed" : status.status === "failed" ? "failed" : "running",
|
||||
});
|
||||
|
||||
if (status.status === "completed" && status.result) {
|
||||
// Check for completion - handle both "completed" and "processing" with 100% progress
|
||||
const isCompleted = status.status === "completed" || (status.status === "processing" && status.progress === 100);
|
||||
|
||||
if (isCompleted && status.result) {
|
||||
const result = status.result;
|
||||
console.log("[useRenderQueue] Task completed, extracting video URL", {
|
||||
result,
|
||||
video_url: result.video_url,
|
||||
status: status.status,
|
||||
progress: status.progress,
|
||||
});
|
||||
|
||||
let videoUrl = result.video_url;
|
||||
if (!videoUrl) {
|
||||
console.error("[useRenderQueue] No video_url in result! Attempting to rescue from file system...", { result });
|
||||
// Try to rescue: check if video exists for this scene
|
||||
const sceneNumberMatch = getScene(sceneId)?.id.match(/\d+/);
|
||||
const sceneNumber = sceneNumberMatch ? parseInt(sceneNumberMatch[0], 10) : null;
|
||||
if (sceneNumber !== null) {
|
||||
podcastApi
|
||||
.listVideos(projectId)
|
||||
.then((videoList) => {
|
||||
const sceneVideo = videoList.videos.find((v) => v.scene_number === sceneNumber);
|
||||
if (sceneVideo) {
|
||||
// Store the raw video URL - SceneCard will handle authentication via blob loading
|
||||
onUpdateJob(sceneId, {
|
||||
status: "completed",
|
||||
progress: 100,
|
||||
videoUrl: sceneVideo.video_url,
|
||||
cost: result.cost || 0,
|
||||
});
|
||||
}
|
||||
})
|
||||
.catch((err) => console.error("[useRenderQueue] Failed to rescue video:", err));
|
||||
}
|
||||
return true; // Stop polling
|
||||
}
|
||||
|
||||
// Store the raw video URL - SceneCard will handle authentication via blob loading
|
||||
onUpdateJob(sceneId, {
|
||||
status: "completed",
|
||||
progress: 100,
|
||||
videoUrl: result.video_url,
|
||||
videoUrl,
|
||||
cost: result.cost,
|
||||
});
|
||||
|
||||
@@ -94,20 +275,62 @@ export const useRenderQueue = ({
|
||||
clearInterval(interval);
|
||||
pollingIntervals.current.delete(sceneId);
|
||||
}
|
||||
setRendering(null);
|
||||
return true; // Stop polling
|
||||
} else if (status.status === "failed") {
|
||||
// Extract user-friendly error message
|
||||
let errorMessage = "Video generation failed";
|
||||
if (status.error) {
|
||||
// Try to extract meaningful error from various formats
|
||||
const errorStr = status.error;
|
||||
if (errorStr.includes("Insufficient credits")) {
|
||||
errorMessage = "Video generation failed: Insufficient WaveSpeed credits. Please top up your account.";
|
||||
} else if (errorStr.includes("HTTPException") || errorStr.includes("502")) {
|
||||
// Extract the actual error message from HTTPException details
|
||||
const match = errorStr.match(/message[":\s]+"([^"]+)"/i) || errorStr.match(/detail[":\s]+"([^"]+)"/i);
|
||||
if (match && match[1]) {
|
||||
errorMessage = `Video generation failed: ${match[1]}`;
|
||||
} else {
|
||||
errorMessage = `Video generation failed: ${errorStr}`;
|
||||
}
|
||||
} else {
|
||||
errorMessage = `Video generation failed: ${errorStr}`;
|
||||
}
|
||||
}
|
||||
|
||||
onUpdateJob(sceneId, { status: "failed", progress: 0 });
|
||||
const interval = pollingIntervals.current.get(sceneId);
|
||||
if (interval) {
|
||||
clearInterval(interval);
|
||||
pollingIntervals.current.delete(sceneId);
|
||||
}
|
||||
onError(status.error || "Video generation failed");
|
||||
pollingErrorCounts.current.delete(sceneId);
|
||||
setRendering(null);
|
||||
onError(errorMessage);
|
||||
return true; // Stop polling
|
||||
}
|
||||
|
||||
return status.status === "completed" || status.status === "failed";
|
||||
return false; // Continue polling
|
||||
} catch (error) {
|
||||
console.error("Error polling task status:", error);
|
||||
return false;
|
||||
const errorCount = (pollingErrorCounts.current.get(sceneId) || 0) + 1;
|
||||
pollingErrorCounts.current.set(sceneId, errorCount);
|
||||
|
||||
// Stop polling after 5 consecutive network errors
|
||||
if (errorCount >= 5) {
|
||||
onUpdateJob(sceneId, { status: "failed", progress: 0 });
|
||||
const interval = pollingIntervals.current.get(sceneId);
|
||||
if (interval) {
|
||||
clearInterval(interval);
|
||||
pollingIntervals.current.delete(sceneId);
|
||||
}
|
||||
pollingErrorCounts.current.delete(sceneId);
|
||||
setRendering(null);
|
||||
const errorMsg = error instanceof Error ? error.message : String(error);
|
||||
onError(`Video generation failed: Unable to check status. ${errorMsg}`);
|
||||
return true; // Stop polling
|
||||
}
|
||||
return false; // Continue polling (might be transient network error)
|
||||
}
|
||||
}, [onUpdateJob, onError]);
|
||||
|
||||
@@ -217,6 +440,7 @@ export const useRenderQueue = ({
|
||||
sceneId: scene.id,
|
||||
sceneTitle: scene.title,
|
||||
sceneContent: sceneContent,
|
||||
baseAvatarUrl: avatarImageUrl || undefined, // Use base avatar if available
|
||||
width: 1024,
|
||||
height: 1024,
|
||||
});
|
||||
@@ -239,68 +463,112 @@ export const useRenderQueue = ({
|
||||
} finally {
|
||||
setGeneratingImage(null);
|
||||
}
|
||||
}, [generatingImage, getScene, onUpdateJob, onError]);
|
||||
}, [generatingImage, getScene, avatarImageUrl, onUpdateJob, onError, script]);
|
||||
|
||||
const runVideoRender = useCallback(async (sceneId: string) => {
|
||||
if (rendering && rendering !== sceneId) return;
|
||||
const scene = getScene(sceneId);
|
||||
if (!scene) return;
|
||||
|
||||
const sceneImageUrl = scene.imageUrl || avatarImageUrl;
|
||||
if (!sceneImageUrl) {
|
||||
onError("Scene image is required for video generation. Please generate images for scenes first.");
|
||||
return;
|
||||
}
|
||||
|
||||
const job = jobs.find((j) => j.sceneId === sceneId);
|
||||
if (!job?.finalUrl) {
|
||||
onError("Please generate audio first before creating video.");
|
||||
return;
|
||||
}
|
||||
|
||||
const estimatedCost = 0.30;
|
||||
if (budgetCap && budgetCap > 0) {
|
||||
const currentSpent = jobs
|
||||
.filter((j) => j.status === "completed" && j.cost)
|
||||
.reduce((sum, j) => sum + (j.cost || 0), 0);
|
||||
|
||||
if (currentSpent + estimatedCost > budgetCap) {
|
||||
onError(`Budget cap exceeded. Estimated cost: $${estimatedCost.toFixed(2)}, Budget remaining: $${(budgetCap - currentSpent).toFixed(2)}`);
|
||||
const runVideoRender = useCallback(
|
||||
async (sceneId: string, settings?: VideoGenerationSettings) => {
|
||||
if (rendering && rendering !== sceneId) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
setRendering(sceneId);
|
||||
onUpdateJob(sceneId, {
|
||||
status: "running",
|
||||
progress: 5,
|
||||
});
|
||||
const scene = getScene(sceneId);
|
||||
if (!scene) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await podcastApi.generateVideo({
|
||||
projectId,
|
||||
sceneId,
|
||||
sceneTitle: scene.title,
|
||||
audioUrl: job.finalUrl,
|
||||
avatarImageUrl: sceneImageUrl,
|
||||
resolution: knobs.resolution || "720p",
|
||||
});
|
||||
// Guard: require image and audio before calling expensive video gen
|
||||
const sceneImageUrl = scene.imageUrl || avatarImageUrl;
|
||||
if (!sceneImageUrl) {
|
||||
onError("Scene image is required for video generation. Please generate images for scenes first.");
|
||||
return;
|
||||
}
|
||||
|
||||
const job = jobs.find((j) => j.sceneId === sceneId);
|
||||
// Use job.finalUrl if available, otherwise fall back to scene.audioUrl (from Script Editor)
|
||||
const audioUrl = job?.finalUrl || scene.audioUrl;
|
||||
if (!audioUrl || audioUrl.startsWith("blob:")) {
|
||||
onError("Please generate audio first before creating video.");
|
||||
return;
|
||||
}
|
||||
|
||||
// Guard: ensure every scene has audio and image before enabling render queue video
|
||||
const allScenesHaveAudio = script.scenes.every((s) => s.audioUrl && !s.audioUrl.startsWith("blob:"));
|
||||
const allScenesHaveImage = script.scenes.every((s) => s.imageUrl);
|
||||
if (!allScenesHaveAudio || !allScenesHaveImage) {
|
||||
onError("Please ensure all scenes have both audio and image before generating video.");
|
||||
return;
|
||||
}
|
||||
|
||||
// Resolution & simple cost heuristic (default 480p for lower cost)
|
||||
const targetResolution: "480p" | "720p" =
|
||||
settings?.resolution || (knobs.resolution as "480p" | "720p") || "480p";
|
||||
const baseCost = 0.3; // 5s at 720p
|
||||
const estimatedCost = targetResolution === "480p" ? baseCost / 2 : baseCost;
|
||||
|
||||
if (budgetCap && budgetCap > 0) {
|
||||
const currentSpent = jobs
|
||||
.filter((j) => j.status === "completed" && j.cost)
|
||||
.reduce((sum, j) => sum + (j.cost || 0), 0);
|
||||
|
||||
if (currentSpent + estimatedCost > budgetCap) {
|
||||
onError(
|
||||
`Budget cap exceeded. Estimated cost: $${estimatedCost.toFixed(
|
||||
2
|
||||
)}, Budget remaining: $${(budgetCap - currentSpent).toFixed(2)}`
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
setRendering(sceneId);
|
||||
onUpdateJob(sceneId, {
|
||||
taskId: result.taskId,
|
||||
status: "running",
|
||||
progress: 5,
|
||||
});
|
||||
|
||||
startPolling(result.taskId, sceneId);
|
||||
} catch (error) {
|
||||
onUpdateJob(sceneId, { status: "failed", progress: 0 });
|
||||
const message = error instanceof Error ? error.message : "Video generation failed";
|
||||
onError(message);
|
||||
} finally {
|
||||
setRendering(null);
|
||||
}
|
||||
}, [rendering, getScene, avatarImageUrl, jobs, budgetCap, projectId, knobs, onUpdateJob, onError, startPolling]);
|
||||
try {
|
||||
console.log("[useRenderQueue] Starting video generation", {
|
||||
sceneId,
|
||||
sceneTitle: scene.title,
|
||||
audioUrl,
|
||||
avatarImageUrl: sceneImageUrl,
|
||||
resolution: targetResolution,
|
||||
prompt: settings?.prompt,
|
||||
seed: settings?.seed,
|
||||
maskImageUrl: settings?.maskImageUrl,
|
||||
});
|
||||
|
||||
const result = await podcastApi.generateVideo({
|
||||
projectId,
|
||||
sceneId,
|
||||
sceneTitle: scene.title,
|
||||
audioUrl,
|
||||
avatarImageUrl: sceneImageUrl,
|
||||
resolution: targetResolution,
|
||||
prompt: settings?.prompt || undefined,
|
||||
seed: settings?.seed ?? -1,
|
||||
maskImageUrl: settings?.maskImageUrl || undefined,
|
||||
});
|
||||
|
||||
if (!result.taskId) {
|
||||
throw new Error("Backend did not return a task ID. Response: " + JSON.stringify(result));
|
||||
}
|
||||
|
||||
onUpdateJob(sceneId, {
|
||||
taskId: result.taskId,
|
||||
status: "running",
|
||||
progress: 5,
|
||||
});
|
||||
|
||||
startPolling(result.taskId, sceneId);
|
||||
} catch (error) {
|
||||
onUpdateJob(sceneId, { status: "failed", progress: 0 });
|
||||
const message = error instanceof Error ? error.message : "Video generation failed";
|
||||
onError(message);
|
||||
}
|
||||
},
|
||||
[rendering, getScene, avatarImageUrl, jobs, budgetCap, projectId, knobs, onUpdateJob, onError, script.scenes, startPolling]
|
||||
);
|
||||
|
||||
const combineAudio = useCallback(async () => {
|
||||
try {
|
||||
@@ -361,16 +629,151 @@ export const useRenderQueue = ({
|
||||
}
|
||||
}, [script.scenes, jobs, projectId, onError]);
|
||||
|
||||
const combineFinalVideo = useCallback(async () => {
|
||||
try {
|
||||
setCombiningVideos(true);
|
||||
onError("");
|
||||
|
||||
// Collect all scene video URLs
|
||||
const sceneVideoUrls: string[] = [];
|
||||
for (const scene of script.scenes) {
|
||||
const job = jobs.find((j) => j.sceneId === scene.id);
|
||||
if (!job?.videoUrl) {
|
||||
throw new Error(`Scene "${scene.title}" is missing a video. Please generate videos for all scenes first.`);
|
||||
}
|
||||
// Remove blob URLs and query params - use the API path only
|
||||
let videoUrl = job.videoUrl;
|
||||
if (videoUrl.startsWith("blob:")) {
|
||||
throw new Error(`Scene "${scene.title}" has a blob URL. Cannot combine blob URLs. Please use API URLs.`);
|
||||
}
|
||||
videoUrl = videoUrl.split("?")[0]; // Remove query params
|
||||
sceneVideoUrls.push(videoUrl);
|
||||
}
|
||||
|
||||
console.log("[combineFinalVideo] Starting combination with", sceneVideoUrls.length, "videos");
|
||||
|
||||
// Start combination task
|
||||
const result = await podcastApi.combineVideos({
|
||||
projectId,
|
||||
sceneVideoUrls,
|
||||
podcastTitle: script.scenes[0]?.title || "Podcast",
|
||||
});
|
||||
|
||||
console.log("[combineFinalVideo] Task created:", result.taskId);
|
||||
|
||||
// Poll for completion
|
||||
const taskId = result.taskId;
|
||||
let done = false;
|
||||
let pollCount = 0;
|
||||
const maxPolls = 300; // 10 minutes max (300 * 2 seconds) - encoding can take time
|
||||
let lastProgress = 0;
|
||||
let lastMessage = "Starting video combination...";
|
||||
|
||||
while (!done && pollCount < maxPolls) {
|
||||
await new Promise((r) => setTimeout(r, 2000)); // Poll every 2 seconds
|
||||
pollCount++;
|
||||
|
||||
const status = await podcastApi.pollTaskStatus(taskId);
|
||||
|
||||
// Update progress and message for user feedback
|
||||
if (status) {
|
||||
const currentProgress = status.progress ?? 0;
|
||||
const currentMessage = status.message || "Processing...";
|
||||
|
||||
// Update UI with progress
|
||||
setCombiningProgress({
|
||||
progress: currentProgress,
|
||||
message: currentMessage,
|
||||
});
|
||||
|
||||
// Only log if progress or message changed to reduce noise
|
||||
if (currentProgress !== lastProgress || currentMessage !== lastMessage) {
|
||||
console.log(
|
||||
`[combineFinalVideo] Poll ${pollCount}: ${status.status} | ` +
|
||||
`Progress: ${currentProgress.toFixed(1)}% | Message: ${currentMessage}`
|
||||
);
|
||||
lastProgress = currentProgress;
|
||||
lastMessage = currentMessage;
|
||||
}
|
||||
} else {
|
||||
console.log(`[combineFinalVideo] Poll ${pollCount}: No status yet...`);
|
||||
}
|
||||
|
||||
if (!status) {
|
||||
// Don't fail immediately - task might still be initializing
|
||||
if (pollCount < 10) {
|
||||
continue; // Wait up to 20 seconds for task to appear
|
||||
}
|
||||
console.error("[combineFinalVideo] Task not found after 10 polls");
|
||||
throw new Error("Task not found. Video combination may have failed on the server. Please try again.");
|
||||
}
|
||||
|
||||
if (status.status === "completed") {
|
||||
done = true;
|
||||
const videoUrl = status.result?.video_url;
|
||||
if (!videoUrl) {
|
||||
console.error("[combineFinalVideo] No video URL in result:", status.result);
|
||||
throw new Error("Final video URL not found in result. Please contact support.");
|
||||
}
|
||||
console.log("[combineFinalVideo] Success! Video URL:", videoUrl);
|
||||
setFinalVideoUrl(videoUrl);
|
||||
|
||||
// Save final video URL to project for persistence across reloads
|
||||
try {
|
||||
await podcastApi.saveProject(projectId, { final_video_url: videoUrl });
|
||||
console.log("[combineFinalVideo] Saved final video URL to project");
|
||||
} catch (error) {
|
||||
console.warn("[combineFinalVideo] Failed to save final video URL to project:", error);
|
||||
// Don't fail the operation if project save fails - video is still available
|
||||
}
|
||||
} else if (status.status === "failed") {
|
||||
const errorMsg = status.error || status.message || "Video combination failed";
|
||||
console.error("[combineFinalVideo] Task failed:", errorMsg);
|
||||
throw new Error(`Video combination failed: ${errorMsg}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (pollCount >= maxPolls) {
|
||||
throw new Error("Video combination timed out after 10 minutes. The video may still be processing. Please check back in a few minutes or try again.");
|
||||
}
|
||||
} catch (error: any) {
|
||||
console.error("[combineFinalVideo] Error:", error);
|
||||
|
||||
// Extract detailed error message
|
||||
let message = "Failed to combine videos";
|
||||
|
||||
if (error?.response?.data?.detail) {
|
||||
// Backend error with detail
|
||||
message = error.response.data.detail;
|
||||
} else if (error?.message) {
|
||||
// Standard error message
|
||||
message = error.message;
|
||||
} else if (typeof error === "string") {
|
||||
message = error;
|
||||
}
|
||||
|
||||
console.error("[combineFinalVideo] Displaying error to user:", message);
|
||||
onError(message);
|
||||
} finally {
|
||||
setCombiningVideos(false);
|
||||
setCombiningProgress(null);
|
||||
}
|
||||
}, [script.scenes, jobs, projectId, onError]);
|
||||
|
||||
return {
|
||||
rendering,
|
||||
generatingImage,
|
||||
combiningAudio,
|
||||
combinedAudioResult,
|
||||
combiningVideos,
|
||||
combiningProgress,
|
||||
finalVideoUrl,
|
||||
isBusy: Boolean(rendering),
|
||||
runRender,
|
||||
runImageGeneration,
|
||||
runVideoRender,
|
||||
combineAudio,
|
||||
combineFinalVideo,
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@@ -0,0 +1,464 @@
|
||||
import React, { useEffect, useState } from "react";
|
||||
import {
|
||||
Dialog,
|
||||
DialogTitle,
|
||||
DialogContent,
|
||||
DialogActions,
|
||||
Stack,
|
||||
Box,
|
||||
Typography,
|
||||
Slider,
|
||||
Select,
|
||||
MenuItem,
|
||||
FormControl,
|
||||
InputLabel,
|
||||
FormControlLabel,
|
||||
Checkbox,
|
||||
Tooltip,
|
||||
IconButton,
|
||||
alpha,
|
||||
TextField,
|
||||
} from "@mui/material";
|
||||
import { HelpOutline as HelpOutlineIcon, Close as CloseIcon } from "@mui/icons-material";
|
||||
import { PrimaryButton, SecondaryButton } from "../ui";
|
||||
|
||||
export type AudioGenerationSettings = {
|
||||
voiceId: string;
|
||||
speed: number;
|
||||
volume: number;
|
||||
pitch: number;
|
||||
emotion: string;
|
||||
englishNormalization: boolean;
|
||||
sampleRate?: number;
|
||||
bitrate?: number;
|
||||
channel?: "1" | "2";
|
||||
format?: "mp3" | "wav" | "pcm" | "flac";
|
||||
languageBoost?: string;
|
||||
};
|
||||
|
||||
interface AudioRegenerateModalProps {
|
||||
open: boolean;
|
||||
onClose: () => void;
|
||||
onRegenerate: (settings: AudioGenerationSettings) => void;
|
||||
initialSettings: AudioGenerationSettings;
|
||||
isGenerating?: boolean;
|
||||
}
|
||||
|
||||
const VOICE_OPTIONS = [
|
||||
"Wise_Woman",
|
||||
"Friendly_Person",
|
||||
"Inspirational_girl",
|
||||
"Deep_Voice_Man",
|
||||
"Calm_Woman",
|
||||
"Casual_Guy",
|
||||
"Lively_Girl",
|
||||
"Patient_Man",
|
||||
"Young_Knight",
|
||||
"Determined_Man",
|
||||
"Lovely_Girl",
|
||||
"Decent_Boy",
|
||||
"Imposing_Manner",
|
||||
"Elegant_Man",
|
||||
"Abbess",
|
||||
"Sweet_Girl_2",
|
||||
"Exuberant_Girl",
|
||||
];
|
||||
|
||||
const EMOTION_OPTIONS = ["happy", "sad", "angry", "fearful", "disgusted", "surprised", "neutral"];
|
||||
|
||||
const SAMPLE_RATE_OPTIONS = [8000, 16000, 22050, 24000, 32000, 44100];
|
||||
const BITRATE_OPTIONS = [32000, 64000, 128000, 256000];
|
||||
const LANGUAGE_BOOST_OPTIONS = [
|
||||
"auto",
|
||||
"English",
|
||||
"Chinese",
|
||||
"Chinese,Yue",
|
||||
"Arabic",
|
||||
"Russian",
|
||||
"Spanish",
|
||||
"French",
|
||||
"Portuguese",
|
||||
"German",
|
||||
"Turkish",
|
||||
"Dutch",
|
||||
"Ukrainian",
|
||||
"Vietnamese",
|
||||
"Indonesian",
|
||||
"Japanese",
|
||||
"Italian",
|
||||
"Korean",
|
||||
"Thai",
|
||||
"Polish",
|
||||
"Romanian",
|
||||
"Greek",
|
||||
"Czech",
|
||||
"Finnish",
|
||||
"Hindi",
|
||||
];
|
||||
|
||||
export const AudioRegenerateModal: React.FC<AudioRegenerateModalProps> = ({
|
||||
open,
|
||||
onClose,
|
||||
onRegenerate,
|
||||
initialSettings,
|
||||
isGenerating = false,
|
||||
}) => {
|
||||
const [settings, setSettings] = useState<AudioGenerationSettings>(initialSettings);
|
||||
|
||||
useEffect(() => {
|
||||
setSettings(initialSettings);
|
||||
}, [initialSettings]);
|
||||
|
||||
const handleRegenerate = () => {
|
||||
onRegenerate(settings);
|
||||
};
|
||||
|
||||
return (
|
||||
<Dialog
|
||||
open={open}
|
||||
onClose={onClose}
|
||||
maxWidth="md"
|
||||
fullWidth
|
||||
PaperProps={{
|
||||
sx: {
|
||||
background: alpha("#0f172a", 0.95),
|
||||
backdropFilter: "blur(20px)",
|
||||
border: "1px solid rgba(255,255,255,0.1)",
|
||||
borderRadius: 4,
|
||||
},
|
||||
}}
|
||||
>
|
||||
<DialogTitle>
|
||||
<Stack direction="row" justifyContent="space-between" alignItems="center">
|
||||
<Typography variant="h6" sx={{ color: "white", fontWeight: 600 }}>
|
||||
Regenerate Audio with Custom Settings
|
||||
</Typography>
|
||||
<IconButton onClick={onClose} size="small" sx={{ color: "rgba(255,255,255,0.7)" }}>
|
||||
<CloseIcon />
|
||||
</IconButton>
|
||||
</Stack>
|
||||
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.6)", mt: 1 }}>
|
||||
Adjust voice, speed, tone, and quality. Changes apply only to this scene.
|
||||
</Typography>
|
||||
</DialogTitle>
|
||||
|
||||
<DialogContent>
|
||||
<Stack spacing={3} sx={{ mt: 1 }}>
|
||||
{/* Voice */}
|
||||
<Box>
|
||||
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1 }}>
|
||||
<Typography variant="subtitle1" sx={{ color: "white", fontWeight: 600 }}>
|
||||
Voice
|
||||
</Typography>
|
||||
<Tooltip title="Choose a system voice or your custom trained voice ID." arrow>
|
||||
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
|
||||
<HelpOutlineIcon fontSize="small" />
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
</Stack>
|
||||
<FormControl fullWidth>
|
||||
<Select
|
||||
value={settings.voiceId}
|
||||
onChange={(e) => setSettings({ ...settings, voiceId: e.target.value })}
|
||||
sx={{
|
||||
backgroundColor: alpha("#ffffff", 0.05),
|
||||
color: "white",
|
||||
"& .MuiOutlinedInput-notchedOutline": { borderColor: "rgba(255,255,255,0.2)" },
|
||||
"&:hover .MuiOutlinedInput-notchedOutline": { borderColor: "rgba(255,255,255,0.3)" },
|
||||
"&.Mui-focused .MuiOutlinedInput-notchedOutline": { borderColor: "#667eea" },
|
||||
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
|
||||
}}
|
||||
>
|
||||
{VOICE_OPTIONS.map((v) => (
|
||||
<MenuItem key={v} value={v}>
|
||||
{v}
|
||||
</MenuItem>
|
||||
))}
|
||||
</Select>
|
||||
</FormControl>
|
||||
</Box>
|
||||
|
||||
{/* Speed / Volume / Pitch */}
|
||||
<Stack direction={{ xs: "column", sm: "row" }} spacing={2}>
|
||||
<Box sx={{ flex: 1 }}>
|
||||
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 0.5 }}>
|
||||
<Typography variant="subtitle2" sx={{ color: "white", fontWeight: 600 }}>
|
||||
Speed (0.5-2.0)
|
||||
</Typography>
|
||||
<Tooltip title="Control how fast the voice speaks. 1.0 is normal." arrow>
|
||||
<HelpOutlineIcon fontSize="small" sx={{ color: "rgba(255,255,255,0.5)" }} />
|
||||
</Tooltip>
|
||||
</Stack>
|
||||
<Slider
|
||||
value={settings.speed}
|
||||
min={0.5}
|
||||
max={2.0}
|
||||
step={0.05}
|
||||
onChange={(_, v) => setSettings({ ...settings, speed: v as number })}
|
||||
sx={{ color: "#6366f1" }}
|
||||
/>
|
||||
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
|
||||
Slower (narrative) ↔ Faster (conversational). Impacts duration.
|
||||
</Typography>
|
||||
</Box>
|
||||
<Box sx={{ flex: 1 }}>
|
||||
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 0.5 }}>
|
||||
<Typography variant="subtitle2" sx={{ color: "white", fontWeight: 600 }}>
|
||||
Volume (0.1-10)
|
||||
</Typography>
|
||||
<Tooltip title="Loudness of the voice. 1.0 is normal loudness." arrow>
|
||||
<HelpOutlineIcon fontSize="small" sx={{ color: "rgba(255,255,255,0.5)" }} />
|
||||
</Tooltip>
|
||||
</Stack>
|
||||
<Slider
|
||||
value={settings.volume}
|
||||
min={0.1}
|
||||
max={10}
|
||||
step={0.1}
|
||||
onChange={(_, v) => setSettings({ ...settings, volume: v as number })}
|
||||
sx={{ color: "#10b981" }}
|
||||
/>
|
||||
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
|
||||
Lower for soft tone; higher for punchier delivery.
|
||||
</Typography>
|
||||
</Box>
|
||||
<Box sx={{ flex: 1 }}>
|
||||
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 0.5 }}>
|
||||
<Typography variant="subtitle2" sx={{ color: "white", fontWeight: 600 }}>
|
||||
Pitch (-12 to 12)
|
||||
</Typography>
|
||||
<Tooltip title="Tone of the voice. 0 is neutral. Negative is deeper; positive is brighter." arrow>
|
||||
<HelpOutlineIcon fontSize="small" sx={{ color: "rgba(255,255,255,0.5)" }} />
|
||||
</Tooltip>
|
||||
</Stack>
|
||||
<Slider
|
||||
value={settings.pitch}
|
||||
min={-12}
|
||||
max={12}
|
||||
step={0.5}
|
||||
onChange={(_, v) => setSettings({ ...settings, pitch: v as number })}
|
||||
sx={{ color: "#f97316" }}
|
||||
/>
|
||||
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
|
||||
Use small adjustments (±2) for natural results.
|
||||
</Typography>
|
||||
</Box>
|
||||
</Stack>
|
||||
|
||||
{/* Emotion */}
|
||||
<Box>
|
||||
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1 }}>
|
||||
<Typography variant="subtitle1" sx={{ color: "white", fontWeight: 600 }}>
|
||||
Emotion
|
||||
</Typography>
|
||||
<Tooltip title="Sets the vocal mood: happy, neutral, sad, etc." arrow>
|
||||
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
|
||||
<HelpOutlineIcon fontSize="small" />
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
</Stack>
|
||||
<FormControl fullWidth>
|
||||
<Select
|
||||
value={settings.emotion}
|
||||
onChange={(e) => setSettings({ ...settings, emotion: e.target.value })}
|
||||
sx={{
|
||||
backgroundColor: alpha("#ffffff", 0.05),
|
||||
color: "white",
|
||||
"& .MuiOutlinedInput-notchedOutline": { borderColor: "rgba(255,255,255,0.2)" },
|
||||
"&:hover .MuiOutlinedInput-notchedOutline": { borderColor: "rgba(255,255,255,0.3)" },
|
||||
"&.Mui-focused .MuiOutlinedInput-notchedOutline": { borderColor: "#667eea" },
|
||||
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
|
||||
}}
|
||||
>
|
||||
{EMOTION_OPTIONS.map((e) => (
|
||||
<MenuItem key={e} value={e}>
|
||||
{e}
|
||||
</MenuItem>
|
||||
))}
|
||||
</Select>
|
||||
</FormControl>
|
||||
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)", mt: 0.5, display: "block" }}>
|
||||
Tip: happy/neutral for most podcasts; sad/angry for dramatic or critical segments.
|
||||
</Typography>
|
||||
</Box>
|
||||
|
||||
{/* Normalization & Language */}
|
||||
<Stack direction={{ xs: "column", sm: "row" }} spacing={2}>
|
||||
<Box sx={{ flex: 1 }}>
|
||||
<FormControlLabel
|
||||
control={
|
||||
<Checkbox
|
||||
checked={settings.englishNormalization}
|
||||
onChange={(e) => setSettings({ ...settings, englishNormalization: e.target.checked })}
|
||||
sx={{ color: "rgba(255,255,255,0.7)" }}
|
||||
/>
|
||||
}
|
||||
label={
|
||||
<Typography variant="body2" sx={{ color: "white" }}>
|
||||
English normalization (better numbers/dates)
|
||||
</Typography>
|
||||
}
|
||||
/>
|
||||
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
|
||||
Improves pronunciation of numbers/dates (recommended for stats-heavy scenes).
|
||||
</Typography>
|
||||
</Box>
|
||||
<Box sx={{ flex: 1 }}>
|
||||
<TextField
|
||||
select
|
||||
fullWidth
|
||||
label="Language boost"
|
||||
value={settings.languageBoost || "auto"}
|
||||
onChange={(e) => setSettings({ ...settings, languageBoost: e.target.value })}
|
||||
SelectProps={{ native: false }}
|
||||
InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }}
|
||||
sx={{
|
||||
"& .MuiOutlinedInput-root": {
|
||||
backgroundColor: alpha("#ffffff", 0.05),
|
||||
color: "white",
|
||||
"& fieldset": { borderColor: "rgba(255,255,255,0.2)" },
|
||||
"&:hover fieldset": { borderColor: "rgba(255,255,255,0.3)" },
|
||||
"&.Mui-focused fieldset": { borderColor: "#667eea" },
|
||||
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
|
||||
},
|
||||
}}
|
||||
>
|
||||
{LANGUAGE_BOOST_OPTIONS.map((opt) => (
|
||||
<MenuItem key={opt} value={opt}>
|
||||
{opt}
|
||||
</MenuItem>
|
||||
))}
|
||||
</TextField>
|
||||
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)", mt: 0.5, display: "block" }}>
|
||||
Helps with language-specific pronunciation and accent.
|
||||
</Typography>
|
||||
</Box>
|
||||
</Stack>
|
||||
|
||||
{/* Quality & Format */}
|
||||
<Stack direction={{ xs: "column", sm: "row" }} spacing={2}>
|
||||
<Box sx={{ flex: 1 }}>
|
||||
<TextField
|
||||
select
|
||||
fullWidth
|
||||
label="Sample rate"
|
||||
value={settings.sampleRate || 24000}
|
||||
onChange={(e) => setSettings({ ...settings, sampleRate: Number(e.target.value) })}
|
||||
InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }}
|
||||
sx={{
|
||||
"& .MuiOutlinedInput-root": {
|
||||
backgroundColor: alpha("#ffffff", 0.05),
|
||||
color: "white",
|
||||
"& fieldset": { borderColor: "rgba(255,255,255,0.2)" },
|
||||
"&:hover fieldset": { borderColor: "rgba(255,255,255,0.3)" },
|
||||
"&.Mui-focused fieldset": { borderColor: "#667eea" },
|
||||
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
|
||||
},
|
||||
}}
|
||||
>
|
||||
{SAMPLE_RATE_OPTIONS.map((opt) => (
|
||||
<MenuItem key={opt} value={opt}>
|
||||
{opt} Hz
|
||||
</MenuItem>
|
||||
))}
|
||||
</TextField>
|
||||
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)", mt: 0.5, display: "block" }}>
|
||||
Higher sample rate = higher fidelity (24k+ recommended for podcast voice).
|
||||
</Typography>
|
||||
</Box>
|
||||
<Box sx={{ flex: 1 }}>
|
||||
<TextField
|
||||
select
|
||||
fullWidth
|
||||
label="Bitrate"
|
||||
value={settings.bitrate || 64000}
|
||||
onChange={(e) => setSettings({ ...settings, bitrate: Number(e.target.value) })}
|
||||
InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }}
|
||||
sx={{
|
||||
"& .MuiOutlinedInput-root": {
|
||||
backgroundColor: alpha("#ffffff", 0.05),
|
||||
color: "white",
|
||||
"& fieldset": { borderColor: "rgba(255,255,255,0.2)" },
|
||||
"&:hover fieldset": { borderColor: "rgba(255,255,255,0.3)" },
|
||||
"&.Mui-focused fieldset": { borderColor: "#667eea" },
|
||||
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
|
||||
},
|
||||
}}
|
||||
>
|
||||
{BITRATE_OPTIONS.map((opt) => (
|
||||
<MenuItem key={opt} value={opt}>
|
||||
{opt / 1000} kbps
|
||||
</MenuItem>
|
||||
))}
|
||||
</TextField>
|
||||
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)", mt: 0.5, display: "block" }}>
|
||||
Higher bitrate = larger file but clearer audio. 64–128 kbps is great for voice.
|
||||
</Typography>
|
||||
</Box>
|
||||
</Stack>
|
||||
|
||||
<Stack direction={{ xs: "column", sm: "row" }} spacing={2}>
|
||||
<Box sx={{ flex: 1 }}>
|
||||
<TextField
|
||||
select
|
||||
fullWidth
|
||||
label="Channel"
|
||||
value={settings.channel || "1"}
|
||||
onChange={(e) => setSettings({ ...settings, channel: e.target.value as "1" | "2" })}
|
||||
InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }}
|
||||
sx={{
|
||||
"& .MuiOutlinedInput-root": {
|
||||
backgroundColor: alpha("#ffffff", 0.05),
|
||||
color: "white",
|
||||
"& fieldset": { borderColor: "rgba(255,255,255,0.2)" },
|
||||
"&:hover fieldset": { borderColor: "rgba(255,255,255,0.3)" },
|
||||
"&.Mui-focused fieldset": { borderColor: "#667eea" },
|
||||
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
|
||||
},
|
||||
}}
|
||||
>
|
||||
<MenuItem value="1">Mono (smaller, voice-focused)</MenuItem>
|
||||
<MenuItem value="2">Stereo (wider, more presence)</MenuItem>
|
||||
</TextField>
|
||||
</Box>
|
||||
<Box sx={{ flex: 1 }}>
|
||||
<TextField
|
||||
select
|
||||
fullWidth
|
||||
label="Format"
|
||||
value={settings.format || "mp3"}
|
||||
onChange={(e) => setSettings({ ...settings, format: e.target.value as "mp3" | "wav" | "pcm" | "flac" })}
|
||||
InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }}
|
||||
sx={{
|
||||
"& .MuiOutlinedInput-root": {
|
||||
backgroundColor: alpha("#ffffff", 0.05),
|
||||
color: "white",
|
||||
"& fieldset": { borderColor: "rgba(255,255,255,0.2)" },
|
||||
"&:hover fieldset": { borderColor: "rgba(255,255,255,0.3)" },
|
||||
"&.Mui-focused fieldset": { borderColor: "#667eea" },
|
||||
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
|
||||
},
|
||||
}}
|
||||
>
|
||||
<MenuItem value="mp3">mp3 (small, universal)</MenuItem>
|
||||
<MenuItem value="wav">wav (uncompressed)</MenuItem>
|
||||
<MenuItem value="pcm">pcm (raw)</MenuItem>
|
||||
<MenuItem value="flac">flac (lossless)</MenuItem>
|
||||
</TextField>
|
||||
</Box>
|
||||
</Stack>
|
||||
</Stack>
|
||||
</DialogContent>
|
||||
|
||||
<DialogActions sx={{ p: 3, pt: 2 }}>
|
||||
<SecondaryButton onClick={onClose} disabled={isGenerating}>
|
||||
Cancel
|
||||
</SecondaryButton>
|
||||
<PrimaryButton onClick={handleRegenerate} loading={isGenerating} disabled={isGenerating}>
|
||||
{isGenerating ? "Generating..." : "Apply & Regenerate"}
|
||||
</PrimaryButton>
|
||||
</DialogActions>
|
||||
</Dialog>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -0,0 +1,563 @@
|
||||
import React, { useState, useEffect } from "react";
|
||||
import {
|
||||
Dialog,
|
||||
DialogTitle,
|
||||
DialogContent,
|
||||
DialogActions,
|
||||
Stack,
|
||||
Box,
|
||||
Typography,
|
||||
TextField,
|
||||
Select,
|
||||
MenuItem,
|
||||
FormControl,
|
||||
InputLabel,
|
||||
Divider,
|
||||
alpha,
|
||||
Tooltip,
|
||||
IconButton,
|
||||
Paper,
|
||||
} from "@mui/material";
|
||||
import {
|
||||
Info as InfoIcon,
|
||||
HelpOutline as HelpOutlineIcon,
|
||||
Close as CloseIcon,
|
||||
} from "@mui/icons-material";
|
||||
import { PrimaryButton, SecondaryButton } from "../ui";
|
||||
|
||||
type PresetKey = "studioNeutral" | "warmBroadcast" | "techModern";
|
||||
|
||||
const PRESETS: Record<
|
||||
PresetKey,
|
||||
{
|
||||
title: string;
|
||||
subtitle: string;
|
||||
prompt: string;
|
||||
style: "Auto" | "Fiction" | "Realistic";
|
||||
renderingSpeed: "Default" | "Turbo" | "Quality";
|
||||
aspectRatio: "1:1" | "16:9" | "9:16" | "4:3" | "3:4";
|
||||
}
|
||||
> = {
|
||||
studioNeutral: {
|
||||
title: "Studio Neutral",
|
||||
subtitle: "Clean, well-lit studio, neutral background",
|
||||
prompt:
|
||||
"Professional podcast studio, neutral light grey backdrop, soft key + fill lighting, subtle depth of field, clear microphone framing",
|
||||
style: "Realistic",
|
||||
renderingSpeed: "Quality",
|
||||
aspectRatio: "16:9",
|
||||
},
|
||||
warmBroadcast: {
|
||||
title: "Warm Broadcast",
|
||||
subtitle: "Warm tones, friendly and inviting broadcast desk",
|
||||
prompt:
|
||||
"Warm broadcast desk, soft amber lighting, cozy ambience, gentle vignette, inviting expression, polished but approachable look",
|
||||
style: "Realistic",
|
||||
renderingSpeed: "Quality",
|
||||
aspectRatio: "16:9",
|
||||
},
|
||||
techModern: {
|
||||
title: "Tech Modern",
|
||||
subtitle: "Crisp, modern look with cool accent lighting",
|
||||
prompt:
|
||||
"Modern tech podcast set, cool accent lights (teal/purple), minimal backdrop, crisp highlights, premium camera look, subtle bokeh",
|
||||
style: "Auto",
|
||||
renderingSpeed: "Quality",
|
||||
aspectRatio: "16:9",
|
||||
},
|
||||
};
|
||||
|
||||
export interface ImageGenerationSettings {
|
||||
prompt: string;
|
||||
style: "Auto" | "Fiction" | "Realistic";
|
||||
renderingSpeed: "Default" | "Turbo" | "Quality";
|
||||
aspectRatio: "1:1" | "16:9" | "9:16" | "4:3" | "3:4";
|
||||
}
|
||||
|
||||
interface ImageRegenerateModalProps {
|
||||
open: boolean;
|
||||
onClose: () => void;
|
||||
onRegenerate: (settings: ImageGenerationSettings) => void;
|
||||
initialPrompt: string;
|
||||
initialStyle?: "Auto" | "Fiction" | "Realistic";
|
||||
initialRenderingSpeed?: "Default" | "Turbo" | "Quality";
|
||||
initialAspectRatio?: "1:1" | "16:9" | "9:16" | "4:3" | "3:4";
|
||||
isGenerating?: boolean;
|
||||
}
|
||||
|
||||
export const ImageRegenerateModal: React.FC<ImageRegenerateModalProps> = ({
|
||||
open,
|
||||
onClose,
|
||||
onRegenerate,
|
||||
initialPrompt,
|
||||
initialStyle = "Realistic",
|
||||
initialRenderingSpeed = "Quality",
|
||||
initialAspectRatio = "16:9",
|
||||
isGenerating = false,
|
||||
}) => {
|
||||
const [prompt, setPrompt] = useState(initialPrompt);
|
||||
const [style, setStyle] = useState<"Auto" | "Fiction" | "Realistic">(initialStyle);
|
||||
const [renderingSpeed, setRenderingSpeed] = useState<"Default" | "Turbo" | "Quality">(initialRenderingSpeed);
|
||||
const [aspectRatio, setAspectRatio] = useState<"1:1" | "16:9" | "9:16" | "4:3" | "3:4">(initialAspectRatio);
|
||||
|
||||
// Update state when initial values change
|
||||
useEffect(() => {
|
||||
setPrompt(initialPrompt);
|
||||
setStyle(initialStyle);
|
||||
setRenderingSpeed(initialRenderingSpeed);
|
||||
setAspectRatio(initialAspectRatio);
|
||||
}, [initialPrompt, initialStyle, initialRenderingSpeed, initialAspectRatio]);
|
||||
|
||||
const handleRegenerate = () => {
|
||||
onRegenerate({
|
||||
prompt,
|
||||
style,
|
||||
renderingSpeed,
|
||||
aspectRatio,
|
||||
});
|
||||
};
|
||||
|
||||
const applyPreset = (presetKey: PresetKey) => {
|
||||
const p = PRESETS[presetKey];
|
||||
// Combine the preset prompt with current scene prompt context
|
||||
setPrompt((current) => {
|
||||
// If user already customized, append; otherwise replace with preset
|
||||
if (!current || current.trim() === "" || current.trim() === initialPrompt.trim()) {
|
||||
return `${initialPrompt}\n${p.prompt}`.trim();
|
||||
}
|
||||
return `${current}\n${p.prompt}`.trim();
|
||||
});
|
||||
setStyle(p.style);
|
||||
setRenderingSpeed(p.renderingSpeed);
|
||||
setAspectRatio(p.aspectRatio);
|
||||
};
|
||||
|
||||
return (
|
||||
<Dialog
|
||||
open={open}
|
||||
onClose={onClose}
|
||||
maxWidth="md"
|
||||
fullWidth
|
||||
PaperProps={{
|
||||
sx: {
|
||||
background: alpha("#0f172a", 0.95),
|
||||
backdropFilter: "blur(20px)",
|
||||
border: "1px solid rgba(255,255,255,0.1)",
|
||||
borderRadius: 4,
|
||||
},
|
||||
}}
|
||||
>
|
||||
<DialogTitle>
|
||||
<Stack direction="row" justifyContent="space-between" alignItems="center">
|
||||
<Typography variant="h6" sx={{ color: "white", fontWeight: 600 }}>
|
||||
Regenerate Image with Custom Settings
|
||||
</Typography>
|
||||
<IconButton
|
||||
onClick={onClose}
|
||||
size="small"
|
||||
sx={{ color: "rgba(255,255,255,0.7)" }}
|
||||
>
|
||||
<CloseIcon />
|
||||
</IconButton>
|
||||
</Stack>
|
||||
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.6)", mt: 1 }}>
|
||||
Customize the image generation parameters to get the perfect result for your scene
|
||||
</Typography>
|
||||
</DialogTitle>
|
||||
|
||||
<DialogContent>
|
||||
<Stack spacing={3} sx={{ mt: 1 }}>
|
||||
{/* Presets */}
|
||||
<Box>
|
||||
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1 }}>
|
||||
<Typography variant="subtitle1" sx={{ color: "white", fontWeight: 600 }}>
|
||||
Podcast-ready presets
|
||||
</Typography>
|
||||
<Tooltip
|
||||
title="Quickly apply a podcast-friendly look. Each preset adjusts lighting, background, and ratio while keeping your base avatar consistent."
|
||||
arrow
|
||||
>
|
||||
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
|
||||
<HelpOutlineIcon fontSize="small" />
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
</Stack>
|
||||
<Stack direction={{ xs: "column", sm: "row" }} spacing={1.5}>
|
||||
{(
|
||||
Object.entries(PRESETS) as Array<[PresetKey, (typeof PRESETS)[PresetKey]]>
|
||||
).map(([key, p]) => (
|
||||
<Paper
|
||||
key={key}
|
||||
onClick={() => applyPreset(key)}
|
||||
sx={{
|
||||
p: 1.5,
|
||||
flex: 1,
|
||||
cursor: "pointer",
|
||||
backgroundColor: alpha("#ffffff", 0.04),
|
||||
border: "1px solid rgba(255,255,255,0.1)",
|
||||
borderRadius: 2,
|
||||
transition: "all 0.2s ease",
|
||||
"&:hover": {
|
||||
borderColor: "rgba(102,126,234,0.7)",
|
||||
boxShadow: "0 8px 24px rgba(0,0,0,0.25)",
|
||||
backgroundColor: alpha("#667eea", 0.08),
|
||||
},
|
||||
}}
|
||||
>
|
||||
<Typography variant="subtitle2" sx={{ color: "white", fontWeight: 700 }}>
|
||||
{p.title}
|
||||
</Typography>
|
||||
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.7)", lineHeight: 1.5, mb: 0.75 }}>
|
||||
{p.subtitle}
|
||||
</Typography>
|
||||
<Stack direction="row" spacing={1} sx={{ color: "rgba(255,255,255,0.6)", fontSize: "0.8rem" }}>
|
||||
<Typography variant="caption">Style: {p.style}</Typography>
|
||||
<Typography variant="caption">Speed: {p.renderingSpeed}</Typography>
|
||||
<Typography variant="caption">AR: {p.aspectRatio}</Typography>
|
||||
</Stack>
|
||||
</Paper>
|
||||
))}
|
||||
</Stack>
|
||||
</Box>
|
||||
|
||||
{/* Prompt Section */}
|
||||
<Box>
|
||||
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1 }}>
|
||||
<Typography variant="subtitle1" sx={{ color: "white", fontWeight: 600 }}>
|
||||
Generation Prompt
|
||||
</Typography>
|
||||
<Tooltip
|
||||
title="The prompt describes what you want to see in the generated image. It should include scene context, visual elements, and style preferences. The AI will use this along with your base avatar to create a consistent character in the scene."
|
||||
arrow
|
||||
>
|
||||
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
|
||||
<HelpOutlineIcon fontSize="small" />
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
</Stack>
|
||||
<TextField
|
||||
fullWidth
|
||||
multiline
|
||||
rows={4}
|
||||
value={prompt}
|
||||
onChange={(e) => setPrompt(e.target.value)}
|
||||
placeholder="Describe the scene, visual elements, and style..."
|
||||
sx={{
|
||||
"& .MuiOutlinedInput-root": {
|
||||
backgroundColor: alpha("#ffffff", 0.05),
|
||||
color: "white",
|
||||
"& fieldset": {
|
||||
borderColor: "rgba(255,255,255,0.2)",
|
||||
},
|
||||
"&:hover fieldset": {
|
||||
borderColor: "rgba(255,255,255,0.3)",
|
||||
},
|
||||
"&.Mui-focused fieldset": {
|
||||
borderColor: "#667eea",
|
||||
},
|
||||
},
|
||||
"& .MuiInputBase-input": {
|
||||
color: "white",
|
||||
},
|
||||
}}
|
||||
/>
|
||||
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.5)", mt: 0.5, display: "block" }}>
|
||||
This prompt will be combined with scene context to generate your image. Be specific about visual elements, mood, and composition.
|
||||
</Typography>
|
||||
</Box>
|
||||
|
||||
<Divider sx={{ borderColor: "rgba(255,255,255,0.1)" }} />
|
||||
|
||||
{/* Style Selection */}
|
||||
<Box>
|
||||
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1.5 }}>
|
||||
<Typography variant="subtitle1" sx={{ color: "white", fontWeight: 600 }}>
|
||||
Character Style
|
||||
</Typography>
|
||||
<Tooltip
|
||||
title="Determines the artistic style of the character generation. Auto lets the AI choose, Fiction creates more stylized/artistic characters, and Realistic produces photorealistic results."
|
||||
arrow
|
||||
>
|
||||
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
|
||||
<HelpOutlineIcon fontSize="small" />
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
</Stack>
|
||||
<FormControl fullWidth>
|
||||
<Select
|
||||
value={style}
|
||||
onChange={(e) => setStyle(e.target.value as "Auto" | "Fiction" | "Realistic")}
|
||||
sx={{
|
||||
backgroundColor: alpha("#ffffff", 0.05),
|
||||
color: "white",
|
||||
"& .MuiOutlinedInput-notchedOutline": {
|
||||
borderColor: "rgba(255,255,255,0.2)",
|
||||
},
|
||||
"&:hover .MuiOutlinedInput-notchedOutline": {
|
||||
borderColor: "rgba(255,255,255,0.3)",
|
||||
},
|
||||
"&.Mui-focused .MuiOutlinedInput-notchedOutline": {
|
||||
borderColor: "#667eea",
|
||||
},
|
||||
"& .MuiSvgIcon-root": {
|
||||
color: "rgba(255,255,255,0.7)",
|
||||
},
|
||||
}}
|
||||
>
|
||||
<MenuItem value="Auto">
|
||||
<Stack>
|
||||
<Typography sx={{ color: "white" }}>Auto</Typography>
|
||||
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
|
||||
AI automatically selects the best style
|
||||
</Typography>
|
||||
</Stack>
|
||||
</MenuItem>
|
||||
<MenuItem value="Fiction">
|
||||
<Stack>
|
||||
<Typography sx={{ color: "white" }}>Fiction</Typography>
|
||||
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
|
||||
Stylized, artistic character appearance
|
||||
</Typography>
|
||||
</Stack>
|
||||
</MenuItem>
|
||||
<MenuItem value="Realistic">
|
||||
<Stack>
|
||||
<Typography sx={{ color: "white" }}>Realistic</Typography>
|
||||
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
|
||||
Photorealistic, professional appearance
|
||||
</Typography>
|
||||
</Stack>
|
||||
</MenuItem>
|
||||
</Select>
|
||||
</FormControl>
|
||||
<Paper
|
||||
sx={{
|
||||
mt: 1.5,
|
||||
p: 1.5,
|
||||
backgroundColor: alpha("#667eea", 0.1),
|
||||
border: "1px solid rgba(102,126,234,0.3)",
|
||||
borderRadius: 2,
|
||||
}}
|
||||
>
|
||||
<Stack direction="row" spacing={1}>
|
||||
<InfoIcon sx={{ color: "#667eea", fontSize: "1.2rem", mt: 0.1 }} />
|
||||
<Box>
|
||||
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.9)", fontWeight: 500, mb: 0.5 }}>
|
||||
Style Impact:
|
||||
</Typography>
|
||||
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.7)", lineHeight: 1.6 }}>
|
||||
<strong>Auto:</strong> Best for most cases, balances realism and style<br />
|
||||
<strong>Fiction:</strong> Great for creative, artistic podcasts with stylized visuals<br />
|
||||
<strong>Realistic:</strong> Ideal for professional, corporate, or news-style podcasts
|
||||
</Typography>
|
||||
</Box>
|
||||
</Stack>
|
||||
</Paper>
|
||||
</Box>
|
||||
|
||||
{/* Rendering Speed */}
|
||||
<Box>
|
||||
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1.5 }}>
|
||||
<Typography variant="subtitle1" sx={{ color: "white", fontWeight: 600 }}>
|
||||
Rendering Speed
|
||||
</Typography>
|
||||
<Tooltip
|
||||
title="Controls the balance between generation speed, cost, and quality. Turbo is fastest and cheapest but lower quality. Quality is slowest and most expensive but produces the best results. Default provides a balanced approach."
|
||||
arrow
|
||||
>
|
||||
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
|
||||
<HelpOutlineIcon fontSize="small" />
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
</Stack>
|
||||
<FormControl fullWidth>
|
||||
<Select
|
||||
value={renderingSpeed}
|
||||
onChange={(e) => setRenderingSpeed(e.target.value as "Default" | "Turbo" | "Quality")}
|
||||
sx={{
|
||||
backgroundColor: alpha("#ffffff", 0.05),
|
||||
color: "white",
|
||||
"& .MuiOutlinedInput-notchedOutline": {
|
||||
borderColor: "rgba(255,255,255,0.2)",
|
||||
},
|
||||
"&:hover .MuiOutlinedInput-notchedOutline": {
|
||||
borderColor: "rgba(255,255,255,0.3)",
|
||||
},
|
||||
"&.Mui-focused .MuiOutlinedInput-notchedOutline": {
|
||||
borderColor: "#667eea",
|
||||
},
|
||||
"& .MuiSvgIcon-root": {
|
||||
color: "rgba(255,255,255,0.7)",
|
||||
},
|
||||
}}
|
||||
>
|
||||
<MenuItem value="Turbo">
|
||||
<Stack>
|
||||
<Typography sx={{ color: "white" }}>Turbo ⚡</Typography>
|
||||
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
|
||||
Fastest (~10-20s) • Cheapest • Lower quality
|
||||
</Typography>
|
||||
</Stack>
|
||||
</MenuItem>
|
||||
<MenuItem value="Default">
|
||||
<Stack>
|
||||
<Typography sx={{ color: "white" }}>Default ⚖️</Typography>
|
||||
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
|
||||
Balanced (~30-60s) • Moderate cost • Good quality
|
||||
</Typography>
|
||||
</Stack>
|
||||
</MenuItem>
|
||||
<MenuItem value="Quality">
|
||||
<Stack>
|
||||
<Typography sx={{ color: "white" }}>Quality ✨</Typography>
|
||||
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
|
||||
Slowest (~60-120s) • Most expensive • Highest quality
|
||||
</Typography>
|
||||
</Stack>
|
||||
</MenuItem>
|
||||
</Select>
|
||||
</FormControl>
|
||||
<Paper
|
||||
sx={{
|
||||
mt: 1.5,
|
||||
p: 1.5,
|
||||
backgroundColor: alpha("#10b981", 0.1),
|
||||
border: "1px solid rgba(16,185,129,0.3)",
|
||||
borderRadius: 2,
|
||||
}}
|
||||
>
|
||||
<Stack direction="row" spacing={1}>
|
||||
<InfoIcon sx={{ color: "#10b981", fontSize: "1.2rem", mt: 0.1 }} />
|
||||
<Box>
|
||||
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.9)", fontWeight: 500, mb: 0.5 }}>
|
||||
Speed vs Quality Trade-off:
|
||||
</Typography>
|
||||
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.7)", lineHeight: 1.6 }}>
|
||||
<strong>Turbo:</strong> Use for quick iterations and testing (~$0.02/image)<br />
|
||||
<strong>Default:</strong> Best balance for most production use (~$0.04/image)<br />
|
||||
<strong>Quality:</strong> Use for final, high-quality outputs (~$0.08/image)
|
||||
</Typography>
|
||||
</Box>
|
||||
</Stack>
|
||||
</Paper>
|
||||
</Box>
|
||||
|
||||
{/* Aspect Ratio */}
|
||||
<Box>
|
||||
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1.5 }}>
|
||||
<Typography variant="subtitle1" sx={{ color: "white", fontWeight: 600 }}>
|
||||
Aspect Ratio
|
||||
</Typography>
|
||||
<Tooltip
|
||||
title="The width-to-height ratio of the generated image. Choose based on your video format: 16:9 for standard widescreen, 9:16 for vertical/social media, 1:1 for square formats, or 4:3 for traditional formats."
|
||||
arrow
|
||||
>
|
||||
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
|
||||
<HelpOutlineIcon fontSize="small" />
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
</Stack>
|
||||
<FormControl fullWidth>
|
||||
<Select
|
||||
value={aspectRatio}
|
||||
onChange={(e) => setAspectRatio(e.target.value as "1:1" | "16:9" | "9:16" | "4:3" | "3:4")}
|
||||
sx={{
|
||||
backgroundColor: alpha("#ffffff", 0.05),
|
||||
color: "white",
|
||||
"& .MuiOutlinedInput-notchedOutline": {
|
||||
borderColor: "rgba(255,255,255,0.2)",
|
||||
},
|
||||
"&:hover .MuiOutlinedInput-notchedOutline": {
|
||||
borderColor: "rgba(255,255,255,0.3)",
|
||||
},
|
||||
"&.Mui-focused .MuiOutlinedInput-notchedOutline": {
|
||||
borderColor: "#667eea",
|
||||
},
|
||||
"& .MuiSvgIcon-root": {
|
||||
color: "rgba(255,255,255,0.7)",
|
||||
},
|
||||
}}
|
||||
>
|
||||
<MenuItem value="16:9">
|
||||
<Stack>
|
||||
<Typography sx={{ color: "white" }}>16:9 (Widescreen)</Typography>
|
||||
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
|
||||
Standard video format, best for YouTube, web
|
||||
</Typography>
|
||||
</Stack>
|
||||
</MenuItem>
|
||||
<MenuItem value="9:16">
|
||||
<Stack>
|
||||
<Typography sx={{ color: "white" }}>9:16 (Vertical)</Typography>
|
||||
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
|
||||
Mobile/social media format (TikTok, Instagram Stories)
|
||||
</Typography>
|
||||
</Stack>
|
||||
</MenuItem>
|
||||
<MenuItem value="1:1">
|
||||
<Stack>
|
||||
<Typography sx={{ color: "white" }}>1:1 (Square)</Typography>
|
||||
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
|
||||
Instagram posts, profile images
|
||||
</Typography>
|
||||
</Stack>
|
||||
</MenuItem>
|
||||
<MenuItem value="4:3">
|
||||
<Stack>
|
||||
<Typography sx={{ color: "white" }}>4:3 (Traditional)</Typography>
|
||||
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
|
||||
Classic TV format, presentations
|
||||
</Typography>
|
||||
</Stack>
|
||||
</MenuItem>
|
||||
<MenuItem value="3:4">
|
||||
<Stack>
|
||||
<Typography sx={{ color: "white" }}>3:4 (Portrait)</Typography>
|
||||
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
|
||||
Portrait orientation, mobile apps
|
||||
</Typography>
|
||||
</Stack>
|
||||
</MenuItem>
|
||||
</Select>
|
||||
</FormControl>
|
||||
<Paper
|
||||
sx={{
|
||||
mt: 1.5,
|
||||
p: 1.5,
|
||||
backgroundColor: alpha("#f59e0b", 0.1),
|
||||
border: "1px solid rgba(245,158,11,0.3)",
|
||||
borderRadius: 2,
|
||||
}}
|
||||
>
|
||||
<Stack direction="row" spacing={1}>
|
||||
<InfoIcon sx={{ color: "#f59e0b", fontSize: "1.2rem", mt: 0.1 }} />
|
||||
<Box>
|
||||
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.9)", fontWeight: 500, mb: 0.5 }}>
|
||||
Format Recommendation:
|
||||
</Typography>
|
||||
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.7)", lineHeight: 1.6 }}>
|
||||
<strong>16:9</strong> is recommended for most podcast videos as it matches standard video player dimensions and provides optimal viewing experience.
|
||||
</Typography>
|
||||
</Box>
|
||||
</Stack>
|
||||
</Paper>
|
||||
</Box>
|
||||
</Stack>
|
||||
</DialogContent>
|
||||
|
||||
<DialogActions sx={{ p: 3, pt: 2 }}>
|
||||
<SecondaryButton onClick={onClose} disabled={isGenerating}>
|
||||
Cancel
|
||||
</SecondaryButton>
|
||||
<PrimaryButton
|
||||
onClick={handleRegenerate}
|
||||
loading={isGenerating}
|
||||
disabled={!prompt.trim() || isGenerating}
|
||||
>
|
||||
{isGenerating ? "Generating..." : "Regenerate Image"}
|
||||
</PrimaryButton>
|
||||
</DialogActions>
|
||||
</Dialog>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import React, { useState, useEffect } from "react";
|
||||
import { Stack, Box, Typography, Divider, Chip, alpha, CircularProgress } from "@mui/material";
|
||||
import { Stack, Box, Typography, Divider, Chip, alpha, CircularProgress, LinearProgress } from "@mui/material";
|
||||
import {
|
||||
EditNote as EditNoteIcon,
|
||||
CheckCircle as CheckCircleIcon,
|
||||
@@ -11,6 +11,8 @@ import {
|
||||
import { Scene, Line, Knobs } from "../types";
|
||||
import { GlassyCard, glassyCardSx, PrimaryButton } from "../ui";
|
||||
import { LineEditor } from "./LineEditor";
|
||||
import { ImageRegenerateModal, ImageGenerationSettings } from "./ImageRegenerateModal";
|
||||
import { AudioRegenerateModal, AudioGenerationSettings } from "./AudioRegenerateModal";
|
||||
import { podcastApi } from "../../../services/podcastApi";
|
||||
import { aiApiClient } from "../../../api/client";
|
||||
|
||||
@@ -24,6 +26,7 @@ interface SceneEditorProps {
|
||||
onAudioGenerationStart?: (sceneId: string) => void;
|
||||
onAudioGenerated?: (sceneId: string, audioUrl: string) => void;
|
||||
idea?: string; // Podcast idea for image generation context
|
||||
avatarUrl?: string | null; // Base avatar URL for consistent scene image generation
|
||||
}
|
||||
|
||||
export const SceneEditor: React.FC<SceneEditorProps> = ({
|
||||
@@ -36,10 +39,30 @@ export const SceneEditor: React.FC<SceneEditorProps> = ({
|
||||
onAudioGenerationStart,
|
||||
onAudioGenerated,
|
||||
idea,
|
||||
avatarUrl,
|
||||
}) => {
|
||||
const [localGenerating, setLocalGenerating] = useState(false);
|
||||
const [generatingImage, setGeneratingImage] = useState(false);
|
||||
const [imageGenerationStatus, setImageGenerationStatus] = useState<string>("");
|
||||
const [imageGenerationProgress, setImageGenerationProgress] = useState<number>(0);
|
||||
const [audioBlobUrl, setAudioBlobUrl] = useState<string | null>(null);
|
||||
const [imageBlobUrl, setImageBlobUrl] = useState<string | null>(null);
|
||||
const [imageLoading, setImageLoading] = useState(false);
|
||||
const [showRegenerateModal, setShowRegenerateModal] = useState(false);
|
||||
const [showAudioModal, setShowAudioModal] = useState(false);
|
||||
const [audioSettings, setAudioSettings] = useState<AudioGenerationSettings>({
|
||||
voiceId: "Wise_Woman",
|
||||
speed: 1.0,
|
||||
volume: 1.0,
|
||||
pitch: 0.0,
|
||||
emotion: scene.emotion || "neutral",
|
||||
englishNormalization: true,
|
||||
sampleRate: 24000,
|
||||
bitrate: 64000,
|
||||
channel: "1",
|
||||
format: "mp3",
|
||||
languageBoost: "auto",
|
||||
});
|
||||
|
||||
// Load audio as blob when audioUrl is available
|
||||
useEffect(() => {
|
||||
@@ -116,6 +139,99 @@ export const SceneEditor: React.FC<SceneEditorProps> = ({
|
||||
};
|
||||
}, [scene.audioUrl, scene.id]);
|
||||
|
||||
// Load image as blob when imageUrl is available
|
||||
useEffect(() => {
|
||||
if (!scene.imageUrl) {
|
||||
// Clean up blob URL if imageUrl is removed
|
||||
setImageBlobUrl((currentBlobUrl) => {
|
||||
if (currentBlobUrl && currentBlobUrl.startsWith('blob:')) {
|
||||
URL.revokeObjectURL(currentBlobUrl);
|
||||
}
|
||||
return null;
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
let isMounted = true;
|
||||
const currentImageUrl = scene.imageUrl; // Capture current value
|
||||
|
||||
const loadImageBlob = async () => {
|
||||
try {
|
||||
setImageLoading(true);
|
||||
// Normalize path
|
||||
let imagePath = currentImageUrl.startsWith('/') ? currentImageUrl : `/${currentImageUrl}`;
|
||||
|
||||
// Convert /api/story/images/ to /api/podcast/images/ if needed
|
||||
if (imagePath.includes('/api/story/images/')) {
|
||||
const filename = imagePath.split('/api/story/images/').pop() || '';
|
||||
imagePath = `/api/podcast/images/${filename}`;
|
||||
}
|
||||
|
||||
// Ensure it's a podcast image endpoint
|
||||
if (!imagePath.includes('/api/podcast/images/')) {
|
||||
const filename = imagePath.split('/').pop() || currentImageUrl;
|
||||
imagePath = `/api/podcast/images/${filename}`;
|
||||
}
|
||||
|
||||
// Remove query parameters if present
|
||||
imagePath = imagePath.split('?')[0];
|
||||
|
||||
const response = await aiApiClient.get(imagePath, {
|
||||
responseType: 'blob',
|
||||
});
|
||||
|
||||
if (!isMounted) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Double-check that imageUrl hasn't changed
|
||||
if (scene.imageUrl !== currentImageUrl) {
|
||||
return;
|
||||
}
|
||||
|
||||
const blob = response.data;
|
||||
const blobUrl = URL.createObjectURL(blob);
|
||||
|
||||
setImageBlobUrl((prevBlobUrl) => {
|
||||
// Clean up previous blob URL if exists
|
||||
if (prevBlobUrl && prevBlobUrl !== blobUrl && prevBlobUrl.startsWith('blob:')) {
|
||||
URL.revokeObjectURL(prevBlobUrl);
|
||||
}
|
||||
return blobUrl;
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('[SceneEditor] Failed to load image blob:', error);
|
||||
// Fallback: try with query token
|
||||
try {
|
||||
const token = localStorage.getItem('clerk_dashboard_token') || '';
|
||||
if (token) {
|
||||
const urlWithToken = `${currentImageUrl}?token=${encodeURIComponent(token)}`;
|
||||
setImageBlobUrl(urlWithToken);
|
||||
}
|
||||
} catch (fallbackError) {
|
||||
console.error('[SceneEditor] Fallback image loading failed:', fallbackError);
|
||||
}
|
||||
} finally {
|
||||
if (isMounted) {
|
||||
setImageLoading(false);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
loadImageBlob();
|
||||
|
||||
return () => {
|
||||
isMounted = false;
|
||||
// Cleanup blob URL on unmount or when imageUrl changes
|
||||
setImageBlobUrl((prevBlobUrl) => {
|
||||
if (prevBlobUrl && prevBlobUrl.startsWith('blob:')) {
|
||||
URL.revokeObjectURL(prevBlobUrl);
|
||||
}
|
||||
return null;
|
||||
});
|
||||
};
|
||||
}, [scene.imageUrl]);
|
||||
|
||||
const updateLine = (updatedLine: Line) => {
|
||||
const updated = { ...scene, lines: scene.lines.map((l) => (l.id === updatedLine.id ? updatedLine : l)) };
|
||||
onUpdateScene(updated);
|
||||
@@ -126,7 +242,7 @@ export const SceneEditor: React.FC<SceneEditorProps> = ({
|
||||
const hasAudio = Boolean(scene.audioUrl && audioBlobUrl);
|
||||
const hasImage = Boolean(scene.imageUrl);
|
||||
|
||||
const handleApproveAndGenerate = async () => {
|
||||
const handleApproveAndGenerate = async (settings?: AudioGenerationSettings) => {
|
||||
const wasAlreadyApproved = scene.approved;
|
||||
const sceneId = scene.id;
|
||||
|
||||
@@ -152,11 +268,20 @@ export const SceneEditor: React.FC<SceneEditorProps> = ({
|
||||
const currentScene = { ...scene, approved: true };
|
||||
|
||||
// Generate audio
|
||||
const effectiveSettings = settings || audioSettings;
|
||||
const result = await podcastApi.renderSceneAudio({
|
||||
scene: currentScene,
|
||||
voiceId: "Wise_Woman",
|
||||
emotion: scene.emotion || knobs.voice_emotion || "neutral",
|
||||
speed: knobs.voice_speed || 1.0,
|
||||
voiceId: effectiveSettings.voiceId || "Wise_Woman",
|
||||
emotion: effectiveSettings.emotion || scene.emotion || knobs.voice_emotion || "neutral",
|
||||
speed: effectiveSettings.speed ?? knobs.voice_speed ?? 1.0,
|
||||
volume: effectiveSettings.volume ?? 1.0,
|
||||
pitch: effectiveSettings.pitch ?? 0.0,
|
||||
englishNormalization: effectiveSettings.englishNormalization ?? true,
|
||||
sampleRate: effectiveSettings.sampleRate,
|
||||
bitrate: effectiveSettings.bitrate,
|
||||
channel: effectiveSettings.channel,
|
||||
format: effectiveSettings.format,
|
||||
languageBoost: effectiveSettings.languageBoost,
|
||||
});
|
||||
|
||||
// Update scene with audio URL and ensure approved state
|
||||
@@ -179,35 +304,138 @@ export const SceneEditor: React.FC<SceneEditorProps> = ({
|
||||
}
|
||||
};
|
||||
|
||||
const handleGenerateImage = async () => {
|
||||
const handleGenerateImage = async (settings?: ImageGenerationSettings) => {
|
||||
const sceneId = scene.id;
|
||||
const startTime = Date.now();
|
||||
let progressInterval: NodeJS.Timeout | null = null;
|
||||
|
||||
try {
|
||||
setGeneratingImage(true);
|
||||
setShowRegenerateModal(false);
|
||||
setImageGenerationStatus("Submitting image generation request...");
|
||||
setImageGenerationProgress(10);
|
||||
|
||||
// Build scene content from lines for context
|
||||
const sceneContent = scene.lines.map((line) => line.text).join(" ");
|
||||
|
||||
// Log avatar URL for debugging
|
||||
console.log("[SceneEditor] Generating image with avatarUrl:", avatarUrl);
|
||||
console.log("[SceneEditor] Custom settings:", settings);
|
||||
|
||||
// Simulate progress updates during API call
|
||||
progressInterval = setInterval(() => {
|
||||
const elapsed = Date.now() - startTime;
|
||||
const seconds = Math.floor(elapsed / 1000);
|
||||
|
||||
// Update status based on elapsed time
|
||||
if (seconds < 5) {
|
||||
setImageGenerationStatus("Submitting request to AI service...");
|
||||
setImageGenerationProgress(15);
|
||||
} else if (seconds < 15) {
|
||||
setImageGenerationStatus("AI is generating your image...");
|
||||
setImageGenerationProgress(30);
|
||||
} else if (seconds < 30) {
|
||||
setImageGenerationStatus("Creating character-consistent scene image...");
|
||||
setImageGenerationProgress(50);
|
||||
} else if (seconds < 60) {
|
||||
setImageGenerationStatus("Rendering image details...");
|
||||
setImageGenerationProgress(70);
|
||||
} else {
|
||||
setImageGenerationStatus(`Processing... (${seconds}s elapsed)`);
|
||||
setImageGenerationProgress(Math.min(90, 50 + (seconds - 30) / 2));
|
||||
}
|
||||
}, 1000);
|
||||
|
||||
const result = await podcastApi.generateSceneImage({
|
||||
sceneId: scene.id,
|
||||
sceneTitle: scene.title,
|
||||
sceneContent: sceneContent,
|
||||
baseAvatarUrl: avatarUrl || undefined, // Pass base avatar URL for character consistency
|
||||
idea: idea,
|
||||
width: 1024,
|
||||
height: 1024,
|
||||
// Pass custom settings if provided
|
||||
customPrompt: settings?.prompt,
|
||||
style: settings?.style,
|
||||
renderingSpeed: settings?.renderingSpeed,
|
||||
aspectRatio: settings?.aspectRatio,
|
||||
});
|
||||
|
||||
if (progressInterval) {
|
||||
clearInterval(progressInterval);
|
||||
progressInterval = null;
|
||||
}
|
||||
|
||||
setImageGenerationStatus("Finalizing image...");
|
||||
setImageGenerationProgress(95);
|
||||
|
||||
// Update scene with image URL
|
||||
const updatedScene = { ...scene, imageUrl: result.image_url };
|
||||
onUpdateScene(updatedScene);
|
||||
} catch (error) {
|
||||
|
||||
const elapsed = Math.floor((Date.now() - startTime) / 1000);
|
||||
setImageGenerationStatus(`Image generated successfully in ${elapsed}s`);
|
||||
setImageGenerationProgress(100);
|
||||
|
||||
// Clear status after a moment
|
||||
setTimeout(() => {
|
||||
setImageGenerationStatus("");
|
||||
setImageGenerationProgress(0);
|
||||
}, 2000);
|
||||
} catch (error: any) {
|
||||
// Clear interval on error
|
||||
if (progressInterval) {
|
||||
clearInterval(progressInterval);
|
||||
progressInterval = null;
|
||||
}
|
||||
|
||||
console.error("Failed to generate image:", error);
|
||||
// Extract error message from response if available
|
||||
const errorMessage = error?.response?.data?.detail?.message
|
||||
|| error?.response?.data?.detail?.error
|
||||
|| error?.response?.data?.detail
|
||||
|| error?.message
|
||||
|| "Failed to generate image. Please try again.";
|
||||
console.error("Error details:", {
|
||||
status: error?.response?.status,
|
||||
statusText: error?.response?.statusText,
|
||||
data: error?.response?.data,
|
||||
message: errorMessage,
|
||||
});
|
||||
|
||||
setImageGenerationStatus(`Error: ${errorMessage}`);
|
||||
setImageGenerationProgress(0);
|
||||
|
||||
// Show user-friendly error message
|
||||
alert(`Image generation failed: ${errorMessage}`);
|
||||
throw error;
|
||||
} finally {
|
||||
// Ensure interval is cleared
|
||||
if (progressInterval) {
|
||||
clearInterval(progressInterval);
|
||||
}
|
||||
setGeneratingImage(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleRegenerateClick = () => {
|
||||
setShowRegenerateModal(true);
|
||||
};
|
||||
|
||||
const handleAudioRegenerateClick = () => {
|
||||
if (hasAudio) {
|
||||
setShowAudioModal(true);
|
||||
} else {
|
||||
handleApproveAndGenerate(audioSettings);
|
||||
}
|
||||
};
|
||||
|
||||
const handleAudioRegenerate = (settings: AudioGenerationSettings) => {
|
||||
setAudioSettings(settings);
|
||||
setShowAudioModal(false);
|
||||
handleApproveAndGenerate(settings);
|
||||
};
|
||||
|
||||
return (
|
||||
<GlassyCard sx={glassyCardSx}>
|
||||
<Stack spacing={2.5}>
|
||||
@@ -256,7 +484,7 @@ export const SceneEditor: React.FC<SceneEditorProps> = ({
|
||||
</Box>
|
||||
<Stack direction="row" spacing={1.5} flexWrap="wrap" useFlexGap>
|
||||
<PrimaryButton
|
||||
onClick={handleApproveAndGenerate}
|
||||
onClick={handleAudioRegenerateClick}
|
||||
disabled={approving || generating}
|
||||
loading={approving || generating}
|
||||
startIcon={
|
||||
@@ -270,7 +498,7 @@ export const SceneEditor: React.FC<SceneEditorProps> = ({
|
||||
}
|
||||
tooltip={
|
||||
hasAudio && !generating
|
||||
? "Regenerate audio for this scene"
|
||||
? "Regenerate audio for this scene with custom settings"
|
||||
: generating
|
||||
? "Generating audio..."
|
||||
: scene.approved
|
||||
@@ -290,7 +518,7 @@ export const SceneEditor: React.FC<SceneEditorProps> = ({
|
||||
: "Approve & Generate Audio"}
|
||||
</PrimaryButton>
|
||||
<PrimaryButton
|
||||
onClick={handleGenerateImage}
|
||||
onClick={hasImage ? handleRegenerateClick : () => handleGenerateImage()}
|
||||
disabled={generatingImage}
|
||||
loading={generatingImage}
|
||||
startIcon={
|
||||
@@ -372,7 +600,157 @@ export const SceneEditor: React.FC<SceneEditorProps> = ({
|
||||
</Box>
|
||||
</>
|
||||
)}
|
||||
|
||||
{/* Image Generation Progress - Show when generating */}
|
||||
{generatingImage && (
|
||||
<>
|
||||
<Divider sx={{ borderColor: "rgba(15, 23, 42, 0.08)", borderWidth: 1, mt: 1 }} />
|
||||
<Box
|
||||
sx={{
|
||||
p: 2,
|
||||
background: "linear-gradient(135deg, rgba(102, 126, 234, 0.08) 0%, rgba(118, 75, 162, 0.08) 100%)",
|
||||
borderRadius: 2,
|
||||
border: "1px solid rgba(102, 126, 234, 0.2)",
|
||||
}}
|
||||
>
|
||||
<Stack direction="row" alignItems="center" spacing={1.5} sx={{ mb: 1.5 }}>
|
||||
<ImageIcon sx={{ color: "#667eea", fontSize: "1.25rem" }} />
|
||||
<Typography variant="subtitle2" sx={{ color: "#667eea", fontWeight: 600 }}>
|
||||
Generating Image...
|
||||
</Typography>
|
||||
</Stack>
|
||||
|
||||
{/* Progress Bar */}
|
||||
<Box sx={{ mb: 1.5 }}>
|
||||
<LinearProgress
|
||||
variant="determinate"
|
||||
value={imageGenerationProgress}
|
||||
sx={{
|
||||
height: 8,
|
||||
borderRadius: 4,
|
||||
backgroundColor: alpha("#667eea", 0.1),
|
||||
"& .MuiLinearProgress-bar": {
|
||||
backgroundColor: "#667eea",
|
||||
borderRadius: 4,
|
||||
}
|
||||
}}
|
||||
/>
|
||||
<Typography variant="caption" sx={{ color: "#667eea", mt: 0.5, display: "block", textAlign: "right" }}>
|
||||
{imageGenerationProgress}%
|
||||
</Typography>
|
||||
</Box>
|
||||
|
||||
{/* Status Message */}
|
||||
{imageGenerationStatus && (
|
||||
<Typography variant="body2" sx={{ color: "#667eea", fontSize: "0.875rem", lineHeight: 1.6, mb: 1 }}>
|
||||
{imageGenerationStatus}
|
||||
</Typography>
|
||||
)}
|
||||
|
||||
{/* Spinner */}
|
||||
<Box sx={{ display: "flex", alignItems: "center", justifyContent: "center", mt: 1 }}>
|
||||
<CircularProgress size={32} sx={{ color: "#667eea" }} />
|
||||
</Box>
|
||||
</Box>
|
||||
</>
|
||||
)}
|
||||
|
||||
{/* Generated Image Display - Show when image exists and not generating */}
|
||||
{scene.imageUrl && !generatingImage && (
|
||||
<>
|
||||
<Divider sx={{ borderColor: "rgba(15, 23, 42, 0.08)", borderWidth: 1, mt: 1 }} />
|
||||
<Box
|
||||
sx={{
|
||||
p: 2,
|
||||
background: imageBlobUrl && !imageLoading
|
||||
? "linear-gradient(135deg, rgba(102, 126, 234, 0.08) 0%, rgba(118, 75, 162, 0.08) 100%)"
|
||||
: "linear-gradient(135deg, rgba(245, 158, 11, 0.08) 0%, rgba(217, 119, 6, 0.08) 100%)",
|
||||
borderRadius: 2,
|
||||
border: imageBlobUrl && !imageLoading
|
||||
? "1px solid rgba(102, 126, 234, 0.2)"
|
||||
: "1px solid rgba(245, 158, 11, 0.2)",
|
||||
}}
|
||||
>
|
||||
<Stack direction="row" alignItems="center" spacing={1.5} sx={{ mb: 1.5 }}>
|
||||
<ImageIcon sx={{ color: imageBlobUrl && !imageLoading ? "#667eea" : "#d97706", fontSize: "1.25rem" }} />
|
||||
<Typography variant="subtitle2" sx={{ color: imageBlobUrl && !imageLoading ? "#667eea" : "#d97706", fontWeight: 600 }}>
|
||||
{imageBlobUrl && !imageLoading ? "Image Generated" : "Loading Image..."}
|
||||
</Typography>
|
||||
</Stack>
|
||||
{imageBlobUrl && !imageLoading ? (
|
||||
<Box
|
||||
sx={{
|
||||
width: "100%",
|
||||
borderRadius: 2,
|
||||
overflow: "hidden",
|
||||
border: "1px solid rgba(102,126,234,0.2)",
|
||||
background: alpha("#667eea", 0.05),
|
||||
}}
|
||||
>
|
||||
<Box
|
||||
component="img"
|
||||
src={imageBlobUrl}
|
||||
alt={scene.title}
|
||||
sx={{
|
||||
width: "100%",
|
||||
height: "auto",
|
||||
display: "block",
|
||||
maxHeight: 400,
|
||||
objectFit: "cover",
|
||||
}}
|
||||
onError={(e) => {
|
||||
console.error('[SceneEditor] Image failed to load:', {
|
||||
src: e.currentTarget.src,
|
||||
imageUrl: scene.imageUrl,
|
||||
imageBlobUrl,
|
||||
});
|
||||
}}
|
||||
onLoad={() => {
|
||||
console.log('[SceneEditor] Image loaded successfully');
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
) : (
|
||||
<Box sx={{ display: "flex", alignItems: "center", justifyContent: "center", py: 2 }}>
|
||||
<CircularProgress size={24} sx={{ color: "#d97706" }} />
|
||||
</Box>
|
||||
)}
|
||||
</Box>
|
||||
</>
|
||||
)}
|
||||
</Stack>
|
||||
|
||||
{/* Image Regeneration Modal */}
|
||||
<ImageRegenerateModal
|
||||
open={showRegenerateModal}
|
||||
onClose={() => setShowRegenerateModal(false)}
|
||||
onRegenerate={handleGenerateImage}
|
||||
initialPrompt={(() => {
|
||||
const promptParts = [
|
||||
`Scene: ${scene.title}`,
|
||||
"Professional podcast recording studio",
|
||||
"Modern microphone setup",
|
||||
"Clean background, professional lighting",
|
||||
"16:9 aspect ratio, video-optimized composition"
|
||||
];
|
||||
if (idea) {
|
||||
promptParts.push(`Topic: ${idea.substring(0, 60)}`);
|
||||
}
|
||||
return promptParts.join(", ");
|
||||
})()}
|
||||
initialStyle="Realistic"
|
||||
initialRenderingSpeed="Quality"
|
||||
initialAspectRatio="16:9"
|
||||
isGenerating={generatingImage}
|
||||
/>
|
||||
|
||||
<AudioRegenerateModal
|
||||
open={showAudioModal}
|
||||
onClose={() => setShowAudioModal(false)}
|
||||
onRegenerate={handleAudioRegenerate}
|
||||
initialSettings={audioSettings}
|
||||
isGenerating={generating}
|
||||
/>
|
||||
</GlassyCard>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -22,6 +22,7 @@ interface ScriptEditorProps {
|
||||
onBackToResearch: () => void;
|
||||
onProceedToRendering: (script: Script) => void;
|
||||
onError: (message: string) => void;
|
||||
avatarUrl?: string | null; // Base avatar URL for consistent scene image generation
|
||||
}
|
||||
|
||||
export const ScriptEditor: React.FC<ScriptEditorProps> = ({
|
||||
@@ -37,6 +38,7 @@ export const ScriptEditor: React.FC<ScriptEditorProps> = ({
|
||||
onBackToResearch,
|
||||
onProceedToRendering,
|
||||
onError,
|
||||
avatarUrl,
|
||||
}) => {
|
||||
const [script, setScript] = useState<Script | null>(initialScript);
|
||||
const [loading, setLoading] = useState(false);
|
||||
@@ -52,6 +54,12 @@ export const ScriptEditor: React.FC<ScriptEditorProps> = ({
|
||||
sceneCount: number;
|
||||
} | null>(null);
|
||||
|
||||
// Defer upward script updates to avoid setState during render warnings
|
||||
const emitScriptChange = useCallback(
|
||||
(next: Script) => Promise.resolve().then(() => onScriptChange(next)),
|
||||
[onScriptChange]
|
||||
);
|
||||
|
||||
// Sync with parent state
|
||||
useEffect(() => {
|
||||
if (initialScript) {
|
||||
@@ -85,7 +93,7 @@ export const ScriptEditor: React.FC<ScriptEditorProps> = ({
|
||||
.then((res) => {
|
||||
if (mounted) {
|
||||
setScript(res);
|
||||
onScriptChange(res);
|
||||
emitScriptChange(res);
|
||||
setError(null);
|
||||
}
|
||||
})
|
||||
@@ -108,7 +116,7 @@ export const ScriptEditor: React.FC<ScriptEditorProps> = ({
|
||||
...currentScript,
|
||||
scenes: currentScript.scenes.map((s) => (s.id === updated.id ? { ...s, ...updated } : s))
|
||||
};
|
||||
onScriptChange(updatedScript);
|
||||
emitScriptChange(updatedScript);
|
||||
return updatedScript;
|
||||
});
|
||||
};
|
||||
@@ -124,7 +132,7 @@ export const ScriptEditor: React.FC<ScriptEditorProps> = ({
|
||||
...currentScript,
|
||||
scenes: currentScript.scenes.map((s) => (s.id === sceneId ? { ...s, approved: true } : s)),
|
||||
};
|
||||
onScriptChange(updatedScript);
|
||||
emitScriptChange(updatedScript);
|
||||
return updatedScript;
|
||||
});
|
||||
} catch (err) {
|
||||
@@ -570,11 +578,12 @@ export const ScriptEditor: React.FC<ScriptEditorProps> = ({
|
||||
s.id === sceneId ? { ...s, audioUrl, approved: true } : s
|
||||
);
|
||||
const updatedScript = { ...currentScript, scenes: updatedScenes };
|
||||
onScriptChange(updatedScript);
|
||||
emitScriptChange(updatedScript);
|
||||
return updatedScript;
|
||||
});
|
||||
}}
|
||||
idea={idea}
|
||||
avatarUrl={avatarUrl}
|
||||
/>
|
||||
</GlassyCard>
|
||||
))}
|
||||
|
||||
@@ -120,6 +120,7 @@ export type CreateProjectPayload = {
|
||||
knobs: Knobs;
|
||||
budgetCap: number;
|
||||
files: { voiceFile?: File | null; avatarFile?: File | null };
|
||||
avatarUrl?: string | null;
|
||||
};
|
||||
|
||||
export type CreateProjectResult = {
|
||||
@@ -141,6 +142,13 @@ export type RenderJobResult = {
|
||||
videoFilename?: string;
|
||||
};
|
||||
|
||||
export interface VideoGenerationSettings {
|
||||
prompt: string;
|
||||
resolution: "480p" | "720p";
|
||||
seed?: number | null;
|
||||
maskImageUrl?: string | null;
|
||||
}
|
||||
|
||||
export type TaskStatus = {
|
||||
task_id: string;
|
||||
status: "pending" | "processing" | "completed" | "failed";
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import React from "react";
|
||||
import { Button, Tooltip, CircularProgress, alpha } from "@mui/material";
|
||||
import { Button, Tooltip, CircularProgress, alpha, SxProps, Theme } from "@mui/material";
|
||||
|
||||
interface SecondaryButtonProps {
|
||||
children: React.ReactNode;
|
||||
@@ -9,6 +9,7 @@ interface SecondaryButtonProps {
|
||||
startIcon?: React.ReactNode;
|
||||
tooltip?: string;
|
||||
ariaLabel?: string;
|
||||
sx?: SxProps<Theme>;
|
||||
}
|
||||
|
||||
export const SecondaryButton: React.FC<SecondaryButtonProps> = ({
|
||||
@@ -19,6 +20,7 @@ export const SecondaryButton: React.FC<SecondaryButtonProps> = ({
|
||||
startIcon,
|
||||
tooltip,
|
||||
ariaLabel,
|
||||
sx,
|
||||
}) => {
|
||||
const button = (
|
||||
<Button
|
||||
@@ -27,17 +29,20 @@ export const SecondaryButton: React.FC<SecondaryButtonProps> = ({
|
||||
disabled={disabled || loading}
|
||||
startIcon={loading ? <CircularProgress size={16} /> : startIcon}
|
||||
aria-label={ariaLabel}
|
||||
sx={{
|
||||
borderColor: "rgba(255,255,255,0.2)",
|
||||
color: "rgba(255,255,255,0.9)",
|
||||
textTransform: "none",
|
||||
px: 2.5,
|
||||
py: 0.75,
|
||||
"&:hover": {
|
||||
borderColor: "rgba(255,255,255,0.4)",
|
||||
background: alpha("#fff", 0.05),
|
||||
sx={[
|
||||
{
|
||||
borderColor: "rgba(255,255,255,0.2)",
|
||||
color: "rgba(255,255,255,0.9)",
|
||||
textTransform: "none",
|
||||
px: 2.5,
|
||||
py: 0.75,
|
||||
"&:hover": {
|
||||
borderColor: "rgba(255,255,255,0.4)",
|
||||
background: alpha("#fff", 0.05),
|
||||
},
|
||||
},
|
||||
}}
|
||||
...(Array.isArray(sx) ? sx : sx ? [sx] : []),
|
||||
]}
|
||||
>
|
||||
{children}
|
||||
</Button>
|
||||
|
||||
@@ -14,7 +14,15 @@ import { podcastApi } from '../services/podcastApi';
|
||||
|
||||
export interface PodcastProjectState {
|
||||
// Project metadata
|
||||
project: { id: string; idea: string; duration: number; speakers: number } | null;
|
||||
project: {
|
||||
id: string;
|
||||
idea: string;
|
||||
duration: number;
|
||||
speakers: number;
|
||||
avatarUrl?: string | null;
|
||||
avatarPrompt?: string | null;
|
||||
avatarPersonaId?: string | null;
|
||||
} | null;
|
||||
|
||||
// Step results
|
||||
analysis: PodcastAnalysis | null;
|
||||
@@ -40,6 +48,9 @@ export interface PodcastProjectState {
|
||||
// Current step tracking
|
||||
currentStep: 'create' | 'analysis' | 'research' | 'script' | 'render' | null;
|
||||
|
||||
// Final combined video
|
||||
finalVideoUrl?: string | null;
|
||||
|
||||
// Timestamps
|
||||
createdAt?: string;
|
||||
updatedAt?: string;
|
||||
@@ -233,13 +244,44 @@ export const usePodcastProjectState = () => {
|
||||
}, []);
|
||||
|
||||
const updateRenderJob = useCallback((sceneId: string, updates: Partial<Job>) => {
|
||||
setState((prev) => ({
|
||||
...prev,
|
||||
renderJobs: prev.renderJobs.map((job) =>
|
||||
job.sceneId === sceneId ? { ...job, ...updates } : job
|
||||
),
|
||||
updatedAt: new Date().toISOString(),
|
||||
}));
|
||||
setState((prev) => {
|
||||
const existingJob = prev.renderJobs.find((job) => job.sceneId === sceneId);
|
||||
|
||||
if (existingJob) {
|
||||
// Update existing job
|
||||
return {
|
||||
...prev,
|
||||
renderJobs: prev.renderJobs.map((job) =>
|
||||
job.sceneId === sceneId ? { ...job, ...updates } : job
|
||||
),
|
||||
updatedAt: new Date().toISOString(),
|
||||
};
|
||||
} else {
|
||||
// Create new job if it doesn't exist
|
||||
const newJob: Job = {
|
||||
sceneId,
|
||||
title: updates.title || sceneId,
|
||||
status: updates.status || "idle",
|
||||
progress: updates.progress || 0,
|
||||
previewUrl: updates.previewUrl || null,
|
||||
finalUrl: updates.finalUrl || null,
|
||||
videoUrl: updates.videoUrl || null,
|
||||
imageUrl: updates.imageUrl || null,
|
||||
jobId: updates.jobId || null,
|
||||
taskId: updates.taskId || null,
|
||||
cost: updates.cost || null,
|
||||
provider: updates.provider || null,
|
||||
voiceId: updates.voiceId || null,
|
||||
fileSize: updates.fileSize || null,
|
||||
avatarImageUrl: updates.avatarImageUrl || null,
|
||||
};
|
||||
return {
|
||||
...prev,
|
||||
renderJobs: [...prev.renderJobs, newJob],
|
||||
updatedAt: new Date().toISOString(),
|
||||
};
|
||||
}
|
||||
});
|
||||
}, []);
|
||||
|
||||
const setKnobs = useCallback((knobs: Knobs) => {
|
||||
@@ -295,6 +337,9 @@ export const usePodcastProjectState = () => {
|
||||
idea: payload.ideaOrUrl,
|
||||
duration: payload.duration,
|
||||
speakers: payload.speakers,
|
||||
avatarUrl: payload.avatarUrl || null,
|
||||
avatarPrompt: null, // Will be set when avatar is generated
|
||||
avatarPersonaId: null,
|
||||
},
|
||||
knobs: payload.knobs,
|
||||
budgetCap: payload.budgetCap,
|
||||
@@ -317,6 +362,9 @@ export const usePodcastProjectState = () => {
|
||||
idea: dbProject.idea,
|
||||
duration: dbProject.duration,
|
||||
speakers: dbProject.speakers,
|
||||
avatarUrl: dbProject.avatar_url || null,
|
||||
avatarPrompt: dbProject.avatar_prompt || null,
|
||||
avatarPersonaId: dbProject.avatar_persona_id || null,
|
||||
},
|
||||
analysis: dbProject.analysis,
|
||||
queries: dbProject.queries || [],
|
||||
@@ -332,6 +380,7 @@ export const usePodcastProjectState = () => {
|
||||
showScriptEditor: dbProject.show_script_editor || false,
|
||||
showRenderQueue: dbProject.show_render_queue || false,
|
||||
currentStep: dbProject.current_step || null,
|
||||
finalVideoUrl: dbProject.final_video_url || null,
|
||||
createdAt: dbProject.created_at,
|
||||
updatedAt: dbProject.updated_at,
|
||||
}));
|
||||
|
||||
@@ -321,9 +321,9 @@ export const podcastApi = {
|
||||
});
|
||||
|
||||
const exaResult = response.data as ExaResearchResult;
|
||||
if (params.onProgress) {
|
||||
if (params.onProgress) {
|
||||
params.onProgress("Deep research completed with Exa.");
|
||||
}
|
||||
}
|
||||
const mapped = mapExaResearchResponse(exaResult);
|
||||
return { research: mapped, raw: exaResult };
|
||||
},
|
||||
@@ -411,6 +411,14 @@ export const podcastApi = {
|
||||
voiceId?: string;
|
||||
emotion?: string; // Fallback if scene doesn't have emotion
|
||||
speed?: number;
|
||||
volume?: number;
|
||||
pitch?: number;
|
||||
englishNormalization?: boolean;
|
||||
sampleRate?: number;
|
||||
bitrate?: number;
|
||||
channel?: "1" | "2";
|
||||
format?: "mp3" | "wav" | "pcm" | "flac";
|
||||
languageBoost?: string;
|
||||
}): Promise<RenderJobResult> {
|
||||
// Use scene-specific emotion if available, otherwise fallback to provided/default
|
||||
const sceneEmotion = params.scene.emotion || params.emotion || "neutral";
|
||||
@@ -493,9 +501,16 @@ export const podcastApi = {
|
||||
scene_title: params.scene.title,
|
||||
text: textToUse,
|
||||
voice_id: params.voiceId || "Wise_Woman",
|
||||
speed: params.speed || 1.0, // Normal speed (was 0.9, but too slow - causing duration issues)
|
||||
speed: params.speed ?? 1.0, // Normal speed (was 0.9, but too slow - causing duration issues)
|
||||
volume: params.volume ?? 1.0,
|
||||
pitch: params.pitch ?? 0.0,
|
||||
emotion: sceneEmotion,
|
||||
english_normalization: true, // Better number reading for statistics
|
||||
english_normalization: params.englishNormalization ?? true, // Better number reading for statistics
|
||||
sample_rate: params.sampleRate || null,
|
||||
bitrate: params.bitrate || null,
|
||||
channel: params.channel || null,
|
||||
format: params.format || null,
|
||||
language_boost: params.languageBoost || null,
|
||||
});
|
||||
|
||||
return {
|
||||
@@ -607,6 +622,8 @@ export const podcastApi = {
|
||||
avatarImageUrl?: string;
|
||||
resolution?: string;
|
||||
prompt?: string;
|
||||
seed?: number;
|
||||
maskImageUrl?: string;
|
||||
}): Promise<{ taskId: string; status: string; message: string }> {
|
||||
const response = await aiApiClient.post("/api/podcast/render/video", {
|
||||
project_id: params.projectId,
|
||||
@@ -616,22 +633,73 @@ export const podcastApi = {
|
||||
avatar_image_url: params.avatarImageUrl,
|
||||
resolution: params.resolution || "720p",
|
||||
prompt: params.prompt,
|
||||
seed: params.seed ?? -1,
|
||||
mask_image_url: params.maskImageUrl,
|
||||
});
|
||||
|
||||
// Backend returns snake_case (task_id); normalize to camelCase for callers
|
||||
const { task_id, status, message } = response.data || {};
|
||||
return {
|
||||
taskId: task_id,
|
||||
status,
|
||||
message,
|
||||
};
|
||||
},
|
||||
|
||||
async pollTaskStatus(taskId: string): Promise<TaskStatus | null> {
|
||||
const response = await aiApiClient.get(`/api/podcast/task/${taskId}/status`);
|
||||
// Backend returns null if task not found
|
||||
return response.data || null;
|
||||
},
|
||||
|
||||
async listVideos(projectId?: string): Promise<{
|
||||
videos: Array<{
|
||||
scene_number: number;
|
||||
filename: string;
|
||||
video_url: string;
|
||||
file_size: number;
|
||||
}>;
|
||||
}> {
|
||||
const params = projectId ? { project_id: projectId } : {};
|
||||
const response = await aiApiClient.get("/api/podcast/videos", { params });
|
||||
return response.data;
|
||||
},
|
||||
|
||||
async pollTaskStatus(taskId: string): Promise<TaskStatus> {
|
||||
const response = await aiApiClient.get(`/api/podcast/task/${taskId}/status`);
|
||||
return response.data;
|
||||
async combineVideos(params: {
|
||||
projectId: string;
|
||||
sceneVideoUrls: string[];
|
||||
podcastTitle?: string;
|
||||
}): Promise<{
|
||||
taskId: string;
|
||||
status: string;
|
||||
message: string;
|
||||
}> {
|
||||
const response = await aiApiClient.post("/api/podcast/render/combine-videos", {
|
||||
project_id: params.projectId,
|
||||
scene_video_urls: params.sceneVideoUrls,
|
||||
podcast_title: params.podcastTitle || "Podcast",
|
||||
});
|
||||
|
||||
const { task_id, status, message } = response.data || {};
|
||||
return {
|
||||
taskId: task_id,
|
||||
status,
|
||||
message,
|
||||
};
|
||||
},
|
||||
|
||||
async generateSceneImage(params: {
|
||||
sceneId: string;
|
||||
sceneTitle: string;
|
||||
sceneContent?: string;
|
||||
baseAvatarUrl?: string;
|
||||
idea?: string;
|
||||
width?: number;
|
||||
height?: number;
|
||||
customPrompt?: string;
|
||||
style?: "Auto" | "Fiction" | "Realistic";
|
||||
renderingSpeed?: "Default" | "Turbo" | "Quality";
|
||||
aspectRatio?: "1:1" | "16:9" | "9:16" | "4:3" | "3:4";
|
||||
}): Promise<{
|
||||
scene_id: string;
|
||||
scene_title: string;
|
||||
@@ -647,9 +715,14 @@ export const podcastApi = {
|
||||
scene_id: params.sceneId,
|
||||
scene_title: params.sceneTitle,
|
||||
scene_content: params.sceneContent,
|
||||
idea: params.idea,
|
||||
base_avatar_url: params.baseAvatarUrl || null,
|
||||
idea: params.idea || null,
|
||||
width: params.width || 1024,
|
||||
height: params.height || 1024,
|
||||
custom_prompt: params.customPrompt || null,
|
||||
style: params.style || null,
|
||||
rendering_speed: params.renderingSpeed || null,
|
||||
aspect_ratio: params.aspectRatio || null,
|
||||
});
|
||||
return response.data;
|
||||
},
|
||||
@@ -679,6 +752,60 @@ export const podcastApi = {
|
||||
project_id: params.projectId,
|
||||
scene_ids: params.sceneIds,
|
||||
scene_audio_urls: params.sceneAudioUrls,
|
||||
});
|
||||
return response.data;
|
||||
},
|
||||
|
||||
async uploadAvatar(file: File, projectId?: string): Promise<{ avatar_url: string; avatar_filename: string }> {
|
||||
const formData = new FormData();
|
||||
formData.append('file', file);
|
||||
if (projectId) {
|
||||
formData.append('project_id', projectId);
|
||||
}
|
||||
const response = await aiApiClient.post('/api/podcast/avatar/upload', formData, {
|
||||
headers: { 'Content-Type': 'multipart/form-data' },
|
||||
});
|
||||
return response.data;
|
||||
},
|
||||
|
||||
async generatePresenters(
|
||||
speakers: number,
|
||||
projectId?: string,
|
||||
audience?: string,
|
||||
contentType?: string,
|
||||
topKeywords?: string[]
|
||||
): Promise<{
|
||||
avatars: Array<{ avatar_url: string; speaker_number: number; prompt?: string; persona_id?: string; seed?: number }>;
|
||||
persona_id?: string;
|
||||
}> {
|
||||
const formData = new FormData();
|
||||
formData.append('speakers', speakers.toString());
|
||||
if (projectId) {
|
||||
formData.append('project_id', projectId);
|
||||
}
|
||||
if (audience) {
|
||||
formData.append('audience', audience);
|
||||
}
|
||||
if (contentType) {
|
||||
formData.append('content_type', contentType);
|
||||
}
|
||||
if (topKeywords && Array.isArray(topKeywords) && topKeywords.length > 0) {
|
||||
formData.append('top_keywords', JSON.stringify(topKeywords));
|
||||
}
|
||||
const response = await aiApiClient.post('/api/podcast/avatar/generate', formData, {
|
||||
headers: { 'Content-Type': 'multipart/form-data' },
|
||||
});
|
||||
return response.data;
|
||||
},
|
||||
|
||||
async makeAvatarPresentable(avatarUrl: string, projectId?: string): Promise<{ avatar_url: string; avatar_filename: string }> {
|
||||
const formData = new FormData();
|
||||
formData.append('avatar_url', avatarUrl);
|
||||
if (projectId) {
|
||||
formData.append('project_id', projectId);
|
||||
}
|
||||
const response = await aiApiClient.post('/api/podcast/avatar/make-presentable', formData, {
|
||||
headers: { 'Content-Type': 'multipart/form-data' },
|
||||
});
|
||||
return response.data;
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user