feat(phase-4): UI/UX improvements for Podcast Maker Write phase

Frontend Changes:
- Add scene numbering badge (1/N) next to scene titles
- Add inline status chips (Complete, Audio, Image, Voice, Why Script)
- Professional AI-like gradient styling for all chips with shadows
- Remove Script Editor header and 'Why This Script Format?' collapsible
- Move Voice and Why Script info to per-scene chips
- Make scene section mobile-responsive (responsive layout, button sizing)
- Rename 'B-Roll Charts' to 'Podcast Charts' with accordion (collapsed by default)
- Add sceneIndex prop to SceneEditor for scene numbering
- Enhanced accessibility with keyboard navigation and focus states

Backend Changes:
- Audio handler improvements
- B-roll handler enhancements
- Script handler updates
- B-roll composer and service improvements
- Removed temporary broll_temp files

Technical:
- Full mobile responsiveness for scene cards
- Gradient chip styling: vibrant colors with white text and shadows
- Non-breaking approval/generation flow preserved
- TypeScript compatibility maintained
This commit is contained in:
ajaysi
2026-04-24 15:44:09 +05:30
parent 8b79099b15
commit ba94ee30bc
16 changed files with 977 additions and 2126 deletions

View File

@@ -280,10 +280,11 @@ async def generate_podcast_audio(
if voice_sample_url:
from services.llm_providers.main_audio_generation import qwen3_voice_clone, cosyvoice_voice_clone
from utils.media_utils import detect_audio_format
engine = (request.voice_clone_engine or "qwen3").lower()
logger.warning(f"[Podcast] 🔊 Voice clone path: engine={engine}, scene='{request.scene_title}', voice_sample_url={voice_sample_url[:80]}...")
# Download voice sample from URL (with caching)
logger.warning(f"[Podcast] Fetching voice sample from: {voice_sample_url}")
try:
@@ -294,6 +295,11 @@ async def generate_podcast_audio(
logger.warning(f"[Podcast] Voice sample fetch result: {len(voice_sample_bytes) if voice_sample_bytes else 0} bytes")
if not voice_sample_bytes:
raise HTTPException(status_code=400, detail=f"Could not fetch voice sample from {voice_sample_url}")
# Detect actual audio format from bytes (may differ from file extension)
detected_fmt, detected_mime = detect_audio_format(voice_sample_bytes)
logger.warning(f"[Podcast] 🔊 Detected voice sample format: {detected_fmt} ({detected_mime}), {len(voice_sample_bytes)} bytes")
voice_mime_type = detected_mime or "audio/wav"
scene_text = request.text.strip()
if len(scene_text) > 4000:
@@ -329,6 +335,7 @@ async def generate_podcast_audio(
audio_bytes=voice_sample_bytes,
text=scene_text,
user_id=user_id,
audio_mime_type=voice_mime_type,
),
)
audio_bytes = result_obj.preview_audio_bytes
@@ -341,6 +348,7 @@ async def generate_podcast_audio(
audio_bytes=voice_sample_bytes,
text=scene_text,
user_id=user_id,
audio_mime_type=voice_mime_type,
),
)
audio_bytes = result_obj.preview_audio_bytes
@@ -419,9 +427,14 @@ async def generate_podcast_audio(
result["audio_url"] = f"/api/podcast/audio/{audio_filename}"
logger.warning(f"[Podcast] Audio generated - path: {result.get('audio_path')}, url: {result.get('audio_url')}")
except HTTPException:
raise
except Exception as exc:
logger.error(f"[Podcast] ❌ Audio generation failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Audio generation failed: {exc}")
exc_type = type(exc).__name__
exc_msg = str(exc)[:500]
logger.error(f"[Podcast] Audio generation failed ({exc_type}): {exc_msg}")
logger.error(f"[Podcast] Audio generation traceback:", exc_info=True)
raise HTTPException(status_code=500, detail=f"Audio generation failed ({exc_type}): {exc_msg}")
# Save to asset library (podcast module)
try:

View File

@@ -14,7 +14,7 @@ from pydantic import BaseModel, Field
from pathlib import Path
import uuid
from middleware.auth_middleware import get_current_user
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
from api.story_writer.utils.auth import require_authenticated_user
from api.story_writer.task_manager import task_manager
from api.podcast.utils import _resolve_podcast_media_file
@@ -23,7 +23,7 @@ from utils.media_utils import resolve_media_path
from loguru import logger
router = APIRouter()
router = APIRouter(prefix="/broll", tags=["B-Roll"])
def _resolve_broll_background_image_path(background_image_url: str) -> str:
@@ -148,7 +148,7 @@ class BrollSceneRequest(BaseModel):
key_insight: str
supporting_stat: str
chart_data: Optional[Dict[str, Any]] = None
visual_cue: str = Field(default="bar_chart_comparison", description="bar_chart_comparison | bullet_points")
visual_cue: str = Field(default="bar_comparison", description="bar_comparison | bar_horizontal | line_trend | pie | stacked_bar | bullet_points | full_avatar")
duration: float = Field(default=10.0, ge=3.0, le=60.0)
background_image_url: str
avatar_video_url: Optional[str] = None
@@ -216,7 +216,9 @@ async def generate_chart_preview(
)
preview_filename = Path(preview_path).name
preview_url = f"/api/podcast/preview/{chart_id}/{preview_filename}"
preview_url = f"/api/podcast/broll/preview/{chart_id}/{preview_filename}"
logger.warning(f"[Broll] Chart preview generated: chart_id={chart_id}, path={preview_path}, url={preview_url}")
return ChartPreviewResponse(
preview_url=preview_url,
@@ -249,7 +251,7 @@ async def generate_broll_scene(
try:
# Validate visual_cue
valid_cues = ["bar_chart_comparison", "bullet_points", "full_avatar"]
valid_cues = ["bar_comparison", "bar_chart_comparison", "bar_horizontal", "line_trend", "pie", "stacked_bar", "bullet_points", "full_avatar"]
if request.visual_cue not in valid_cues:
raise HTTPException(
status_code=400,
@@ -333,37 +335,39 @@ async def compose_broll_videos(
async def serve_chart_preview(
chart_id: str,
filename: str,
user_id: Optional[str] = None,
current_user: Dict[str, Any] = Depends(get_current_user_with_query_token),
):
"""
Serve chart preview PNG files.
- user_id passed as query param for multi-tenant workspace resolution
- endpoint is public (no auth) to allow direct image loading in browser
Uses authentication via Authorization header or token query parameter,
matching the pattern used by /api/podcast/images/ for browser <img> tags.
"""
from api.podcast.constants import get_podcast_media_dir
user_id = require_authenticated_user(current_user)
# Validate filename to prevent directory traversal
if ".." in filename or "/" in filename or "\\" in filename:
raise HTTPException(status_code=400, detail="Invalid filename")
logger.warning(f"[Broll] serve_chart_preview: chart_id={chart_id}, filename={filename}, user_id={user_id}")
broll_service = get_broll_service(user_id=user_id)
expected_filename = broll_service.get_chart_preview_filename(chart_id)
if filename != expected_filename:
raise HTTPException(status_code=404, detail="Chart preview not found")
# Use expected_filename to get the correct path
file_path = broll_service.get_output_path(expected_filename)
charts_dir = get_podcast_media_dir("chart", user_id)
file_path = charts_dir / filename
logger.warning(f"[Broll] serve_chart_preview: resolved path={file_path}, exists={file_path.exists()}")
if not file_path.exists():
raise HTTPException(status_code=404, detail="Chart preview not found")
# Security: ensure resolved path is within charts_dir
if not str(file_path.resolve()).startswith(str(charts_dir.resolve())):
raise HTTPException(status_code=403, detail="Access denied")
return FileResponse(
path=str(file_path),
media_type="image/png",
filename=expected_filename,
filename=filename,
)

View File

@@ -141,16 +141,18 @@ async def generate_podcast_script(
if numeric_pairs:
labels = [p[0] for p in numeric_pairs]
values = [p[1] for p in numeric_pairs]
sources = [f.get("url", f.get("source", "")) for f in research_fact_cards[:12] if f.get("url") or f.get("source")]
return {
"type": "bar_comparison",
"title": scene_title,
"labels": labels,
"values": values,
"takeaway": "Data points sourced from research facts used in this scene.",
"source": sources[0] if sources else "",
}
return {
"type": "bullet",
"type": "bullet_points",
"title": scene_title,
"bullet_points": ["Key point 1", "Key point 2", "Key point 3"],
"takeaway": "Narration summary for this scene.",
@@ -233,11 +235,15 @@ Return JSON with scenes array. Each scene:
- ttsHints: optional list from [pause_300ms, pause_700ms, smile, serious_tone, emphasize_data]
- Plain text only, no markdown
- chart_data: object for B-roll mapping (required in audio_only)
- type: bar_comparison|line_trend|bullet_points
- type: bar_comparison|bar_horizontal|line_trend|pie|stacked_bar|bullet_points
- title: short chart title
- labels: list
- values: list (same length as labels)
- values: list (same length as labels, required for bar/line/pie)
- before/after: parallel lists of numbers (for bar_comparison only)
- segments: list of {{name, values}} (for stacked_bar only)
- bullet_points: list of strings (for bullet_points only)
- takeaway: one sentence tying chart to narration
- source: URL or citation for the data (e.g. "Research fact #3" or a URL from the research context)
COST OPTIMIZATION:
- 5-6 scenes max for {request.duration_minutes} min episode