Podcast Maker: Fix progress modals, research JSON, header stepper, voice/podcastMode chips

2026-04-19 13:16:59 +05:30
parent ff61708e29
commit e704aa7d87
61 changed files with 7965 additions and 368 deletions
--- a/backend/api/podcast/handlers/analysis.py
+++ b/backend/api/podcast/handlers/analysis.py
@@ -51,6 +51,7 @@ async def enhance_podcast_idea(
    # In podcast-only mode, skip bible generation since onboarding is disabled
    bible_context = ""
    if not _is_podcast_only_mode():
+        logger.warning(f"[Podcast Enhance] Podcast mode=full — attempting Bible generation for user {user_id}")
        try:
            bible_service = PodcastBibleService()
            if request.bible:
@@ -65,6 +66,7 @@ async def enhance_podcast_idea(
            logger.warning(f"[Podcast Enhance] Failed to parse or generate bible context: {exc}")
    else:
        # In podcast mode, use the provided bible directly if available
+        logger.warning(f"[Podcast Enhance] Podcast mode=podcast_only — skipping Bible generation for user {user_id}")
        if request.bible:
            try:
                from models.podcast_bible_models import PodcastBible
@@ -209,7 +211,11 @@ async def analyze_podcast_idea(
    final_avatar_url = request.avatar_url
    final_avatar_prompt = None
    
-    if not final_avatar_url:
+    # Skip avatar generation for audio_only mode
+    podcast_mode = getattr(request, 'podcast_mode', None) or 'video_only'
+    should_generate_avatar = not final_avatar_url and podcast_mode != 'audio_only'
+    
+    if should_generate_avatar:
        logger.info(f"[Podcast Analyze] No avatar_url provided, generating one for user {user_id}")
        try:
            # 1. PRE-FLIGHT VALIDATION: Check subscription limits for image generation
@@ -240,8 +246,9 @@ async def analyze_podcast_idea(
            if image_result and image_result.image_bytes:
                img_id = str(uuid.uuid4())[:8]
                filename = f"presenter_podcast_{user_id}_{img_id}.png"
-                output_path = PODCAST_IMAGES_DIR / filename
-                PODCAST_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
+                avatars_dir = PODCAST_IMAGES_DIR / "avatars"
+                avatars_dir.mkdir(parents=True, exist_ok=True)
+                output_path = avatars_dir / filename
                
                with open(output_path, "wb") as f:
                    f.write(image_result.image_bytes)
@@ -253,13 +260,14 @@ async def analyze_podcast_idea(
                    db=db,
                    user_id=user_id,
                    asset_type="image",
-                    file_url=final_avatar_url,
+                    source_module="podcast_analysis",
                    filename=filename,
+                    file_url=final_avatar_url,
                    title=f"Presenter Avatar - {request.idea[:40]}",
                    description=f"AI-generated podcast presenter for: {request.idea}",
                    provider=image_result.provider,
                    model=image_result.model,
-                    cost=image_result.cost
+                    cost=0.0  # Cost tracked in generate_image
                )
                logger.info(f"[Podcast Analyze] ✅ Generated and saved avatar to {final_avatar_url}")
        except Exception as e:
--- a/backend/api/podcast/handlers/broll.py
+++ b/backend/api/podcast/handlers/broll.py
@@ -0,0 +1,241 @@
+"""
+B-Roll Handlers
+
+API endpoints for B-roll chart preview and video generation.
+"""
+
+from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
+from fastapi.responses import FileResponse
+from typing import Dict, Any, Optional, List
+from pydantic import BaseModel, Field
+import uuid
+
+from middleware.auth_middleware import get_current_user
+from api.story_writer.utils.auth import require_authenticated_user
+from services.podcast.broll_service import get_broll_service
+from loguru import logger
+
+
+router = APIRouter()
+
+
+class ChartPreviewRequest(BaseModel):
+    """Request model for chart preview generation."""
+    chart_data: Dict[str, Any] = Field(..., description="Chart data (labels, before/after, etc.)")
+    chart_type: str = Field(
+        default="bar_comparison", 
+        description="bar_comparison | bar_horizontal | line_trend | pie | stacked_bar | bullet"
+    )
+    title: str = Field(default="", description="Chart title")
+    subtitle: Optional[str] = Field(default="", description="Optional subtitle at bottom")
+
+
+class ChartPreviewResponse(BaseModel):
+    """Response for chart preview."""
+    preview_url: str
+    chart_id: str
+
+
+class BrollSceneRequest(BaseModel):
+    """Request for generating B-roll video for a scene."""
+    scene_id: str
+    key_insight: str
+    supporting_stat: str
+    chart_data: Optional[Dict[str, Any]] = None
+    visual_cue: str = Field(default="bar_chart_comparison", description="bar_chart_comparison | bullet_points")
+    duration: float = Field(default=10.0, ge=3.0, le=60.0)
+    background_image_url: str
+    avatar_video_url: Optional[str] = None
+
+
+class BrollSceneResponse(BaseModel):
+    """Response for B-roll scene generation."""
+    scene_id: str
+    broll_video_url: str
+    broll_video_path: str
+
+
+class BrollComposeRequest(BaseModel):
+    """Request for composing multiple B-roll videos."""
+    scene_video_paths: List[str]
+    output_filename: str = "final_broll.mp4"
+    fade_dur: float = Field(default=0.5, ge=0.0, le=2.0)
+    fps: int = Field(default=24, ge=12, le=60)
+
+
+class BrollComposeResponse(BaseModel):
+    """Response for B-roll composition."""
+    final_video_url: str
+    final_video_path: str
+
+
+@router.post("/preview/chart", response_model=ChartPreviewResponse)
+async def generate_chart_preview(
+    request: ChartPreviewRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+):
+    """
+    Generate a chart PNG preview (static image for Write phase).
+    
+    This endpoint is called from the Write phase to show users chart previews
+    before they commit to B-roll video generation.
+    """
+    user_id = require_authenticated_user(current_user)
+    
+    try:
+        broll_service = get_broll_service()
+        
+        preview_path = broll_service.generate_chart_preview(
+            chart_data=request.chart_data,
+            chart_type=request.chart_type,
+            title=request.title,
+            subtitle=request.subtitle or "",
+        )
+        
+        if not preview_path:
+            raise HTTPException(status_code=500, detail="Failed to generate chart preview")
+        
+        chart_id = uuid.uuid4().hex[:8]
+        preview_url = f"/api/podcast/broll/preview/{chart_id}/{preview_path.split('/')[-1]}"
+        
+        return ChartPreviewResponse(
+            preview_url=preview_url,
+            chart_id=chart_id,
+        )
+        
+    except Exception as e:
+        logger.error(f"[Broll] Chart preview generation failed: {e}")
+        raise HTTPException(status_code=500, detail=f"Chart preview failed: {str(e)}")
+
+
+@router.post("/render/broll-scene", response_model=BrollSceneResponse)
+async def generate_broll_scene(
+    request: BrollSceneRequest,
+    background_tasks: BackgroundTasks,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+):
+    """
+    Generate a B-roll video for a single scene.
+    
+    This creates a programmatic video with:
+    - Background image with Ken Burns effect
+    - Chart overlay (if chart_data provided)
+    - Avatar circle in corner (if avatar_video_url provided)
+    - Insight card at bottom
+    
+    Returns a task_id for polling since video generation can take time.
+    """
+    user_id = require_authenticated_user(current_user)
+    
+    try:
+        # Validate visual_cue
+        valid_cues = ["bar_chart_comparison", "bullet_points", "full_avatar"]
+        if request.visual_cue not in valid_cues:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Invalid visual_cue. Must be one of: {valid_cues}"
+            )
+        
+        # For now, return a placeholder - full video generation requires
+        # resolving image/video URLs to actual file paths
+        # In V2, this will integrate with the actual video generation
+        
+        logger.info(f"[Broll] B-roll scene request for scene: {request.scene_id}")
+        
+        return BrollSceneResponse(
+            scene_id=request.scene_id,
+            broll_video_url="",
+            broll_video_path="",
+        )
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"[Broll] B-roll scene generation failed: {e}")
+        raise HTTPException(status_code=500, detail=f"B-roll generation failed: {str(e)}")
+
+
+@router.post("/render/broll-compose", response_model=BrollComposeResponse)
+async def compose_broll_videos(
+    request: BrollComposeRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+):
+    """
+    Compose multiple B-roll scene videos into a final video.
+    
+    Applies crossfade transitions between scenes.
+    """
+    user_id = require_authenticated_user(current_user)
+    
+    try:
+        broll_service = get_broll_service()
+        
+        final_path = broll_service.compose_final_video(
+            video_paths=request.scene_video_paths,
+            output_filename=request.output_filename,
+            fade_dur=request.fade_dur,
+            fps=request.fps,
+        )
+        
+        final_filename = final_path.split('/')[-1]
+        final_url = f"/api/podcast/broll/final/{final_filename}"
+        
+        return BrollComposeResponse(
+            final_video_url=final_url,
+            final_video_path=final_path,
+        )
+        
+    except Exception as e:
+        logger.error(f"[Broll] Video composition failed: {e}")
+        raise HTTPException(status_code=500, detail=f"Video composition failed: {str(e)}")
+
+
+@router.get("/preview/{chart_id}/{filename}")
+async def serve_chart_preview(
+    chart_id: str,
+    filename: str,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+):
+    """Serve chart preview PNG files."""
+    from pathlib import Path
+    
+    user_id = require_authenticated_user(current_user)
+    
+    broll_service = get_broll_service()
+    file_path = broll_service.output_dir / f"chart_preview_{chart_id}.png"
+    
+    if not file_path.exists():
+        raise HTTPException(status_code=404, detail="Chart preview not found")
+    
+    return FileResponse(
+        path=str(file_path),
+        media_type="image/png",
+        filename=filename,
+    )
+
+
+@router.get("/final/{filename}")
+async def serve_final_broll(
+    filename: str,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+):
+    """Serve final composed B-roll video files."""
+    user_id = require_authenticated_user(current_user)
+    
+    broll_service = get_broll_service()
+    file_path = broll_service.output_dir / filename
+    
+    if not file_path.exists():
+        raise HTTPException(status_code=404, detail="Video not found")
+    
+    return FileResponse(
+        path=str(file_path),
+        media_type="video/mp4",
+        filename=filename,
+    )
+
+
+@router.get("/health")
+async def broll_health():
+    """Health check for B-roll service."""
+    return {"status": "ok", "service": "broll"}
--- a/backend/api/podcast/handlers/projects.py
+++ b/backend/api/podcast/handlers/projects.py
@@ -119,7 +119,7 @@ async def update_project(
        project = service.update_project(user_id, project_id, **updates)
        
        if not project:
-            raise HTTPException(status_code=404, detail="Project not found")
+            raise HTTPException(status_code=404, detail=f"Project {project_id} not found")
        
        return PodcastProjectResponse.model_validate(project)
    except HTTPException:
--- a/backend/api/podcast/handlers/research.py
+++ b/backend/api/podcast/handlers/research.py
@@ -22,6 +22,7 @@ from ..models import (
    PodcastExaSource,
    PodcastExaConfig,
    PodcastResearchInsight,
+    PodcastResearchOutput,
 )

 router = APIRouter()
@@ -159,43 +160,50 @@ As a podcast research expert, analyze this data and create content that will:
 4. Include a compelling call-to-action for listeners

 REQUIRED OUTPUT (JSON):
-=======================
+======================
 {{
-  "summary": "2-3 paragraph comprehensive summary in Markdown. Start with a hook that matches the episode intro. Include specific data points, expert quotes, and trends.",
+  "summary": "2-3 paragraph comprehensive summary in Markdown. Start with a hook that matches the episode intro.",
  "key_insights": [
    {{
-      "title": "Catchy, engaging title for this insight",
-      "content": "3-4 sentences with specific facts, quotes, or data. Write in a conversational tone suitable for a podcast host to discuss.",
-      "source_indices": [1, 2, 3],
-      "podcast_talking_points": ["Point 1 host can expand on", "Counter-point or follow-up", "Question to ask guest"]
+      "title": "Insight title",
+      "content": "3-4 sentences with specific facts, quotes, or data for podcast host.",
+      "source_indices": [1, 2],
+      "podcast_talking_points": ["Point host can expand on", "Counter-point"]
    }}
  ],
  "expert_quotes": [
    {{
-      "quote": "Direct quote from source",
+      "quote": "Direct quote from source text",
      "source_index": 1,
      "context": "Why this quote matters for the podcast"
    }}
  ],
-  "listener_cta_suggestions": ["Specific action listener can take", "Resource to share", "Next episode preview"]
+  "listener_cta_suggestions": ["Action listener can take", "Resource to share", "Next episode preview"],
+  "mapped_angles": [
+    {{
+      "title": "Content angle title",
+      "why": "Why compelling for audience",
+      "mapped_fact_ids": [1, 2]
+    }}
+  ]
 }}

+IMPORTANT: You must include ALL fields above with valid data. expert_quotes, listener_cta_suggestions, and mapped_angles must have content - do NOT leave them empty!
+
 QUALITY STANDARDS:
-==================
- INSIGHTS MUST BE DEEP, not superficial - avoid generic statements
- Include SPECIFIC DATA POINTS, percentages, statistics when available
- Extract EXPERT QUOTES that hosts can reference
- Identify GAPS in the research where more depth is needed
- Make content naturally flow into the planned episode hook and CTA
- Write in a CONVERSATIONAL tone - how a host would actually speak
- Flag any CONTROVERSIAL or debatable claims for host to address
+=================
+- Include at least 2 expert_quotes with source_index
+- Include at least 2 listener_cta_suggestions 
+- Include at least 2 mapped_angles
+- Include specific data points, percentages, statistics
+- Write in conversational tone
 """
        try:
-            logger.warning(f"[Podcast Research] Calling LLM for insight extraction...")
+            logger.warning(f"[Podcast Research] Calling LLM with json_struct...")
            llm_response = llm_text_gen(
                prompt=prompt,
                user_id=user_id,
-                json_struct=None,
+                json_struct=PodcastResearchOutput.model_json_schema(),
                preferred_provider=None,
                flow_type="premium_tool",
            )