feat: Improve podcast maker UX and fix bugs

Frontend: - Add progress modals with educational content for analysis and voice cloning - Improve tab navigation in AnalysisPanel (combine Titles, Hook, CTA into one tab) - Fix tab styling to make inactive tabs visible - Fix avatar 'Make Presentable' not updating preview (blob URL handling) - Improve mobile responsiveness for avatar tabs - Clean up verbose console logging (AnalysisPanel, demoMode, RobustCamera) - Add sequential progress messages instead of cycling Backend: - Fix 'Depends object has no attribute get' error in auth and image editing - Use get_session_for_user instead of get_db outside FastAPI DI context - Reduce WARNING logs to DEBUG in audio handler - Add proper emphasis boolean handling in script generation - Add missing fields to PodcastScene and PodcastSceneLine models - Fix voice cloning cost estimate display issue
2026-04-07 16:28:11 +05:30
parent 1a456b21b7
commit e59c77b221
17 changed files with 851 additions and 198 deletions
--- a/backend/api/podcast/handlers/audio.py
+++ b/backend/api/podcast/handlers/audio.py
@@ -391,9 +391,9 @@ async def serve_podcast_audio(
        raise HTTPException(status_code=400, detail="Invalid filename")
    
    user_id = require_authenticated_user(current_user)
-    logger.warning(f"[Podcast] serve_podcast_audio called: user_id={user_id}, filename={filename}")
+    logger.debug(f"[Podcast] serve_podcast_audio called: user_id={user_id}, filename={filename}")
    audio_path = _resolve_podcast_media_file(filename, "audio", user_id)
-    logger.warning(f"[Podcast] Resolved audio path: {audio_path}")
+    logger.debug(f"[Podcast] Resolved audio path: {audio_path}")
    
    return FileResponse(audio_path, media_type="audio/mpeg")

--- a/backend/api/podcast/handlers/avatar.py
+++ b/backend/api/podcast/handlers/avatar.py
@@ -114,6 +114,9 @@ async def make_avatar_presentable(
    Transform an uploaded avatar image into a podcast-appropriate presenter.
    Uses AI image editing to convert the uploaded photo into a professional podcast presenter.
    """
+    # CRITICAL: Log at the very start before any logic
+    logger.info(f"[Podcast] ===== MAKE PRESENTABLE ENDPOINT START =====")
+    
    user_id = require_authenticated_user(current_user)
    logger.info(f"[Podcast] Make presentable request received - user_id={user_id}, avatar_url={avatar_url}, project_id={project_id}")
    
--- a/backend/api/podcast/handlers/script.py
+++ b/backend/api/podcast/handlers/script.py
@@ -178,25 +178,83 @@ COST OPTIMIZATION:
    scenes_data = data.get("scenes") or []
    if not isinstance(scenes_data, list):
        raise HTTPException(status_code=500, detail="LLM response missing scenes array")
+    
+    if len(scenes_data) == 0:
+        logger.warning("[ScriptGen] LLM returned empty scenes array")
+        raise HTTPException(status_code=500, detail="LLM returned no scenes - please try again")
+
+    logger.warning(f"[ScriptGen] Processing {len(scenes_data)} scenes from LLM response")

    valid_emotions = {"neutral", "happy", "excited", "serious", "curious", "confident"}

    # Normalize scenes
    scenes: list[PodcastScene] = []
+    total_lines_input = 0
+    total_lines_output = 0
+    dropped_empty_lines = 0
+    
    for idx, scene in enumerate(scenes_data):
+        if not isinstance(scene, dict):
+            logger.warning(f"[ScriptGen] Scene {idx} is not a dict, skipping")
+            continue
+            
        title = scene.get("title") or f"Scene {idx + 1}"
        duration = int(scene.get("duration") or max(30, (request.duration_minutes * 60) // max(1, len(scenes_data))))
        emotion = scene.get("emotion") or "neutral"
        if emotion not in valid_emotions:
+            logger.warning(f"[ScriptGen] Invalid emotion '{emotion}' in scene {idx}, defaulting to 'neutral'")
            emotion = "neutral"
        lines_raw = scene.get("lines") or []
+        total_lines_input += len(lines_raw)
        lines: list[PodcastSceneLine] = []
-        for line in lines_raw:
+        
+        for line_idx, line in enumerate(lines_raw):
+            if not isinstance(line, dict):
+                logger.warning(f"[ScriptGen] Line {line_idx} in scene {idx} is not a dict, skipping")
+                continue
+                
            speaker = line.get("speaker") or ("Host" if len(lines) % request.speakers == 0 else "Guest")
            text = line.get("text") or ""
-            emphasis = line.get("emphasis", False)
+            
+            # Handle emphasis - convert various values to boolean
+            emphasis_raw = line.get("emphasis", False)
+            if isinstance(emphasis_raw, bool):
+                emphasis = emphasis_raw
+            elif isinstance(emphasis_raw, str):
+                emphasis = emphasis_raw.lower() in ("true", "yes", "1")
+                if emphasis_raw.lower() not in ("true", "false", "yes", "no", "1", "0"):
+                    logger.debug(f"[ScriptGen] Unusual emphasis value '{emphasis_raw}' converted to {emphasis}")
+            else:
+                emphasis = bool(emphasis_raw)
+            
+            # Generate line ID if not provided
+            line_id = line.get("id") or f"line-{idx + 1}-{line_idx + 1}"
+            
+            # Get used fact IDs if provided
+            used_fact_ids = line.get("usedFactIds") or line.get("used_fact_ids") or None
+            
            if text:
-                lines.append(PodcastSceneLine(speaker=speaker, text=text, emphasis=emphasis))
+                lines.append(PodcastSceneLine(
+                    speaker=speaker, 
+                    text=text, 
+                    emphasis=emphasis,
+                    id=line_id,
+                    usedFactIds=used_fact_ids
+                ))
+                total_lines_output += 1
+            else:
+                dropped_empty_lines += 1
+                logger.debug(f"[ScriptGen] Dropped empty line {line_idx} in scene {idx}")
+                
+        # Log scene status
+        if scenes_data and isinstance(scene, dict):
+            image_url_raw = scene.get("imageUrl") or scene.get("image_url")
+            audio_url_raw = scene.get("audioUrl") or scene.get("audio_url")
+            if image_url_raw:
+                logger.warning(f"[ScriptGen] Scene {idx} has imageUrl - will be reset to None")
+            if audio_url_raw:
+                logger.warning(f"[ScriptGen] Scene {idx} has audioUrl - will be reset to None")
+                
        scenes.append(
            PodcastScene(
                id=scene.get("id") or f"scene-{idx + 1}",
@@ -205,8 +263,16 @@ COST OPTIMIZATION:
                lines=lines,
                approved=False,
                emotion=emotion,
+                imageUrl=None,  # Will be generated later
+                audioUrl=None,  # Will be generated later
+                imagePrompt=None,  # Will be generated during image generation
            )
        )
+    
+    # Summary logging
+    logger.warning(f"[ScriptGen] Script generated: {len(scenes)} scenes, {total_lines_output}/{total_lines_input} lines")
+    if dropped_empty_lines > 0:
+        logger.warning(f"[ScriptGen] Dropped {dropped_empty_lines} empty lines")

    return PodcastScriptResponse(scenes=scenes)

--- a/backend/api/podcast/models.py
+++ b/backend/api/podcast/models.py
@@ -101,6 +101,8 @@ class PodcastSceneLine(BaseModel):
    speaker: str
    text: str
    emphasis: Optional[bool] = False
+    id: Optional[str] = None  # Optional line ID for frontend tracking
+    usedFactIds: Optional[List[str]] = None  # Facts referenced in this line


 class PodcastScene(BaseModel):
@@ -111,6 +113,8 @@ class PodcastScene(BaseModel):
    approved: bool = False
    emotion: Optional[str] = None
    imageUrl: Optional[str] = None  # Generated image URL for video generation
+    audioUrl: Optional[str] = None  # Generated audio URL for this scene
+    imagePrompt: Optional[str] = None  # Original image generation prompt for video context


 class PodcastExaConfig(BaseModel):
--- a/backend/api/story_writer/utils/auth.py
+++ b/backend/api/story_writer/utils/auth.py
@@ -8,9 +8,14 @@ def require_authenticated_user(current_user: Dict[str, Any] | None) -> str:
    Validates the current user dictionary provided by Clerk middleware and
    returns the normalized user_id. Raises HTTP 401 if authentication fails.
    """
-    if not current_user or not isinstance(current_user, dict):
+    # Guard against dependency injection issues where Depends object might be passed
+    if current_user is None or not isinstance(current_user, dict):
        raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Authentication required")
-
+    
+    # Additional check: ensure it's actually a dict and not a Depends object or other type
+    if not hasattr(current_user, 'get') or not callable(getattr(current_user, 'get')):
+        raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid authentication context")
+    
    user_id = str(current_user.get("id", "")).strip()
    if not user_id:
        raise HTTPException(
--- a/backend/services/llm_providers/main_image_editing.py
+++ b/backend/services/llm_providers/main_image_editing.py
@@ -106,7 +106,7 @@ def edit_image(
    skip_validation = os.getenv("ALWRITY_SKIP_IMAGE_EDITING_VALIDATION", "false").lower() in ("true", "1", "yes")
    
    if user_id and not skip_validation:
-        from services.database import get_db
+        from services.database import get_session_for_user
        from services.subscription import PricingService
        from services.subscription.preflight_validator import validate_image_editing_operations
        from fastapi import HTTPException
@@ -115,17 +115,18 @@ def edit_image(
        
        db = None
        try:
-            # Properly handle the generator
-            db_gen = get_db()
-            db = next(db_gen)
-            
-            pricing_service = PricingService(db)
-            # Raises HTTPException immediately if validation fails - frontend gets immediate response
-            validate_image_editing_operations(
-                pricing_service=pricing_service,
-                user_id=user_id
-            )
-            logger.info(f"[Image Editing] ✅ Pre-flight validation passed for user_id={user_id} - proceeding with image editing")
+            # Use get_session_for_user instead of get_db() since we're outside FastAPI DI
+            db = get_session_for_user(user_id)
+            if not db:
+                logger.warning(f"[Image Editing] ⚠️ Could not get DB session for user {user_id} - skipping validation")
+            else:
+                pricing_service = PricingService(db)
+                # Raises HTTPException immediately if validation fails - frontend gets immediate response
+                validate_image_editing_operations(
+                    pricing_service=pricing_service,
+                    user_id=user_id
+                )
+                logger.info(f"[Image Editing] ✅ Pre-flight validation passed for user_id={user_id} - proceeding with image editing")
        except HTTPException as http_ex:
            # Re-raise immediately - don't proceed with API call
            logger.error(f"[Image Editing] ❌ Pre-flight validation failed for user_id={user_id} - blocking API call: {http_ex.detail}")