feat: podcast demo mode with ALWRITY_ENABLED_FEATURES support

- Add ALWRITY_ENABLED_FEATURES env var for feature gating - Podcast-only mode: skip LLM bootstrap, scheduler, persona services - Enhance video generation prompt with scene context, analysis, narration - Add voice cloning support via custom_voice_id in WaveSpeed - Add text-to-speech for research results (browser speechSynthesis) - Fix render queue to sync images from script phase - Add WaveSpeed LLM pricing (gpt-oss-120b) - Fix podcast bible generation error handling - Refactor RouterManager for feature-based router loading
2026-04-03 06:59:59 +05:30
parent c52b1eabc9
commit 63bb937796
58 changed files with 3568 additions and 1597 deletions
--- a/backend/api/podcast/handlers/images.py
+++ b/backend/api/podcast/handlers/images.py
@@ -104,6 +104,16 @@ async def generate_podcast_scene_image(
        # Otherwise, generate from scratch with podcast-optimized prompt
        image_prompt = ""  # Initialize prompt variable
        
+        # Emotion to lighting mapping for visual tone
+        emotion_lighting = {
+            "happy": "warm, bright lighting, cheerful atmosphere",
+            "excited": "dynamic, energetic lighting with highlights",
+            "serious": "professional, balanced lighting, authoritative feel",
+            "curious": "soft, inviting lighting, thoughtful atmosphere",
+            "confident": "strong, dramatic lighting, authoritative look",
+            "neutral": "professional, balanced lighting"
+        }
+        
        if base_avatar_bytes:
            # Use Ideogram Character API for consistent character generation
            # Use custom prompt if provided, otherwise build scene-specific prompt
@@ -127,6 +137,28 @@ async def generate_podcast_scene_image(
                    if bible_obj.host.look:
                        prompt_parts.append(f"Host Look: {bible_obj.host.look}")
                
+                # Scene emotion for visual tone
+                emotion_lighting = {
+                    "happy": "warm, bright lighting, cheerful atmosphere",
+                    "excited": "dynamic, energetic lighting with highlights",
+                    "serious": "professional, balanced lighting, authoritative feel",
+                    "curious": "soft, inviting lighting, thoughtful atmosphere",
+                    "confident": "strong, dramatic lighting, authoritative look",
+                    "neutral": "professional, balanced lighting"
+                }
+                scene_emotion = request.scene_emotion
+                if scene_emotion and scene_emotion in emotion_lighting:
+                    prompt_parts.append(emotion_lighting[scene_emotion])
+                
+                # AI Analysis context for visual relevance
+                if request.analysis:
+                    keywords = request.analysis.get("topKeywords", [])[:5]
+                    if keywords:
+                        prompt_parts.append(f"Keywords: {', '.join(keywords)}")
+                    audience = request.analysis.get("audience", "")
+                    if audience:
+                        prompt_parts.append(f"Target: {audience}")
+                
                # Scene content insights for visual context
                if request.scene_content:
                    content_preview = request.scene_content[:200].replace("\n", " ").strip()
@@ -139,6 +171,12 @@ async def generate_podcast_scene_image(
                        visual_keywords.append("modern tech studio setting")
                    if any(word in content_lower for word in ["business", "growth", "strategy", "market"]):
                        visual_keywords.append("professional business studio")
+                    if any(word in content_lower for word in ["nature", "outdoor", "environment", "green"]):
+                        visual_keywords.append("natural outdoor setting")
+                    if any(word in content_lower for word in ["medical", "health", "wellness"]):
+                        visual_keywords.append("clean medical studio")
+                    if any(word in content_lower for word in ["education", "learning", "students"]):
+                        visual_keywords.append("classroom or educational setting")
                    if visual_keywords:
                        prompt_parts.append(", ".join(visual_keywords))
                
@@ -265,6 +303,19 @@ async def generate_podcast_scene_image(
            if request.scene_title:
                prompt_parts.append(f"Scene theme: {request.scene_title}")
            
+            # Scene emotion for visual tone (no avatar branch)
+            if request.scene_emotion and request.scene_emotion in emotion_lighting:
+                prompt_parts.append(emotion_lighting[request.scene_emotion])
+            
+            # AI Analysis context (no avatar branch)
+            if request.analysis:
+                keywords = request.analysis.get("topKeywords", [])[:5]
+                if keywords:
+                    prompt_parts.append(f"Keywords: {', '.join(keywords)}")
+                audience = request.analysis.get("audience", "")
+                if audience:
+                    prompt_parts.append(f"Target: {audience}")
+            
            # Content context for visual relevance
            if request.scene_content:
                content_preview = request.scene_content[:150].replace("\n", " ").strip()
@@ -276,6 +327,12 @@ async def generate_podcast_scene_image(
                    visual_keywords.append("modern technology aesthetic")
                if any(word in content_lower for word in ["business", "growth", "strategy", "market"]):
                    visual_keywords.append("professional business environment")
+                if any(word in content_lower for word in ["nature", "outdoor", "environment"]):
+                    visual_keywords.append("natural outdoor setting")
+                if any(word in content_lower for word in ["medical", "health", "wellness"]):
+                    visual_keywords.append("clean medical studio")
+                if any(word in content_lower for word in ["education", "learning", "students"]):
+                    visual_keywords.append("classroom or educational setting")
                if visual_keywords:
                    prompt_parts.append(", ".join(visual_keywords))
            
@@ -379,6 +436,7 @@ async def generate_podcast_scene_image(
            provider=result.provider,
            model=result.model,
            cost=cost,
+            image_prompt=image_prompt,
        )

    except HTTPException: