feat: podcast demo mode with ALWRITY_ENABLED_FEATURES support

- Add ALWRITY_ENABLED_FEATURES env var for feature gating
- Podcast-only mode: skip LLM bootstrap, scheduler, persona services
- Enhance video generation prompt with scene context, analysis, narration
- Add voice cloning support via custom_voice_id in WaveSpeed
- Add text-to-speech for research results (browser speechSynthesis)
- Fix render queue to sync images from script phase
- Add WaveSpeed LLM pricing (gpt-oss-120b)
- Fix podcast bible generation error handling
- Refactor RouterManager for feature-based router loading
This commit is contained in:
ajaysi
2026-04-03 06:59:59 +05:30
parent c52b1eabc9
commit 63bb937796
58 changed files with 3568 additions and 1597 deletions

View File

@@ -6,6 +6,7 @@ Centralized constants and directory configuration for podcast module.
from pathlib import Path
from typing import Literal
from loguru import logger
from services.story_writer.audio_generation_service import StoryAudioGenerationService
# Directory paths
@@ -45,11 +46,14 @@ def get_podcast_media_dir(
}[media_type]
if user_id:
tenant_media_dir = ROOT_DIR / "workspace" / f"workspace_{_sanitize_user_id(user_id)}" / "media" / media_subdir
sanitized = _sanitize_user_id(user_id)
tenant_media_dir = ROOT_DIR / "workspace" / f"workspace_{sanitized}" / "media" / media_subdir
resolved_dir = tenant_media_dir.resolve()
else:
resolved_dir = (DATA_MEDIA_DIR / media_subdir).resolve()
logger.debug(f"[Podcast] get_podcast_media_dir: type={media_type}, user_id={user_id}, sanitized={user_id and _sanitize_user_id(user_id)}, resolved={resolved_dir}")
if ensure_exists:
resolved_dir.mkdir(parents=True, exist_ok=True)
@@ -61,7 +65,9 @@ def get_podcast_media_read_dirs(media_type: MediaType, user_id: str | None = Non
dirs: list[Path] = []
if user_id:
dirs.append(get_podcast_media_dir(media_type, user_id))
logger.debug(f"[Podcast] get_podcast_media_read_dirs: added user dir for {user_id}")
dirs.append(get_podcast_media_dir(media_type, None))
logger.debug(f"[Podcast] get_podcast_media_read_dirs: dirs={dirs}")
return dirs

View File

@@ -5,10 +5,11 @@ Analysis endpoint for podcast ideas.
"""
from fastapi import APIRouter, Depends, HTTPException
from typing import Dict, Any
from typing import Dict, Any, Optional, List
import json
import uuid
from sqlalchemy.orm import Session
from pydantic import BaseModel
from services.database import get_db
from middleware.auth_middleware import get_current_user
@@ -258,6 +259,10 @@ Return JSON with:
- top_keywords: 5 podcast-relevant keywords/phrases
- suggested_outlines: 2 items, each with title (<=60 chars) and 4-6 short segments (bullet-friendly, factual)
- title_suggestions: 3 concise episode titles
- episode_hook: one compelling 15-30 second opening hook/angle that grabs attention
- key_takeaways: 3-5 actionable insights listeners will learn
- guest_talking_points: (if guest included) 3-4 suggested questions/angles for guest interview
- listener_cta: one clear call-to-action for listeners
- research_queries: array of {{"query": "string", "rationale": "string"}}
- exa_suggested_config: suggested Exa search options with:
- exa_search_type: "auto" | "neural" | "keyword"
@@ -271,7 +276,10 @@ Return JSON with:
Requirements:
- Keep language factual, actionable, and suited for spoken audio.
- Avoid narrative fiction tone.
- Prefer 2024-2025 context.
- For research queries: Mix of time-sensitive and evergreen queries:
- 2-3 queries should focus on latest 2025-2026 developments, trends, and data (use year in query)
- 2-3 queries should be evergreen/fundamental (concepts, definitions, best practices, proven strategies) - do NOT include years in these
- Today's date is April 2026.
"""
try:
@@ -305,6 +313,10 @@ Requirements:
top_keywords = data.get("top_keywords") or []
suggested_outlines = data.get("suggested_outlines") or []
title_suggestions = data.get("title_suggestions") or []
episode_hook = data.get("episode_hook") or ""
key_takeaways = data.get("key_takeaways") or []
guest_talking_points = data.get("guest_talking_points") or []
listener_cta = data.get("listener_cta") or ""
research_queries = data.get("research_queries") or []
exa_suggested_config = data.get("exa_suggested_config") or None
@@ -314,6 +326,10 @@ Requirements:
top_keywords=top_keywords,
suggested_outlines=suggested_outlines,
title_suggestions=title_suggestions,
episode_hook=episode_hook,
key_takeaways=key_takeaways,
guest_talking_points=guest_talking_points,
listener_cta=listener_cta,
research_queries=research_queries,
exa_suggested_config=exa_suggested_config,
bible=bible_obj.model_dump() if bible_obj else None,
@@ -321,3 +337,106 @@ Requirements:
avatar_prompt=final_avatar_prompt,
)
class RegenerateQueriesRequest(BaseModel):
idea: str
feedback: str
existing_analysis: Optional[Dict[str, Any]] = None
bible: Optional[Dict[str, Any]] = None
class RegenerateQueriesResponse(BaseModel):
research_queries: List[Dict[str, str]]
@router.post("/regenerate-queries", response_model=RegenerateQueriesResponse)
async def regenerate_research_queries(
request: RegenerateQueriesRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""
Regenerate research queries based on user feedback and existing analysis.
"""
user_id = require_authenticated_user(current_user)
# Build context from existing analysis
idea = request.idea
feedback = request.feedback
# Get topic, keywords, audience from existing analysis if provided
topic = idea
keywords = ""
audience = ""
if request.existing_analysis:
topic = request.existing_analysis.get("title_suggestions", [idea])[0] if request.existing_analysis.get("title_suggestions") else idea
keywords = ", ".join(request.existing_analysis.get("top_keywords", [])[:5])
audience = request.existing_analysis.get("audience", "")
# Serialize Bible context if provided
bible_context = ""
if request.bible:
try:
bible_service = PodcastBibleService()
from models.podcast_bible_models import PodcastBible
bible_data = PodcastBible(**request.bible)
bible_context = bible_service.serialize_bible(bible_data)
except Exception as e:
logger.warning(f"Failed to serialize bible for query regeneration: {e}")
prompt = f"""
You are a research strategist for podcast content. Given a podcast idea, existing analysis, and user feedback,
generate 7 new research queries that address the user's specific needs.
{f"USER FEEDBACK: {feedback}" if feedback else ""}
{f"EXISTING ANALYSIS CONTEXT:\n- Topic: {topic}\n- Keywords: {keywords}\n- Audience: {audience}\n" if request.existing_analysis else ""}
{f"PODCAST BIBLE CONTEXT:\n{bible_context}\n" if bible_context else ""}
Podcast Idea: "{idea}"
TASK:
Generate exactly 7 research queries that:
1. Incorporate the user's feedback direction
2. Build on the existing analysis context
3. Mix of time-sensitive (2025-2026) and evergreen topics
4. Are highly specific to the podcast topic
Return JSON with:
- research_queries: array of {{"query": "string", "rationale": "string"}}
Requirements:
- At least 2-3 queries should focus on latest 2025-2026 developments (include year in query)
- At least 2-3 queries should be evergreen (concepts, definitions, best practices - NO year)
- Queries should be specific and actionable, not generic
"""
try:
from services.llm_providers.main_text_generation import llm_text_gen
raw = llm_text_gen(
prompt=prompt,
user_id=user_id,
json_struct={"research_queries": [{"query": "string", "rationale": "string"}]},
preferred_provider=None,
flow_type="premium_tool",
)
# Parse response
if isinstance(raw, dict):
queries = raw.get("research_queries", [])
else:
# Try to parse as JSON
try:
parsed = json.loads(raw) if isinstance(raw, str) else raw
queries = parsed.get("research_queries", []) if isinstance(parsed, dict) else []
except:
queries = []
return RegenerateQueriesResponse(research_queries=queries[:7])
except HTTPException:
raise
except Exception as exc:
logger.error(f"[Regenerate Queries] Failed for user {user_id}: {exc}")
raise HTTPException(status_code=500, detail=f"Regenerate queries failed: {exc}")

View File

@@ -126,12 +126,14 @@ async def generate_podcast_audio(
try:
audio_service = get_podcast_audio_service(user_id)
logger.warning(f"[Podcast] Generating audio with service dir: {audio_service.output_dir}")
result: StoryAudioResult = audio_service.generate_ai_audio(
scene_number=0,
scene_title=request.scene_title,
text=request.text.strip(),
user_id=user_id,
voice_id=request.voice_id or "Wise_Woman",
custom_voice_id=request.custom_voice_id,
speed=request.speed or 1.0, # Normal speed (was 0.9, but too slow - causing duration issues)
volume=request.volume or 1.0,
pitch=request.pitch or 0.0, # Normal pitch (0.0 = neutral)
@@ -149,6 +151,8 @@ async def generate_podcast_audio(
if result.get("audio_url") and "/api/story/audio/" in result.get("audio_url", ""):
audio_filename = result.get("audio_filename", "")
result["audio_url"] = f"/api/podcast/audio/{audio_filename}"
logger.warning(f"[Podcast] Audio generated - path: {result.get('audio_path')}, url: {result.get('audio_url')}")
except Exception as exc:
raise HTTPException(status_code=500, detail=f"Audio generation failed: {exc}")
@@ -387,7 +391,9 @@ async def serve_podcast_audio(
raise HTTPException(status_code=400, detail="Invalid filename")
user_id = require_authenticated_user(current_user)
logger.warning(f"[Podcast] serve_podcast_audio called: user_id={user_id}, filename={filename}")
audio_path = _resolve_podcast_media_file(filename, "audio", user_id)
logger.warning(f"[Podcast] Resolved audio path: {audio_path}")
return FileResponse(audio_path, media_type="audio/mpeg")

View File

@@ -104,6 +104,16 @@ async def generate_podcast_scene_image(
# Otherwise, generate from scratch with podcast-optimized prompt
image_prompt = "" # Initialize prompt variable
# Emotion to lighting mapping for visual tone
emotion_lighting = {
"happy": "warm, bright lighting, cheerful atmosphere",
"excited": "dynamic, energetic lighting with highlights",
"serious": "professional, balanced lighting, authoritative feel",
"curious": "soft, inviting lighting, thoughtful atmosphere",
"confident": "strong, dramatic lighting, authoritative look",
"neutral": "professional, balanced lighting"
}
if base_avatar_bytes:
# Use Ideogram Character API for consistent character generation
# Use custom prompt if provided, otherwise build scene-specific prompt
@@ -127,6 +137,28 @@ async def generate_podcast_scene_image(
if bible_obj.host.look:
prompt_parts.append(f"Host Look: {bible_obj.host.look}")
# Scene emotion for visual tone
emotion_lighting = {
"happy": "warm, bright lighting, cheerful atmosphere",
"excited": "dynamic, energetic lighting with highlights",
"serious": "professional, balanced lighting, authoritative feel",
"curious": "soft, inviting lighting, thoughtful atmosphere",
"confident": "strong, dramatic lighting, authoritative look",
"neutral": "professional, balanced lighting"
}
scene_emotion = request.scene_emotion
if scene_emotion and scene_emotion in emotion_lighting:
prompt_parts.append(emotion_lighting[scene_emotion])
# AI Analysis context for visual relevance
if request.analysis:
keywords = request.analysis.get("topKeywords", [])[:5]
if keywords:
prompt_parts.append(f"Keywords: {', '.join(keywords)}")
audience = request.analysis.get("audience", "")
if audience:
prompt_parts.append(f"Target: {audience}")
# Scene content insights for visual context
if request.scene_content:
content_preview = request.scene_content[:200].replace("\n", " ").strip()
@@ -139,6 +171,12 @@ async def generate_podcast_scene_image(
visual_keywords.append("modern tech studio setting")
if any(word in content_lower for word in ["business", "growth", "strategy", "market"]):
visual_keywords.append("professional business studio")
if any(word in content_lower for word in ["nature", "outdoor", "environment", "green"]):
visual_keywords.append("natural outdoor setting")
if any(word in content_lower for word in ["medical", "health", "wellness"]):
visual_keywords.append("clean medical studio")
if any(word in content_lower for word in ["education", "learning", "students"]):
visual_keywords.append("classroom or educational setting")
if visual_keywords:
prompt_parts.append(", ".join(visual_keywords))
@@ -265,6 +303,19 @@ async def generate_podcast_scene_image(
if request.scene_title:
prompt_parts.append(f"Scene theme: {request.scene_title}")
# Scene emotion for visual tone (no avatar branch)
if request.scene_emotion and request.scene_emotion in emotion_lighting:
prompt_parts.append(emotion_lighting[request.scene_emotion])
# AI Analysis context (no avatar branch)
if request.analysis:
keywords = request.analysis.get("topKeywords", [])[:5]
if keywords:
prompt_parts.append(f"Keywords: {', '.join(keywords)}")
audience = request.analysis.get("audience", "")
if audience:
prompt_parts.append(f"Target: {audience}")
# Content context for visual relevance
if request.scene_content:
content_preview = request.scene_content[:150].replace("\n", " ").strip()
@@ -276,6 +327,12 @@ async def generate_podcast_scene_image(
visual_keywords.append("modern technology aesthetic")
if any(word in content_lower for word in ["business", "growth", "strategy", "market"]):
visual_keywords.append("professional business environment")
if any(word in content_lower for word in ["nature", "outdoor", "environment"]):
visual_keywords.append("natural outdoor setting")
if any(word in content_lower for word in ["medical", "health", "wellness"]):
visual_keywords.append("clean medical studio")
if any(word in content_lower for word in ["education", "learning", "students"]):
visual_keywords.append("classroom or educational setting")
if visual_keywords:
prompt_parts.append(", ".join(visual_keywords))
@@ -379,6 +436,7 @@ async def generate_podcast_scene_image(
provider=result.provider,
model=result.model,
cost=cost,
image_prompt=image_prompt,
)
except HTTPException:

View File

@@ -27,7 +27,10 @@ async def create_project(
db: Session = Depends(get_db),
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""Create a new podcast project."""
"""Create a new podcast project.
If a project with the same idea already exists, return 409 conflict with existing project info.
"""
try:
user_id = current_user.get("user_id") or current_user.get("id")
if not user_id:
@@ -40,6 +43,19 @@ async def create_project(
if existing:
raise HTTPException(status_code=400, detail="Project ID already exists")
# Check for duplicate idea (case-insensitive partial match)
existing_idea = service.get_project_by_idea(user_id, request.idea)
if existing_idea:
raise HTTPException(
status_code=409,
detail={
"message": "A project with similar idea already exists",
"existing_project_id": existing_idea.project_id,
"existing_idea": existing_idea.idea,
"existing_status": existing_idea.status,
}
)
project = service.create_project(
user_id=user_id,
project_id=request.project_id,

View File

@@ -8,6 +8,7 @@ from fastapi import APIRouter, Depends, HTTPException
from typing import Dict, Any, List
from types import SimpleNamespace
import json
import re
from middleware.auth_middleware import get_current_user
from api.story_writer.utils.auth import require_authenticated_user
@@ -36,10 +37,16 @@ async def podcast_research_exa(
Uses Podcast Bible and Analysis context for hyper-personalization.
"""
user_id = require_authenticated_user(current_user)
logger.warning(f"[Podcast Research] ========== REQUEST START ==========")
logger.warning(f"[Podcast Research] User: {user_id}, Topic: {request.topic[:80]}...")
logger.warning(f"[Podcast Research] Queries count: {len(request.queries) if request.queries else 0}")
queries = [q.strip() for q in request.queries if q and q.strip()]
if not queries:
raise HTTPException(status_code=400, detail="At least one query is required for research.")
logger.warning(f"[Podcast Research] EXACT queries being sent to Exa: {queries}")
exa_cfg = request.exa_config or PodcastExaConfig()
cfg = SimpleNamespace(
@@ -52,6 +59,7 @@ async def podcast_research_exa(
)
provider = ExaResearchProvider()
logger.warning(f"[Podcast Research] Provider initialized, starting Exa search...")
# --- Context Building ---
bible_service = PodcastBibleService()
@@ -68,9 +76,16 @@ async def podcast_research_exa(
if request.analysis:
analysis_context = f"""
PODCAST ANALYSIS CONTEXT:
Audience: {request.analysis.get('audience', 'General')}
========================
Topic: {request.topic}
Target Audience: {request.analysis.get('audience', 'General')}
Content Type: {request.analysis.get('content_type', 'Informative')}
Top Keywords: {', '.join(request.analysis.get('top_keywords', []))}
Episode Hook (Intro): {request.analysis.get('episode_hook', 'N/A')}
Key Takeaways: {', '.join(request.analysis.get('key_takeaways', [])) or 'N/A'}
Guest Talking Points: {', '.join(request.analysis.get('guest_talking_points', [])) or 'N/A'}
Listener CTA: {request.analysis.get('listener_cta', 'N/A')}
"""
# Exa search params
@@ -84,6 +99,7 @@ Top Keywords: {', '.join(request.analysis.get('top_keywords', []))}
try:
# 1. RUN EXA SEARCH
logger.warning(f"[Podcast Research] Calling Exa search with topic: {request.topic[:100]}...")
result = await provider.search(
prompt=request.topic,
topic=request.topic,
@@ -92,8 +108,9 @@ Top Keywords: {', '.join(request.analysis.get('top_keywords', []))}
config=cfg,
user_id=user_id,
)
logger.warning(f"[Podcast Research] Exa search completed, got {len(result.get('sources', []))} sources")
except Exception as exc:
logger.error(f"[Podcast Exa Research] Search failed for user {user_id}: {exc}")
logger.error(f"[Podcast Exa Research] Search failed for user {user_id}: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Exa research failed: {exc}")
# 2. EXTRACT INSIGHTS VIA LLM
@@ -104,46 +121,77 @@ Top Keywords: {', '.join(request.analysis.get('top_keywords', []))}
key_insights = []
if raw_content and sources:
logger.info(f"[Podcast Research] Extracting insights from {len(sources)} sources for user {user_id}")
logger.warning(f"[Podcast Research] Extracting insights from {len(sources)} sources for user {user_id}")
# Build list of research queries used for this search
queries_used = ", ".join([f"Query {i+1}: {q}" for i, q in enumerate(queries)]) if queries else "No specific queries"
prompt = f"""
You are an expert research analyst for a high-end podcast production team.
Your task is to analyze the following research data and extract deep, actionable insights for a podcast episode.
You are an expert research analyst and content strategist for a high-end podcast production team.
Your task is to analyze the research data and extract deep, podcast-ready insights.
PODCAST CONTEXT:
Topic: {request.topic}
================
Main Topic: {request.topic}
RESEARCH QUERIES USED:
=====================
{queries_used}
PODCAST BIBLE & BRAND CONTEXT:
==============================
{bible_context}
PODCAST ANALYSIS (from AI Analysis phase):
==========================================
{analysis_context}
RESEARCH DATA (from {len(sources)} sources):
============================================
{raw_content}
TASK:
1. Provide a comprehensive summary (2-3 paragraphs) of the most important findings. Use Markdown for formatting (bolding, lists).
2. Extract 3-5 "Key Insights". Each insight should have a title and a detailed explanation.
3. For each insight, identify which source indices (e.g. 1, 2) it was derived from.
YOUR TASK:
==========
As a podcast research expert, analyze this data and create content that will:
1. Engage the specific target audience identified above
2. Support the episode hook and key takeaways already planned
3. Provide talking points that complement the guest's expertise
4. Include a compelling call-to-action for listeners
NOTE: The research data includes "Key Highlights", "Summaries", and "Excerpts" from various sources.
Pay special attention to the "Key Highlights" sections as they contain the most relevant information extracted by the neural search engine.
Return JSON structure:
REQUIRED OUTPUT (JSON):
=======================
{{
"summary": "Detailed markdown summary...",
"summary": "2-3 paragraph comprehensive summary in Markdown. Start with a hook that matches the episode intro. Include specific data points, expert quotes, and trends.",
"key_insights": [
{{
"title": "Insight Title",
"content": "Detailed markdown content...",
"source_indices": [1, 2]
"title": "Catchy, engaging title for this insight",
"content": "3-4 sentences with specific facts, quotes, or data. Write in a conversational tone suitable for a podcast host to discuss.",
"source_indices": [1, 2, 3],
"podcast_talking_points": ["Point 1 host can expand on", "Counter-point or follow-up", "Question to ask guest"]
}}
]
],
"expert_quotes": [
{{
"quote": "Direct quote from source",
"source_index": 1,
"context": "Why this quote matters for the podcast"
}}
],
"listener_cta_suggestions": ["Specific action listener can take", "Resource to share", "Next episode preview"]
}}
Requirements:
- Ensure insights are deep, not just superficial facts. Look for trends, expert opinions, and specific data points.
- Tone should be professional, insightful, and ready for a podcast host to discuss.
- Avoid generic filler.
QUALITY STANDARDS:
==================
- INSIGHTS MUST BE DEEP, not superficial - avoid generic statements
- Include SPECIFIC DATA POINTS, percentages, statistics when available
- Extract EXPERT QUOTES that hosts can reference
- Identify GAPS in the research where more depth is needed
- Make content naturally flow into the planned episode hook and CTA
- Write in a CONVERSATIONAL tone - how a host would actually speak
- Flag any CONTROVERSIAL or debatable claims for host to address
"""
try:
logger.warning(f"[Podcast Research] Calling LLM for insight extraction...")
llm_response = llm_text_gen(
prompt=prompt,
user_id=user_id,
@@ -151,15 +199,45 @@ Requirements:
preferred_provider=None,
flow_type="premium_tool",
)
logger.warning(f"[Podcast Research] LLM response received, length: {len(llm_response) if llm_response else 0}")
# Normalize response
# Normalize response - handle both string and dict responses
data = None
if isinstance(llm_response, str):
data = json.loads(llm_response)
try:
# Try to fix common JSON issues
fixed_response = llm_response.strip()
# Remove markdown code blocks if present
if fixed_response.startswith("```"):
fixed_response = fixed_response.split("```")[1]
if fixed_response.startswith("json"):
fixed_response = fixed_response[4:]
fixed_response = fixed_response.strip()
data = json.loads(fixed_response)
except json.JSONDecodeError as json_err:
logger.warning(f"[Podcast Research] Failed to parse JSON: {json_err}. Response preview: {llm_response[:500]}...")
# Try to extract JSON from response using regex
json_match = re.search(r'\{.*\}', llm_response, re.DOTALL)
if json_match:
try:
data = json.loads(json_match.group())
logger.warning("[Podcast Research] Successfully extracted JSON via regex")
except:
pass
else:
data = llm_response
summary = data.get("summary", "")
key_insights = [PodcastResearchInsight(**insight) for insight in data.get("key_insights", [])]
if data:
try:
summary = data.get("summary", "")
key_insights = [PodcastResearchInsight(**insight) for insight in data.get("key_insights", [])]
except Exception as insight_err:
logger.warning(f"[Podcast Research] Failed to parse insights: {insight_err}. Data keys: {list(data.keys()) if isinstance(data, dict) else 'not a dict'}")
summary = data.get("summary", "") if isinstance(data, dict) else ""
key_insights = []
else:
summary = ""
key_insights = []
except HTTPException:
raise
except Exception as exc:
@@ -183,21 +261,32 @@ Requirements:
logger.warning(f"[Podcast Exa Research] Failed to track usage: {track_err}")
sources_payload = []
seen_urls = set()
for src in sources:
url = src.get("url", "")
# Skip duplicates
if url and url in seen_urls:
continue
if url:
seen_urls.add(url)
try:
sources_payload.append(PodcastExaSource(**src))
except Exception:
sources_payload.append(PodcastExaSource(**{
"title": src.get("title", ""),
"url": src.get("url", ""),
"excerpt": src.get("excerpt", ""),
"url": url,
"excerpt": src.get("excerpt") or (src.get("highlights")[0] if src.get("highlights") else "") or src.get("summary", ""),
"published_at": src.get("published_at"),
"publishedDate": src.get("publishedDate"),
"highlights": src.get("highlights"),
"summary": src.get("summary"),
"source_type": src.get("source_type"),
"index": src.get("index"),
"image": src.get("image"),
"author": src.get("author"),
"text": src.get("text"),
"credibility_score": src.get("credibility_score"),
}))
return PodcastExaResearchResponse(

View File

@@ -1,11 +1,12 @@
"""
Podcast Script Handlers
Script generation endpoint.
Script generation and approval endpoints.
"""
from fastapi import APIRouter, Depends, HTTPException
from typing import Dict, Any
from typing import Dict, Any, Optional
from pydantic import BaseModel, Field
import json
from middleware.auth_middleware import get_current_user
@@ -24,6 +25,29 @@ from ..models import (
router = APIRouter()
class SceneApprovalRequest(BaseModel):
project_id: str = Field(..., min_length=1)
scene_id: str = Field(..., min_length=1)
approved: bool = True
notes: Optional[str] = None
@router.post("/script/approve")
async def approve_podcast_scene(
request: SceneApprovalRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
) -> Dict[str, Any]:
"""Persist scene approval metadata for auditing (podcast-specific)."""
user_id = require_authenticated_user(current_user)
logger.warning(f"[Podcast] Scene approval recorded user={user_id} project={request.project_id} scene={request.scene_id} approved={request.approved}")
return {
"success": True,
"project_id": request.project_id,
"scene_id": request.scene_id,
"approved": request.approved,
}
@router.post("/script", response_model=PodcastScriptResponse)
async def generate_podcast_script(
request: PodcastScriptRequest,
@@ -33,6 +57,10 @@ async def generate_podcast_script(
Generate a podcast script outline (scenes + lines) using podcast-oriented prompting.
"""
user_id = require_authenticated_user(current_user)
logger.warning(f"[ScriptGen] ========== SCRIPT GENERATION START ==========")
logger.warning(f"[ScriptGen] Topic: {request.idea[:60]}...")
logger.warning(f"[ScriptGen] Duration: {request.duration_minutes} min, Speakers: {request.speakers}")
logger.warning(f"[ScriptGen] Has research: {bool(request.research)}, Has bible: {bool(request.bible)}, Has analysis: {bool(request.analysis)}")
# Build comprehensive research context for higher-quality scripts
research_context = ""
@@ -77,55 +105,53 @@ async def generate_podcast_script(
# Extract Analysis and Outline context for grounding
analysis_context = ""
if request.analysis:
analysis_context = f"""
TARGET AUDIENCE: {request.analysis.get('audience', 'General')}
CONTENT TYPE: {request.analysis.get('contentType', 'Conversational')}
TOP KEYWORDS: {', '.join(request.analysis.get('topKeywords', []))}
"""
try:
audience = request.analysis.get('audience', '') or ''
content_type = request.analysis.get('contentType', '') or ''
keywords = request.analysis.get('topKeywords', []) or []
analysis_context = f"ANALYSIS: Audience={audience} | Type={content_type} | Keywords={', '.join(keywords[:8])}"
except:
pass
outline_context = ""
if request.outline:
outline_context = f"""
REFINED EPISODE OUTLINE (Follow this structure closely):
Title: {request.outline.get('title', 'N/A')}
Segments: {' | '.join(request.outline.get('segments', []))}
"""
try:
title = request.outline.get('title', '') or ''
segments = request.outline.get('segments', []) or []
outline_context = f"OUTLINE: {title} - {' | '.join(segments[:5])}"
except:
pass
prompt = f"""You are an expert podcast script planner. Create natural, conversational podcast scenes.
prompt = f"""Create a podcast script with scenes and dialogue.
{f"PODCAST BIBLE (Hyper-Personalization Context):\n{bible_context}\n" if bible_context else ""}
{f"ANALYSIS CONTEXT:\n{analysis_context}\n" if analysis_context else ""}
{f"REFINED OUTLINE:\n{outline_context}\n" if outline_context else ""}
{f"BIBLE: {bible_context[:1500]}" if bible_context else ""}
{f"{analysis_context}" if analysis_context else ""}
{f"{outline_context}" if outline_context else ""}
{f"RESEARCH: {research_context[:1200]}" if research_context else ""}
Podcast Idea: "{request.idea}"
Duration: ~{request.duration_minutes} minutes
Speakers: {request.speakers} (Host + optional Guest)
Topic: "{request.idea}"
Duration: {request.duration_minutes} min | Speakers: {request.speakers}
{f"RESEARCH CONTEXT:\n{research_context}\n" if research_context else ""}
Return JSON with scenes array. Each scene:
- id: string
- title: short title (<=50 chars)
- duration: seconds (total/5)
- emotion: neutral|happy|excited|serious|curious|confident
- lines: array of {{speaker, text, emphasis}}
- Use 2-4 LINES PER SCENE (shorter script = lower TTS costs)
- Each line: 1-3 sentences, conversational
- Plain text only, no markdown
Return JSON with:
- scenes: array of scenes. Each scene has:
- id: string
- title: short scene title (<= 60 chars)
- duration: duration in seconds (evenly split across total duration)
- emotion: string (one of: "neutral", "happy", "excited", "serious", "curious", "confident")
- lines: array of {{"speaker": "...", "text": "...", "emphasis": boolean}}
* Write natural, conversational dialogue
* Each line can be a sentence or a few sentences that flow together
* Use plain text only - no markdown formatting (no asterisks, underscores, etc.)
* Mark "emphasis": true for key statistics or important points
Guidelines:
- Write for spoken delivery: conversational, natural, with contractions.
- Follow the interaction tone specified in the Bible.
- Ensure the Host persona matches the background and personality traits from the Bible.
- Structure the intro and outro scenes according to the Bible's "Intro Format" and "Outro Format".
- Adhere to any constraints mentioned in the Bible.
- Use insights from the Research Context to ground the conversation in facts.
- IMPORTANT: Follow the REFINED OUTLINE segments as the primary structure for the episode.
COST OPTIMIZATION:
- 5-6 scenes max for {request.duration_minutes} min episode
- Concise, information-dense dialogue
- Skip filler words and redundant phrases
- Focus on unique insights from research
- Make every line count toward value delivery
"""
try:
logger.warning(f"[ScriptGen] Calling LLM to generate script (prompt length: {len(prompt)})...")
raw = llm_text_gen(
prompt=prompt,
user_id=user_id,
@@ -133,6 +159,7 @@ Guidelines:
preferred_provider=None,
flow_type="premium_tool",
)
logger.warning(f"[ScriptGen] LLM response received, length: {len(raw) if raw else 0}")
except HTTPException:
raise
except Exception as exc:

View File

@@ -140,17 +140,20 @@ def _execute_podcast_video_task(
except Exception as e:
logger.warning(f"[Podcast] Failed to fetch project context for video generation: {e}")
# Prepare scene data for animation
# Prepare scene data for animation - include all context for enhanced prompt
scene_data = {
"scene_number": scene_number,
"title": request.scene_title,
"scene_id": request.scene_id,
"image_prompt": request.scene_image_prompt,
"description": request.scene_narration,
"lines": [{"text": request.scene_narration}] if request.scene_narration else [],
}
story_context = {
"project_id": request.project_id,
"type": "podcast",
"bible": project_bible,
"analysis": project_analysis,
"analysis": request.analysis or project_analysis, # Use passed analysis or fallback to DB
}
animation_result = animate_scene_with_voiceover(

View File

@@ -63,6 +63,10 @@ class PodcastAnalyzeResponse(BaseModel):
top_keywords: list[str]
suggested_outlines: list[Dict[str, Any]]
title_suggestions: list[str]
episode_hook: Optional[str] = None
key_takeaways: Optional[list[str]] = None
guest_talking_points: Optional[list[str]] = None
listener_cta: Optional[str] = None
research_queries: Optional[List[Dict[str, str]]] = None
exa_suggested_config: Optional[Dict[str, Any]] = None
bible: Optional[Dict[str, Any]] = None
@@ -142,12 +146,15 @@ class PodcastExaSource(BaseModel):
url: str = ""
excerpt: str = ""
published_at: Optional[str] = None
publishedDate: Optional[str] = None # Exa format
highlights: Optional[List[str]] = None
summary: Optional[str] = None
source_type: Optional[str] = None
index: Optional[int] = None
image: Optional[str] = None
author: Optional[str] = None
text: Optional[str] = None # Exa full text
credibility_score: Optional[float] = None # Exa scores
class PodcastResearchInsight(BaseModel):
@@ -155,6 +162,9 @@ class PodcastResearchInsight(BaseModel):
title: str
content: str
source_indices: List[int] = []
podcast_talking_points: Optional[List[str]] = [] # Talking points for host to expand on
expert_quotes: Optional[List[Dict[str, str]]] = [] # Quotes from sources
listener_cta_suggestions: Optional[List[str]] = [] # CTA suggestions
class PodcastExaResearchResponse(BaseModel):
@@ -178,6 +188,7 @@ class PodcastAudioRequest(BaseModel):
scene_title: str
text: str
voice_id: Optional[str] = "Wise_Woman"
custom_voice_id: Optional[str] = None # Voice clone ID for custom voice
speed: Optional[float] = 1.0
volume: Optional[float] = 1.0
pitch: Optional[float] = 0.0
@@ -263,7 +274,9 @@ class PodcastImageRequest(BaseModel):
scene_id: str
scene_title: str
scene_content: Optional[str] = None # Optional: scene lines text for context
scene_emotion: Optional[str] = None # Optional: scene emotion for visual tone
idea: Optional[str] = None # Optional: podcast idea for context
analysis: Optional[Dict[str, Any]] = Field(None, description="AI analysis for visual context (keywords, audience)")
base_avatar_url: Optional[str] = None # Base avatar image URL for scene variations
bible: Optional[Dict[str, Any]] = Field(None, description="Podcast Bible for hyper-personalization")
width: int = 1024
@@ -285,6 +298,7 @@ class PodcastImageResponse(BaseModel):
provider: str
model: Optional[str] = None
cost: float
image_prompt: Optional[str] = None # Return the prompt used for generation
class PodcastVideoGenerationRequest(BaseModel):
@@ -295,6 +309,9 @@ class PodcastVideoGenerationRequest(BaseModel):
audio_url: str = Field(..., description="URL to the generated audio file")
avatar_image_url: Optional[str] = Field(None, description="URL to scene image (required for video generation)")
bible: Optional[Dict[str, Any]] = Field(None, description="Podcast Bible for hyper-personalization")
analysis: Optional[Dict[str, Any]] = Field(None, description="Podcast Analysis for context (content type, audience, takeaways, guest)")
scene_image_prompt: Optional[str] = Field(None, description="Original image generation prompt for visual context")
scene_narration: Optional[str] = Field(None, description="Scene narration/script lines for context")
resolution: str = Field("720p", description="Video resolution (480p or 720p)")
prompt: Optional[str] = Field(None, description="Optional animation prompt override")
seed: Optional[int] = Field(-1, description="Random seed; -1 for random")