feat: podcast demo mode with ALWRITY_ENABLED_FEATURES support

- Add ALWRITY_ENABLED_FEATURES env var for feature gating
- Podcast-only mode: skip LLM bootstrap, scheduler, persona services
- Enhance video generation prompt with scene context, analysis, narration
- Add voice cloning support via custom_voice_id in WaveSpeed
- Add text-to-speech for research results (browser speechSynthesis)
- Fix render queue to sync images from script phase
- Add WaveSpeed LLM pricing (gpt-oss-120b)
- Fix podcast bible generation error handling
- Refactor RouterManager for feature-based router loading
This commit is contained in:
ajaysi
2026-04-03 06:59:59 +05:30
parent c52b1eabc9
commit 63bb937796
58 changed files with 3568 additions and 1597 deletions

View File

@@ -116,6 +116,10 @@ class RouterManager:
if "all" in enabled_features:
return True
# Skip core routers in podcast-only mode (they require non-podcast features)
if enabled_features == {"podcast"}:
return False
# If no required features specified, include by default
if not required_features:
return True

View File

@@ -6,6 +6,7 @@ Centralized constants and directory configuration for podcast module.
from pathlib import Path
from typing import Literal
from loguru import logger
from services.story_writer.audio_generation_service import StoryAudioGenerationService
# Directory paths
@@ -45,11 +46,14 @@ def get_podcast_media_dir(
}[media_type]
if user_id:
tenant_media_dir = ROOT_DIR / "workspace" / f"workspace_{_sanitize_user_id(user_id)}" / "media" / media_subdir
sanitized = _sanitize_user_id(user_id)
tenant_media_dir = ROOT_DIR / "workspace" / f"workspace_{sanitized}" / "media" / media_subdir
resolved_dir = tenant_media_dir.resolve()
else:
resolved_dir = (DATA_MEDIA_DIR / media_subdir).resolve()
logger.debug(f"[Podcast] get_podcast_media_dir: type={media_type}, user_id={user_id}, sanitized={user_id and _sanitize_user_id(user_id)}, resolved={resolved_dir}")
if ensure_exists:
resolved_dir.mkdir(parents=True, exist_ok=True)
@@ -61,7 +65,9 @@ def get_podcast_media_read_dirs(media_type: MediaType, user_id: str | None = Non
dirs: list[Path] = []
if user_id:
dirs.append(get_podcast_media_dir(media_type, user_id))
logger.debug(f"[Podcast] get_podcast_media_read_dirs: added user dir for {user_id}")
dirs.append(get_podcast_media_dir(media_type, None))
logger.debug(f"[Podcast] get_podcast_media_read_dirs: dirs={dirs}")
return dirs

View File

@@ -5,10 +5,11 @@ Analysis endpoint for podcast ideas.
"""
from fastapi import APIRouter, Depends, HTTPException
from typing import Dict, Any
from typing import Dict, Any, Optional, List
import json
import uuid
from sqlalchemy.orm import Session
from pydantic import BaseModel
from services.database import get_db
from middleware.auth_middleware import get_current_user
@@ -258,6 +259,10 @@ Return JSON with:
- top_keywords: 5 podcast-relevant keywords/phrases
- suggested_outlines: 2 items, each with title (<=60 chars) and 4-6 short segments (bullet-friendly, factual)
- title_suggestions: 3 concise episode titles
- episode_hook: one compelling 15-30 second opening hook/angle that grabs attention
- key_takeaways: 3-5 actionable insights listeners will learn
- guest_talking_points: (if guest included) 3-4 suggested questions/angles for guest interview
- listener_cta: one clear call-to-action for listeners
- research_queries: array of {{"query": "string", "rationale": "string"}}
- exa_suggested_config: suggested Exa search options with:
- exa_search_type: "auto" | "neural" | "keyword"
@@ -271,7 +276,10 @@ Return JSON with:
Requirements:
- Keep language factual, actionable, and suited for spoken audio.
- Avoid narrative fiction tone.
- Prefer 2024-2025 context.
- For research queries: Mix of time-sensitive and evergreen queries:
- 2-3 queries should focus on latest 2025-2026 developments, trends, and data (use year in query)
- 2-3 queries should be evergreen/fundamental (concepts, definitions, best practices, proven strategies) - do NOT include years in these
- Today's date is April 2026.
"""
try:
@@ -305,6 +313,10 @@ Requirements:
top_keywords = data.get("top_keywords") or []
suggested_outlines = data.get("suggested_outlines") or []
title_suggestions = data.get("title_suggestions") or []
episode_hook = data.get("episode_hook") or ""
key_takeaways = data.get("key_takeaways") or []
guest_talking_points = data.get("guest_talking_points") or []
listener_cta = data.get("listener_cta") or ""
research_queries = data.get("research_queries") or []
exa_suggested_config = data.get("exa_suggested_config") or None
@@ -314,6 +326,10 @@ Requirements:
top_keywords=top_keywords,
suggested_outlines=suggested_outlines,
title_suggestions=title_suggestions,
episode_hook=episode_hook,
key_takeaways=key_takeaways,
guest_talking_points=guest_talking_points,
listener_cta=listener_cta,
research_queries=research_queries,
exa_suggested_config=exa_suggested_config,
bible=bible_obj.model_dump() if bible_obj else None,
@@ -321,3 +337,106 @@ Requirements:
avatar_prompt=final_avatar_prompt,
)
class RegenerateQueriesRequest(BaseModel):
idea: str
feedback: str
existing_analysis: Optional[Dict[str, Any]] = None
bible: Optional[Dict[str, Any]] = None
class RegenerateQueriesResponse(BaseModel):
research_queries: List[Dict[str, str]]
@router.post("/regenerate-queries", response_model=RegenerateQueriesResponse)
async def regenerate_research_queries(
request: RegenerateQueriesRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""
Regenerate research queries based on user feedback and existing analysis.
"""
user_id = require_authenticated_user(current_user)
# Build context from existing analysis
idea = request.idea
feedback = request.feedback
# Get topic, keywords, audience from existing analysis if provided
topic = idea
keywords = ""
audience = ""
if request.existing_analysis:
topic = request.existing_analysis.get("title_suggestions", [idea])[0] if request.existing_analysis.get("title_suggestions") else idea
keywords = ", ".join(request.existing_analysis.get("top_keywords", [])[:5])
audience = request.existing_analysis.get("audience", "")
# Serialize Bible context if provided
bible_context = ""
if request.bible:
try:
bible_service = PodcastBibleService()
from models.podcast_bible_models import PodcastBible
bible_data = PodcastBible(**request.bible)
bible_context = bible_service.serialize_bible(bible_data)
except Exception as e:
logger.warning(f"Failed to serialize bible for query regeneration: {e}")
prompt = f"""
You are a research strategist for podcast content. Given a podcast idea, existing analysis, and user feedback,
generate 7 new research queries that address the user's specific needs.
{f"USER FEEDBACK: {feedback}" if feedback else ""}
{f"EXISTING ANALYSIS CONTEXT:\n- Topic: {topic}\n- Keywords: {keywords}\n- Audience: {audience}\n" if request.existing_analysis else ""}
{f"PODCAST BIBLE CONTEXT:\n{bible_context}\n" if bible_context else ""}
Podcast Idea: "{idea}"
TASK:
Generate exactly 7 research queries that:
1. Incorporate the user's feedback direction
2. Build on the existing analysis context
3. Mix of time-sensitive (2025-2026) and evergreen topics
4. Are highly specific to the podcast topic
Return JSON with:
- research_queries: array of {{"query": "string", "rationale": "string"}}
Requirements:
- At least 2-3 queries should focus on latest 2025-2026 developments (include year in query)
- At least 2-3 queries should be evergreen (concepts, definitions, best practices - NO year)
- Queries should be specific and actionable, not generic
"""
try:
from services.llm_providers.main_text_generation import llm_text_gen
raw = llm_text_gen(
prompt=prompt,
user_id=user_id,
json_struct={"research_queries": [{"query": "string", "rationale": "string"}]},
preferred_provider=None,
flow_type="premium_tool",
)
# Parse response
if isinstance(raw, dict):
queries = raw.get("research_queries", [])
else:
# Try to parse as JSON
try:
parsed = json.loads(raw) if isinstance(raw, str) else raw
queries = parsed.get("research_queries", []) if isinstance(parsed, dict) else []
except:
queries = []
return RegenerateQueriesResponse(research_queries=queries[:7])
except HTTPException:
raise
except Exception as exc:
logger.error(f"[Regenerate Queries] Failed for user {user_id}: {exc}")
raise HTTPException(status_code=500, detail=f"Regenerate queries failed: {exc}")

View File

@@ -126,12 +126,14 @@ async def generate_podcast_audio(
try:
audio_service = get_podcast_audio_service(user_id)
logger.warning(f"[Podcast] Generating audio with service dir: {audio_service.output_dir}")
result: StoryAudioResult = audio_service.generate_ai_audio(
scene_number=0,
scene_title=request.scene_title,
text=request.text.strip(),
user_id=user_id,
voice_id=request.voice_id or "Wise_Woman",
custom_voice_id=request.custom_voice_id,
speed=request.speed or 1.0, # Normal speed (was 0.9, but too slow - causing duration issues)
volume=request.volume or 1.0,
pitch=request.pitch or 0.0, # Normal pitch (0.0 = neutral)
@@ -149,6 +151,8 @@ async def generate_podcast_audio(
if result.get("audio_url") and "/api/story/audio/" in result.get("audio_url", ""):
audio_filename = result.get("audio_filename", "")
result["audio_url"] = f"/api/podcast/audio/{audio_filename}"
logger.warning(f"[Podcast] Audio generated - path: {result.get('audio_path')}, url: {result.get('audio_url')}")
except Exception as exc:
raise HTTPException(status_code=500, detail=f"Audio generation failed: {exc}")
@@ -387,7 +391,9 @@ async def serve_podcast_audio(
raise HTTPException(status_code=400, detail="Invalid filename")
user_id = require_authenticated_user(current_user)
logger.warning(f"[Podcast] serve_podcast_audio called: user_id={user_id}, filename={filename}")
audio_path = _resolve_podcast_media_file(filename, "audio", user_id)
logger.warning(f"[Podcast] Resolved audio path: {audio_path}")
return FileResponse(audio_path, media_type="audio/mpeg")

View File

@@ -104,6 +104,16 @@ async def generate_podcast_scene_image(
# Otherwise, generate from scratch with podcast-optimized prompt
image_prompt = "" # Initialize prompt variable
# Emotion to lighting mapping for visual tone
emotion_lighting = {
"happy": "warm, bright lighting, cheerful atmosphere",
"excited": "dynamic, energetic lighting with highlights",
"serious": "professional, balanced lighting, authoritative feel",
"curious": "soft, inviting lighting, thoughtful atmosphere",
"confident": "strong, dramatic lighting, authoritative look",
"neutral": "professional, balanced lighting"
}
if base_avatar_bytes:
# Use Ideogram Character API for consistent character generation
# Use custom prompt if provided, otherwise build scene-specific prompt
@@ -127,6 +137,28 @@ async def generate_podcast_scene_image(
if bible_obj.host.look:
prompt_parts.append(f"Host Look: {bible_obj.host.look}")
# Scene emotion for visual tone
emotion_lighting = {
"happy": "warm, bright lighting, cheerful atmosphere",
"excited": "dynamic, energetic lighting with highlights",
"serious": "professional, balanced lighting, authoritative feel",
"curious": "soft, inviting lighting, thoughtful atmosphere",
"confident": "strong, dramatic lighting, authoritative look",
"neutral": "professional, balanced lighting"
}
scene_emotion = request.scene_emotion
if scene_emotion and scene_emotion in emotion_lighting:
prompt_parts.append(emotion_lighting[scene_emotion])
# AI Analysis context for visual relevance
if request.analysis:
keywords = request.analysis.get("topKeywords", [])[:5]
if keywords:
prompt_parts.append(f"Keywords: {', '.join(keywords)}")
audience = request.analysis.get("audience", "")
if audience:
prompt_parts.append(f"Target: {audience}")
# Scene content insights for visual context
if request.scene_content:
content_preview = request.scene_content[:200].replace("\n", " ").strip()
@@ -139,6 +171,12 @@ async def generate_podcast_scene_image(
visual_keywords.append("modern tech studio setting")
if any(word in content_lower for word in ["business", "growth", "strategy", "market"]):
visual_keywords.append("professional business studio")
if any(word in content_lower for word in ["nature", "outdoor", "environment", "green"]):
visual_keywords.append("natural outdoor setting")
if any(word in content_lower for word in ["medical", "health", "wellness"]):
visual_keywords.append("clean medical studio")
if any(word in content_lower for word in ["education", "learning", "students"]):
visual_keywords.append("classroom or educational setting")
if visual_keywords:
prompt_parts.append(", ".join(visual_keywords))
@@ -265,6 +303,19 @@ async def generate_podcast_scene_image(
if request.scene_title:
prompt_parts.append(f"Scene theme: {request.scene_title}")
# Scene emotion for visual tone (no avatar branch)
if request.scene_emotion and request.scene_emotion in emotion_lighting:
prompt_parts.append(emotion_lighting[request.scene_emotion])
# AI Analysis context (no avatar branch)
if request.analysis:
keywords = request.analysis.get("topKeywords", [])[:5]
if keywords:
prompt_parts.append(f"Keywords: {', '.join(keywords)}")
audience = request.analysis.get("audience", "")
if audience:
prompt_parts.append(f"Target: {audience}")
# Content context for visual relevance
if request.scene_content:
content_preview = request.scene_content[:150].replace("\n", " ").strip()
@@ -276,6 +327,12 @@ async def generate_podcast_scene_image(
visual_keywords.append("modern technology aesthetic")
if any(word in content_lower for word in ["business", "growth", "strategy", "market"]):
visual_keywords.append("professional business environment")
if any(word in content_lower for word in ["nature", "outdoor", "environment"]):
visual_keywords.append("natural outdoor setting")
if any(word in content_lower for word in ["medical", "health", "wellness"]):
visual_keywords.append("clean medical studio")
if any(word in content_lower for word in ["education", "learning", "students"]):
visual_keywords.append("classroom or educational setting")
if visual_keywords:
prompt_parts.append(", ".join(visual_keywords))
@@ -379,6 +436,7 @@ async def generate_podcast_scene_image(
provider=result.provider,
model=result.model,
cost=cost,
image_prompt=image_prompt,
)
except HTTPException:

View File

@@ -27,7 +27,10 @@ async def create_project(
db: Session = Depends(get_db),
current_user: Dict[str, Any] = Depends(get_current_user),
):
"""Create a new podcast project."""
"""Create a new podcast project.
If a project with the same idea already exists, return 409 conflict with existing project info.
"""
try:
user_id = current_user.get("user_id") or current_user.get("id")
if not user_id:
@@ -40,6 +43,19 @@ async def create_project(
if existing:
raise HTTPException(status_code=400, detail="Project ID already exists")
# Check for duplicate idea (case-insensitive partial match)
existing_idea = service.get_project_by_idea(user_id, request.idea)
if existing_idea:
raise HTTPException(
status_code=409,
detail={
"message": "A project with similar idea already exists",
"existing_project_id": existing_idea.project_id,
"existing_idea": existing_idea.idea,
"existing_status": existing_idea.status,
}
)
project = service.create_project(
user_id=user_id,
project_id=request.project_id,

View File

@@ -8,6 +8,7 @@ from fastapi import APIRouter, Depends, HTTPException
from typing import Dict, Any, List
from types import SimpleNamespace
import json
import re
from middleware.auth_middleware import get_current_user
from api.story_writer.utils.auth import require_authenticated_user
@@ -36,10 +37,16 @@ async def podcast_research_exa(
Uses Podcast Bible and Analysis context for hyper-personalization.
"""
user_id = require_authenticated_user(current_user)
logger.warning(f"[Podcast Research] ========== REQUEST START ==========")
logger.warning(f"[Podcast Research] User: {user_id}, Topic: {request.topic[:80]}...")
logger.warning(f"[Podcast Research] Queries count: {len(request.queries) if request.queries else 0}")
queries = [q.strip() for q in request.queries if q and q.strip()]
if not queries:
raise HTTPException(status_code=400, detail="At least one query is required for research.")
logger.warning(f"[Podcast Research] EXACT queries being sent to Exa: {queries}")
exa_cfg = request.exa_config or PodcastExaConfig()
cfg = SimpleNamespace(
@@ -52,6 +59,7 @@ async def podcast_research_exa(
)
provider = ExaResearchProvider()
logger.warning(f"[Podcast Research] Provider initialized, starting Exa search...")
# --- Context Building ---
bible_service = PodcastBibleService()
@@ -68,9 +76,16 @@ async def podcast_research_exa(
if request.analysis:
analysis_context = f"""
PODCAST ANALYSIS CONTEXT:
Audience: {request.analysis.get('audience', 'General')}
========================
Topic: {request.topic}
Target Audience: {request.analysis.get('audience', 'General')}
Content Type: {request.analysis.get('content_type', 'Informative')}
Top Keywords: {', '.join(request.analysis.get('top_keywords', []))}
Episode Hook (Intro): {request.analysis.get('episode_hook', 'N/A')}
Key Takeaways: {', '.join(request.analysis.get('key_takeaways', [])) or 'N/A'}
Guest Talking Points: {', '.join(request.analysis.get('guest_talking_points', [])) or 'N/A'}
Listener CTA: {request.analysis.get('listener_cta', 'N/A')}
"""
# Exa search params
@@ -84,6 +99,7 @@ Top Keywords: {', '.join(request.analysis.get('top_keywords', []))}
try:
# 1. RUN EXA SEARCH
logger.warning(f"[Podcast Research] Calling Exa search with topic: {request.topic[:100]}...")
result = await provider.search(
prompt=request.topic,
topic=request.topic,
@@ -92,8 +108,9 @@ Top Keywords: {', '.join(request.analysis.get('top_keywords', []))}
config=cfg,
user_id=user_id,
)
logger.warning(f"[Podcast Research] Exa search completed, got {len(result.get('sources', []))} sources")
except Exception as exc:
logger.error(f"[Podcast Exa Research] Search failed for user {user_id}: {exc}")
logger.error(f"[Podcast Exa Research] Search failed for user {user_id}: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Exa research failed: {exc}")
# 2. EXTRACT INSIGHTS VIA LLM
@@ -104,46 +121,77 @@ Top Keywords: {', '.join(request.analysis.get('top_keywords', []))}
key_insights = []
if raw_content and sources:
logger.info(f"[Podcast Research] Extracting insights from {len(sources)} sources for user {user_id}")
logger.warning(f"[Podcast Research] Extracting insights from {len(sources)} sources for user {user_id}")
# Build list of research queries used for this search
queries_used = ", ".join([f"Query {i+1}: {q}" for i, q in enumerate(queries)]) if queries else "No specific queries"
prompt = f"""
You are an expert research analyst for a high-end podcast production team.
Your task is to analyze the following research data and extract deep, actionable insights for a podcast episode.
You are an expert research analyst and content strategist for a high-end podcast production team.
Your task is to analyze the research data and extract deep, podcast-ready insights.
PODCAST CONTEXT:
Topic: {request.topic}
================
Main Topic: {request.topic}
RESEARCH QUERIES USED:
=====================
{queries_used}
PODCAST BIBLE & BRAND CONTEXT:
==============================
{bible_context}
PODCAST ANALYSIS (from AI Analysis phase):
==========================================
{analysis_context}
RESEARCH DATA (from {len(sources)} sources):
============================================
{raw_content}
TASK:
1. Provide a comprehensive summary (2-3 paragraphs) of the most important findings. Use Markdown for formatting (bolding, lists).
2. Extract 3-5 "Key Insights". Each insight should have a title and a detailed explanation.
3. For each insight, identify which source indices (e.g. 1, 2) it was derived from.
YOUR TASK:
==========
As a podcast research expert, analyze this data and create content that will:
1. Engage the specific target audience identified above
2. Support the episode hook and key takeaways already planned
3. Provide talking points that complement the guest's expertise
4. Include a compelling call-to-action for listeners
NOTE: The research data includes "Key Highlights", "Summaries", and "Excerpts" from various sources.
Pay special attention to the "Key Highlights" sections as they contain the most relevant information extracted by the neural search engine.
Return JSON structure:
REQUIRED OUTPUT (JSON):
=======================
{{
"summary": "Detailed markdown summary...",
"summary": "2-3 paragraph comprehensive summary in Markdown. Start with a hook that matches the episode intro. Include specific data points, expert quotes, and trends.",
"key_insights": [
{{
"title": "Insight Title",
"content": "Detailed markdown content...",
"source_indices": [1, 2]
"title": "Catchy, engaging title for this insight",
"content": "3-4 sentences with specific facts, quotes, or data. Write in a conversational tone suitable for a podcast host to discuss.",
"source_indices": [1, 2, 3],
"podcast_talking_points": ["Point 1 host can expand on", "Counter-point or follow-up", "Question to ask guest"]
}}
]
],
"expert_quotes": [
{{
"quote": "Direct quote from source",
"source_index": 1,
"context": "Why this quote matters for the podcast"
}}
],
"listener_cta_suggestions": ["Specific action listener can take", "Resource to share", "Next episode preview"]
}}
Requirements:
- Ensure insights are deep, not just superficial facts. Look for trends, expert opinions, and specific data points.
- Tone should be professional, insightful, and ready for a podcast host to discuss.
- Avoid generic filler.
QUALITY STANDARDS:
==================
- INSIGHTS MUST BE DEEP, not superficial - avoid generic statements
- Include SPECIFIC DATA POINTS, percentages, statistics when available
- Extract EXPERT QUOTES that hosts can reference
- Identify GAPS in the research where more depth is needed
- Make content naturally flow into the planned episode hook and CTA
- Write in a CONVERSATIONAL tone - how a host would actually speak
- Flag any CONTROVERSIAL or debatable claims for host to address
"""
try:
logger.warning(f"[Podcast Research] Calling LLM for insight extraction...")
llm_response = llm_text_gen(
prompt=prompt,
user_id=user_id,
@@ -151,15 +199,45 @@ Requirements:
preferred_provider=None,
flow_type="premium_tool",
)
logger.warning(f"[Podcast Research] LLM response received, length: {len(llm_response) if llm_response else 0}")
# Normalize response
# Normalize response - handle both string and dict responses
data = None
if isinstance(llm_response, str):
data = json.loads(llm_response)
try:
# Try to fix common JSON issues
fixed_response = llm_response.strip()
# Remove markdown code blocks if present
if fixed_response.startswith("```"):
fixed_response = fixed_response.split("```")[1]
if fixed_response.startswith("json"):
fixed_response = fixed_response[4:]
fixed_response = fixed_response.strip()
data = json.loads(fixed_response)
except json.JSONDecodeError as json_err:
logger.warning(f"[Podcast Research] Failed to parse JSON: {json_err}. Response preview: {llm_response[:500]}...")
# Try to extract JSON from response using regex
json_match = re.search(r'\{.*\}', llm_response, re.DOTALL)
if json_match:
try:
data = json.loads(json_match.group())
logger.warning("[Podcast Research] Successfully extracted JSON via regex")
except:
pass
else:
data = llm_response
summary = data.get("summary", "")
key_insights = [PodcastResearchInsight(**insight) for insight in data.get("key_insights", [])]
if data:
try:
summary = data.get("summary", "")
key_insights = [PodcastResearchInsight(**insight) for insight in data.get("key_insights", [])]
except Exception as insight_err:
logger.warning(f"[Podcast Research] Failed to parse insights: {insight_err}. Data keys: {list(data.keys()) if isinstance(data, dict) else 'not a dict'}")
summary = data.get("summary", "") if isinstance(data, dict) else ""
key_insights = []
else:
summary = ""
key_insights = []
except HTTPException:
raise
except Exception as exc:
@@ -183,21 +261,32 @@ Requirements:
logger.warning(f"[Podcast Exa Research] Failed to track usage: {track_err}")
sources_payload = []
seen_urls = set()
for src in sources:
url = src.get("url", "")
# Skip duplicates
if url and url in seen_urls:
continue
if url:
seen_urls.add(url)
try:
sources_payload.append(PodcastExaSource(**src))
except Exception:
sources_payload.append(PodcastExaSource(**{
"title": src.get("title", ""),
"url": src.get("url", ""),
"excerpt": src.get("excerpt", ""),
"url": url,
"excerpt": src.get("excerpt") or (src.get("highlights")[0] if src.get("highlights") else "") or src.get("summary", ""),
"published_at": src.get("published_at"),
"publishedDate": src.get("publishedDate"),
"highlights": src.get("highlights"),
"summary": src.get("summary"),
"source_type": src.get("source_type"),
"index": src.get("index"),
"image": src.get("image"),
"author": src.get("author"),
"text": src.get("text"),
"credibility_score": src.get("credibility_score"),
}))
return PodcastExaResearchResponse(

View File

@@ -1,11 +1,12 @@
"""
Podcast Script Handlers
Script generation endpoint.
Script generation and approval endpoints.
"""
from fastapi import APIRouter, Depends, HTTPException
from typing import Dict, Any
from typing import Dict, Any, Optional
from pydantic import BaseModel, Field
import json
from middleware.auth_middleware import get_current_user
@@ -24,6 +25,29 @@ from ..models import (
router = APIRouter()
class SceneApprovalRequest(BaseModel):
project_id: str = Field(..., min_length=1)
scene_id: str = Field(..., min_length=1)
approved: bool = True
notes: Optional[str] = None
@router.post("/script/approve")
async def approve_podcast_scene(
request: SceneApprovalRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
) -> Dict[str, Any]:
"""Persist scene approval metadata for auditing (podcast-specific)."""
user_id = require_authenticated_user(current_user)
logger.warning(f"[Podcast] Scene approval recorded user={user_id} project={request.project_id} scene={request.scene_id} approved={request.approved}")
return {
"success": True,
"project_id": request.project_id,
"scene_id": request.scene_id,
"approved": request.approved,
}
@router.post("/script", response_model=PodcastScriptResponse)
async def generate_podcast_script(
request: PodcastScriptRequest,
@@ -33,6 +57,10 @@ async def generate_podcast_script(
Generate a podcast script outline (scenes + lines) using podcast-oriented prompting.
"""
user_id = require_authenticated_user(current_user)
logger.warning(f"[ScriptGen] ========== SCRIPT GENERATION START ==========")
logger.warning(f"[ScriptGen] Topic: {request.idea[:60]}...")
logger.warning(f"[ScriptGen] Duration: {request.duration_minutes} min, Speakers: {request.speakers}")
logger.warning(f"[ScriptGen] Has research: {bool(request.research)}, Has bible: {bool(request.bible)}, Has analysis: {bool(request.analysis)}")
# Build comprehensive research context for higher-quality scripts
research_context = ""
@@ -77,55 +105,53 @@ async def generate_podcast_script(
# Extract Analysis and Outline context for grounding
analysis_context = ""
if request.analysis:
analysis_context = f"""
TARGET AUDIENCE: {request.analysis.get('audience', 'General')}
CONTENT TYPE: {request.analysis.get('contentType', 'Conversational')}
TOP KEYWORDS: {', '.join(request.analysis.get('topKeywords', []))}
"""
try:
audience = request.analysis.get('audience', '') or ''
content_type = request.analysis.get('contentType', '') or ''
keywords = request.analysis.get('topKeywords', []) or []
analysis_context = f"ANALYSIS: Audience={audience} | Type={content_type} | Keywords={', '.join(keywords[:8])}"
except:
pass
outline_context = ""
if request.outline:
outline_context = f"""
REFINED EPISODE OUTLINE (Follow this structure closely):
Title: {request.outline.get('title', 'N/A')}
Segments: {' | '.join(request.outline.get('segments', []))}
"""
try:
title = request.outline.get('title', '') or ''
segments = request.outline.get('segments', []) or []
outline_context = f"OUTLINE: {title} - {' | '.join(segments[:5])}"
except:
pass
prompt = f"""You are an expert podcast script planner. Create natural, conversational podcast scenes.
prompt = f"""Create a podcast script with scenes and dialogue.
{f"PODCAST BIBLE (Hyper-Personalization Context):\n{bible_context}\n" if bible_context else ""}
{f"ANALYSIS CONTEXT:\n{analysis_context}\n" if analysis_context else ""}
{f"REFINED OUTLINE:\n{outline_context}\n" if outline_context else ""}
{f"BIBLE: {bible_context[:1500]}" if bible_context else ""}
{f"{analysis_context}" if analysis_context else ""}
{f"{outline_context}" if outline_context else ""}
{f"RESEARCH: {research_context[:1200]}" if research_context else ""}
Podcast Idea: "{request.idea}"
Duration: ~{request.duration_minutes} minutes
Speakers: {request.speakers} (Host + optional Guest)
Topic: "{request.idea}"
Duration: {request.duration_minutes} min | Speakers: {request.speakers}
{f"RESEARCH CONTEXT:\n{research_context}\n" if research_context else ""}
Return JSON with scenes array. Each scene:
- id: string
- title: short title (<=50 chars)
- duration: seconds (total/5)
- emotion: neutral|happy|excited|serious|curious|confident
- lines: array of {{speaker, text, emphasis}}
- Use 2-4 LINES PER SCENE (shorter script = lower TTS costs)
- Each line: 1-3 sentences, conversational
- Plain text only, no markdown
Return JSON with:
- scenes: array of scenes. Each scene has:
- id: string
- title: short scene title (<= 60 chars)
- duration: duration in seconds (evenly split across total duration)
- emotion: string (one of: "neutral", "happy", "excited", "serious", "curious", "confident")
- lines: array of {{"speaker": "...", "text": "...", "emphasis": boolean}}
* Write natural, conversational dialogue
* Each line can be a sentence or a few sentences that flow together
* Use plain text only - no markdown formatting (no asterisks, underscores, etc.)
* Mark "emphasis": true for key statistics or important points
Guidelines:
- Write for spoken delivery: conversational, natural, with contractions.
- Follow the interaction tone specified in the Bible.
- Ensure the Host persona matches the background and personality traits from the Bible.
- Structure the intro and outro scenes according to the Bible's "Intro Format" and "Outro Format".
- Adhere to any constraints mentioned in the Bible.
- Use insights from the Research Context to ground the conversation in facts.
- IMPORTANT: Follow the REFINED OUTLINE segments as the primary structure for the episode.
COST OPTIMIZATION:
- 5-6 scenes max for {request.duration_minutes} min episode
- Concise, information-dense dialogue
- Skip filler words and redundant phrases
- Focus on unique insights from research
- Make every line count toward value delivery
"""
try:
logger.warning(f"[ScriptGen] Calling LLM to generate script (prompt length: {len(prompt)})...")
raw = llm_text_gen(
prompt=prompt,
user_id=user_id,
@@ -133,6 +159,7 @@ Guidelines:
preferred_provider=None,
flow_type="premium_tool",
)
logger.warning(f"[ScriptGen] LLM response received, length: {len(raw) if raw else 0}")
except HTTPException:
raise
except Exception as exc:

View File

@@ -140,17 +140,20 @@ def _execute_podcast_video_task(
except Exception as e:
logger.warning(f"[Podcast] Failed to fetch project context for video generation: {e}")
# Prepare scene data for animation
# Prepare scene data for animation - include all context for enhanced prompt
scene_data = {
"scene_number": scene_number,
"title": request.scene_title,
"scene_id": request.scene_id,
"image_prompt": request.scene_image_prompt,
"description": request.scene_narration,
"lines": [{"text": request.scene_narration}] if request.scene_narration else [],
}
story_context = {
"project_id": request.project_id,
"type": "podcast",
"bible": project_bible,
"analysis": project_analysis,
"analysis": request.analysis or project_analysis, # Use passed analysis or fallback to DB
}
animation_result = animate_scene_with_voiceover(

View File

@@ -63,6 +63,10 @@ class PodcastAnalyzeResponse(BaseModel):
top_keywords: list[str]
suggested_outlines: list[Dict[str, Any]]
title_suggestions: list[str]
episode_hook: Optional[str] = None
key_takeaways: Optional[list[str]] = None
guest_talking_points: Optional[list[str]] = None
listener_cta: Optional[str] = None
research_queries: Optional[List[Dict[str, str]]] = None
exa_suggested_config: Optional[Dict[str, Any]] = None
bible: Optional[Dict[str, Any]] = None
@@ -142,12 +146,15 @@ class PodcastExaSource(BaseModel):
url: str = ""
excerpt: str = ""
published_at: Optional[str] = None
publishedDate: Optional[str] = None # Exa format
highlights: Optional[List[str]] = None
summary: Optional[str] = None
source_type: Optional[str] = None
index: Optional[int] = None
image: Optional[str] = None
author: Optional[str] = None
text: Optional[str] = None # Exa full text
credibility_score: Optional[float] = None # Exa scores
class PodcastResearchInsight(BaseModel):
@@ -155,6 +162,9 @@ class PodcastResearchInsight(BaseModel):
title: str
content: str
source_indices: List[int] = []
podcast_talking_points: Optional[List[str]] = [] # Talking points for host to expand on
expert_quotes: Optional[List[Dict[str, str]]] = [] # Quotes from sources
listener_cta_suggestions: Optional[List[str]] = [] # CTA suggestions
class PodcastExaResearchResponse(BaseModel):
@@ -178,6 +188,7 @@ class PodcastAudioRequest(BaseModel):
scene_title: str
text: str
voice_id: Optional[str] = "Wise_Woman"
custom_voice_id: Optional[str] = None # Voice clone ID for custom voice
speed: Optional[float] = 1.0
volume: Optional[float] = 1.0
pitch: Optional[float] = 0.0
@@ -263,7 +274,9 @@ class PodcastImageRequest(BaseModel):
scene_id: str
scene_title: str
scene_content: Optional[str] = None # Optional: scene lines text for context
scene_emotion: Optional[str] = None # Optional: scene emotion for visual tone
idea: Optional[str] = None # Optional: podcast idea for context
analysis: Optional[Dict[str, Any]] = Field(None, description="AI analysis for visual context (keywords, audience)")
base_avatar_url: Optional[str] = None # Base avatar image URL for scene variations
bible: Optional[Dict[str, Any]] = Field(None, description="Podcast Bible for hyper-personalization")
width: int = 1024
@@ -285,6 +298,7 @@ class PodcastImageResponse(BaseModel):
provider: str
model: Optional[str] = None
cost: float
image_prompt: Optional[str] = None # Return the prompt used for generation
class PodcastVideoGenerationRequest(BaseModel):
@@ -295,6 +309,9 @@ class PodcastVideoGenerationRequest(BaseModel):
audio_url: str = Field(..., description="URL to the generated audio file")
avatar_image_url: Optional[str] = Field(None, description="URL to scene image (required for video generation)")
bible: Optional[Dict[str, Any]] = Field(None, description="Podcast Bible for hyper-personalization")
analysis: Optional[Dict[str, Any]] = Field(None, description="Podcast Analysis for context (content type, audience, takeaways, guest)")
scene_image_prompt: Optional[str] = Field(None, description="Original image generation prompt for visual context")
scene_narration: Optional[str] = Field(None, description="Scene narration/script lines for context")
resolution: str = Field("720p", description="Video resolution (480p or 720p)")
prompt: Optional[str] = Field(None, description="Optional animation prompt override")
seed: Optional[int] = Field(-1, description="Random seed; -1 for random")

View File

@@ -9,59 +9,26 @@ builtins.Dict = typing.Dict
builtins.Any = typing.Any
builtins.Union = typing.Union
# Import onboarding models VERY early to ensure they're available before any services
from models.onboarding import APIKey, WebsiteAnalysis, ResearchPreferences, PersonaData, CompetitorAnalysis
from fastapi import FastAPI, HTTPException, Depends, Request, BackgroundTasks
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from pydantic import BaseModel
from typing import Dict, Any, Optional
import os
from loguru import logger
from dotenv import load_dotenv
import asyncio
from datetime import datetime
# Import OnboardingSession right after basic imports to ensure it's available
from models.onboarding import OnboardingSession
from services.subscription import monitoring_middleware
# Import remaining onboarding models
from models import APIKey, WebsiteAnalysis, ResearchPreferences, PersonaData, CompetitorAnalysis
# Import modular utilities
from alwrity_utils import HealthChecker, RateLimiter, FrontendServing, RouterManager
from alwrity_utils import OnboardingManager
# Load environment variables
# Try multiple locations for .env file
# Load environment variables FIRST before any other imports
from pathlib import Path
from dotenv import load_dotenv
backend_dir = Path(__file__).parent
project_root = backend_dir.parent
load_dotenv(backend_dir / '.env')
load_dotenv(project_root / '.env')
load_dotenv()
# Load from backend/.env first (higher priority), then root .env
load_dotenv(backend_dir / '.env') # backend/.env
load_dotenv(project_root / '.env') # root .env (fallback)
load_dotenv() # CWD .env (fallback)
# Set LOG_LEVEL early to WARNING to suppress DEBUG persona logs in podcast mode
import os
if os.getenv("ALWRITY_ENABLED_FEATURES", "").strip().lower() == "podcast":
os.environ["LOG_LEVEL"] = "WARNING"
def get_enabled_features() -> set:
"""Get enabled features from ALWRITY_ENABLED_FEATURES env var.
Values:
- "all" - enable all features (default)
- comma-separated: "podcast,core"
- single feature: "podcast"
"""
"""Get enabled features from ALWRITY_ENABLED_FEATURES env var."""
env_value = os.getenv("ALWRITY_ENABLED_FEATURES", "all").strip().lower()
if not env_value or env_value == "all":
return {"all"}
return {f.strip() for f in env_value.split(",") if f.strip()}
@@ -71,6 +38,32 @@ def is_podcast_only_demo_mode() -> bool:
return "podcast" in enabled and "all" not in enabled
# Import onboarding models (after env is loaded)
from models.onboarding import APIKey, WebsiteAnalysis, ResearchPreferences, PersonaData, CompetitorAnalysis
# Import FastAPI and related
from fastapi import FastAPI, HTTPException, Depends, Request, BackgroundTasks
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from pydantic import BaseModel
from typing import Dict, Any, Optional
import os
import asyncio
from datetime import datetime
from loguru import logger
# Import modular utilities (skip OnboardingManager import in podcast-only mode)
from alwrity_utils import HealthChecker, RateLimiter, FrontendServing, RouterManager
if not is_podcast_only_demo_mode():
from alwrity_utils import OnboardingManager
# Import monitoring middleware
from services.subscription import monitoring_middleware
def should_include_non_podcast_features() -> bool:
"""Check if non-podcast features should be included."""
enabled = get_enabled_features()
@@ -94,8 +87,10 @@ from api.component_logic import router as component_logic_router
# Import subscription API endpoints
from api.subscription import router as subscription_router
# Import Step 3 onboarding routes
from api.onboarding_utils.step3_routes import router as step3_routes
# Import Step 3 onboarding routes (skip in podcast-only mode)
step3_routes = None
if not PODCAST_ONLY_DEMO_MODE:
from api.onboarding_utils.step3_routes import router as step3_routes
# Import SEO tools router
from routers.seo_tools import router as seo_tools_router
@@ -218,7 +213,9 @@ router_manager = RouterManager(app)
router_group_status: Dict[str, Dict[str, Any]] = {}
onboarding_manager = None
# Only create OnboardingManager if NOT in podcast-only mode
if not PODCAST_ONLY_DEMO_MODE:
from alwrity_utils import OnboardingManager
onboarding_manager = OnboardingManager(app)
# Middleware Order (FastAPI executes in REVERSE order of registration - LIFO):
@@ -575,9 +572,12 @@ async def startup_event():
if startup_report.get("status") != "healthy":
logger.error(f"Startup readiness finished with failures: {startup_report.get('errors', [])}")
# Start task scheduler
from services.scheduler import get_scheduler
await get_scheduler().start()
# Start task scheduler only if NOT in podcast-only mode
if not is_podcast_only_demo_mode():
from services.scheduler import get_scheduler
await get_scheduler().start()
else:
logger.info("[Podcast] Skipping scheduler startup (podcast-only mode)")
# Check Wix API key configuration
wix_api_key = os.getenv('WIX_API_KEY')

View File

@@ -62,6 +62,7 @@ class VoiceCloneResult:
def generate_audio(
text: str,
voice_id: str = "Wise_Woman",
custom_voice_id: Optional[str] = None,
speed: float = 1.0,
volume: float = 1.0,
pitch: float = 0.0,
@@ -173,6 +174,7 @@ def generate_audio(
audio_bytes = client.generate_speech(
text=text,
voice_id=voice_id,
custom_voice_id=custom_voice_id,
speed=speed,
volume=volume,
pitch=pitch,

View File

@@ -67,7 +67,7 @@ def llm_text_gen(
resolved_flow_type = flow_type or ("sif_agent" if preferred_hf_models else "premium_tool")
flow_tag = f"flow_type={resolved_flow_type}"
logger.info(f"[llm_text_gen][{flow_tag}] Starting text generation")
logger.warning(f"[llm_text_gen][{flow_tag}] Starting text generation")
logger.debug(f"[llm_text_gen] Prompt length: {len(prompt)} characters")
# Set default values for LLM parameters
@@ -94,7 +94,7 @@ def llm_text_gen(
primary_provider = provider_list[0]
if primary_provider in ['wavespeed', 'wave']:
gpt_provider = "wavespeed"
model = os.getenv('WAVESPEED_TEXT_MODEL', 'openai/gpt-oss-120b:cerebras')
model = os.getenv('WAVESPEED_TEXT_MODEL', 'openai/gpt-oss-120b')
elif primary_provider in ['gemini', 'google']:
gpt_provider = "google"
model = "gemini-2.0-flash-001"
@@ -111,7 +111,7 @@ def llm_text_gen(
elif preferred_provider:
if preferred_provider in ['wavespeed', 'wave']:
gpt_provider = "wavespeed"
model = os.getenv('WAVESPEED_TEXT_MODEL', 'openai/gpt-oss-120b:cerebras')
model = os.getenv('WAVESPEED_TEXT_MODEL', 'openai/gpt-oss-120b')
elif preferred_provider in ['openai', 'gpt']:
gpt_provider = "openai"
model = os.getenv('OPENAI_MODEL', 'gpt-4o-mini')
@@ -166,7 +166,7 @@ def llm_text_gen(
if api_key_manager.get_api_key("wavespeed"):
available_providers.append("wavespeed")
logger.info(
logger.warning(
f"[llm_text_gen][{flow_tag}] Provider preflight: env_provider='{env_provider or 'auto'}', "
f"provider_list={provider_list}, strict_provider_mode={strict_provider_mode}, "
f"available_providers={available_providers}, preferred_provider={preferred_provider or 'none'}, "
@@ -278,7 +278,12 @@ def llm_text_gen(
UsageSummary.billing_period == current_period
).first()
# No separate log here - we'll create unified log after API call and usage tracking
# Log subscription details before making the API call
if usage:
total_llm_calls = (usage.gemini_calls or 0) + (usage.openai_calls or 0) + (usage.anthropic_calls or 0) + (usage.mistral_calls or 0) + (usage.wavespeed_calls or 0)
logger.info(f"[llm_text_gen] Subscription check passed for user {user_id}: provider={actual_provider_name or gpt_provider}, tokens_requested={estimated_total_tokens}, current_usage=${usage.total_cost or 0:.4f}, calls_used={total_llm_calls}")
else:
logger.info(f"[llm_text_gen] Subscription check passed for user {user_id}: provider={actual_provider_name or gpt_provider}, tokens_requested={estimated_total_tokens}, new_user_no_usage_record")
finally:
db.close()
@@ -363,7 +368,7 @@ def llm_text_gen(
from services.llm_providers.wavespeed_provider import wavespeed_text_response
response_text = wavespeed_text_response(
prompt=prompt,
model=model or "openai/gpt-oss-120b:cerebras",
model=model or "openai/gpt-oss-120b",
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,

View File

@@ -15,14 +15,31 @@ class PodcastBibleService:
"""Service for generating and managing the Podcast Bible."""
def __init__(self):
self.personalization_service = PersonalizationService()
try:
from services.product_marketing.personalization_service import PersonalizationService
self.personalization_service = PersonalizationService()
except Exception as e:
logger.warning(f"Failed to initialize PersonalizationService: {e}")
self.personalization_service = None
def generate_bible(self, user_id: str, project_id: str) -> PodcastBible:
"""Generate a Podcast Bible from onboarding data."""
logger.info(f"Generating Podcast Bible for user {user_id}")
try:
preferences = self.personalization_service.get_user_preferences(user_id) or {}
if not self.personalization_service:
logger.warning("PersonalizationService not available, using default bible")
return self._get_default_bible(project_id)
try:
preferences = self.personalization_service.get_user_preferences(user_id)
except Exception as pref_err:
logger.warning(f"Failed to get user preferences: {pref_err}, using defaults")
return self._get_default_bible(project_id)
if not preferences:
logger.info(f"No preferences found for user {user_id}, using defaults")
return self._get_default_bible(project_id)
if not isinstance(preferences, dict):
logger.warning(f"Podcast Bible preferences payload is non-dict for user {user_id}, using defaults")
preferences = {}
@@ -129,18 +146,23 @@ class PodcastBibleService:
name="AI Host",
background="Industry Professional",
expertise_level="Expert",
personality_traits=["Professional", "Informative"],
vocal_style="Authoritative",
vocal_characteristics=["Deep", "Steady"]
vocal_characteristics=["Deep", "Steady"],
look="A professional individual dressed in business-casual attire."
),
audience=AudienceDNA(
expertise_level="Intermediate",
interests=["Industry Trends", "Technology"],
pain_points=["Staying Competitive", "Operational Efficiency"]
pain_points=["Staying Competitive", "Operational Efficiency"],
demographics=None
),
brand=BrandDNA(
industry="General Business",
tone="Professional",
communication_style="Analytical"
communication_style="Analytical",
key_messages=[],
competitor_context=None
),
visual_style=VisualStyle(
environment="Professional modern office studio",

View File

@@ -156,6 +156,12 @@ def _check_production_api_key_loading(
if deploy_env == "local":
_record_check(checks, "production_api_key_loading", True, "skipped in local deploy mode")
return
# Also skip in podcast-only mode (no production API keys needed)
enabled_features = os.getenv("ALWRITY_ENABLED_FEATURES", "all").strip().lower()
if enabled_features == "podcast":
_record_check(checks, "production_api_key_loading", True, "skipped in podcast-only mode")
return
test_tenant_id = os.getenv("ALWRITY_STARTUP_TEST_TENANT_ID", "").strip()
if not test_tenant_id:

View File

@@ -46,6 +46,7 @@ class StoryAudioGenerationService:
return _get_story_media_write_dir("audio", user_id=user_id, db=db)
except Exception as e:
logger.warning(f"[StoryAudioGeneration] Failed to resolve user workspace path for {user_id}: {e}")
# Don't fall back to default - keep using the already-set output_dir for podcast
return self.output_dir
def _generate_audio_filename(self, scene_number: int, scene_title: str) -> str:
@@ -318,6 +319,7 @@ class StoryAudioGenerationService:
text: str,
user_id: str,
voice_id: str = "Wise_Woman",
custom_voice_id: Optional[str] = None,
speed: float = 1.0,
volume: float = 1.0,
pitch: float = 0.0,
@@ -364,6 +366,7 @@ class StoryAudioGenerationService:
result = generate_audio(
text=text.strip(),
voice_id=voice_id,
custom_voice_id=custom_voice_id,
speed=speed,
volume=volume,
pitch=pitch,
@@ -378,8 +381,8 @@ class StoryAudioGenerationService:
enable_sync_mode=enable_sync_mode,
)
# Determine output directory (user workspace or default)
output_dir = self._get_user_audio_dir(user_id, db)
# Use the output_dir that was set when service was created (already handles podcast vs story)
output_dir = self.output_dir
# Save audio to file
audio_filename = self._generate_audio_filename(scene_number, scene_title)

View File

@@ -442,9 +442,34 @@ class PricingService:
"description": "AI Audio Generation default pricing"
}
]
# WaveSpeed LLM Text Generation Pricing (via Cerebras)
wavespeed_llm_pricing = [
{
"provider": APIProvider.WAVESPEED,
"model_name": "openai/gpt-oss-120b",
"cost_per_input_token": 0.0000006, # $0.60 per 1M input tokens
"cost_per_output_token": 0.0000006, # $0.60 per 1M output tokens
"description": "WaveSpeed GPT-OSS 120B (Cerebras) - Fast text generation"
},
{
"provider": APIProvider.WAVESPEED,
"model_name": "openai/gpt-oss-120b:cerebras",
"cost_per_input_token": 0.0000006,
"cost_per_output_token": 0.0000006,
"description": "WaveSpeed GPT-OSS 120B (Cerebras) - Fast text generation"
},
{
"provider": APIProvider.WAVESPEED,
"model_name": "openai/gpt-oss-20b",
"cost_per_input_token": 0.0000002, # $0.20 per 1M input tokens
"cost_per_output_token": 0.0000002, # $0.20 per 1M output tokens
"description": "WaveSpeed GPT-OSS 20B (Cerebras) - Cost-effective text generation"
},
]
# Combine all pricing data (include video pricing in search_pricing list)
all_pricing = gemini_pricing + openai_pricing + anthropic_pricing + mistral_pricing + search_pricing
all_pricing = gemini_pricing + openai_pricing + anthropic_pricing + mistral_pricing + search_pricing + wavespeed_llm_pricing
# Insert or update pricing data
for pricing_data in all_pricing:

View File

@@ -241,6 +241,7 @@ class WaveSpeedClient:
self,
text: str,
voice_id: str,
custom_voice_id: Optional[str] = None,
speed: float = 1.0,
volume: float = 1.0,
pitch: float = 0.0,
@@ -255,6 +256,7 @@ class WaveSpeedClient:
Args:
text: Text to convert to speech (max 10000 characters)
voice_id: Voice ID (e.g., "Wise_Woman", "Friendly_Person", etc.)
custom_voice_id: Custom voice clone ID for using cloned voice
speed: Speech speed (0.5-2.0, default: 1.0)
volume: Speech volume (0.1-10.0, default: 1.0)
pitch: Speech pitch (-12 to 12, default: 0.0)
@@ -269,6 +271,7 @@ class WaveSpeedClient:
return self.speech.generate_speech(
text=text,
voice_id=voice_id,
custom_voice_id=custom_voice_id,
speed=speed,
volume=volume,
pitch=pitch,

View File

@@ -40,6 +40,7 @@ class SpeechGenerator:
self,
text: str,
voice_id: str,
custom_voice_id: Optional[str] = None,
speed: float = 1.0,
volume: float = 1.0,
pitch: float = 0.0,
@@ -54,6 +55,7 @@ class SpeechGenerator:
Args:
text: Text to convert to speech (max 10000 characters)
voice_id: Voice ID (e.g., "Wise_Woman", "Friendly_Person", etc.)
custom_voice_id: Custom voice clone ID for using cloned voice
speed: Speech speed (0.5-2.0, default: 1.0)
volume: Speech volume (0.1-10.0, default: 1.0)
pitch: Speech pitch (-12 to 12, default: 0.0)
@@ -77,6 +79,11 @@ class SpeechGenerator:
if not sanitized_voice_id:
raise ValueError("Voice ID cannot be empty after sanitization")
# Sanitize custom_voice_id if provided
sanitized_custom_voice_id = None
if custom_voice_id:
sanitized_custom_voice_id = str(custom_voice_id).strip() or None
# Ensure numeric parameters are proper floats and within valid ranges
sanitized_speed = max(0.5, min(2.0, float(speed))) if speed is not None else 1.0
sanitized_volume = max(0.1, min(10.0, float(volume))) if volume is not None else 1.0
@@ -112,6 +119,10 @@ class SpeechGenerator:
"enable_sync_mode": bool(enable_sync_mode),
}
# Add custom voice clone ID if provided
if sanitized_custom_voice_id:
payload["custom_voice_id"] = sanitized_custom_voice_id
# Add optional parameters with proper type validation
optional_params = [
"english_normalization",
@@ -179,6 +190,20 @@ class SpeechGenerator:
if response.status_code != 200:
logger.error(f"[WaveSpeed] Speech generation failed: {response.status_code} {response.text}")
# Check for custom voice ID specific errors
response_text = response.text.lower()
if "custom_voice" in response_text or "voice_id" in response_text:
raise HTTPException(
status_code=400,
detail={
"error": "Invalid voice clone ID",
"message": "The custom voice ID is invalid or expired. Please create a new voice clone or use a predefined voice.",
"status_code": response.status_code,
"response": response.text,
},
)
raise HTTPException(
status_code=502,
detail={

View File

@@ -26,20 +26,24 @@ def _generate_simple_infinitetalk_prompt(
story_context: Dict[str, Any],
) -> Optional[str]:
"""
Generate a balanced, concise prompt for InfiniteTalk.
InfiniteTalk is audio-driven, so the prompt should describe the scene and suggest
subtle motion, but avoid overly elaborate cinematic descriptions.
Generate an enhanced prompt for InfiniteTalk video generation.
Includes scene content, analysis, bible context, and visual elements.
Returns None if no meaningful prompt can be generated.
"""
title = (scene_data.get("title") or "").strip()
description = (scene_data.get("description") or "").strip()
image_prompt = (scene_data.get("image_prompt") or "").strip()
lines = scene_data.get("lines", [])
narration = ""
if lines:
# Combine first few lines for context
narration = " ".join([str(l.get("text", "")) for l in lines[:3]])[:150]
# Build a balanced prompt: scene description + simple motion hint
# Build enhanced prompt with multiple context sources
parts = []
# Add scene context
# Add main scene title
if title and len(title) > 5 and title.lower() not in ("scene", "podcast", "episode"):
parts.append(title)
@@ -48,60 +52,70 @@ def _generate_simple_infinitetalk_prompt(
if analysis:
content_type = analysis.get("content_type")
if content_type:
parts.append(f"Style: {content_type}")
parts.append(f"Content type: {content_type}")
# Audience helps define the formality/vibe
# Add key takeaways if available
key_takeaways = analysis.get("keyTakeaways", [])
if key_takeaways and isinstance(key_takeaways, list) and len(key_takeaways) > 0:
takeaway = str(key_takeaways[0])[:80]
if takeaway:
parts.append(f"Key insight: {takeaway}")
# Audience
audience = analysis.get("audience")
if audience:
# Just use first few words of audience to keep it short
short_audience = " ".join(audience.split()[:3])
parts.append(f"For: {short_audience}")
# Add bible context if available
short_audience = " ".join(audience.split()[:3])
parts.append(f"Target audience: {short_audience}")
# Guest info
guest_name = analysis.get("guestName")
guest_expertise = analysis.get("guestExpertise")
if guest_name:
parts.append(f"Guest: {guest_name}")
if guest_expertise:
parts.append(f"Expertise: {guest_expertise}")
# Add bible context
bible = story_context.get("bible", {})
if bible:
host_persona = bible.get("host_persona")
tone = bible.get("tone")
visual_style = bible.get("visual_style")
background = bible.get("background")
if host_persona:
parts.append(f"Host: {host_persona}")
parts.append(f"Host persona: {host_persona}")
if tone:
parts.append(f"Tone: {tone}")
elif description:
# Take first sentence or first 60 chars
desc_part = description.split('.')[0][:60].strip()
if desc_part:
parts.append(desc_part)
elif image_prompt:
# Take first sentence or first 60 chars
img_part = image_prompt.split('.')[0][:60].strip()
if visual_style:
parts.append(f"Visual style: {visual_style}")
if background:
parts.append(f"Background: {background}")
# Add original image prompt as fallback context
if image_prompt and len(parts) < 3:
img_part = image_prompt.split('.')[0][:100].strip()
if img_part:
parts.append(img_part)
parts.append(f"Visual context: {img_part}")
# Add narration snippet if available
if narration and len(parts) < 4:
parts.append(f"Discussing: {narration}")
if not parts:
return None
# Add a simple, subtle motion suggestion (not elaborate camera movements)
# Keep it natural and audio-driven
motion_hints = [
"with subtle movement",
"with gentle motion",
"with natural animation",
]
# Build prompt with visual quality keywords
quality_keywords = "Cinematic lighting, high detail, 4k quality, smooth motion"
# Combine scene description with subtle motion hint
if len(parts[0]) < 80:
# Room for a motion hint
prompt = f"{parts[0]}, {motion_hints[0]}"
else:
# Just use the description if it's already long enough
prompt = parts[0]
# Combine parts into final prompt
prompt = f"{'. '.join(parts)}. {quality_keywords}. With subtle natural movement."
# Keep it concise - max 120 characters (allows for scene + motion hint)
prompt = prompt[:120].strip()
# Allow more room for detailed prompts - max 350 characters
prompt = prompt[:350].strip()
# Clean up trailing commas or incomplete sentences
if prompt.endswith(','):
# Clean up trailing punctuation
if prompt.endswith(',') or prompt.endswith('.'):
prompt = prompt[:-1].strip()
return prompt if len(prompt) >= 15 else None

View File

@@ -50,6 +50,10 @@ def should_bootstrap_linguistic_models() -> bool:
if "all" in enabled_features:
return True
# Podcast-only mode doesn't need linguistic models
if enabled_features == {"podcast"}:
return False
# Map old profile names to features for backwards compatibility
feature_mapping = {
"podcast": "podcast",
@@ -64,14 +68,18 @@ def should_bootstrap_linguistic_models() -> bool:
def should_bootstrap_local_llm_models() -> bool:
"""Decide whether to bootstrap local LLM models based on enabled features."""
"""Decide whether to bootstrap local LLM models based on enabled features.
SIF/Story Writer requires local LLM - skip if only podcast is enabled.
"""
enabled_features = get_enabled_features()
if "all" in enabled_features:
return True
# Skip LLM bootstrap for lean deployments
return "core" in enabled_features or "podcast" in enabled_features
# SIF/Story Writer requires local LLM - only bootstrap if explicitly needed
# Skip for lean deployments (podcast-only, content-planning only, etc.)
return False # Default to skip unless "all" is enabled
def bootstrap_linguistic_models() -> BootstrapResult:
@@ -209,6 +217,10 @@ def bootstrap_local_llm_models() -> BootstrapResult:
# Bootstrap linguistic models BEFORE any imports that might need them
BOOTSTRAP_RESULTS = []
# Load .env file early so ALWRITY_ENABLED_FEATURES is available
from dotenv import load_dotenv
load_dotenv()
if __name__ == "__main__":
enabled_features = get_enabled_features()
features_str = ",".join(sorted(enabled_features))