feat: podcast demo mode with ALWRITY_ENABLED_FEATURES support
- Add ALWRITY_ENABLED_FEATURES env var for feature gating - Podcast-only mode: skip LLM bootstrap, scheduler, persona services - Enhance video generation prompt with scene context, analysis, narration - Add voice cloning support via custom_voice_id in WaveSpeed - Add text-to-speech for research results (browser speechSynthesis) - Fix render queue to sync images from script phase - Add WaveSpeed LLM pricing (gpt-oss-120b) - Fix podcast bible generation error handling - Refactor RouterManager for feature-based router loading
This commit is contained in:
@@ -62,6 +62,7 @@ class VoiceCloneResult:
|
||||
def generate_audio(
|
||||
text: str,
|
||||
voice_id: str = "Wise_Woman",
|
||||
custom_voice_id: Optional[str] = None,
|
||||
speed: float = 1.0,
|
||||
volume: float = 1.0,
|
||||
pitch: float = 0.0,
|
||||
@@ -173,6 +174,7 @@ def generate_audio(
|
||||
audio_bytes = client.generate_speech(
|
||||
text=text,
|
||||
voice_id=voice_id,
|
||||
custom_voice_id=custom_voice_id,
|
||||
speed=speed,
|
||||
volume=volume,
|
||||
pitch=pitch,
|
||||
|
||||
@@ -67,7 +67,7 @@ def llm_text_gen(
|
||||
resolved_flow_type = flow_type or ("sif_agent" if preferred_hf_models else "premium_tool")
|
||||
flow_tag = f"flow_type={resolved_flow_type}"
|
||||
|
||||
logger.info(f"[llm_text_gen][{flow_tag}] Starting text generation")
|
||||
logger.warning(f"[llm_text_gen][{flow_tag}] Starting text generation")
|
||||
logger.debug(f"[llm_text_gen] Prompt length: {len(prompt)} characters")
|
||||
|
||||
# Set default values for LLM parameters
|
||||
@@ -94,7 +94,7 @@ def llm_text_gen(
|
||||
primary_provider = provider_list[0]
|
||||
if primary_provider in ['wavespeed', 'wave']:
|
||||
gpt_provider = "wavespeed"
|
||||
model = os.getenv('WAVESPEED_TEXT_MODEL', 'openai/gpt-oss-120b:cerebras')
|
||||
model = os.getenv('WAVESPEED_TEXT_MODEL', 'openai/gpt-oss-120b')
|
||||
elif primary_provider in ['gemini', 'google']:
|
||||
gpt_provider = "google"
|
||||
model = "gemini-2.0-flash-001"
|
||||
@@ -111,7 +111,7 @@ def llm_text_gen(
|
||||
elif preferred_provider:
|
||||
if preferred_provider in ['wavespeed', 'wave']:
|
||||
gpt_provider = "wavespeed"
|
||||
model = os.getenv('WAVESPEED_TEXT_MODEL', 'openai/gpt-oss-120b:cerebras')
|
||||
model = os.getenv('WAVESPEED_TEXT_MODEL', 'openai/gpt-oss-120b')
|
||||
elif preferred_provider in ['openai', 'gpt']:
|
||||
gpt_provider = "openai"
|
||||
model = os.getenv('OPENAI_MODEL', 'gpt-4o-mini')
|
||||
@@ -166,7 +166,7 @@ def llm_text_gen(
|
||||
if api_key_manager.get_api_key("wavespeed"):
|
||||
available_providers.append("wavespeed")
|
||||
|
||||
logger.info(
|
||||
logger.warning(
|
||||
f"[llm_text_gen][{flow_tag}] Provider preflight: env_provider='{env_provider or 'auto'}', "
|
||||
f"provider_list={provider_list}, strict_provider_mode={strict_provider_mode}, "
|
||||
f"available_providers={available_providers}, preferred_provider={preferred_provider or 'none'}, "
|
||||
@@ -278,7 +278,12 @@ def llm_text_gen(
|
||||
UsageSummary.billing_period == current_period
|
||||
).first()
|
||||
|
||||
# No separate log here - we'll create unified log after API call and usage tracking
|
||||
# Log subscription details before making the API call
|
||||
if usage:
|
||||
total_llm_calls = (usage.gemini_calls or 0) + (usage.openai_calls or 0) + (usage.anthropic_calls or 0) + (usage.mistral_calls or 0) + (usage.wavespeed_calls or 0)
|
||||
logger.info(f"[llm_text_gen] Subscription check passed for user {user_id}: provider={actual_provider_name or gpt_provider}, tokens_requested={estimated_total_tokens}, current_usage=${usage.total_cost or 0:.4f}, calls_used={total_llm_calls}")
|
||||
else:
|
||||
logger.info(f"[llm_text_gen] Subscription check passed for user {user_id}: provider={actual_provider_name or gpt_provider}, tokens_requested={estimated_total_tokens}, new_user_no_usage_record")
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
@@ -363,7 +368,7 @@ def llm_text_gen(
|
||||
from services.llm_providers.wavespeed_provider import wavespeed_text_response
|
||||
response_text = wavespeed_text_response(
|
||||
prompt=prompt,
|
||||
model=model or "openai/gpt-oss-120b:cerebras",
|
||||
model=model or "openai/gpt-oss-120b",
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
top_p=top_p,
|
||||
|
||||
@@ -15,14 +15,31 @@ class PodcastBibleService:
|
||||
"""Service for generating and managing the Podcast Bible."""
|
||||
|
||||
def __init__(self):
|
||||
self.personalization_service = PersonalizationService()
|
||||
try:
|
||||
from services.product_marketing.personalization_service import PersonalizationService
|
||||
self.personalization_service = PersonalizationService()
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to initialize PersonalizationService: {e}")
|
||||
self.personalization_service = None
|
||||
|
||||
def generate_bible(self, user_id: str, project_id: str) -> PodcastBible:
|
||||
"""Generate a Podcast Bible from onboarding data."""
|
||||
logger.info(f"Generating Podcast Bible for user {user_id}")
|
||||
|
||||
try:
|
||||
preferences = self.personalization_service.get_user_preferences(user_id) or {}
|
||||
if not self.personalization_service:
|
||||
logger.warning("PersonalizationService not available, using default bible")
|
||||
return self._get_default_bible(project_id)
|
||||
|
||||
try:
|
||||
preferences = self.personalization_service.get_user_preferences(user_id)
|
||||
except Exception as pref_err:
|
||||
logger.warning(f"Failed to get user preferences: {pref_err}, using defaults")
|
||||
return self._get_default_bible(project_id)
|
||||
|
||||
if not preferences:
|
||||
logger.info(f"No preferences found for user {user_id}, using defaults")
|
||||
return self._get_default_bible(project_id)
|
||||
if not isinstance(preferences, dict):
|
||||
logger.warning(f"Podcast Bible preferences payload is non-dict for user {user_id}, using defaults")
|
||||
preferences = {}
|
||||
@@ -129,18 +146,23 @@ class PodcastBibleService:
|
||||
name="AI Host",
|
||||
background="Industry Professional",
|
||||
expertise_level="Expert",
|
||||
personality_traits=["Professional", "Informative"],
|
||||
vocal_style="Authoritative",
|
||||
vocal_characteristics=["Deep", "Steady"]
|
||||
vocal_characteristics=["Deep", "Steady"],
|
||||
look="A professional individual dressed in business-casual attire."
|
||||
),
|
||||
audience=AudienceDNA(
|
||||
expertise_level="Intermediate",
|
||||
interests=["Industry Trends", "Technology"],
|
||||
pain_points=["Staying Competitive", "Operational Efficiency"]
|
||||
pain_points=["Staying Competitive", "Operational Efficiency"],
|
||||
demographics=None
|
||||
),
|
||||
brand=BrandDNA(
|
||||
industry="General Business",
|
||||
tone="Professional",
|
||||
communication_style="Analytical"
|
||||
communication_style="Analytical",
|
||||
key_messages=[],
|
||||
competitor_context=None
|
||||
),
|
||||
visual_style=VisualStyle(
|
||||
environment="Professional modern office studio",
|
||||
|
||||
@@ -156,6 +156,12 @@ def _check_production_api_key_loading(
|
||||
if deploy_env == "local":
|
||||
_record_check(checks, "production_api_key_loading", True, "skipped in local deploy mode")
|
||||
return
|
||||
|
||||
# Also skip in podcast-only mode (no production API keys needed)
|
||||
enabled_features = os.getenv("ALWRITY_ENABLED_FEATURES", "all").strip().lower()
|
||||
if enabled_features == "podcast":
|
||||
_record_check(checks, "production_api_key_loading", True, "skipped in podcast-only mode")
|
||||
return
|
||||
|
||||
test_tenant_id = os.getenv("ALWRITY_STARTUP_TEST_TENANT_ID", "").strip()
|
||||
if not test_tenant_id:
|
||||
|
||||
@@ -46,6 +46,7 @@ class StoryAudioGenerationService:
|
||||
return _get_story_media_write_dir("audio", user_id=user_id, db=db)
|
||||
except Exception as e:
|
||||
logger.warning(f"[StoryAudioGeneration] Failed to resolve user workspace path for {user_id}: {e}")
|
||||
# Don't fall back to default - keep using the already-set output_dir for podcast
|
||||
return self.output_dir
|
||||
|
||||
def _generate_audio_filename(self, scene_number: int, scene_title: str) -> str:
|
||||
@@ -318,6 +319,7 @@ class StoryAudioGenerationService:
|
||||
text: str,
|
||||
user_id: str,
|
||||
voice_id: str = "Wise_Woman",
|
||||
custom_voice_id: Optional[str] = None,
|
||||
speed: float = 1.0,
|
||||
volume: float = 1.0,
|
||||
pitch: float = 0.0,
|
||||
@@ -364,6 +366,7 @@ class StoryAudioGenerationService:
|
||||
result = generate_audio(
|
||||
text=text.strip(),
|
||||
voice_id=voice_id,
|
||||
custom_voice_id=custom_voice_id,
|
||||
speed=speed,
|
||||
volume=volume,
|
||||
pitch=pitch,
|
||||
@@ -378,8 +381,8 @@ class StoryAudioGenerationService:
|
||||
enable_sync_mode=enable_sync_mode,
|
||||
)
|
||||
|
||||
# Determine output directory (user workspace or default)
|
||||
output_dir = self._get_user_audio_dir(user_id, db)
|
||||
# Use the output_dir that was set when service was created (already handles podcast vs story)
|
||||
output_dir = self.output_dir
|
||||
|
||||
# Save audio to file
|
||||
audio_filename = self._generate_audio_filename(scene_number, scene_title)
|
||||
|
||||
@@ -442,9 +442,34 @@ class PricingService:
|
||||
"description": "AI Audio Generation default pricing"
|
||||
}
|
||||
]
|
||||
|
||||
# WaveSpeed LLM Text Generation Pricing (via Cerebras)
|
||||
wavespeed_llm_pricing = [
|
||||
{
|
||||
"provider": APIProvider.WAVESPEED,
|
||||
"model_name": "openai/gpt-oss-120b",
|
||||
"cost_per_input_token": 0.0000006, # $0.60 per 1M input tokens
|
||||
"cost_per_output_token": 0.0000006, # $0.60 per 1M output tokens
|
||||
"description": "WaveSpeed GPT-OSS 120B (Cerebras) - Fast text generation"
|
||||
},
|
||||
{
|
||||
"provider": APIProvider.WAVESPEED,
|
||||
"model_name": "openai/gpt-oss-120b:cerebras",
|
||||
"cost_per_input_token": 0.0000006,
|
||||
"cost_per_output_token": 0.0000006,
|
||||
"description": "WaveSpeed GPT-OSS 120B (Cerebras) - Fast text generation"
|
||||
},
|
||||
{
|
||||
"provider": APIProvider.WAVESPEED,
|
||||
"model_name": "openai/gpt-oss-20b",
|
||||
"cost_per_input_token": 0.0000002, # $0.20 per 1M input tokens
|
||||
"cost_per_output_token": 0.0000002, # $0.20 per 1M output tokens
|
||||
"description": "WaveSpeed GPT-OSS 20B (Cerebras) - Cost-effective text generation"
|
||||
},
|
||||
]
|
||||
|
||||
# Combine all pricing data (include video pricing in search_pricing list)
|
||||
all_pricing = gemini_pricing + openai_pricing + anthropic_pricing + mistral_pricing + search_pricing
|
||||
all_pricing = gemini_pricing + openai_pricing + anthropic_pricing + mistral_pricing + search_pricing + wavespeed_llm_pricing
|
||||
|
||||
# Insert or update pricing data
|
||||
for pricing_data in all_pricing:
|
||||
|
||||
@@ -241,6 +241,7 @@ class WaveSpeedClient:
|
||||
self,
|
||||
text: str,
|
||||
voice_id: str,
|
||||
custom_voice_id: Optional[str] = None,
|
||||
speed: float = 1.0,
|
||||
volume: float = 1.0,
|
||||
pitch: float = 0.0,
|
||||
@@ -255,6 +256,7 @@ class WaveSpeedClient:
|
||||
Args:
|
||||
text: Text to convert to speech (max 10000 characters)
|
||||
voice_id: Voice ID (e.g., "Wise_Woman", "Friendly_Person", etc.)
|
||||
custom_voice_id: Custom voice clone ID for using cloned voice
|
||||
speed: Speech speed (0.5-2.0, default: 1.0)
|
||||
volume: Speech volume (0.1-10.0, default: 1.0)
|
||||
pitch: Speech pitch (-12 to 12, default: 0.0)
|
||||
@@ -269,6 +271,7 @@ class WaveSpeedClient:
|
||||
return self.speech.generate_speech(
|
||||
text=text,
|
||||
voice_id=voice_id,
|
||||
custom_voice_id=custom_voice_id,
|
||||
speed=speed,
|
||||
volume=volume,
|
||||
pitch=pitch,
|
||||
|
||||
@@ -40,6 +40,7 @@ class SpeechGenerator:
|
||||
self,
|
||||
text: str,
|
||||
voice_id: str,
|
||||
custom_voice_id: Optional[str] = None,
|
||||
speed: float = 1.0,
|
||||
volume: float = 1.0,
|
||||
pitch: float = 0.0,
|
||||
@@ -54,6 +55,7 @@ class SpeechGenerator:
|
||||
Args:
|
||||
text: Text to convert to speech (max 10000 characters)
|
||||
voice_id: Voice ID (e.g., "Wise_Woman", "Friendly_Person", etc.)
|
||||
custom_voice_id: Custom voice clone ID for using cloned voice
|
||||
speed: Speech speed (0.5-2.0, default: 1.0)
|
||||
volume: Speech volume (0.1-10.0, default: 1.0)
|
||||
pitch: Speech pitch (-12 to 12, default: 0.0)
|
||||
@@ -77,6 +79,11 @@ class SpeechGenerator:
|
||||
if not sanitized_voice_id:
|
||||
raise ValueError("Voice ID cannot be empty after sanitization")
|
||||
|
||||
# Sanitize custom_voice_id if provided
|
||||
sanitized_custom_voice_id = None
|
||||
if custom_voice_id:
|
||||
sanitized_custom_voice_id = str(custom_voice_id).strip() or None
|
||||
|
||||
# Ensure numeric parameters are proper floats and within valid ranges
|
||||
sanitized_speed = max(0.5, min(2.0, float(speed))) if speed is not None else 1.0
|
||||
sanitized_volume = max(0.1, min(10.0, float(volume))) if volume is not None else 1.0
|
||||
@@ -112,6 +119,10 @@ class SpeechGenerator:
|
||||
"enable_sync_mode": bool(enable_sync_mode),
|
||||
}
|
||||
|
||||
# Add custom voice clone ID if provided
|
||||
if sanitized_custom_voice_id:
|
||||
payload["custom_voice_id"] = sanitized_custom_voice_id
|
||||
|
||||
# Add optional parameters with proper type validation
|
||||
optional_params = [
|
||||
"english_normalization",
|
||||
@@ -179,6 +190,20 @@ class SpeechGenerator:
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.error(f"[WaveSpeed] Speech generation failed: {response.status_code} {response.text}")
|
||||
|
||||
# Check for custom voice ID specific errors
|
||||
response_text = response.text.lower()
|
||||
if "custom_voice" in response_text or "voice_id" in response_text:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={
|
||||
"error": "Invalid voice clone ID",
|
||||
"message": "The custom voice ID is invalid or expired. Please create a new voice clone or use a predefined voice.",
|
||||
"status_code": response.status_code,
|
||||
"response": response.text,
|
||||
},
|
||||
)
|
||||
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail={
|
||||
|
||||
@@ -26,20 +26,24 @@ def _generate_simple_infinitetalk_prompt(
|
||||
story_context: Dict[str, Any],
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Generate a balanced, concise prompt for InfiniteTalk.
|
||||
InfiniteTalk is audio-driven, so the prompt should describe the scene and suggest
|
||||
subtle motion, but avoid overly elaborate cinematic descriptions.
|
||||
Generate an enhanced prompt for InfiniteTalk video generation.
|
||||
Includes scene content, analysis, bible context, and visual elements.
|
||||
|
||||
Returns None if no meaningful prompt can be generated.
|
||||
"""
|
||||
title = (scene_data.get("title") or "").strip()
|
||||
description = (scene_data.get("description") or "").strip()
|
||||
image_prompt = (scene_data.get("image_prompt") or "").strip()
|
||||
lines = scene_data.get("lines", [])
|
||||
narration = ""
|
||||
if lines:
|
||||
# Combine first few lines for context
|
||||
narration = " ".join([str(l.get("text", "")) for l in lines[:3]])[:150]
|
||||
|
||||
# Build a balanced prompt: scene description + simple motion hint
|
||||
# Build enhanced prompt with multiple context sources
|
||||
parts = []
|
||||
|
||||
# Add scene context
|
||||
# Add main scene title
|
||||
if title and len(title) > 5 and title.lower() not in ("scene", "podcast", "episode"):
|
||||
parts.append(title)
|
||||
|
||||
@@ -48,60 +52,70 @@ def _generate_simple_infinitetalk_prompt(
|
||||
if analysis:
|
||||
content_type = analysis.get("content_type")
|
||||
if content_type:
|
||||
parts.append(f"Style: {content_type}")
|
||||
parts.append(f"Content type: {content_type}")
|
||||
|
||||
# Audience helps define the formality/vibe
|
||||
# Add key takeaways if available
|
||||
key_takeaways = analysis.get("keyTakeaways", [])
|
||||
if key_takeaways and isinstance(key_takeaways, list) and len(key_takeaways) > 0:
|
||||
takeaway = str(key_takeaways[0])[:80]
|
||||
if takeaway:
|
||||
parts.append(f"Key insight: {takeaway}")
|
||||
|
||||
# Audience
|
||||
audience = analysis.get("audience")
|
||||
if audience:
|
||||
# Just use first few words of audience to keep it short
|
||||
short_audience = " ".join(audience.split()[:3])
|
||||
parts.append(f"For: {short_audience}")
|
||||
|
||||
# Add bible context if available
|
||||
short_audience = " ".join(audience.split()[:3])
|
||||
parts.append(f"Target audience: {short_audience}")
|
||||
|
||||
# Guest info
|
||||
guest_name = analysis.get("guestName")
|
||||
guest_expertise = analysis.get("guestExpertise")
|
||||
if guest_name:
|
||||
parts.append(f"Guest: {guest_name}")
|
||||
if guest_expertise:
|
||||
parts.append(f"Expertise: {guest_expertise}")
|
||||
|
||||
# Add bible context
|
||||
bible = story_context.get("bible", {})
|
||||
if bible:
|
||||
host_persona = bible.get("host_persona")
|
||||
tone = bible.get("tone")
|
||||
visual_style = bible.get("visual_style")
|
||||
background = bible.get("background")
|
||||
|
||||
if host_persona:
|
||||
parts.append(f"Host: {host_persona}")
|
||||
parts.append(f"Host persona: {host_persona}")
|
||||
if tone:
|
||||
parts.append(f"Tone: {tone}")
|
||||
|
||||
elif description:
|
||||
# Take first sentence or first 60 chars
|
||||
desc_part = description.split('.')[0][:60].strip()
|
||||
if desc_part:
|
||||
parts.append(desc_part)
|
||||
elif image_prompt:
|
||||
# Take first sentence or first 60 chars
|
||||
img_part = image_prompt.split('.')[0][:60].strip()
|
||||
if visual_style:
|
||||
parts.append(f"Visual style: {visual_style}")
|
||||
if background:
|
||||
parts.append(f"Background: {background}")
|
||||
|
||||
# Add original image prompt as fallback context
|
||||
if image_prompt and len(parts) < 3:
|
||||
img_part = image_prompt.split('.')[0][:100].strip()
|
||||
if img_part:
|
||||
parts.append(img_part)
|
||||
parts.append(f"Visual context: {img_part}")
|
||||
|
||||
# Add narration snippet if available
|
||||
if narration and len(parts) < 4:
|
||||
parts.append(f"Discussing: {narration}")
|
||||
|
||||
if not parts:
|
||||
return None
|
||||
|
||||
# Add a simple, subtle motion suggestion (not elaborate camera movements)
|
||||
# Keep it natural and audio-driven
|
||||
motion_hints = [
|
||||
"with subtle movement",
|
||||
"with gentle motion",
|
||||
"with natural animation",
|
||||
]
|
||||
# Build prompt with visual quality keywords
|
||||
quality_keywords = "Cinematic lighting, high detail, 4k quality, smooth motion"
|
||||
|
||||
# Combine scene description with subtle motion hint
|
||||
if len(parts[0]) < 80:
|
||||
# Room for a motion hint
|
||||
prompt = f"{parts[0]}, {motion_hints[0]}"
|
||||
else:
|
||||
# Just use the description if it's already long enough
|
||||
prompt = parts[0]
|
||||
# Combine parts into final prompt
|
||||
prompt = f"{'. '.join(parts)}. {quality_keywords}. With subtle natural movement."
|
||||
|
||||
# Keep it concise - max 120 characters (allows for scene + motion hint)
|
||||
prompt = prompt[:120].strip()
|
||||
# Allow more room for detailed prompts - max 350 characters
|
||||
prompt = prompt[:350].strip()
|
||||
|
||||
# Clean up trailing commas or incomplete sentences
|
||||
if prompt.endswith(','):
|
||||
# Clean up trailing punctuation
|
||||
if prompt.endswith(',') or prompt.endswith('.'):
|
||||
prompt = prompt[:-1].strip()
|
||||
|
||||
return prompt if len(prompt) >= 15 else None
|
||||
|
||||
Reference in New Issue
Block a user