- Add POST /podcast/pre-estimate endpoint for cost estimation before analysis - Enhance cost_estimator.py with multi-model support (gemini, audio, voice clone, image, video) - Add detailed cost breakdown (llm, audio, media costs + per-phase breakdown) - Remove redundant pricing seeding from init_alpha_subscription_tiers.py - Add SSOT pricing via PricingService.initialize_default_pricing() - Update TopicUrlInput tooltip to show estimate details - Add debug logging for pricing seeding and pre-estimate - Clean up verbose podcast mode debug logs in app.py
216 lines
7.9 KiB
Python
216 lines
7.9 KiB
Python
"""
|
|
Podcast cost estimation helpers.
|
|
|
|
Builds user-facing podcast estimates from the subscription pricing catalog
|
|
instead of hard-coded frontend heuristics.
|
|
|
|
Supports multiple models for each component:
|
|
- Audio TTS: minimax/speech-02-hd (default), qwen3-tts, cosyvoice-tts
|
|
- Voice Clone: qwen3, cosyvoice, minimax
|
|
- Image: qwen-image (default), ideogram-v3-turbo
|
|
- Video: wan-2.5 (default), kling-v2.5, infinitetalk
|
|
- LLM: gemini-2.5-flash (default)
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Any, Dict, Optional
|
|
from sqlalchemy.orm import Session
|
|
|
|
from models.subscription_models import APIProvider
|
|
from services.subscription.pricing_service import PricingService
|
|
|
|
|
|
def _round_money(value: float) -> float:
|
|
return round(float(value), 4)
|
|
|
|
|
|
def _load_pricing(
|
|
pricing_service: PricingService,
|
|
provider: APIProvider,
|
|
preferred_model: str,
|
|
) -> Optional[Dict[str, Any]]:
|
|
"""Load pricing for a provider and model, with fallback to default."""
|
|
pricing = pricing_service.get_pricing_for_provider_model(provider, preferred_model)
|
|
if pricing:
|
|
return pricing
|
|
# Fallback to provider default model row (if configured).
|
|
return pricing_service.get_pricing_for_provider_model(provider, "default")
|
|
|
|
|
|
# Default models used in podcast generation
|
|
DEFAULT_MODELS = {
|
|
"gemini": "gemini-2.5-flash",
|
|
"exa": "exa-search",
|
|
"audio_tts": "minimax/speech-02-hd",
|
|
"voice_clone": "wavespeed-ai/qwen3-tts/voice-clone",
|
|
"image": "qwen-image",
|
|
"video": "wan-2.5",
|
|
}
|
|
|
|
|
|
def estimate_podcast_cost(
|
|
*,
|
|
db: Session,
|
|
duration_minutes: int,
|
|
speakers: int,
|
|
query_count: int,
|
|
include_avatar_phase: bool = True,
|
|
# Optional model overrides
|
|
gemini_model: str = "gemini-2.5-flash",
|
|
audio_tts_model: str = "minimax/speech-02-hd",
|
|
voice_clone_engine: str = "qwen3",
|
|
image_model: str = "qwen-image",
|
|
video_model: str = "wan-2.5",
|
|
) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Compute a backend estimate for podcast creation.
|
|
|
|
Supports customizable models for each component.
|
|
Uses pricing_catalog for accurate cost calculation.
|
|
"""
|
|
pricing_service = PricingService(db)
|
|
|
|
# Load pricing for each component and model
|
|
gemini_pricing = _load_pricing(pricing_service, APIProvider.GEMINI, gemini_model)
|
|
exa_pricing = _load_pricing(pricing_service, APIProvider.EXA, "exa-search")
|
|
|
|
# Audio TTS pricing (minimax/speech-02-hd)
|
|
audio_pricing = _load_pricing(pricing_service, APIProvider.AUDIO, audio_tts_model)
|
|
|
|
# Voice clone pricing (different engines)
|
|
voice_clone_model = f"wavespeed-ai/{voice_clone_engine}-tts/voice-clone"
|
|
voice_clone_pricing = _load_pricing(pricing_service, APIProvider.AUDIO, voice_clone_model)
|
|
if not voice_clone_pricing:
|
|
# Try alternate model names
|
|
voice_clone_pricing = _load_pricing(pricing_service, APIProvider.AUDIO, f"{voice_clone_engine}/voice-clone")
|
|
|
|
# Image pricing (qwen-image or ideogram)
|
|
image_pricing = _load_pricing(pricing_service, APIProvider.STABILITY, image_model)
|
|
|
|
# Video pricing (wan-2.5, kling, or infinitetalk)
|
|
video_pricing = _load_pricing(pricing_service, APIProvider.VIDEO, video_model)
|
|
|
|
# Return None if critical pricing unavailable (fail fast)
|
|
if not gemini_pricing:
|
|
return None
|
|
|
|
# Configuration
|
|
minutes = max(1, int(duration_minutes or 1))
|
|
speaker_count = max(1, int(speakers or 1))
|
|
research_queries = max(1, int(query_count or 1))
|
|
|
|
# Token usage assumptions per phase
|
|
analysis_input_tokens = 1800
|
|
analysis_output_tokens = 1000
|
|
research_synthesis_input_tokens = 2200
|
|
research_synthesis_output_tokens = 900
|
|
script_input_tokens = max(1800, minutes * 300)
|
|
script_output_tokens = max(2200, minutes * 700)
|
|
|
|
# TTS: ~900 chars per minute per speaker
|
|
estimated_tts_tokens = max(900, minutes * 900 * speaker_count)
|
|
|
|
# Voice clone: 1 clone operation per speaker
|
|
voice_clone_count = speaker_count
|
|
|
|
# ===== COST CALCULATIONS =====
|
|
|
|
# 1. Analysis phase (LLM)
|
|
analysis_cost = (
|
|
analysis_input_tokens * float(gemini_pricing.get("cost_per_input_token") or 0.0)
|
|
+ analysis_output_tokens * float(gemini_pricing.get("cost_per_output_token") or 0.0)
|
|
)
|
|
|
|
# 2. Research phase
|
|
# 2a. LLM for research synthesis
|
|
research_llm_cost = (
|
|
research_synthesis_input_tokens * float(gemini_pricing.get("cost_per_input_token") or 0.0)
|
|
+ research_synthesis_output_tokens * float(gemini_pricing.get("cost_per_output_token") or 0.0)
|
|
)
|
|
# 2b. Search API (Exa)
|
|
research_search_cost = 0.0
|
|
if exa_pricing:
|
|
research_search_cost = research_queries * float(exa_pricing.get("cost_per_request") or 0.0)
|
|
research_cost = research_search_cost + research_llm_cost
|
|
|
|
# 3. Script generation (LLM)
|
|
script_cost = (
|
|
script_input_tokens * float(gemini_pricing.get("cost_per_input_token") or 0.0)
|
|
+ script_output_tokens * float(gemini_pricing.get("cost_per_output_token") or 0.0)
|
|
)
|
|
|
|
# 4. Audio TTS
|
|
tts_cost = 0.0
|
|
if audio_pricing:
|
|
tts_cost = estimated_tts_tokens * float(audio_pricing.get("cost_per_input_token") or 0.0)
|
|
|
|
# 5. Voice cloning (if needed)
|
|
voice_clone_cost = 0.0
|
|
if voice_clone_pricing:
|
|
voice_clone_cost = voice_clone_count * (
|
|
float(voice_clone_pricing.get("cost_per_request") or 0.0)
|
|
+ estimated_tts_tokens * float(voice_clone_pricing.get("cost_per_input_token") or 0.0)
|
|
)
|
|
|
|
# 6. Avatar image generation
|
|
avatar_cost = 0.0
|
|
if include_avatar_phase and image_pricing:
|
|
image_unit = float(image_pricing.get("cost_per_image") or image_pricing.get("cost_per_request") or 0.0)
|
|
avatar_cost = speaker_count * image_unit
|
|
|
|
# 7. Video rendering
|
|
video_cost = 0.0
|
|
if video_pricing:
|
|
# Assume 1 video render per minute (upper bound)
|
|
video_cost = minutes * float(video_pricing.get("cost_per_request") or 0.0)
|
|
|
|
# ===== TOTALS =====
|
|
llm_total = analysis_cost + research_llm_cost + script_cost
|
|
audio_total = tts_cost + voice_clone_cost
|
|
media_total = avatar_cost + video_cost
|
|
total = llm_total + research_search_cost + audio_total + media_total
|
|
|
|
return {
|
|
# Cost breakdown
|
|
"analysisCost": _round_money(analysis_cost),
|
|
"researchCost": _round_money(research_cost),
|
|
"researchSearchCost": _round_money(research_search_cost),
|
|
"researchLlmCost": _round_money(research_llm_cost),
|
|
"scriptCost": _round_money(script_cost),
|
|
"ttsCost": _round_money(tts_cost),
|
|
"voiceCloneCost": _round_money(voice_clone_cost),
|
|
"avatarCost": _round_money(avatar_cost),
|
|
"videoCost": _round_money(video_cost),
|
|
"total": _round_money(total),
|
|
# Totals by category
|
|
"llmCost": _round_money(llm_total),
|
|
"audioCost": _round_money(audio_total),
|
|
"mediaCost": _round_money(media_total),
|
|
# Currency
|
|
"currency": "USD",
|
|
"source": "pricing_catalog",
|
|
# Models used for this estimate
|
|
"models": {
|
|
"llm": gemini_model,
|
|
"research": "exa-search",
|
|
"audio_tts": audio_tts_model,
|
|
"voice_clone": voice_clone_model,
|
|
"image": image_model,
|
|
"video": video_model,
|
|
},
|
|
# Assumptions used
|
|
"assumptions": {
|
|
"analysis_input_tokens": analysis_input_tokens,
|
|
"analysis_output_tokens": analysis_output_tokens,
|
|
"research_synthesis_input_tokens": research_synthesis_input_tokens,
|
|
"research_synthesis_output_tokens": research_synthesis_output_tokens,
|
|
"script_input_tokens": script_input_tokens,
|
|
"script_output_tokens": script_output_tokens,
|
|
"estimated_tts_tokens": estimated_tts_tokens,
|
|
"research_queries": research_queries,
|
|
"voice_clone_count": voice_clone_count,
|
|
"video_requests": minutes,
|
|
"avatar_requests": speaker_count if include_avatar_phase else 0,
|
|
},
|
|
} |