feat(podcast): add pre-estimate endpoint, enhance cost estimator with multi-model support, cleanup alpha pricing seeding
- Add POST /podcast/pre-estimate endpoint for cost estimation before analysis - Enhance cost_estimator.py with multi-model support (gemini, audio, voice clone, image, video) - Add detailed cost breakdown (llm, audio, media costs + per-phase breakdown) - Remove redundant pricing seeding from init_alpha_subscription_tiers.py - Add SSOT pricing via PricingService.initialize_default_pricing() - Update TopicUrlInput tooltip to show estimate details - Add debug logging for pricing seeding and pre-estimate - Clean up verbose podcast mode debug logs in app.py
This commit is contained in:
@@ -3,6 +3,13 @@ Podcast cost estimation helpers.
|
||||
|
||||
Builds user-facing podcast estimates from the subscription pricing catalog
|
||||
instead of hard-coded frontend heuristics.
|
||||
|
||||
Supports multiple models for each component:
|
||||
- Audio TTS: minimax/speech-02-hd (default), qwen3-tts, cosyvoice-tts
|
||||
- Voice Clone: qwen3, cosyvoice, minimax
|
||||
- Image: qwen-image (default), ideogram-v3-turbo
|
||||
- Video: wan-2.5 (default), kling-v2.5, infinitetalk
|
||||
- LLM: gemini-2.5-flash (default)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -23,6 +30,7 @@ def _load_pricing(
|
||||
provider: APIProvider,
|
||||
preferred_model: str,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Load pricing for a provider and model, with fallback to default."""
|
||||
pricing = pricing_service.get_pricing_for_provider_model(provider, preferred_model)
|
||||
if pricing:
|
||||
return pricing
|
||||
@@ -30,6 +38,17 @@ def _load_pricing(
|
||||
return pricing_service.get_pricing_for_provider_model(provider, "default")
|
||||
|
||||
|
||||
# Default models used in podcast generation
|
||||
DEFAULT_MODELS = {
|
||||
"gemini": "gemini-2.5-flash",
|
||||
"exa": "exa-search",
|
||||
"audio_tts": "minimax/speech-02-hd",
|
||||
"voice_clone": "wavespeed-ai/qwen3-tts/voice-clone",
|
||||
"image": "qwen-image",
|
||||
"video": "wan-2.5",
|
||||
}
|
||||
|
||||
|
||||
def estimate_podcast_cost(
|
||||
*,
|
||||
db: Session,
|
||||
@@ -37,88 +56,150 @@ def estimate_podcast_cost(
|
||||
speakers: int,
|
||||
query_count: int,
|
||||
include_avatar_phase: bool = True,
|
||||
# Optional model overrides
|
||||
gemini_model: str = "gemini-2.5-flash",
|
||||
audio_tts_model: str = "minimax/speech-02-hd",
|
||||
voice_clone_engine: str = "qwen3",
|
||||
image_model: str = "qwen-image",
|
||||
video_model: str = "wan-2.5",
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Compute a backend estimate for podcast creation.
|
||||
|
||||
Returns None when pricing rows are unavailable so UI can display "Unavailable".
|
||||
|
||||
Supports customizable models for each component.
|
||||
Uses pricing_catalog for accurate cost calculation.
|
||||
"""
|
||||
pricing_service = PricingService(db)
|
||||
|
||||
gemini_pricing = _load_pricing(pricing_service, APIProvider.GEMINI, "gemini-2.5-flash")
|
||||
# Load pricing for each component and model
|
||||
gemini_pricing = _load_pricing(pricing_service, APIProvider.GEMINI, gemini_model)
|
||||
exa_pricing = _load_pricing(pricing_service, APIProvider.EXA, "exa-search")
|
||||
audio_pricing = _load_pricing(pricing_service, APIProvider.AUDIO, "minimax/speech-02-hd")
|
||||
video_pricing = _load_pricing(pricing_service, APIProvider.VIDEO, "default")
|
||||
image_pricing = _load_pricing(pricing_service, APIProvider.STABILITY, "qwen-image")
|
||||
|
||||
# Audio TTS pricing (minimax/speech-02-hd)
|
||||
audio_pricing = _load_pricing(pricing_service, APIProvider.AUDIO, audio_tts_model)
|
||||
|
||||
# Voice clone pricing (different engines)
|
||||
voice_clone_model = f"wavespeed-ai/{voice_clone_engine}-tts/voice-clone"
|
||||
voice_clone_pricing = _load_pricing(pricing_service, APIProvider.AUDIO, voice_clone_model)
|
||||
if not voice_clone_pricing:
|
||||
# Try alternate model names
|
||||
voice_clone_pricing = _load_pricing(pricing_service, APIProvider.AUDIO, f"{voice_clone_engine}/voice-clone")
|
||||
|
||||
# Image pricing (qwen-image or ideogram)
|
||||
image_pricing = _load_pricing(pricing_service, APIProvider.STABILITY, image_model)
|
||||
|
||||
# Video pricing (wan-2.5, kling, or infinitetalk)
|
||||
video_pricing = _load_pricing(pricing_service, APIProvider.VIDEO, video_model)
|
||||
|
||||
# Return None if critical pricing unavailable (fail fast)
|
||||
if not gemini_pricing:
|
||||
return None
|
||||
|
||||
# Configuration
|
||||
minutes = max(1, int(duration_minutes or 1))
|
||||
speaker_count = max(1, int(speakers or 1))
|
||||
research_queries = max(1, int(query_count or 1))
|
||||
|
||||
# Phase-level usage assumptions (token/request proxies for pre-creation estimate).
|
||||
# Token usage assumptions per phase
|
||||
analysis_input_tokens = 1800
|
||||
analysis_output_tokens = 1000
|
||||
research_synthesis_input_tokens = 2200
|
||||
research_synthesis_output_tokens = 900
|
||||
script_input_tokens = max(1800, minutes * 300)
|
||||
script_output_tokens = max(2200, minutes * 700)
|
||||
|
||||
# TTS token proxy: ~900 chars per minute per speaker.
|
||||
|
||||
# TTS: ~900 chars per minute per speaker
|
||||
estimated_tts_tokens = max(900, minutes * 900 * speaker_count)
|
||||
|
||||
# Voice clone: 1 clone operation per speaker
|
||||
voice_clone_count = speaker_count
|
||||
|
||||
# ===== COST CALCULATIONS =====
|
||||
|
||||
# 1. Analysis phase (LLM)
|
||||
analysis_cost = (
|
||||
analysis_input_tokens * float(gemini_pricing.get("cost_per_input_token") or 0.0)
|
||||
+ analysis_output_tokens * float(gemini_pricing.get("cost_per_output_token") or 0.0)
|
||||
+ float(gemini_pricing.get("cost_per_request") or 0.0)
|
||||
)
|
||||
|
||||
# 2. Research phase
|
||||
# 2a. LLM for research synthesis
|
||||
research_llm_cost = (
|
||||
research_synthesis_input_tokens * float(gemini_pricing.get("cost_per_input_token") or 0.0)
|
||||
+ research_synthesis_output_tokens * float(gemini_pricing.get("cost_per_output_token") or 0.0)
|
||||
+ float(gemini_pricing.get("cost_per_request") or 0.0)
|
||||
)
|
||||
script_cost = (
|
||||
script_input_tokens * float(gemini_pricing.get("cost_per_input_token") or 0.0)
|
||||
+ script_output_tokens * float(gemini_pricing.get("cost_per_output_token") or 0.0)
|
||||
+ float(gemini_pricing.get("cost_per_request") or 0.0)
|
||||
)
|
||||
|
||||
# 2b. Search API (Exa)
|
||||
research_search_cost = 0.0
|
||||
if exa_pricing:
|
||||
research_search_cost = research_queries * float(exa_pricing.get("cost_per_request") or 0.0)
|
||||
research_cost = research_search_cost + research_llm_cost
|
||||
|
||||
# 3. Script generation (LLM)
|
||||
script_cost = (
|
||||
script_input_tokens * float(gemini_pricing.get("cost_per_input_token") or 0.0)
|
||||
+ script_output_tokens * float(gemini_pricing.get("cost_per_output_token") or 0.0)
|
||||
)
|
||||
|
||||
# 4. Audio TTS
|
||||
tts_cost = 0.0
|
||||
if audio_pricing:
|
||||
tts_cost = (
|
||||
estimated_tts_tokens * float(audio_pricing.get("cost_per_input_token") or 0.0)
|
||||
+ float(audio_pricing.get("cost_per_request") or 0.0)
|
||||
tts_cost = estimated_tts_tokens * float(audio_pricing.get("cost_per_input_token") or 0.0)
|
||||
|
||||
# 5. Voice cloning (if needed)
|
||||
voice_clone_cost = 0.0
|
||||
if voice_clone_pricing:
|
||||
voice_clone_cost = voice_clone_count * (
|
||||
float(voice_clone_pricing.get("cost_per_request") or 0.0)
|
||||
+ estimated_tts_tokens * float(voice_clone_pricing.get("cost_per_input_token") or 0.0)
|
||||
)
|
||||
|
||||
# Assume one video render request per minute (upper-bound planning estimate).
|
||||
video_cost = 0.0
|
||||
if video_pricing:
|
||||
video_cost = minutes * float(video_pricing.get("cost_per_request") or 0.0)
|
||||
|
||||
# 6. Avatar image generation
|
||||
avatar_cost = 0.0
|
||||
if include_avatar_phase and image_pricing:
|
||||
image_unit = float(image_pricing.get("cost_per_image") or image_pricing.get("cost_per_request") or 0.0)
|
||||
avatar_cost = speaker_count * image_unit
|
||||
|
||||
research_cost = research_search_cost + research_llm_cost
|
||||
total = analysis_cost + research_cost + script_cost + tts_cost + video_cost + avatar_cost
|
||||
# 7. Video rendering
|
||||
video_cost = 0.0
|
||||
if video_pricing:
|
||||
# Assume 1 video render per minute (upper bound)
|
||||
video_cost = minutes * float(video_pricing.get("cost_per_request") or 0.0)
|
||||
|
||||
# ===== TOTALS =====
|
||||
llm_total = analysis_cost + research_llm_cost + script_cost
|
||||
audio_total = tts_cost + voice_clone_cost
|
||||
media_total = avatar_cost + video_cost
|
||||
total = llm_total + research_search_cost + audio_total + media_total
|
||||
|
||||
return {
|
||||
# Cost breakdown
|
||||
"analysisCost": _round_money(analysis_cost),
|
||||
"researchCost": _round_money(research_cost),
|
||||
"researchSearchCost": _round_money(research_search_cost),
|
||||
"researchLlmCost": _round_money(research_llm_cost),
|
||||
"scriptCost": _round_money(script_cost),
|
||||
"ttsCost": _round_money(tts_cost),
|
||||
"voiceCloneCost": _round_money(voice_clone_cost),
|
||||
"avatarCost": _round_money(avatar_cost),
|
||||
"videoCost": _round_money(video_cost),
|
||||
"researchCost": _round_money(research_cost),
|
||||
"analysisCost": _round_money(analysis_cost),
|
||||
"scriptCost": _round_money(script_cost),
|
||||
"total": _round_money(total),
|
||||
# Totals by category
|
||||
"llmCost": _round_money(llm_total),
|
||||
"audioCost": _round_money(audio_total),
|
||||
"mediaCost": _round_money(media_total),
|
||||
# Currency
|
||||
"currency": "USD",
|
||||
"source": "pricing_catalog",
|
||||
# Models used for this estimate
|
||||
"models": {
|
||||
"llm": gemini_model,
|
||||
"research": "exa-search",
|
||||
"audio_tts": audio_tts_model,
|
||||
"voice_clone": voice_clone_model,
|
||||
"image": image_model,
|
||||
"video": video_model,
|
||||
},
|
||||
# Assumptions used
|
||||
"assumptions": {
|
||||
"analysis_input_tokens": analysis_input_tokens,
|
||||
"analysis_output_tokens": analysis_output_tokens,
|
||||
@@ -128,6 +209,8 @@ def estimate_podcast_cost(
|
||||
"script_output_tokens": script_output_tokens,
|
||||
"estimated_tts_tokens": estimated_tts_tokens,
|
||||
"research_queries": research_queries,
|
||||
"voice_clone_count": voice_clone_count,
|
||||
"video_requests": minutes,
|
||||
"avatar_requests": speaker_count if include_avatar_phase else 0,
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -4,8 +4,9 @@ Podcast Analysis Handlers
|
||||
Analysis endpoint for podcast ideas.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from typing import Dict, Any, Optional, List
|
||||
from datetime import datetime
|
||||
import json
|
||||
import uuid
|
||||
from sqlalchemy.orm import Session
|
||||
@@ -21,11 +22,18 @@ from utils.asset_tracker import save_asset_to_library
|
||||
from loguru import logger
|
||||
import os
|
||||
from ..constants import get_podcast_media_dir
|
||||
from ..prompts import get_enhance_topic_prompt, format_website_context
|
||||
from ..models import (
|
||||
PodcastAnalyzeRequest,
|
||||
PodcastAnalyzeResponse,
|
||||
PodcastEnhanceIdeaRequest,
|
||||
PodcastEnhanceIdeaResponse
|
||||
PodcastEnhanceIdeaResponse,
|
||||
ExtractUrlRequest,
|
||||
ExtractUrlResponse,
|
||||
WebsiteAnalysisRequest,
|
||||
WebsiteAnalysisResponse,
|
||||
PodcastPreEstimateRequest,
|
||||
PodcastPreEstimateResponse,
|
||||
)
|
||||
from ..cost_estimator import estimate_podcast_cost
|
||||
|
||||
@@ -37,6 +45,74 @@ def _is_podcast_only_mode() -> bool:
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/pre-estimate", response_model=PodcastPreEstimateResponse)
|
||||
async def pre_estimate_cost(
|
||||
request: PodcastPreEstimateRequest,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Lightweight endpoint to estimate podcast creation cost before analysis.
|
||||
|
||||
Takes user configuration (duration, speakers, query_count, podcast_mode) and returns
|
||||
a cost estimate WITHOUT running full analysis.
|
||||
|
||||
Optional model overrides can be specified to estimate with different models.
|
||||
"""
|
||||
try:
|
||||
include_avatar_phase = request.podcast_mode != "audio_only"
|
||||
|
||||
estimate = estimate_podcast_cost(
|
||||
db=db,
|
||||
duration_minutes=request.duration,
|
||||
speakers=request.speakers,
|
||||
query_count=request.query_count,
|
||||
include_avatar_phase=include_avatar_phase,
|
||||
# Model overrides if provided
|
||||
gemini_model=request.gemini_model or "gemini-2.5-flash",
|
||||
audio_tts_model=request.audio_tts_model or "minimax/speech-02-hd",
|
||||
voice_clone_engine=request.voice_clone_engine or "qwen3",
|
||||
image_model=request.image_model or "qwen-image",
|
||||
video_model=request.video_model or "wan-2.5",
|
||||
)
|
||||
|
||||
# Debug: get pricing row count and providers
|
||||
from models.subscription_models import APIProviderPricing
|
||||
pricing_count = db.query(APIProviderPricing).count()
|
||||
providers = db.query(APIProviderPricing.provider).distinct().all()
|
||||
provider_list = sorted([p[0].value for p in providers]) if providers else []
|
||||
|
||||
debug_info = {
|
||||
"pricing_rows": pricing_count,
|
||||
"providers": provider_list,
|
||||
}
|
||||
|
||||
# Log pricing debug info at warning level
|
||||
logger.warning(f"[PRE-ESTIMATE] Pricing debug: rows={pricing_count}, providers={provider_list}")
|
||||
logger.warning(f"[PRE-ESTIMATE] Models: llm={request.gemini_model}, tts={request.audio_tts_model}, video={request.video_model}")
|
||||
|
||||
if estimate is None:
|
||||
return PodcastPreEstimateResponse(
|
||||
estimate=None,
|
||||
error="Pricing data unavailable. Please try again later.",
|
||||
pricing_available=False,
|
||||
debug=debug_info,
|
||||
)
|
||||
|
||||
return PodcastPreEstimateResponse(
|
||||
estimate=estimate,
|
||||
error=None,
|
||||
pricing_available=True,
|
||||
debug=debug_info,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Pre-estimate error: {e}")
|
||||
return PodcastPreEstimateResponse(
|
||||
estimate=None,
|
||||
error=str(e),
|
||||
)
|
||||
|
||||
|
||||
@router.post("/idea/enhance", response_model=PodcastEnhanceIdeaResponse)
|
||||
async def enhance_podcast_idea(
|
||||
request: PodcastEnhanceIdeaRequest,
|
||||
@@ -77,39 +153,27 @@ async def enhance_podcast_idea(
|
||||
except Exception as exc:
|
||||
logger.debug(f"[Podcast Enhance] Bible parsing skipped in podcast mode: {exc}")
|
||||
|
||||
prompt = f"""
|
||||
You are a creative podcast producer. Generate 3 distinct, compelling podcast episode concepts from the raw idea.
|
||||
# Log what's being used for context
|
||||
context_used = []
|
||||
if bible_context:
|
||||
context_used.append("Podcast Bible")
|
||||
if request.website_data:
|
||||
context_used.append("Website Extraction")
|
||||
if request.topic_context:
|
||||
category = request.topic_context.get("category", "unknown")
|
||||
context_used.append(f"Category Research ({category})")
|
||||
|
||||
logger.warning(f"[Podcast Enhance] Generating with context: {', '.join(context_used) if context_used else 'basic idea only'}")
|
||||
|
||||
{f"USER PERSONALIZATION CONTEXT (Podcast Bible):\n{bible_context}\n" if bible_context else ""}
|
||||
|
||||
RAW IDEA/KEYWORDS: "{request.idea}"
|
||||
|
||||
TASK:
|
||||
Generate 3 different enhanced versions, each with a unique angle:
|
||||
1. Professional & Expert-led angle (focus on authority, insights, and expertise)
|
||||
2. Storytelling & Human interest angle (focus on narratives, emotions, and personal connections)
|
||||
3. Trendy & Contemporary angle (focus on current trends, modern perspectives, and relevance)
|
||||
|
||||
Each version should be 2-3 sentences, audience-focused, and align with host persona if provided.
|
||||
|
||||
Return JSON with:
|
||||
- enhanced_ideas: array of 3 strings, each string being a complete episode pitch (NOT objects, just plain strings)
|
||||
- rationales: array of 3 strings explaining the approach for each version
|
||||
|
||||
IMPORTANT: enhanced_ideas must be an array of plain strings, NOT objects. Example:
|
||||
{{
|
||||
"enhanced_ideas": [
|
||||
"Your expert guide to AI advancement: A practical look at how AI is transforming industries...",
|
||||
"The human stories behind AI innovation: From Silicon Valley to your daily life...",
|
||||
"AI in 2026: What's trending and what's next in artificial intelligence..."
|
||||
],
|
||||
"rationales": [
|
||||
"Professional approach focusing on expertise and authority",
|
||||
"Storytelling approach emphasizing human connection",
|
||||
"Contemporary approach highlighting current relevance"
|
||||
]
|
||||
}}
|
||||
"""
|
||||
# Use new context builder for prompt generation
|
||||
from services.podcast_context_builder import context_builder
|
||||
context_result = context_builder.build_enhance_context(
|
||||
idea=request.idea,
|
||||
bible_context=bible_context,
|
||||
website_data=request.website_data,
|
||||
topic_context=request.topic_context,
|
||||
)
|
||||
prompt = context_result["prompt"]
|
||||
|
||||
try:
|
||||
raw = llm_text_gen(
|
||||
@@ -502,3 +566,316 @@ Requirements:
|
||||
except Exception as exc:
|
||||
logger.error(f"[Regenerate Queries] Failed for user {user_id}: {exc}")
|
||||
raise HTTPException(status_code=500, detail=f"Regenerate queries failed: {exc}")
|
||||
|
||||
|
||||
@router.post("/extract-url", response_model=ExtractUrlResponse)
|
||||
async def extract_url_content(
|
||||
request: ExtractUrlRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""
|
||||
Extract content from a URL using Exa's get_contents API.
|
||||
|
||||
This allows users to paste a blog post or article URL as their podcast topic,
|
||||
and we'll extract the content to use as the podcast idea.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
from exa_py import Exa
|
||||
import os
|
||||
|
||||
api_key = os.getenv("EXA_API_KEY")
|
||||
if not api_key:
|
||||
raise HTTPException(status_code=500, detail="EXA_API_KEY not configured")
|
||||
|
||||
exa = Exa(api_key)
|
||||
|
||||
logger.warning(f"[ExtractUrl] Extracting content from: {request.url} for user {user_id}")
|
||||
|
||||
try:
|
||||
result = exa.get_contents(
|
||||
urls=[request.url],
|
||||
text=True,
|
||||
highlights=True,
|
||||
summary=True,
|
||||
subpages=2,
|
||||
)
|
||||
except Exception as exa_error:
|
||||
logger.error(f"[ExtractUrl] Exa call error: {exa_error}")
|
||||
return ExtractUrlResponse(
|
||||
success=False,
|
||||
url=request.url,
|
||||
error=f"Exa API error: {str(exa_error)}"
|
||||
)
|
||||
|
||||
# Check for errors using the correct attribute (statuses is array of status objects)
|
||||
if hasattr(result, 'statuses') and result.statuses:
|
||||
for status in result.statuses:
|
||||
if status.status == "error":
|
||||
logger.error(f"[ExtractUrl] Failed to extract {status.id}: {status.error.tag if hasattr(status.error, 'tag') else 'unknown'}")
|
||||
return ExtractUrlResponse(
|
||||
success=False,
|
||||
url=request.url,
|
||||
error=f"Failed to extract content: {status.error.tag if hasattr(status.error, 'tag') else 'unknown error'}"
|
||||
)
|
||||
|
||||
if not result.results:
|
||||
return ExtractUrlResponse(
|
||||
success=False,
|
||||
url=request.url,
|
||||
error="No content found at the provided URL"
|
||||
)
|
||||
|
||||
# Extract content - safe to access result now
|
||||
content = result.results[0]
|
||||
|
||||
# Extract all available fields from Exa response
|
||||
extracted_text = content.text or ""
|
||||
extracted_summary = getattr(content, 'summary', "") or ""
|
||||
extracted_title = content.title or ""
|
||||
|
||||
# Highlights - extract from content.highlights array if available
|
||||
highlights = []
|
||||
if hasattr(content, 'highlights') and content.highlights:
|
||||
highlights = [h for h in content.highlights if h]
|
||||
|
||||
# Additional fields from Exa response
|
||||
image = getattr(content, 'image', None)
|
||||
favicon = getattr(content, 'favicon', None)
|
||||
|
||||
# Subpages - extract with their own content
|
||||
subpages = []
|
||||
if hasattr(content, 'subpages') and content.subpages:
|
||||
for sp in content.subpages:
|
||||
subpages.append({
|
||||
'id': sp.get('id', ''),
|
||||
'title': sp.get('title', ''),
|
||||
'url': sp.get('url', ''),
|
||||
'summary': sp.get('summary', ''),
|
||||
'text': sp.get('text', '')[:500] if sp.get('text') else '', # First 500 chars
|
||||
})
|
||||
|
||||
logger.warning(f"[ExtractUrl] Successfully extracted {len(extracted_text)} chars from {request.url}")
|
||||
logger.warning(f"[ExtractUrl] title={extracted_title[:50]}, summary={extracted_summary[:50]}, highlights={len(highlights)}, subpages={len(subpages)}")
|
||||
|
||||
return ExtractUrlResponse(
|
||||
success=True,
|
||||
title=extracted_title,
|
||||
text=extracted_text,
|
||||
summary=extracted_summary,
|
||||
author=getattr(content, 'author', None),
|
||||
highlights=highlights,
|
||||
url=request.url,
|
||||
image=image,
|
||||
favicon=favicon,
|
||||
subpages=subpages,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/website-analysis", response_model=WebsiteAnalysisResponse)
|
||||
async def save_website_analysis(
|
||||
request: WebsiteAnalysisRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""Save the user's website analysis for reuse in future podcasts."""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
try:
|
||||
from services.user_data_service import user_data_service
|
||||
|
||||
website_data = {
|
||||
"website_url": request.website_url,
|
||||
"extracted_at": datetime.now().isoformat(),
|
||||
"exa_content": request.exa_content,
|
||||
"full_analysis": None,
|
||||
"analysis_status": "pending",
|
||||
}
|
||||
|
||||
success = user_data_service.save_user_data(
|
||||
user_id=user_id,
|
||||
data_key="website_analysis",
|
||||
data_value=website_data,
|
||||
)
|
||||
|
||||
if success:
|
||||
logger.warning(f"[WebsiteAnalysis] Saved analysis for user {user_id}: {request.website_url}")
|
||||
return WebsiteAnalysisResponse(
|
||||
success=True,
|
||||
website_url=request.website_url,
|
||||
message="Website analysis saved successfully",
|
||||
)
|
||||
else:
|
||||
return WebsiteAnalysisResponse(
|
||||
success=False,
|
||||
error="Failed to save website analysis",
|
||||
)
|
||||
|
||||
except Exception as exc:
|
||||
logger.error(f"[WebsiteAnalysis] Failed to save for user {user_id}: {exc}")
|
||||
return WebsiteAnalysisResponse(
|
||||
success=False,
|
||||
error=f"Failed to save: {str(exc)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/website-extraction")
|
||||
async def get_saved_website_extraction(request: Request = None):
|
||||
"""Get previously saved website extraction data for this user."""
|
||||
try:
|
||||
# Safely get current_user from Depends
|
||||
if request is None or not hasattr(request, 'state'):
|
||||
logger.warning("[WebsiteExtraction] No request or state - user not authenticated")
|
||||
return {"success": False, "data": None, "error": "Not authenticated"}
|
||||
|
||||
current_user = getattr(request.state, 'user', None)
|
||||
if not current_user:
|
||||
logger.warning("[WebsiteExtraction] No user in request state")
|
||||
return {"success": False, "data": None, "error": "Not authenticated"}
|
||||
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
from services.user_data_service import UserDataService
|
||||
from services.database import get_db
|
||||
db = next(get_db())
|
||||
|
||||
user_service = UserDataService(db)
|
||||
extraction = user_service.get_website_extraction(user_id)
|
||||
|
||||
if extraction:
|
||||
logger.info(f"[WebsiteExtraction] Found saved data for user {user_id}")
|
||||
return {
|
||||
"success": True,
|
||||
"data": extraction
|
||||
}
|
||||
else:
|
||||
logger.info(f"[WebsiteExtraction] No saved data for user {user_id}")
|
||||
return {
|
||||
"success": False,
|
||||
"data": None
|
||||
}
|
||||
|
||||
except Exception as exc:
|
||||
logger.error(f"[WebsiteExtraction] Failed for user: {exc}", exc_info=True)
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(exc)
|
||||
}
|
||||
|
||||
|
||||
@router.post("/website-extraction")
|
||||
async def save_website_extraction(
|
||||
extraction: Dict[str, Any],
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""Save website extraction data for future use."""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
try:
|
||||
from services.user_data_service import UserDataService
|
||||
from services.database import get_db
|
||||
db = next(get_db())
|
||||
|
||||
user_service = UserDataService(db)
|
||||
success = user_service.save_website_extraction(user_id, extraction)
|
||||
|
||||
if success:
|
||||
logger.info(f"[WebsiteExtraction] Saved for user {user_id}")
|
||||
return {
|
||||
"success": True,
|
||||
"message": "Website extraction saved"
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Failed to save"
|
||||
}
|
||||
|
||||
except Exception as exc:
|
||||
logger.error(f"[WebsiteExtraction] Save failed: {exc}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(exc)
|
||||
}
|
||||
|
||||
|
||||
@router.post("/project/{project_id}/topic-context")
|
||||
async def save_topic_context(
|
||||
project_id: str,
|
||||
topic_context: Dict[str, Any],
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""Save topic context (category research) to a podcast project."""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
try:
|
||||
from services.database import get_db
|
||||
from models.podcast_models import PodcastProject
|
||||
|
||||
db = next(get_db())
|
||||
|
||||
# Find the project
|
||||
project = db.query(PodcastProject).filter(
|
||||
PodcastProject.project_id == project_id,
|
||||
PodcastProject.user_id == user_id
|
||||
).first()
|
||||
|
||||
if not project:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Project not found"
|
||||
}
|
||||
|
||||
# Update topic context
|
||||
project.topic_context = topic_context
|
||||
db.commit()
|
||||
|
||||
logger.info(f"[TopicContext] Saved for project {project_id}")
|
||||
return {
|
||||
"success": True,
|
||||
"message": "Topic context saved"
|
||||
}
|
||||
|
||||
except Exception as exc:
|
||||
logger.error(f"[TopicContext] Save failed: {exc}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(exc)
|
||||
}
|
||||
|
||||
|
||||
@router.get("/project/{project_id}/topic-context")
|
||||
async def get_topic_context(
|
||||
project_id: str,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""Get topic context from a podcast project."""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
try:
|
||||
from services.database import get_db
|
||||
from models.podcast_models import PodcastProject
|
||||
|
||||
db = next(get_db())
|
||||
|
||||
project = db.query(PodcastProject).filter(
|
||||
PodcastProject.project_id == project_id,
|
||||
PodcastProject.user_id == user_id
|
||||
).first()
|
||||
|
||||
if not project:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Project not found"
|
||||
}
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"data": project.topic_context
|
||||
}
|
||||
|
||||
except Exception as exc:
|
||||
logger.error(f"[TopicContext] Get failed: {exc}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(exc)
|
||||
}
|
||||
|
||||
251
backend/api/podcast/handlers/tavily_category_research.py
Normal file
251
backend/api/podcast/handlers/tavily_category_research.py
Normal file
@@ -0,0 +1,251 @@
|
||||
"""
|
||||
Category Research Handlers
|
||||
|
||||
Research endpoints using Tavily or Exa for category-based topic discovery.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from typing import Dict, Any, List, Optional
|
||||
from pydantic import BaseModel
|
||||
from loguru import logger
|
||||
from types import SimpleNamespace
|
||||
|
||||
from middleware.auth_middleware import get_current_user
|
||||
from services.research.tavily_service import TavilyService
|
||||
from services.blog_writer.research.exa_provider import ExaResearchProvider
|
||||
|
||||
router = APIRouter(prefix="/research", tags=["Podcast Category Research"])
|
||||
|
||||
CATEGORY_PROVIDER_MAP = {
|
||||
"news": "tavily",
|
||||
"finance": "tavily",
|
||||
"research-paper": "exa",
|
||||
"personal-site": "exa",
|
||||
}
|
||||
|
||||
EXA_CATEGORY_MAP = {
|
||||
"research-paper": "research paper",
|
||||
"personal-site": "personal site",
|
||||
}
|
||||
|
||||
|
||||
class CategoryResearchRequest(BaseModel):
|
||||
category: str
|
||||
keyword: Optional[str] = None
|
||||
max_results: Optional[int] = 8
|
||||
website_url: Optional[str] = None
|
||||
|
||||
|
||||
class CategoryTopic(BaseModel):
|
||||
title: str
|
||||
url: str
|
||||
snippet: str
|
||||
score: float
|
||||
favicon: Optional[str] = None
|
||||
|
||||
|
||||
class CategoryResearchResponse(BaseModel):
|
||||
success: bool
|
||||
category: str
|
||||
provider: str
|
||||
topics: List[CategoryTopic]
|
||||
query: Optional[str] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
def _normalize_tavily_results(results: List[Dict]) -> List[CategoryTopic]:
|
||||
topics = []
|
||||
for item in results:
|
||||
topics.append(CategoryTopic(
|
||||
title=item.get("title", ""),
|
||||
url=item.get("url", ""),
|
||||
snippet=item.get("content", ""),
|
||||
score=item.get("score", 0.0),
|
||||
favicon=item.get("favicon"),
|
||||
))
|
||||
return topics
|
||||
|
||||
|
||||
def _normalize_exa_results(results: List[Dict], query: str) -> List[CategoryTopic]:
|
||||
topics = []
|
||||
for idx, item in enumerate(results):
|
||||
score = 1.0 - (idx * 0.1)
|
||||
topics.append(CategoryTopic(
|
||||
title=item.get("title", "") or f"Result {idx + 1}",
|
||||
url=item.get("url", ""),
|
||||
snippet=item.get("summary", "") or item.get("text", "") or "",
|
||||
score=max(0.5, score),
|
||||
favicon=None,
|
||||
))
|
||||
return topics
|
||||
|
||||
|
||||
async def _search_tavily(category: str, keyword: str, max_results: int) -> CategoryResearchResponse:
|
||||
logger.info(f"[CategoryResearch] Using Tavily for category={category}, keyword={keyword}")
|
||||
|
||||
try:
|
||||
tavily = TavilyService()
|
||||
result = await tavily.search(
|
||||
query=keyword,
|
||||
topic=category,
|
||||
search_depth="basic",
|
||||
max_results=max_results,
|
||||
include_favicon=True,
|
||||
)
|
||||
|
||||
if not result.get("success"):
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=result.get("error", "Tavily search failed")
|
||||
)
|
||||
|
||||
topics = _normalize_tavily_results(result.get("results", []))
|
||||
logger.info(f"[CategoryResearch] Tavily found {len(topics)} topics")
|
||||
|
||||
return CategoryResearchResponse(
|
||||
success=True,
|
||||
category=category,
|
||||
provider="tavily",
|
||||
topics=topics,
|
||||
query=keyword,
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[CategoryResearch] Tavily error: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
async def _search_exa(category: str, keyword: str, max_results: int, website_url: Optional[str] = None) -> CategoryResearchResponse:
|
||||
exa_category = EXA_CATEGORY_MAP.get(category, category)
|
||||
|
||||
logger.info(f"[CategoryResearch] Exa: category={category}, exa_category={exa_category}, keyword={keyword}, website_url={website_url}")
|
||||
|
||||
try:
|
||||
# Import exa directly for more control
|
||||
import os
|
||||
from urllib.parse import urlparse
|
||||
exa_api_key = os.getenv("EXA_API_KEY")
|
||||
if not exa_api_key:
|
||||
raise HTTPException(status_code=500, detail="EXA_API_KEY not configured")
|
||||
|
||||
from exa_py import Exa
|
||||
exa = Exa(exa_api_key)
|
||||
logger.info(f"[CategoryResearch] Exa client initialized")
|
||||
|
||||
# Build search parameters
|
||||
search_params = {
|
||||
"num_results": max_results,
|
||||
"category": exa_category,
|
||||
}
|
||||
|
||||
# For personal-site, extract domain from URL if provided
|
||||
include_domains = None
|
||||
if category == "personal-site" and website_url:
|
||||
try:
|
||||
parsed = urlparse(website_url)
|
||||
if parsed.netloc:
|
||||
include_domains = [parsed.netloc]
|
||||
logger.info(f"[CategoryResearch] Personal site - limiting to domain: {parsed.netloc}")
|
||||
elif parsed.path and "." in parsed.path:
|
||||
# Could be domain without protocol
|
||||
include_domains = [parsed.path]
|
||||
logger.info(f"[CategoryResearch] Personal site - using as domain: {parsed.path}")
|
||||
except Exception as url_err:
|
||||
logger.warning(f"[CategoryResearch] Failed to parse website_url: {url_err}")
|
||||
|
||||
logger.info(f"[CategoryResearch] Calling Exa with params: {search_params}, include_domains={include_domains}")
|
||||
|
||||
# Make the search call
|
||||
results = exa.search_and_contents(
|
||||
query=keyword,
|
||||
type="auto" if category != "personal-site" else "neural",
|
||||
num_results=max_results,
|
||||
category=exa_category,
|
||||
text=True,
|
||||
summary=True,
|
||||
include_domains=include_domains,
|
||||
)
|
||||
|
||||
logger.info(f"[CategoryResearch] Exa search completed, got results")
|
||||
|
||||
# Transform results to our format
|
||||
topics = []
|
||||
if results and hasattr(results, 'results'):
|
||||
for item in results.results:
|
||||
title = getattr(item, 'title', 'Untitled')
|
||||
url = getattr(item, 'url', '')
|
||||
snippet = getattr(item, 'summary', '') or getattr(item, 'text', '') or ''
|
||||
score = 0.8 # Default score for Exa results
|
||||
|
||||
topics.append(CategoryTopic(
|
||||
title=title,
|
||||
url=url,
|
||||
snippet=snippet[:300] if snippet else '',
|
||||
score=score,
|
||||
favicon=None,
|
||||
))
|
||||
|
||||
logger.info(f"[CategoryResearch] Exa found {len(topics)} topics")
|
||||
|
||||
return CategoryResearchResponse(
|
||||
success=True,
|
||||
category=category,
|
||||
provider="exa",
|
||||
topics=topics,
|
||||
query=keyword,
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
import traceback
|
||||
logger.error(f"[CategoryResearch] Exa error: {type(e).__name__}: {e}")
|
||||
logger.error(f"[CategoryResearch] Stack: {traceback.format_exc()}")
|
||||
raise HTTPException(status_code=500, detail=f"Exa search failed: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/tavily-category", response_model=CategoryResearchResponse)
|
||||
async def research_by_category(
|
||||
request: CategoryResearchRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""
|
||||
Research topics by category using Tavily or Exa.
|
||||
|
||||
Categories:
|
||||
- news, finance: Uses Tavily
|
||||
- research-paper, personal-site: Uses Exa
|
||||
"""
|
||||
category = request.category.lower()
|
||||
valid_categories = list(CATEGORY_PROVIDER_MAP.keys())
|
||||
|
||||
logger.info(f"[CategoryResearch] Full request payload: category={request.category}, keyword={request.keyword}, website_url={request.website_url}")
|
||||
|
||||
if category not in valid_categories:
|
||||
logger.error(f"[CategoryResearch] Invalid category: {category}, valid: {valid_categories}")
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Category must be one of: {', '.join(valid_categories)}"
|
||||
)
|
||||
|
||||
keyword = request.keyword or category
|
||||
max_results = min(max(request.max_results or 8, 5), 10)
|
||||
website_url = request.website_url
|
||||
|
||||
logger.info(f"[CategoryResearch] Processing: category={category}, keyword={keyword}, max_results={max_results}, website_url={website_url}")
|
||||
|
||||
provider = CATEGORY_PROVIDER_MAP.get(category, "tavily")
|
||||
logger.info(f"[CategoryResearch] Selected provider: {provider} for category: {category}")
|
||||
|
||||
try:
|
||||
if provider == "tavily":
|
||||
return await _search_tavily(category, keyword, max_results)
|
||||
elif provider == "exa":
|
||||
return await _search_exa(category, keyword, max_results, website_url)
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail="Unknown provider")
|
||||
except Exception as e:
|
||||
logger.error(f"[CategoryResearch] Outer error: {type(e).__name__}: {e}", exc_info=True)
|
||||
raise
|
||||
@@ -18,6 +18,7 @@ class PodcastTrendsRequest(BaseModel):
|
||||
keywords: List[str] = Field(..., min_length=1, max_length=5, description="1-5 keywords to analyze")
|
||||
timeframe: str = Field(default="today 12-m", description="Timeframe: 'today 3-m', 'today 12-m', 'today 5-y', 'all'")
|
||||
geo: str = Field(default="US", description="Country code: 'US', 'GB', 'IN', etc.")
|
||||
source: str = Field(default="web", description="Data source: 'web' (Google), 'podcast' (YouTube)")
|
||||
|
||||
|
||||
class PodcastTrendsResponse(BaseModel):
|
||||
@@ -47,12 +48,39 @@ async def get_podcast_trends(
|
||||
|
||||
try:
|
||||
service = GoogleTrendsService()
|
||||
# Map 'source' to 'gprop' - 'podcast' uses YouTube for video/podcast relevance
|
||||
gprop_map = {"": "", "web": "", "podcast": "youtube", "news": "news", "images": "images", "shopping": "froogle"}
|
||||
gprop = gprop_map.get(request.source, "")
|
||||
|
||||
result = await service.analyze_trends(
|
||||
keywords=request.keywords,
|
||||
timeframe=request.timeframe,
|
||||
geo=request.geo,
|
||||
gprop=gprop,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
has_error = result.get("error")
|
||||
has_data = (
|
||||
len(result.get("interest_over_time", [])) > 0
|
||||
or len(result.get("interest_by_region", [])) > 0
|
||||
or len(result.get("related_topics", {}).get("top", [])) > 0
|
||||
or len(result.get("related_topics", {}).get("rising", [])) > 0
|
||||
or len(result.get("related_queries", {}).get("top", [])) > 0
|
||||
or len(result.get("related_queries", {}).get("rising", [])) > 0
|
||||
)
|
||||
|
||||
# Return error if: has error OR no data (meaning blocked/empty)
|
||||
if has_error and not has_data:
|
||||
error_msg = result.get("error", "")
|
||||
logger.warning(f"[Trends] No data or error: {error_msg[:100]}")
|
||||
return PodcastTrendsResponse(success=False, data=result, error=error_msg or "No trends data available. Google may be blocking requests.")
|
||||
|
||||
# Even if no error but empty data - return error
|
||||
if not has_data:
|
||||
logger.warning("[Trends] Empty data returned")
|
||||
return PodcastTrendsResponse(success=False, data=result, error="No trends data available. Please try different keywords.")
|
||||
|
||||
return PodcastTrendsResponse(success=True, data=result)
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
|
||||
@@ -80,6 +80,14 @@ class PodcastEnhanceIdeaRequest(BaseModel):
|
||||
"""Request model for enhancing a podcast idea with AI."""
|
||||
idea: str = Field(..., description="The raw podcast idea or keywords")
|
||||
bible: Optional[Dict[str, Any]] = Field(None, description="Optional Podcast Bible for context")
|
||||
website_data: Optional[Dict[str, Any]] = Field(
|
||||
None,
|
||||
description="Optional website extraction data for enriched context (title, summary, highlights, subpages, url)"
|
||||
)
|
||||
topic_context: Optional[Dict[str, Any]] = Field(
|
||||
None,
|
||||
description="Optional category research context (category, topics, selected_topic)"
|
||||
)
|
||||
|
||||
|
||||
class PodcastEnhanceIdeaResponse(BaseModel):
|
||||
@@ -470,3 +478,59 @@ class VoiceCloneResult(BaseModel):
|
||||
file_size: int
|
||||
task_id: str
|
||||
status: str = "completed"
|
||||
|
||||
|
||||
class ExtractUrlRequest(BaseModel):
|
||||
"""Request to extract content from a URL using Exa."""
|
||||
url: str = Field(..., description="URL to extract content from")
|
||||
|
||||
|
||||
class ExtractUrlResponse(BaseModel):
|
||||
"""Response with extracted content from URL."""
|
||||
success: bool
|
||||
title: Optional[str] = None
|
||||
text: Optional[str] = None
|
||||
summary: Optional[str] = None
|
||||
author: Optional[str] = None
|
||||
highlights: Optional[List[str]] = Field(default_factory=list, description="Key highlights from the content")
|
||||
url: str
|
||||
image: Optional[str] = None
|
||||
favicon: Optional[str] = None
|
||||
subpages: Optional[List[Dict[str, Any]]] = Field(default_factory=list, description="Subpages with their own content")
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class WebsiteAnalysisRequest(BaseModel):
|
||||
"""Request to save user's website analysis."""
|
||||
website_url: str = Field(..., description="The website URL")
|
||||
exa_content: Dict[str, Any] = Field(default_factory=dict, description="Exa extracted content")
|
||||
|
||||
|
||||
class WebsiteAnalysisResponse(BaseModel):
|
||||
"""Response for website analysis."""
|
||||
success: bool
|
||||
website_url: Optional[str] = None
|
||||
message: Optional[str] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class PodcastPreEstimateRequest(BaseModel):
|
||||
"""Request model for pre-analysis cost estimate."""
|
||||
duration: int = Field(default=10, description="Target duration in minutes")
|
||||
speakers: int = Field(default=1, description="Number of speakers")
|
||||
query_count: int = Field(default=3, description="Number of research queries")
|
||||
podcast_mode: str = Field(default="audio_video", description="Podcast mode: audio_only, video_only, or audio_video")
|
||||
# Optional model overrides for cost estimation
|
||||
gemini_model: Optional[str] = Field(default=None, description="LLM model: gemini-2.5-flash, gemini-1.5-flash, etc.")
|
||||
audio_tts_model: Optional[str] = Field(default=None, description="Audio TTS model: minimax/speech-02-hd")
|
||||
voice_clone_engine: Optional[str] = Field(default=None, description="Voice clone engine: qwen3, cosyvoice, minimax")
|
||||
image_model: Optional[str] = Field(default=None, description="Image model: qwen-image, ideogram-v3-turbo")
|
||||
video_model: Optional[str] = Field(default=None, description="Video model: wan-2.5, kling-v2.5-turbo-std-5s, wavespeed-ai/infinitetalk")
|
||||
|
||||
|
||||
class PodcastPreEstimateResponse(BaseModel):
|
||||
"""Response model for pre-analysis cost estimate."""
|
||||
estimate: Optional[Dict[str, Any]] = None
|
||||
error: Optional[str] = None
|
||||
pricing_available: bool = Field(default=False, description="Whether pricing data is available in DB")
|
||||
debug: Optional[Dict[str, Any]] = Field(default=None, description="Debug info: pricing rows count, providers")
|
||||
|
||||
24
backend/api/podcast/prompts/__init__.py
Normal file
24
backend/api/podcast/prompts/__init__.py
Normal file
@@ -0,0 +1,24 @@
|
||||
"""
|
||||
Prompts module for podcast topic enhancement.
|
||||
"""
|
||||
|
||||
from .website_enhance_prompts import (
|
||||
get_enhance_topic_prompt,
|
||||
format_website_context,
|
||||
STANDARD_ENHANCE_PROMPT,
|
||||
WEBSITE_AWARE_ENHANCE_PROMPT,
|
||||
)
|
||||
|
||||
from services.podcast_context_builder import (
|
||||
PodcastContextBuilder,
|
||||
context_builder,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"get_enhance_topic_prompt",
|
||||
"format_website_context",
|
||||
"STANDARD_ENHANCE_PROMPT",
|
||||
"WEBSITE_AWARE_ENHANCE_PROMPT",
|
||||
"PodcastContextBuilder",
|
||||
"context_builder",
|
||||
]
|
||||
187
backend/api/podcast/prompts/website_enhance_prompts.py
Normal file
187
backend/api/podcast/prompts/website_enhance_prompts.py
Normal file
@@ -0,0 +1,187 @@
|
||||
"""
|
||||
Website-aware prompts for podcast topic enhancement.
|
||||
|
||||
This module provides prompts for enhancing podcast topics with optional
|
||||
website extraction data for richer context.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, Optional
|
||||
from string import Template
|
||||
|
||||
|
||||
# Standard prompt for when no website data is available
|
||||
STANDARD_ENHANCE_PROMPT = Template("""">You are a creative podcast producer. Generate 3 distinct, compelling podcast episode concepts from the raw idea.
|
||||
|
||||
${bible_context}
|
||||
|
||||
RAW IDEA/KEYWORDS: "$idea"
|
||||
|
||||
TASK:
|
||||
Generate 3 different enhanced versions, each with a unique angle:
|
||||
1. Professional & Expert-led angle (focus on authority, insights, and expertise)
|
||||
2. Storytelling & Human interest angle (focus on narratives, emotions, and personal connections)
|
||||
3. Trendy & Contemporary angle (focus on current trends, modern perspectives, and relevance)
|
||||
|
||||
Each version should be 2-3 sentences, audience-focused, and align with host persona if provided.
|
||||
|
||||
Return JSON with:
|
||||
- enhanced_ideas: array of 3 strings, each string being a complete episode pitch (NOT objects, just plain strings)
|
||||
- rationales: array of 3 strings explaining the approach for each version
|
||||
|
||||
IMPORTANT: enhanced_ideas must be an array of plain strings, NOT objects. Example:
|
||||
{
|
||||
"enhanced_ideas": [
|
||||
"Your expert guide to AI advancement: A practical look at how AI is transforming industries...",
|
||||
"The human stories behind AI innovation: From Silicon Valley to your daily life...",
|
||||
"AI in 2026: What's trending and what's next in artificial intelligence..."
|
||||
],
|
||||
"rationales": [
|
||||
"Professional approach focusing on expertise and authority",
|
||||
"Storytelling approach emphasizing human connection",
|
||||
"Contemporary approach highlighting current relevance"
|
||||
]
|
||||
}
|
||||
""")
|
||||
|
||||
|
||||
# Website-aware prompt for when website data is available
|
||||
WEBSITE_AWARE_ENHANCE_PROMPT = Template("""">You are a creative podcast producer. Generate 3 distinct, compelling podcast episode concepts from the raw idea, enriched with website content analysis.
|
||||
|
||||
${bible_context}
|
||||
|
||||
WEBSITE CONTENT ANALYSIS:
|
||||
${website_context}
|
||||
|
||||
RAW IDEA/KEYWORDS: "$idea"
|
||||
|
||||
TASK:
|
||||
Generate 3 different enhanced versions, each with a unique angle, that INCORPORATE the website content context:
|
||||
1. Professional & Expert-led angle (focus on authority, insights, and expertise from the website)
|
||||
2. Storytelling & Human interest angle (focus on narratives, emotions, and personal connections tied to the brand)
|
||||
3. Trendy & Contemporary angle (focus on current trends, modern perspectives, and relevance leveraging the site's focus areas)
|
||||
|
||||
Each version should:
|
||||
- Be 2-3 sentences
|
||||
- Reference specific elements from the website content when relevant
|
||||
- Be audience-focused and align with host persona if provided
|
||||
- NOT just repeat the website summary - create fresh podcast angles
|
||||
|
||||
Return JSON with:
|
||||
- enhanced_ideas: array of 3 strings, each string being a complete episode pitch (NOT objects, just plain strings)
|
||||
- rationales: array of 3 strings explaining the approach for each version
|
||||
|
||||
IMPORTANT: enhanced_ideas must be an array of plain strings, NOT objects. Example:
|
||||
{
|
||||
"enhanced_ideas": [
|
||||
"Your expert guide to AI advancement: A practical look at how AI is transforming industries...",
|
||||
"The human stories behind AI innovation: From Silicon Valley to your daily life...",
|
||||
"AI in 2026: What's trending and what's next in artificial intelligence..."
|
||||
],
|
||||
"rationales": [
|
||||
"Professional approach focusing on expertise and authority",
|
||||
"Storytelling approach emphasizing human connection",
|
||||
"Contemporary approach highlighting current relevance"
|
||||
]
|
||||
}
|
||||
""")
|
||||
|
||||
|
||||
def get_enhance_topic_prompt(
|
||||
idea: str,
|
||||
bible_context: str = "",
|
||||
website_data: Optional[Dict[str, Any]] = None
|
||||
) -> str:
|
||||
"""
|
||||
Returns the appropriate prompt based on available context.
|
||||
|
||||
Args:
|
||||
idea: The raw podcast idea or keywords
|
||||
bible_context: Optional Podcast Bible context string
|
||||
website_data: Optional website extraction data
|
||||
|
||||
Returns:
|
||||
Formatted prompt string with appropriate context
|
||||
"""
|
||||
# Build bible context section
|
||||
bible_section = f"USER PERSONALIZATION CONTEXT (Podcast Bible):\n{bible_context}\n" if bible_context else ""
|
||||
|
||||
if website_data:
|
||||
# Build website context section
|
||||
website_context_parts = []
|
||||
if website_data.get('url'):
|
||||
website_context_parts.append(f"Source: {website_data.get('url')}")
|
||||
if website_data.get('title'):
|
||||
website_context_parts.append(f"Company/Organization: {website_data.get('title')}")
|
||||
if website_data.get('summary'):
|
||||
website_context_parts.append(f"About: {website_data.get('summary')}")
|
||||
if website_data.get('highlights'):
|
||||
highlights_str = ', '.join(website_data.get('highlights', [])[:3])
|
||||
website_context_parts.append(f"Key Highlights: {highlights_str}")
|
||||
if website_data.get('subpages'):
|
||||
subpages_str = ', '.join([
|
||||
sp.get('title', sp.get('url', ''))
|
||||
for sp in website_data.get('subpages', [])[:3]
|
||||
])
|
||||
website_context_parts.append(f"Subpages: {subpages_str}")
|
||||
|
||||
website_context_str = "\n".join(website_context_parts)
|
||||
|
||||
return WEBSITE_AWARE_ENHANCE_PROMPT.substitute(
|
||||
idea=idea,
|
||||
bible_context=bible_section,
|
||||
website_context=website_context_str
|
||||
)
|
||||
else:
|
||||
return STANDARD_ENHANCE_PROMPT.substitute(
|
||||
idea=idea,
|
||||
bible_context=bible_section
|
||||
)
|
||||
|
||||
|
||||
def format_website_context(website_data: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Format website data for inclusion in progress messages.
|
||||
|
||||
Args:
|
||||
website_data: Website extraction data
|
||||
|
||||
Returns:
|
||||
Formatted string describing what's being used
|
||||
"""
|
||||
parts = []
|
||||
|
||||
if website_data.get('title'):
|
||||
parts.append(f"• {website_data['title']}")
|
||||
|
||||
if website_data.get('summary'):
|
||||
summary_preview = website_data['summary'][:100]
|
||||
parts.append(f"• Summary: {summary_preview}...")
|
||||
|
||||
if website_data.get('highlights'):
|
||||
parts.append(f"• {len(website_data['highlights'])} key highlights")
|
||||
|
||||
if website_data.get('subpages'):
|
||||
parts.append(f"• {len(website_data['subpages'])} subpages analyzed")
|
||||
|
||||
if website_data.get('url'):
|
||||
parts.append(f"• Source: {website_data['url']}")
|
||||
|
||||
return "\n".join(parts) if parts else "Basic website analysis"
|
||||
|
||||
if website_data.get('title'):
|
||||
parts.append(f"• {website_data['title']}")
|
||||
|
||||
if website_data.get('summary'):
|
||||
summary_preview = website_data['summary'][:100]
|
||||
parts.append(f"• Summary: {summary_preview}...")
|
||||
|
||||
if website_data.get('highlights'):
|
||||
parts.append(f"• {len(website_data['highlights'])} key highlights")
|
||||
|
||||
if website_data.get('subpages'):
|
||||
parts.append(f"• {len(website_data['subpages'])} subpages analyzed")
|
||||
|
||||
if website_data.get('url'):
|
||||
parts.append(f"• Source: {website_data['url']}")
|
||||
|
||||
return "\n".join(parts) if parts else "Basic website analysis"
|
||||
@@ -12,7 +12,7 @@ from api.story_writer.utils.auth import require_authenticated_user
|
||||
from api.story_writer.task_manager import task_manager
|
||||
|
||||
# Import all handler routers
|
||||
from .handlers import projects, analysis, research, script, audio, images, video, avatar, dubbing, broll, trends
|
||||
from .handlers import projects, analysis, research, script, audio, images, video, avatar, dubbing, broll, trends, tavily_category_research
|
||||
|
||||
# Create main router
|
||||
router = APIRouter(prefix="/api/podcast", tags=["Podcast Maker"])
|
||||
@@ -29,6 +29,7 @@ router.include_router(avatar.router)
|
||||
router.include_router(dubbing.router)
|
||||
router.include_router(broll.router)
|
||||
router.include_router(trends.router)
|
||||
router.include_router(tavily_category_research.router)
|
||||
|
||||
|
||||
@router.get("/task/{task_id}/status")
|
||||
|
||||
Reference in New Issue
Block a user