- Add POST /podcast/pre-estimate endpoint for cost estimation before analysis - Enhance cost_estimator.py with multi-model support (gemini, audio, voice clone, image, video) - Add detailed cost breakdown (llm, audio, media costs + per-phase breakdown) - Remove redundant pricing seeding from init_alpha_subscription_tiers.py - Add SSOT pricing via PricingService.initialize_default_pricing() - Update TopicUrlInput tooltip to show estimate details - Add debug logging for pricing seeding and pre-estimate - Clean up verbose podcast mode debug logs in app.py
882 lines
34 KiB
Python
882 lines
34 KiB
Python
"""
|
|
Podcast Analysis Handlers
|
|
|
|
Analysis endpoint for podcast ideas.
|
|
"""
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException, Request
|
|
from typing import Dict, Any, Optional, List
|
|
from datetime import datetime
|
|
import json
|
|
import uuid
|
|
from sqlalchemy.orm import Session
|
|
from pydantic import BaseModel
|
|
|
|
from services.database import get_db
|
|
from middleware.auth_middleware import get_current_user
|
|
from api.story_writer.utils.auth import require_authenticated_user
|
|
from services.llm_providers.main_text_generation import llm_text_gen
|
|
from services.llm_providers.main_image_generation import generate_image
|
|
from services.podcast_bible_service import PodcastBibleService
|
|
from utils.asset_tracker import save_asset_to_library
|
|
from loguru import logger
|
|
import os
|
|
from ..constants import get_podcast_media_dir
|
|
from ..prompts import get_enhance_topic_prompt, format_website_context
|
|
from ..models import (
|
|
PodcastAnalyzeRequest,
|
|
PodcastAnalyzeResponse,
|
|
PodcastEnhanceIdeaRequest,
|
|
PodcastEnhanceIdeaResponse,
|
|
ExtractUrlRequest,
|
|
ExtractUrlResponse,
|
|
WebsiteAnalysisRequest,
|
|
WebsiteAnalysisResponse,
|
|
PodcastPreEstimateRequest,
|
|
PodcastPreEstimateResponse,
|
|
)
|
|
from ..cost_estimator import estimate_podcast_cost
|
|
|
|
# Check if running in podcast-only demo mode
|
|
def _is_podcast_only_mode() -> bool:
|
|
"""Check if podcast-only demo mode is enabled."""
|
|
return os.getenv("ALWRITY_ENABLED_FEATURES", "").strip().lower() == "podcast"
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
@router.post("/pre-estimate", response_model=PodcastPreEstimateResponse)
|
|
async def pre_estimate_cost(
|
|
request: PodcastPreEstimateRequest,
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""
|
|
Lightweight endpoint to estimate podcast creation cost before analysis.
|
|
|
|
Takes user configuration (duration, speakers, query_count, podcast_mode) and returns
|
|
a cost estimate WITHOUT running full analysis.
|
|
|
|
Optional model overrides can be specified to estimate with different models.
|
|
"""
|
|
try:
|
|
include_avatar_phase = request.podcast_mode != "audio_only"
|
|
|
|
estimate = estimate_podcast_cost(
|
|
db=db,
|
|
duration_minutes=request.duration,
|
|
speakers=request.speakers,
|
|
query_count=request.query_count,
|
|
include_avatar_phase=include_avatar_phase,
|
|
# Model overrides if provided
|
|
gemini_model=request.gemini_model or "gemini-2.5-flash",
|
|
audio_tts_model=request.audio_tts_model or "minimax/speech-02-hd",
|
|
voice_clone_engine=request.voice_clone_engine or "qwen3",
|
|
image_model=request.image_model or "qwen-image",
|
|
video_model=request.video_model or "wan-2.5",
|
|
)
|
|
|
|
# Debug: get pricing row count and providers
|
|
from models.subscription_models import APIProviderPricing
|
|
pricing_count = db.query(APIProviderPricing).count()
|
|
providers = db.query(APIProviderPricing.provider).distinct().all()
|
|
provider_list = sorted([p[0].value for p in providers]) if providers else []
|
|
|
|
debug_info = {
|
|
"pricing_rows": pricing_count,
|
|
"providers": provider_list,
|
|
}
|
|
|
|
# Log pricing debug info at warning level
|
|
logger.warning(f"[PRE-ESTIMATE] Pricing debug: rows={pricing_count}, providers={provider_list}")
|
|
logger.warning(f"[PRE-ESTIMATE] Models: llm={request.gemini_model}, tts={request.audio_tts_model}, video={request.video_model}")
|
|
|
|
if estimate is None:
|
|
return PodcastPreEstimateResponse(
|
|
estimate=None,
|
|
error="Pricing data unavailable. Please try again later.",
|
|
pricing_available=False,
|
|
debug=debug_info,
|
|
)
|
|
|
|
return PodcastPreEstimateResponse(
|
|
estimate=estimate,
|
|
error=None,
|
|
pricing_available=True,
|
|
debug=debug_info,
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Pre-estimate error: {e}")
|
|
return PodcastPreEstimateResponse(
|
|
estimate=None,
|
|
error=str(e),
|
|
)
|
|
|
|
|
|
@router.post("/idea/enhance", response_model=PodcastEnhanceIdeaResponse)
|
|
async def enhance_podcast_idea(
|
|
request: PodcastEnhanceIdeaRequest,
|
|
current_user: Dict[str, Any] = Depends(get_current_user),
|
|
):
|
|
"""
|
|
Take raw keywords/topic and use AI to craft a presentable, detailed podcast idea.
|
|
Uses the user's Podcast Bible for hyper-personalization if available.
|
|
"""
|
|
user_id = require_authenticated_user(current_user)
|
|
|
|
# Serialize Bible context if provided or generate from onboarding
|
|
# In podcast-only mode, skip bible generation since onboarding is disabled
|
|
bible_context = ""
|
|
if not _is_podcast_only_mode():
|
|
logger.warning(f"[Podcast Enhance] Podcast mode=full — attempting Bible generation for user {user_id}")
|
|
try:
|
|
bible_service = PodcastBibleService()
|
|
if request.bible:
|
|
from models.podcast_bible_models import PodcastBible
|
|
bible_data = PodcastBible(**request.bible)
|
|
bible_context = bible_service.serialize_bible(bible_data)
|
|
else:
|
|
# Generate from onboarding data directly
|
|
bible_obj = bible_service.generate_bible(user_id, "temp_enhance")
|
|
bible_context = bible_service.serialize_bible(bible_obj)
|
|
except Exception as exc:
|
|
logger.warning(f"[Podcast Enhance] Failed to parse or generate bible context: {exc}")
|
|
else:
|
|
# In podcast mode, use the provided bible directly if available
|
|
logger.warning(f"[Podcast Enhance] Podcast mode=podcast_only — skipping Bible generation for user {user_id}")
|
|
if request.bible:
|
|
try:
|
|
from models.podcast_bible_models import PodcastBible
|
|
bible_data = PodcastBible(**request.bible)
|
|
bible_service = PodcastBibleService()
|
|
bible_context = bible_service.serialize_bible(bible_data)
|
|
except Exception as exc:
|
|
logger.debug(f"[Podcast Enhance] Bible parsing skipped in podcast mode: {exc}")
|
|
|
|
# Log what's being used for context
|
|
context_used = []
|
|
if bible_context:
|
|
context_used.append("Podcast Bible")
|
|
if request.website_data:
|
|
context_used.append("Website Extraction")
|
|
if request.topic_context:
|
|
category = request.topic_context.get("category", "unknown")
|
|
context_used.append(f"Category Research ({category})")
|
|
|
|
logger.warning(f"[Podcast Enhance] Generating with context: {', '.join(context_used) if context_used else 'basic idea only'}")
|
|
|
|
# Use new context builder for prompt generation
|
|
from services.podcast_context_builder import context_builder
|
|
context_result = context_builder.build_enhance_context(
|
|
idea=request.idea,
|
|
bible_context=bible_context,
|
|
website_data=request.website_data,
|
|
topic_context=request.topic_context,
|
|
)
|
|
prompt = context_result["prompt"]
|
|
|
|
try:
|
|
raw = llm_text_gen(
|
|
prompt=prompt,
|
|
user_id=user_id,
|
|
json_struct=None,
|
|
preferred_provider=None,
|
|
flow_type="premium_tool",
|
|
)
|
|
|
|
# Normalize response
|
|
if isinstance(raw, str):
|
|
data = json.loads(raw)
|
|
else:
|
|
data = raw
|
|
|
|
# Extract enhanced ideas and rationales with fallbacks
|
|
enhanced_ideas = data.get("enhanced_ideas", [])
|
|
rationales = data.get("rationales", [])
|
|
|
|
# Handle case where LLM returns objects instead of strings
|
|
normalized_ideas = []
|
|
for idea in enhanced_ideas:
|
|
if isinstance(idea, dict):
|
|
# Extract title and description from object
|
|
title = idea.get("title", "")
|
|
description = idea.get("description", "") or idea.get("content", "")
|
|
normalized_ideas.append(f"{title}: {description}" if description else title)
|
|
elif isinstance(idea, str):
|
|
normalized_ideas.append(idea)
|
|
|
|
enhanced_ideas = normalized_ideas
|
|
|
|
# Ensure we have exactly 3 ideas, fallback to original if needed
|
|
if not isinstance(enhanced_ideas, list) or len(enhanced_ideas) != 3:
|
|
# Fallback: create 3 variations of the original idea
|
|
base_idea = request.idea
|
|
enhanced_ideas = [
|
|
f"Expert insights on {base_idea}: A deep dive into industry trends and best practices.",
|
|
f"The human side of {base_idea}: Personal stories and real-world experiences that resonate.",
|
|
f"Modern perspectives on {base_idea}: Current trends and forward-thinking approaches."
|
|
]
|
|
rationales = [
|
|
"Professional approach focusing on expertise and authority",
|
|
"Storytelling approach emphasizing human connection",
|
|
"Contemporary approach highlighting current relevance"
|
|
]
|
|
|
|
# Ensure rationales match the number of ideas
|
|
if not isinstance(rationales, list) or len(rationales) != 3:
|
|
rationales = [
|
|
"Professional angle with expert insights",
|
|
"Storytelling angle with human interest",
|
|
"Trendy angle with contemporary relevance"
|
|
]
|
|
|
|
return PodcastEnhanceIdeaResponse(
|
|
enhanced_ideas=enhanced_ideas[:3], # Ensure exactly 3
|
|
rationales=rationales[:3] # Ensure exactly 3
|
|
)
|
|
except HTTPException:
|
|
# Re-raise HTTPExceptions (e.g., 429 subscription limit) - preserve error details
|
|
raise
|
|
except Exception as exc:
|
|
logger.error(f"[Podcast Enhance] Failed for user {user_id}: {exc}")
|
|
raise HTTPException(status_code=500, detail=f"Enhance failed: {exc}")
|
|
|
|
|
|
@router.post("/analyze", response_model=PodcastAnalyzeResponse)
|
|
async def analyze_podcast_idea(
|
|
request: PodcastAnalyzeRequest,
|
|
current_user: Dict[str, Any] = Depends(get_current_user),
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""
|
|
Analyze a podcast idea and return podcast-oriented outlines, keywords, and titles.
|
|
If no avatar_url is provided, it generates one automatically based on the host's look.
|
|
"""
|
|
user_id = require_authenticated_user(current_user)
|
|
|
|
# Serialize Bible context if provided or generate from onboarding
|
|
bible_context = ""
|
|
bible_obj = None
|
|
try:
|
|
bible_service = PodcastBibleService()
|
|
if request.bible:
|
|
from models.podcast_bible_models import PodcastBible
|
|
bible_data = PodcastBible(**request.bible)
|
|
bible_context = bible_service.serialize_bible(bible_data)
|
|
bible_obj = bible_data
|
|
else:
|
|
# Generate from onboarding data directly
|
|
bible_obj = bible_service.generate_bible(user_id, "temp_analyze")
|
|
bible_context = bible_service.serialize_bible(bible_obj)
|
|
bible_obj = bible_obj
|
|
except Exception as exc:
|
|
logger.warning(f"[Podcast Analyze] Failed to parse or generate bible context: {exc}")
|
|
|
|
# --- NEW: Generate Presenter Avatar if missing ---
|
|
final_avatar_url = request.avatar_url
|
|
final_avatar_prompt = None
|
|
|
|
# Skip avatar generation for audio_only mode
|
|
podcast_mode = getattr(request, 'podcast_mode', None) or 'video_only'
|
|
should_generate_avatar = not final_avatar_url and podcast_mode != 'audio_only'
|
|
|
|
if should_generate_avatar:
|
|
logger.info(f"[Podcast Analyze] No avatar_url provided, generating one for user {user_id}")
|
|
try:
|
|
# 1. PRE-FLIGHT VALIDATION: Check subscription limits for image generation
|
|
from services.subscription import PricingService
|
|
from services.subscription.preflight_validator import validate_image_generation_operations
|
|
pricing_service = PricingService(db)
|
|
validate_image_generation_operations(
|
|
pricing_service=pricing_service,
|
|
user_id=user_id,
|
|
num_images=1
|
|
)
|
|
|
|
# 2. Build avatar prompt from Bible host look or fallback
|
|
host_look = bible_obj.host.look if bible_obj and bible_obj.host.look else "A professional podcast host"
|
|
visual_style = bible_obj.visual_style.style_preset if bible_obj else "Realistic Photography"
|
|
|
|
final_avatar_prompt = f"Professional headshot of a podcast host, {host_look}, {visual_style} style, clean background, soft studio lighting, center-focused, high resolution, sharp focus, professional photography quality, 16:9 aspect ratio."
|
|
|
|
# 3. Generate the image
|
|
logger.info(f"[Podcast Analyze] Generating avatar with prompt: {final_avatar_prompt}")
|
|
image_result = generate_image(
|
|
prompt=final_avatar_prompt,
|
|
user_id=user_id,
|
|
options={"width": 1024, "height": 1024}
|
|
)
|
|
|
|
# 4. Save to disk and library
|
|
if image_result and image_result.image_bytes:
|
|
img_id = str(uuid.uuid4())[:8]
|
|
filename = f"presenter_podcast_{user_id}_{img_id}.png"
|
|
images_dir = get_podcast_media_dir("image", user_id, ensure_exists=True)
|
|
avatars_dir = images_dir / "avatars"
|
|
avatars_dir.mkdir(parents=True, exist_ok=True)
|
|
output_path = avatars_dir / filename
|
|
|
|
with open(output_path, "wb") as f:
|
|
f.write(image_result.image_bytes)
|
|
|
|
final_avatar_url = f"/api/podcast/images/avatars/{filename}"
|
|
|
|
# Save to asset library for reuse
|
|
save_asset_to_library(
|
|
db=db,
|
|
user_id=user_id,
|
|
asset_type="image",
|
|
source_module="podcast_analysis",
|
|
filename=filename,
|
|
file_url=final_avatar_url,
|
|
title=f"Presenter Avatar - {request.idea[:40]}",
|
|
description=f"AI-generated podcast presenter for: {request.idea}",
|
|
provider=image_result.provider,
|
|
model=image_result.model,
|
|
cost=0.0 # Cost tracked in generate_image
|
|
)
|
|
logger.info(f"[Podcast Analyze] ✅ Generated and saved avatar to {final_avatar_url}")
|
|
except Exception as e:
|
|
logger.error(f"[Podcast Analyze] ❌ Failed to generate avatar: {e}")
|
|
# Non-fatal: continue analysis even if avatar generation fails
|
|
|
|
# --- END: Avatar Generation ---
|
|
|
|
# Incorporate user feedback if provided
|
|
feedback_context = ""
|
|
if request.feedback:
|
|
feedback_context = f"""
|
|
USER REGENERATION FEEDBACK:
|
|
The user was not satisfied with the previous analysis. They provided the following instructions for improvement:
|
|
"{request.feedback}"
|
|
Please prioritize this feedback and adjust the analysis accordingly.
|
|
"""
|
|
|
|
prompt = f"""
|
|
You are an expert podcast producer and research strategist. Given a podcast idea, craft concise podcast-ready assets
|
|
that sound like episode plans (not fiction stories).
|
|
|
|
{f"USER PERSONALIZATION CONTEXT (Podcast Bible):\n{bible_context}\n" if bible_context else ""}
|
|
{feedback_context}
|
|
|
|
Podcast Idea: "{request.idea}"
|
|
Duration: ~{request.duration} minutes
|
|
Speakers: {request.speakers} (host + optional guest)
|
|
|
|
TASK:
|
|
1. Define the target audience and content type aligned with the Bible's "Audience DNA" and "Brand DNA".
|
|
2. Identify 5 high-impact keywords.
|
|
3. Propose 2 episode outlines with factual segments.
|
|
4. Suggest 3 titles.
|
|
5. IMPORTANT: Generate 4-6 specific research queries for Exa. These queries MUST be highly targeted to the episode's topic, the host's expertise level, and the audience's interests as defined in the Bible.
|
|
* Do NOT use generic queries like "latest trends in X".
|
|
* DO use queries that look for case studies, specific data points, expert opinions, or contrasting viewpoints that would make for a deep, insightful podcast conversation.
|
|
|
|
Return JSON with:
|
|
- audience: short target audience description
|
|
- content_type: podcast style/format
|
|
- top_keywords: 5 podcast-relevant keywords/phrases
|
|
- suggested_outlines: 2 items, each with title (<=60 chars) and 4-6 short segments (bullet-friendly, factual)
|
|
- title_suggestions: 3 concise episode titles
|
|
- episode_hook: one compelling 15-30 second opening hook/angle that grabs attention
|
|
- key_takeaways: 3-5 actionable insights listeners will learn
|
|
- guest_talking_points: (if guest included) 3-4 suggested questions/angles for guest interview
|
|
- listener_cta: one clear call-to-action for listeners
|
|
- research_queries: array of {{"query": "string", "rationale": "string"}}
|
|
- exa_suggested_config: suggested Exa search options with:
|
|
- exa_search_type: "auto" | "neural" | "keyword"
|
|
- exa_category: one of ["research paper","news","company","github","tweet","personal site","pdf","financial report","linkedin profile"]
|
|
- exa_include_domains: up to 3 reputable domains
|
|
- exa_exclude_domains: up to 3 domains
|
|
- max_sources: 6-10
|
|
- include_statistics: boolean
|
|
- date_range: one of ["last_month","last_3_months","last_year","all_time"]
|
|
|
|
Requirements:
|
|
- Keep language factual, actionable, and suited for spoken audio.
|
|
- Avoid narrative fiction tone.
|
|
- For research queries: Mix of time-sensitive and evergreen queries:
|
|
- 2-3 queries should focus on latest 2025-2026 developments, trends, and data (use year in query)
|
|
- 2-3 queries should be evergreen/fundamental (concepts, definitions, best practices, proven strategies) - do NOT include years in these
|
|
- Today's date is April 2026.
|
|
"""
|
|
|
|
try:
|
|
raw = llm_text_gen(
|
|
prompt=prompt,
|
|
user_id=user_id,
|
|
json_struct=None,
|
|
preferred_provider=None,
|
|
flow_type="premium_tool",
|
|
)
|
|
except HTTPException:
|
|
# Re-raise HTTPExceptions (e.g., 429 subscription limit) - preserve error details
|
|
raise
|
|
except Exception as exc:
|
|
logger.error(f"[Podcast Analyze] Analysis failed for user {user_id}: {exc}")
|
|
raise HTTPException(status_code=500, detail=f"Analysis failed: {exc}")
|
|
|
|
# Normalize response (accept dict or JSON string)
|
|
if isinstance(raw, str):
|
|
try:
|
|
data = json.loads(raw)
|
|
except json.JSONDecodeError:
|
|
raise HTTPException(status_code=500, detail="LLM returned non-JSON output")
|
|
elif isinstance(raw, dict):
|
|
data = raw
|
|
else:
|
|
raise HTTPException(status_code=500, detail="Unexpected LLM response format")
|
|
|
|
audience = data.get("audience") or "Growth-focused professionals"
|
|
content_type = data.get("content_type") or "Interview + insights"
|
|
top_keywords = data.get("top_keywords") or []
|
|
suggested_outlines = data.get("suggested_outlines") or []
|
|
title_suggestions = data.get("title_suggestions") or []
|
|
episode_hook = data.get("episode_hook") or ""
|
|
key_takeaways = data.get("key_takeaways") or []
|
|
guest_talking_points = data.get("guest_talking_points") or []
|
|
listener_cta = data.get("listener_cta") or ""
|
|
research_queries = data.get("research_queries") or []
|
|
exa_suggested_config = data.get("exa_suggested_config") or None
|
|
estimate = estimate_podcast_cost(
|
|
db=db,
|
|
duration_minutes=request.duration,
|
|
speakers=request.speakers,
|
|
query_count=len(research_queries) if isinstance(research_queries, list) else 0,
|
|
include_avatar_phase=podcast_mode != "audio_only",
|
|
)
|
|
|
|
return PodcastAnalyzeResponse(
|
|
audience=audience,
|
|
content_type=content_type,
|
|
top_keywords=top_keywords,
|
|
suggested_outlines=suggested_outlines,
|
|
title_suggestions=title_suggestions,
|
|
episode_hook=episode_hook,
|
|
key_takeaways=key_takeaways,
|
|
guest_talking_points=guest_talking_points,
|
|
listener_cta=listener_cta,
|
|
research_queries=research_queries,
|
|
exa_suggested_config=exa_suggested_config,
|
|
bible=bible_obj.model_dump() if bible_obj else None,
|
|
avatar_url=final_avatar_url,
|
|
avatar_prompt=final_avatar_prompt,
|
|
estimate=estimate,
|
|
)
|
|
|
|
|
|
class RegenerateQueriesRequest(BaseModel):
|
|
idea: str
|
|
feedback: str
|
|
existing_analysis: Optional[Dict[str, Any]] = None
|
|
bible: Optional[Dict[str, Any]] = None
|
|
|
|
|
|
class RegenerateQueriesResponse(BaseModel):
|
|
research_queries: List[Dict[str, str]]
|
|
|
|
|
|
@router.post("/regenerate-queries", response_model=RegenerateQueriesResponse)
|
|
async def regenerate_research_queries(
|
|
request: RegenerateQueriesRequest,
|
|
current_user: Dict[str, Any] = Depends(get_current_user),
|
|
):
|
|
"""
|
|
Regenerate research queries based on user feedback and existing analysis.
|
|
"""
|
|
user_id = require_authenticated_user(current_user)
|
|
|
|
# Build context from existing analysis
|
|
idea = request.idea
|
|
feedback = request.feedback
|
|
|
|
# Get topic, keywords, audience from existing analysis if provided
|
|
topic = idea
|
|
keywords = ""
|
|
audience = ""
|
|
if request.existing_analysis:
|
|
topic = request.existing_analysis.get("title_suggestions", [idea])[0] if request.existing_analysis.get("title_suggestions") else idea
|
|
keywords = ", ".join(request.existing_analysis.get("top_keywords", [])[:5])
|
|
audience = request.existing_analysis.get("audience", "")
|
|
|
|
# Serialize Bible context if provided
|
|
bible_context = ""
|
|
if request.bible:
|
|
try:
|
|
bible_service = PodcastBibleService()
|
|
from models.podcast_bible_models import PodcastBible
|
|
bible_data = PodcastBible(**request.bible)
|
|
bible_context = bible_service.serialize_bible(bible_data)
|
|
except Exception as e:
|
|
logger.warning(f"Failed to serialize bible for query regeneration: {e}")
|
|
|
|
prompt = f"""
|
|
You are a research strategist for podcast content. Given a podcast idea, existing analysis, and user feedback,
|
|
generate 7 new research queries that address the user's specific needs.
|
|
|
|
{f"USER FEEDBACK: {feedback}" if feedback else ""}
|
|
|
|
{f"EXISTING ANALYSIS CONTEXT:\n- Topic: {topic}\n- Keywords: {keywords}\n- Audience: {audience}\n" if request.existing_analysis else ""}
|
|
{f"PODCAST BIBLE CONTEXT:\n{bible_context}\n" if bible_context else ""}
|
|
|
|
Podcast Idea: "{idea}"
|
|
|
|
TASK:
|
|
Generate exactly 7 research queries that:
|
|
1. Incorporate the user's feedback direction
|
|
2. Build on the existing analysis context
|
|
3. Mix of time-sensitive (2025-2026) and evergreen topics
|
|
4. Are highly specific to the podcast topic
|
|
|
|
Return JSON with:
|
|
- research_queries: array of {{"query": "string", "rationale": "string"}}
|
|
|
|
Requirements:
|
|
- At least 2-3 queries should focus on latest 2025-2026 developments (include year in query)
|
|
- At least 2-3 queries should be evergreen (concepts, definitions, best practices - NO year)
|
|
- Queries should be specific and actionable, not generic
|
|
"""
|
|
|
|
try:
|
|
from services.llm_providers.main_text_generation import llm_text_gen
|
|
|
|
raw = llm_text_gen(
|
|
prompt=prompt,
|
|
user_id=user_id,
|
|
json_struct={"research_queries": [{"query": "string", "rationale": "string"}]},
|
|
preferred_provider=None,
|
|
flow_type="premium_tool",
|
|
)
|
|
|
|
# Parse response
|
|
if isinstance(raw, dict):
|
|
queries = raw.get("research_queries", [])
|
|
else:
|
|
# Try to parse as JSON
|
|
try:
|
|
parsed = json.loads(raw) if isinstance(raw, str) else raw
|
|
queries = parsed.get("research_queries", []) if isinstance(parsed, dict) else []
|
|
except:
|
|
queries = []
|
|
|
|
return RegenerateQueriesResponse(research_queries=queries[:7])
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as exc:
|
|
logger.error(f"[Regenerate Queries] Failed for user {user_id}: {exc}")
|
|
raise HTTPException(status_code=500, detail=f"Regenerate queries failed: {exc}")
|
|
|
|
|
|
@router.post("/extract-url", response_model=ExtractUrlResponse)
|
|
async def extract_url_content(
|
|
request: ExtractUrlRequest,
|
|
current_user: Dict[str, Any] = Depends(get_current_user),
|
|
):
|
|
"""
|
|
Extract content from a URL using Exa's get_contents API.
|
|
|
|
This allows users to paste a blog post or article URL as their podcast topic,
|
|
and we'll extract the content to use as the podcast idea.
|
|
"""
|
|
user_id = require_authenticated_user(current_user)
|
|
|
|
from exa_py import Exa
|
|
import os
|
|
|
|
api_key = os.getenv("EXA_API_KEY")
|
|
if not api_key:
|
|
raise HTTPException(status_code=500, detail="EXA_API_KEY not configured")
|
|
|
|
exa = Exa(api_key)
|
|
|
|
logger.warning(f"[ExtractUrl] Extracting content from: {request.url} for user {user_id}")
|
|
|
|
try:
|
|
result = exa.get_contents(
|
|
urls=[request.url],
|
|
text=True,
|
|
highlights=True,
|
|
summary=True,
|
|
subpages=2,
|
|
)
|
|
except Exception as exa_error:
|
|
logger.error(f"[ExtractUrl] Exa call error: {exa_error}")
|
|
return ExtractUrlResponse(
|
|
success=False,
|
|
url=request.url,
|
|
error=f"Exa API error: {str(exa_error)}"
|
|
)
|
|
|
|
# Check for errors using the correct attribute (statuses is array of status objects)
|
|
if hasattr(result, 'statuses') and result.statuses:
|
|
for status in result.statuses:
|
|
if status.status == "error":
|
|
logger.error(f"[ExtractUrl] Failed to extract {status.id}: {status.error.tag if hasattr(status.error, 'tag') else 'unknown'}")
|
|
return ExtractUrlResponse(
|
|
success=False,
|
|
url=request.url,
|
|
error=f"Failed to extract content: {status.error.tag if hasattr(status.error, 'tag') else 'unknown error'}"
|
|
)
|
|
|
|
if not result.results:
|
|
return ExtractUrlResponse(
|
|
success=False,
|
|
url=request.url,
|
|
error="No content found at the provided URL"
|
|
)
|
|
|
|
# Extract content - safe to access result now
|
|
content = result.results[0]
|
|
|
|
# Extract all available fields from Exa response
|
|
extracted_text = content.text or ""
|
|
extracted_summary = getattr(content, 'summary', "") or ""
|
|
extracted_title = content.title or ""
|
|
|
|
# Highlights - extract from content.highlights array if available
|
|
highlights = []
|
|
if hasattr(content, 'highlights') and content.highlights:
|
|
highlights = [h for h in content.highlights if h]
|
|
|
|
# Additional fields from Exa response
|
|
image = getattr(content, 'image', None)
|
|
favicon = getattr(content, 'favicon', None)
|
|
|
|
# Subpages - extract with their own content
|
|
subpages = []
|
|
if hasattr(content, 'subpages') and content.subpages:
|
|
for sp in content.subpages:
|
|
subpages.append({
|
|
'id': sp.get('id', ''),
|
|
'title': sp.get('title', ''),
|
|
'url': sp.get('url', ''),
|
|
'summary': sp.get('summary', ''),
|
|
'text': sp.get('text', '')[:500] if sp.get('text') else '', # First 500 chars
|
|
})
|
|
|
|
logger.warning(f"[ExtractUrl] Successfully extracted {len(extracted_text)} chars from {request.url}")
|
|
logger.warning(f"[ExtractUrl] title={extracted_title[:50]}, summary={extracted_summary[:50]}, highlights={len(highlights)}, subpages={len(subpages)}")
|
|
|
|
return ExtractUrlResponse(
|
|
success=True,
|
|
title=extracted_title,
|
|
text=extracted_text,
|
|
summary=extracted_summary,
|
|
author=getattr(content, 'author', None),
|
|
highlights=highlights,
|
|
url=request.url,
|
|
image=image,
|
|
favicon=favicon,
|
|
subpages=subpages,
|
|
)
|
|
|
|
|
|
@router.post("/website-analysis", response_model=WebsiteAnalysisResponse)
|
|
async def save_website_analysis(
|
|
request: WebsiteAnalysisRequest,
|
|
current_user: Dict[str, Any] = Depends(get_current_user),
|
|
):
|
|
"""Save the user's website analysis for reuse in future podcasts."""
|
|
user_id = require_authenticated_user(current_user)
|
|
|
|
try:
|
|
from services.user_data_service import user_data_service
|
|
|
|
website_data = {
|
|
"website_url": request.website_url,
|
|
"extracted_at": datetime.now().isoformat(),
|
|
"exa_content": request.exa_content,
|
|
"full_analysis": None,
|
|
"analysis_status": "pending",
|
|
}
|
|
|
|
success = user_data_service.save_user_data(
|
|
user_id=user_id,
|
|
data_key="website_analysis",
|
|
data_value=website_data,
|
|
)
|
|
|
|
if success:
|
|
logger.warning(f"[WebsiteAnalysis] Saved analysis for user {user_id}: {request.website_url}")
|
|
return WebsiteAnalysisResponse(
|
|
success=True,
|
|
website_url=request.website_url,
|
|
message="Website analysis saved successfully",
|
|
)
|
|
else:
|
|
return WebsiteAnalysisResponse(
|
|
success=False,
|
|
error="Failed to save website analysis",
|
|
)
|
|
|
|
except Exception as exc:
|
|
logger.error(f"[WebsiteAnalysis] Failed to save for user {user_id}: {exc}")
|
|
return WebsiteAnalysisResponse(
|
|
success=False,
|
|
error=f"Failed to save: {str(exc)}"
|
|
)
|
|
|
|
|
|
@router.get("/website-extraction")
|
|
async def get_saved_website_extraction(request: Request = None):
|
|
"""Get previously saved website extraction data for this user."""
|
|
try:
|
|
# Safely get current_user from Depends
|
|
if request is None or not hasattr(request, 'state'):
|
|
logger.warning("[WebsiteExtraction] No request or state - user not authenticated")
|
|
return {"success": False, "data": None, "error": "Not authenticated"}
|
|
|
|
current_user = getattr(request.state, 'user', None)
|
|
if not current_user:
|
|
logger.warning("[WebsiteExtraction] No user in request state")
|
|
return {"success": False, "data": None, "error": "Not authenticated"}
|
|
|
|
user_id = require_authenticated_user(current_user)
|
|
|
|
from services.user_data_service import UserDataService
|
|
from services.database import get_db
|
|
db = next(get_db())
|
|
|
|
user_service = UserDataService(db)
|
|
extraction = user_service.get_website_extraction(user_id)
|
|
|
|
if extraction:
|
|
logger.info(f"[WebsiteExtraction] Found saved data for user {user_id}")
|
|
return {
|
|
"success": True,
|
|
"data": extraction
|
|
}
|
|
else:
|
|
logger.info(f"[WebsiteExtraction] No saved data for user {user_id}")
|
|
return {
|
|
"success": False,
|
|
"data": None
|
|
}
|
|
|
|
except Exception as exc:
|
|
logger.error(f"[WebsiteExtraction] Failed for user: {exc}", exc_info=True)
|
|
return {
|
|
"success": False,
|
|
"error": str(exc)
|
|
}
|
|
|
|
|
|
@router.post("/website-extraction")
|
|
async def save_website_extraction(
|
|
extraction: Dict[str, Any],
|
|
current_user: Dict[str, Any] = Depends(get_current_user),
|
|
):
|
|
"""Save website extraction data for future use."""
|
|
user_id = require_authenticated_user(current_user)
|
|
|
|
try:
|
|
from services.user_data_service import UserDataService
|
|
from services.database import get_db
|
|
db = next(get_db())
|
|
|
|
user_service = UserDataService(db)
|
|
success = user_service.save_website_extraction(user_id, extraction)
|
|
|
|
if success:
|
|
logger.info(f"[WebsiteExtraction] Saved for user {user_id}")
|
|
return {
|
|
"success": True,
|
|
"message": "Website extraction saved"
|
|
}
|
|
else:
|
|
return {
|
|
"success": False,
|
|
"error": "Failed to save"
|
|
}
|
|
|
|
except Exception as exc:
|
|
logger.error(f"[WebsiteExtraction] Save failed: {exc}")
|
|
return {
|
|
"success": False,
|
|
"error": str(exc)
|
|
}
|
|
|
|
|
|
@router.post("/project/{project_id}/topic-context")
|
|
async def save_topic_context(
|
|
project_id: str,
|
|
topic_context: Dict[str, Any],
|
|
current_user: Dict[str, Any] = Depends(get_current_user),
|
|
):
|
|
"""Save topic context (category research) to a podcast project."""
|
|
user_id = require_authenticated_user(current_user)
|
|
|
|
try:
|
|
from services.database import get_db
|
|
from models.podcast_models import PodcastProject
|
|
|
|
db = next(get_db())
|
|
|
|
# Find the project
|
|
project = db.query(PodcastProject).filter(
|
|
PodcastProject.project_id == project_id,
|
|
PodcastProject.user_id == user_id
|
|
).first()
|
|
|
|
if not project:
|
|
return {
|
|
"success": False,
|
|
"error": "Project not found"
|
|
}
|
|
|
|
# Update topic context
|
|
project.topic_context = topic_context
|
|
db.commit()
|
|
|
|
logger.info(f"[TopicContext] Saved for project {project_id}")
|
|
return {
|
|
"success": True,
|
|
"message": "Topic context saved"
|
|
}
|
|
|
|
except Exception as exc:
|
|
logger.error(f"[TopicContext] Save failed: {exc}")
|
|
return {
|
|
"success": False,
|
|
"error": str(exc)
|
|
}
|
|
|
|
|
|
@router.get("/project/{project_id}/topic-context")
|
|
async def get_topic_context(
|
|
project_id: str,
|
|
current_user: Dict[str, Any] = Depends(get_current_user),
|
|
):
|
|
"""Get topic context from a podcast project."""
|
|
user_id = require_authenticated_user(current_user)
|
|
|
|
try:
|
|
from services.database import get_db
|
|
from models.podcast_models import PodcastProject
|
|
|
|
db = next(get_db())
|
|
|
|
project = db.query(PodcastProject).filter(
|
|
PodcastProject.project_id == project_id,
|
|
PodcastProject.user_id == user_id
|
|
).first()
|
|
|
|
if not project:
|
|
return {
|
|
"success": False,
|
|
"error": "Project not found"
|
|
}
|
|
|
|
return {
|
|
"success": True,
|
|
"data": project.topic_context
|
|
}
|
|
|
|
except Exception as exc:
|
|
logger.error(f"[TopicContext] Get failed: {exc}")
|
|
return {
|
|
"success": False,
|
|
"error": str(exc)
|
|
}
|