""" Podcast Research Handlers Research endpoints using Exa provider and LLM summarization. """ from fastapi import APIRouter, Depends, HTTPException from typing import Dict, Any, List from types import SimpleNamespace import json from middleware.auth_middleware import get_current_user from api.story_writer.utils.auth import require_authenticated_user from services.blog_writer.research.exa_provider import ExaResearchProvider from services.llm_providers.main_text_generation import llm_text_gen from services.podcast_bible_service import PodcastBibleService from loguru import logger from ..models import ( PodcastExaResearchRequest, PodcastExaResearchResponse, PodcastExaSource, PodcastExaConfig, PodcastResearchInsight, ) router = APIRouter() @router.post("/research/exa", response_model=PodcastExaResearchResponse) async def podcast_research_exa( request: PodcastExaResearchRequest, current_user: Dict[str, Any] = Depends(get_current_user), ): """ Run podcast research via Exa and then use LLM to extract deep insights. Uses Podcast Bible and Analysis context for hyper-personalization. """ user_id = require_authenticated_user(current_user) queries = [q.strip() for q in request.queries if q and q.strip()] if not queries: raise HTTPException(status_code=400, detail="At least one query is required for research.") exa_cfg = request.exa_config or PodcastExaConfig() cfg = SimpleNamespace( exa_search_type=exa_cfg.exa_search_type or "auto", exa_category=exa_cfg.exa_category, exa_include_domains=exa_cfg.exa_include_domains or [], exa_exclude_domains=exa_cfg.exa_exclude_domains or [], max_sources=exa_cfg.max_sources or 8, source_types=[], ) provider = ExaResearchProvider() # --- Context Building --- bible_service = PodcastBibleService() bible_context = "" if request.bible: try: from models.podcast_bible_models import PodcastBible bible_data = PodcastBible(**request.bible) bible_context = bible_service.serialize_bible(bible_data) except Exception as exc: logger.warning(f"[Podcast Research] Failed to serialize bible: {exc}") analysis_context = "" if request.analysis: analysis_context = f""" PODCAST ANALYSIS CONTEXT: Audience: {request.analysis.get('audience', 'General')} Content Type: {request.analysis.get('content_type', 'Informative')} Top Keywords: {', '.join(request.analysis.get('top_keywords', []))} """ # Exa search params industry = request.bible.get("brand", {}).get("industry", "") if request.bible else "" target_audience = "" if request.bible: audience_dna = request.bible.get("audience", {}) if audience_dna: interests = ", ".join(audience_dna.get("interests", [])) target_audience = f"Expertise: {audience_dna.get('expertise_level', '')}. Interests: {interests}." try: # 1. RUN EXA SEARCH result = await provider.search( prompt=request.topic, topic=request.topic, industry=industry, target_audience=target_audience, config=cfg, user_id=user_id, ) except Exception as exc: logger.error(f"[Podcast Exa Research] Search failed for user {user_id}: {exc}") raise HTTPException(status_code=500, detail=f"Exa research failed: {exc}") # 2. EXTRACT INSIGHTS VIA LLM raw_content = result.get("content", "") sources = result.get("sources", []) summary = "" key_insights = [] if raw_content and sources: logger.info(f"[Podcast Research] Extracting insights from {len(sources)} sources for user {user_id}") prompt = f""" You are an expert research analyst for a high-end podcast production team. Your task is to analyze the following research data and extract deep, actionable insights for a podcast episode. PODCAST CONTEXT: Topic: {request.topic} {bible_context} {analysis_context} RESEARCH DATA (from {len(sources)} sources): {raw_content} TASK: 1. Provide a comprehensive summary (2-3 paragraphs) of the most important findings. Use Markdown for formatting (bolding, lists). 2. Extract 3-5 "Key Insights". Each insight should have a title and a detailed explanation. 3. For each insight, identify which source indices (e.g. 1, 2) it was derived from. NOTE: The research data includes "Key Highlights", "Summaries", and "Excerpts" from various sources. Pay special attention to the "Key Highlights" sections as they contain the most relevant information extracted by the neural search engine. Return JSON structure: {{ "summary": "Detailed markdown summary...", "key_insights": [ {{ "title": "Insight Title", "content": "Detailed markdown content...", "source_indices": [1, 2] }} ] }} Requirements: - Ensure insights are deep, not just superficial facts. Look for trends, expert opinions, and specific data points. - Tone should be professional, insightful, and ready for a podcast host to discuss. - Avoid generic filler. """ try: llm_response = llm_text_gen(prompt=prompt, user_id=user_id, json_struct=None) # Normalize response if isinstance(llm_response, str): data = json.loads(llm_response) else: data = llm_response summary = data.get("summary", "") key_insights = [PodcastResearchInsight(**insight) for insight in data.get("key_insights", [])] except Exception as exc: logger.error(f"[Podcast Research] LLM Insight extraction failed: {exc}") # Fallback to a basic summary if LLM fails summary = f"Research completed for '{request.topic}'. Found {len(sources)} sources." # Fallback: if summary is still empty (e.g. LLM returned empty string), use raw content first paragraph or basic text if not summary: if raw_content: summary = raw_content[:2000] # Use first 2000 chars of raw content as summary else: summary = f"Research completed for '{request.topic}'. Found {len(sources)} sources." # 3. TRACK USAGE try: cost_total = 0.0 if isinstance(result, dict): cost_total = result.get("cost", {}).get("total", 0.005) if result.get("cost") else 0.005 provider.track_exa_usage(user_id, cost_total) except Exception as track_err: logger.warning(f"[Podcast Exa Research] Failed to track usage: {track_err}") sources_payload = [] for src in sources: try: sources_payload.append(PodcastExaSource(**src)) except Exception: sources_payload.append(PodcastExaSource(**{ "title": src.get("title", ""), "url": src.get("url", ""), "excerpt": src.get("excerpt", ""), "published_at": src.get("published_at"), "highlights": src.get("highlights"), "summary": src.get("summary"), "source_type": src.get("source_type"), "index": src.get("index"), "image": src.get("image"), "author": src.get("author"), })) return PodcastExaResearchResponse( sources=sources_payload, search_queries=result.get("search_queries", queries) if isinstance(result, dict) else queries, summary=summary, key_insights=key_insights, cost=result.get("cost") if isinstance(result, dict) else None, search_type=result.get("search_type") if isinstance(result, dict) else None, provider=result.get("provider", "exa") if isinstance(result, dict) else "exa", content=raw_content, )