Analyzing your idea with AI...

2026-04-19 13:21:36 +05:30
parent e704aa7d87
commit 0732887c09
17 changed files with 6225 additions and 0 deletions
--- a/_session_backup/research.py
+++ b/_session_backup/research.py
@@ -0,0 +1,244 @@
+"""
+Podcast Research Handlers
+
+Research endpoints using Exa provider and LLM summarization.
+"""
+
+from fastapi import APIRouter, Depends, HTTPException
+from typing import Dict, Any, List
+from types import SimpleNamespace
+import json
+
+from middleware.auth_middleware import get_current_user
+from api.story_writer.utils.auth import require_authenticated_user
+from services.blog_writer.research.exa_provider import ExaResearchProvider
+from services.llm_providers.main_text_generation import llm_text_gen
+from services.podcast_bible_service import PodcastBibleService
+from loguru import logger
+from ..models import (
+    PodcastExaResearchRequest,
+    PodcastExaResearchResponse,
+    PodcastExaSource,
+    PodcastExaConfig,
+    PodcastResearchInsight,
+)
+
+router = APIRouter()
+
+
+@router.post("/research/exa", response_model=PodcastExaResearchResponse)
+async def podcast_research_exa(
+    request: PodcastExaResearchRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+):
+    """
+    Run podcast research via Exa and then use LLM to extract deep insights.
+    Uses Podcast Bible and Analysis context for hyper-personalization.
+    """
+    user_id = require_authenticated_user(current_user)
+
+    queries = [q.strip() for q in request.queries if q and q.strip()]
+    if not queries:
+        raise HTTPException(status_code=400, detail="At least one query is required for research.")
+
+    exa_cfg = request.exa_config or PodcastExaConfig()
+    cfg = SimpleNamespace(
+        exa_search_type=exa_cfg.exa_search_type or "auto",
+        exa_category=exa_cfg.exa_category,
+        exa_include_domains=exa_cfg.exa_include_domains or [],
+        exa_exclude_domains=exa_cfg.exa_exclude_domains or [],
+        max_sources=exa_cfg.max_sources or 8,
+        source_types=[],
+    )
+
+    provider = ExaResearchProvider()
+    
+    # --- Context Building ---
+    bible_service = PodcastBibleService()
+    bible_context = ""
+    if request.bible:
+        try:
+            from models.podcast_bible_models import PodcastBible
+            bible_data = PodcastBible(**request.bible)
+            bible_context = bible_service.serialize_bible(bible_data)
+        except Exception as exc:
+            logger.warning(f"[Podcast Research] Failed to serialize bible: {exc}")
+
+    analysis_context = ""
+    if request.analysis:
+        analysis_context = f"""
+PODCAST ANALYSIS CONTEXT:
+Audience: {request.analysis.get('audience', 'General')}
+Content Type: {request.analysis.get('content_type', 'Informative')}
+Top Keywords: {', '.join(request.analysis.get('top_keywords', []))}
+"""
+
+    # Exa search params
+    industry = request.bible.get("brand", {}).get("industry", "") if request.bible else ""
+    target_audience = ""
+    if request.bible:
+        audience_dna = request.bible.get("audience", {})
+        if audience_dna:
+            interests = ", ".join(audience_dna.get("interests", []))
+            target_audience = f"Expertise: {audience_dna.get('expertise_level', '')}. Interests: {interests}."
+
+    try:
+        # 1. RUN EXA SEARCH
+        result = await provider.search(
+            prompt=request.topic,
+            topic=request.topic,
+            industry=industry,
+            target_audience=target_audience,
+            config=cfg,
+            user_id=user_id,
+        )
+    except Exception as exc:
+        logger.error(f"[Podcast Exa Research] Search failed for user {user_id}: {exc}")
+        raise HTTPException(status_code=500, detail=f"Exa research failed: {exc}")
+
+    # 2. EXTRACT INSIGHTS VIA LLM
+    raw_content = result.get("content", "")
+    sources = result.get("sources", [])
+    
+    summary = ""
+    key_insights = []
+    expert_quotes = []
+    listener_cta = []
+    mapped_angles = []
+    
+    if raw_content and sources:
+        logger.info(f"[Podcast Research] Extracting insights from {len(sources)} sources for user {user_id}")
+        
+        prompt = f"""
+You are an expert research analyst for a high-end podcast production team. 
+Your task is to analyze the following research data and extract deep, actionable insights for a podcast episode.
+
+PODCAST CONTEXT:
+Topic: {request.topic}
+{bible_context}
+{analysis_context}
+
+RESEARCH DATA (from {len(sources)} sources):
+{raw_content}
+
+TASK:
+1. Provide a comprehensive summary (2-3 paragraphs) of the most important findings. Use Markdown for formatting (bolding, lists).
+2. Extract 3-5 "Key Insights". Each insight should have a title and a detailed explanation.
+3. For each insight, identify which source indices (e.g. 1, 2) it was derived from.
+4. Extract notable "Expert Quotes" - direct quotes from industry leaders, researchers, or authoritative voices found in the sources.
+5. Suggest 2-4 "Listener CTA" (call-to-action) ideas that the podcast host can use to engage the audience.
+6. Identify 3-5 "Mapped Angles" - unique content angles with rationale for why they matter for this topic.
+
+NOTE: The research data includes "Key Highlights", "Summaries", and "Excerpts" from various sources. 
+Pay special attention to the "Key Highlights" sections as they contain the most relevant information extracted by the neural search engine.
+
+Return JSON structure:
+{{
+  "summary": "Detailed markdown summary...",
+  "key_insights": [
+    {{
+      "title": "Insight Title",
+      "content": "Detailed markdown content...",
+      "source_indices": [1, 2]
+    }}
+  ],
+  "expert_quotes": [
+    {{
+      "quote": "Exact quote from source...",
+      "source_index": 1
+    }}
+  ],
+  "listener_cta": [
+    "Call-to-action suggestion 1",
+    "Call-to-action suggestion 2"
+  ],
+  "mapped_angles": [
+    {{
+      "title": "Angle Title",
+      "why": "Why this angle matters for the audience...",
+      "mapped_fact_ids": ["fact_1", "fact_2"]
+    }}
+  ]
+}}
+
+Requirements:
+- Ensure insights are deep, not just superficial facts. Look for trends, expert opinions, and specific data points.
+- Expert quotes should be exact or near-exact quotes from the sources, with attribution.
+- Listener CTAs should be practical and engaging (e.g., "Share your experience with X on social media").
+- Mapped angles should be unique perspectives that make the episode stand out.
+- Tone should be professional, insightful, and ready for a podcast host to discuss.
+- Avoid generic filler.
+"""
+        try:
+            llm_response = llm_text_gen(
+                prompt=prompt,
+                user_id=user_id,
+                json_struct=None,
+                preferred_provider="huggingface",
+                flow_type="premium_tool",
+            )
+            
+            # Normalize response
+            if isinstance(llm_response, str):
+                data = json.loads(llm_response)
+            else:
+                data = llm_response
+                
+            summary = data.get("summary", "")
+            key_insights = [PodcastResearchInsight(**insight) for insight in data.get("key_insights", [])]
+            expert_quotes = data.get("expert_quotes", [])
+            listener_cta = data.get("listener_cta", [])
+            mapped_angles = data.get("mapped_angles", [])
+        except Exception as exc:
+            logger.error(f"[Podcast Research] LLM Insight extraction failed: {exc}")
+            # Fallback to a basic summary if LLM fails
+            summary = f"Research completed for '{request.topic}'. Found {len(sources)} sources."
+            
+    # Fallback: if summary is still empty (e.g. LLM returned empty string), use raw content first paragraph or basic text
+    if not summary:
+        if raw_content:
+            summary = raw_content[:2000] # Use first 2000 chars of raw content as summary
+        else:
+            summary = f"Research completed for '{request.topic}'. Found {len(sources)} sources."
+
+    # 3. TRACK USAGE
+    try:
+        cost_total = 0.0
+        if isinstance(result, dict):
+            cost_total = result.get("cost", {}).get("total", 0.005) if result.get("cost") else 0.005
+        provider.track_exa_usage(user_id, cost_total)
+    except Exception as track_err:
+        logger.warning(f"[Podcast Exa Research] Failed to track usage: {track_err}")
+
+    sources_payload = []
+    for src in sources:
+        try:
+            sources_payload.append(PodcastExaSource(**src))
+        except Exception:
+            sources_payload.append(PodcastExaSource(**{
+                "title": src.get("title", ""),
+                "url": src.get("url", ""),
+                "excerpt": src.get("excerpt", ""),
+                "published_at": src.get("published_at"),
+                "highlights": src.get("highlights"),
+                "summary": src.get("summary"),
+                "source_type": src.get("source_type"),
+                "index": src.get("index"),
+                "image": src.get("image"),
+                "author": src.get("author"),
+            }))
+
+    return PodcastExaResearchResponse(
+        sources=sources_payload,
+        search_queries=result.get("search_queries", queries) if isinstance(result, dict) else queries,
+        summary=summary,
+        key_insights=key_insights,
+        expert_quotes=expert_quotes,
+        listener_cta=listener_cta,
+        mapped_angles=mapped_angles,
+        cost=result.get("cost") if isinstance(result, dict) else None,
+        search_type=result.get("search_type") if isinstance(result, dict) else None,
+        provider=result.get("provider", "exa") if isinstance(result, dict) else "exa",
+        content=raw_content,
+    )
+