Analyzing your idea with AI...
This commit is contained in:
244
_session_backup/research.py
Normal file
244
_session_backup/research.py
Normal file
@@ -0,0 +1,244 @@
|
||||
"""
|
||||
Podcast Research Handlers
|
||||
|
||||
Research endpoints using Exa provider and LLM summarization.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from typing import Dict, Any, List
|
||||
from types import SimpleNamespace
|
||||
import json
|
||||
|
||||
from middleware.auth_middleware import get_current_user
|
||||
from api.story_writer.utils.auth import require_authenticated_user
|
||||
from services.blog_writer.research.exa_provider import ExaResearchProvider
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from services.podcast_bible_service import PodcastBibleService
|
||||
from loguru import logger
|
||||
from ..models import (
|
||||
PodcastExaResearchRequest,
|
||||
PodcastExaResearchResponse,
|
||||
PodcastExaSource,
|
||||
PodcastExaConfig,
|
||||
PodcastResearchInsight,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/research/exa", response_model=PodcastExaResearchResponse)
|
||||
async def podcast_research_exa(
|
||||
request: PodcastExaResearchRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""
|
||||
Run podcast research via Exa and then use LLM to extract deep insights.
|
||||
Uses Podcast Bible and Analysis context for hyper-personalization.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
queries = [q.strip() for q in request.queries if q and q.strip()]
|
||||
if not queries:
|
||||
raise HTTPException(status_code=400, detail="At least one query is required for research.")
|
||||
|
||||
exa_cfg = request.exa_config or PodcastExaConfig()
|
||||
cfg = SimpleNamespace(
|
||||
exa_search_type=exa_cfg.exa_search_type or "auto",
|
||||
exa_category=exa_cfg.exa_category,
|
||||
exa_include_domains=exa_cfg.exa_include_domains or [],
|
||||
exa_exclude_domains=exa_cfg.exa_exclude_domains or [],
|
||||
max_sources=exa_cfg.max_sources or 8,
|
||||
source_types=[],
|
||||
)
|
||||
|
||||
provider = ExaResearchProvider()
|
||||
|
||||
# --- Context Building ---
|
||||
bible_service = PodcastBibleService()
|
||||
bible_context = ""
|
||||
if request.bible:
|
||||
try:
|
||||
from models.podcast_bible_models import PodcastBible
|
||||
bible_data = PodcastBible(**request.bible)
|
||||
bible_context = bible_service.serialize_bible(bible_data)
|
||||
except Exception as exc:
|
||||
logger.warning(f"[Podcast Research] Failed to serialize bible: {exc}")
|
||||
|
||||
analysis_context = ""
|
||||
if request.analysis:
|
||||
analysis_context = f"""
|
||||
PODCAST ANALYSIS CONTEXT:
|
||||
Audience: {request.analysis.get('audience', 'General')}
|
||||
Content Type: {request.analysis.get('content_type', 'Informative')}
|
||||
Top Keywords: {', '.join(request.analysis.get('top_keywords', []))}
|
||||
"""
|
||||
|
||||
# Exa search params
|
||||
industry = request.bible.get("brand", {}).get("industry", "") if request.bible else ""
|
||||
target_audience = ""
|
||||
if request.bible:
|
||||
audience_dna = request.bible.get("audience", {})
|
||||
if audience_dna:
|
||||
interests = ", ".join(audience_dna.get("interests", []))
|
||||
target_audience = f"Expertise: {audience_dna.get('expertise_level', '')}. Interests: {interests}."
|
||||
|
||||
try:
|
||||
# 1. RUN EXA SEARCH
|
||||
result = await provider.search(
|
||||
prompt=request.topic,
|
||||
topic=request.topic,
|
||||
industry=industry,
|
||||
target_audience=target_audience,
|
||||
config=cfg,
|
||||
user_id=user_id,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error(f"[Podcast Exa Research] Search failed for user {user_id}: {exc}")
|
||||
raise HTTPException(status_code=500, detail=f"Exa research failed: {exc}")
|
||||
|
||||
# 2. EXTRACT INSIGHTS VIA LLM
|
||||
raw_content = result.get("content", "")
|
||||
sources = result.get("sources", [])
|
||||
|
||||
summary = ""
|
||||
key_insights = []
|
||||
expert_quotes = []
|
||||
listener_cta = []
|
||||
mapped_angles = []
|
||||
|
||||
if raw_content and sources:
|
||||
logger.info(f"[Podcast Research] Extracting insights from {len(sources)} sources for user {user_id}")
|
||||
|
||||
prompt = f"""
|
||||
You are an expert research analyst for a high-end podcast production team.
|
||||
Your task is to analyze the following research data and extract deep, actionable insights for a podcast episode.
|
||||
|
||||
PODCAST CONTEXT:
|
||||
Topic: {request.topic}
|
||||
{bible_context}
|
||||
{analysis_context}
|
||||
|
||||
RESEARCH DATA (from {len(sources)} sources):
|
||||
{raw_content}
|
||||
|
||||
TASK:
|
||||
1. Provide a comprehensive summary (2-3 paragraphs) of the most important findings. Use Markdown for formatting (bolding, lists).
|
||||
2. Extract 3-5 "Key Insights". Each insight should have a title and a detailed explanation.
|
||||
3. For each insight, identify which source indices (e.g. 1, 2) it was derived from.
|
||||
4. Extract notable "Expert Quotes" - direct quotes from industry leaders, researchers, or authoritative voices found in the sources.
|
||||
5. Suggest 2-4 "Listener CTA" (call-to-action) ideas that the podcast host can use to engage the audience.
|
||||
6. Identify 3-5 "Mapped Angles" - unique content angles with rationale for why they matter for this topic.
|
||||
|
||||
NOTE: The research data includes "Key Highlights", "Summaries", and "Excerpts" from various sources.
|
||||
Pay special attention to the "Key Highlights" sections as they contain the most relevant information extracted by the neural search engine.
|
||||
|
||||
Return JSON structure:
|
||||
{{
|
||||
"summary": "Detailed markdown summary...",
|
||||
"key_insights": [
|
||||
{{
|
||||
"title": "Insight Title",
|
||||
"content": "Detailed markdown content...",
|
||||
"source_indices": [1, 2]
|
||||
}}
|
||||
],
|
||||
"expert_quotes": [
|
||||
{{
|
||||
"quote": "Exact quote from source...",
|
||||
"source_index": 1
|
||||
}}
|
||||
],
|
||||
"listener_cta": [
|
||||
"Call-to-action suggestion 1",
|
||||
"Call-to-action suggestion 2"
|
||||
],
|
||||
"mapped_angles": [
|
||||
{{
|
||||
"title": "Angle Title",
|
||||
"why": "Why this angle matters for the audience...",
|
||||
"mapped_fact_ids": ["fact_1", "fact_2"]
|
||||
}}
|
||||
]
|
||||
}}
|
||||
|
||||
Requirements:
|
||||
- Ensure insights are deep, not just superficial facts. Look for trends, expert opinions, and specific data points.
|
||||
- Expert quotes should be exact or near-exact quotes from the sources, with attribution.
|
||||
- Listener CTAs should be practical and engaging (e.g., "Share your experience with X on social media").
|
||||
- Mapped angles should be unique perspectives that make the episode stand out.
|
||||
- Tone should be professional, insightful, and ready for a podcast host to discuss.
|
||||
- Avoid generic filler.
|
||||
"""
|
||||
try:
|
||||
llm_response = llm_text_gen(
|
||||
prompt=prompt,
|
||||
user_id=user_id,
|
||||
json_struct=None,
|
||||
preferred_provider="huggingface",
|
||||
flow_type="premium_tool",
|
||||
)
|
||||
|
||||
# Normalize response
|
||||
if isinstance(llm_response, str):
|
||||
data = json.loads(llm_response)
|
||||
else:
|
||||
data = llm_response
|
||||
|
||||
summary = data.get("summary", "")
|
||||
key_insights = [PodcastResearchInsight(**insight) for insight in data.get("key_insights", [])]
|
||||
expert_quotes = data.get("expert_quotes", [])
|
||||
listener_cta = data.get("listener_cta", [])
|
||||
mapped_angles = data.get("mapped_angles", [])
|
||||
except Exception as exc:
|
||||
logger.error(f"[Podcast Research] LLM Insight extraction failed: {exc}")
|
||||
# Fallback to a basic summary if LLM fails
|
||||
summary = f"Research completed for '{request.topic}'. Found {len(sources)} sources."
|
||||
|
||||
# Fallback: if summary is still empty (e.g. LLM returned empty string), use raw content first paragraph or basic text
|
||||
if not summary:
|
||||
if raw_content:
|
||||
summary = raw_content[:2000] # Use first 2000 chars of raw content as summary
|
||||
else:
|
||||
summary = f"Research completed for '{request.topic}'. Found {len(sources)} sources."
|
||||
|
||||
# 3. TRACK USAGE
|
||||
try:
|
||||
cost_total = 0.0
|
||||
if isinstance(result, dict):
|
||||
cost_total = result.get("cost", {}).get("total", 0.005) if result.get("cost") else 0.005
|
||||
provider.track_exa_usage(user_id, cost_total)
|
||||
except Exception as track_err:
|
||||
logger.warning(f"[Podcast Exa Research] Failed to track usage: {track_err}")
|
||||
|
||||
sources_payload = []
|
||||
for src in sources:
|
||||
try:
|
||||
sources_payload.append(PodcastExaSource(**src))
|
||||
except Exception:
|
||||
sources_payload.append(PodcastExaSource(**{
|
||||
"title": src.get("title", ""),
|
||||
"url": src.get("url", ""),
|
||||
"excerpt": src.get("excerpt", ""),
|
||||
"published_at": src.get("published_at"),
|
||||
"highlights": src.get("highlights"),
|
||||
"summary": src.get("summary"),
|
||||
"source_type": src.get("source_type"),
|
||||
"index": src.get("index"),
|
||||
"image": src.get("image"),
|
||||
"author": src.get("author"),
|
||||
}))
|
||||
|
||||
return PodcastExaResearchResponse(
|
||||
sources=sources_payload,
|
||||
search_queries=result.get("search_queries", queries) if isinstance(result, dict) else queries,
|
||||
summary=summary,
|
||||
key_insights=key_insights,
|
||||
expert_quotes=expert_quotes,
|
||||
listener_cta=listener_cta,
|
||||
mapped_angles=mapped_angles,
|
||||
cost=result.get("cost") if isinstance(result, dict) else None,
|
||||
search_type=result.get("search_type") if isinstance(result, dict) else None,
|
||||
provider=result.get("provider", "exa") if isinstance(result, dict) else "exa",
|
||||
content=raw_content,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user