Added video studio router and endpoints. Added research router and endpoints. Added youtube router and endpoints. Added onboarding utils router and endpoints. Added onboarding utils service. Added onboarding utils models. Added onboarding utils routes. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils.
This commit is contained in:
51
backend/services/research/core/__init__.py
Normal file
51
backend/services/research/core/__init__.py
Normal file
@@ -0,0 +1,51 @@
|
||||
"""
|
||||
Research Engine Core Module
|
||||
|
||||
This is the standalone AI Research Engine that can be imported by
|
||||
Blog Writer, Podcast Maker, YouTube Creator, and other ALwrity tools.
|
||||
|
||||
Design Goals:
|
||||
- Tool-agnostic: Any content tool can import and use this
|
||||
- AI-driven parameter optimization: Users don't need to understand Exa/Tavily internals
|
||||
- Provider priority: Exa → Tavily → Google (fallback)
|
||||
- Personalization-aware: Accepts context from calling tools
|
||||
- Advanced by default: Prioritizes quality over speed
|
||||
|
||||
Usage:
|
||||
from services.research.core import ResearchEngine, ResearchContext
|
||||
|
||||
engine = ResearchEngine()
|
||||
result = await engine.research(ResearchContext(
|
||||
query="AI trends in healthcare 2025",
|
||||
content_type=ContentType.BLOG,
|
||||
persona_context={"industry": "Healthcare", "audience": "Medical professionals"}
|
||||
))
|
||||
|
||||
Author: ALwrity Team
|
||||
Version: 2.0
|
||||
Last Updated: December 2025
|
||||
"""
|
||||
|
||||
from .research_context import (
|
||||
ResearchContext,
|
||||
ResearchPersonalizationContext,
|
||||
ContentType,
|
||||
ResearchGoal,
|
||||
ResearchDepth,
|
||||
ProviderPreference,
|
||||
)
|
||||
from .parameter_optimizer import ParameterOptimizer
|
||||
from .research_engine import ResearchEngine
|
||||
|
||||
__all__ = [
|
||||
# Context schemas
|
||||
"ResearchContext",
|
||||
"ResearchPersonalizationContext",
|
||||
"ContentType",
|
||||
"ResearchGoal",
|
||||
"ResearchDepth",
|
||||
"ProviderPreference",
|
||||
# Core classes
|
||||
"ParameterOptimizer",
|
||||
"ResearchEngine",
|
||||
]
|
||||
384
backend/services/research/core/parameter_optimizer.py
Normal file
384
backend/services/research/core/parameter_optimizer.py
Normal file
@@ -0,0 +1,384 @@
|
||||
"""
|
||||
AI Parameter Optimizer for Research Engine
|
||||
|
||||
Uses AI to analyze the research query and context to select optimal
|
||||
parameters for Exa and Tavily APIs. This abstracts the complexity
|
||||
from non-technical users.
|
||||
|
||||
Key Decisions:
|
||||
- Provider selection (Exa vs Tavily vs Google)
|
||||
- Search type (neural vs keyword)
|
||||
- Category/topic selection
|
||||
- Depth and result limits
|
||||
- Domain filtering
|
||||
|
||||
Author: ALwrity Team
|
||||
Version: 2.0
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
from typing import Dict, Any, Optional, Tuple
|
||||
from loguru import logger
|
||||
|
||||
from .research_context import (
|
||||
ResearchContext,
|
||||
ResearchGoal,
|
||||
ResearchDepth,
|
||||
ProviderPreference,
|
||||
ContentType,
|
||||
)
|
||||
from models.blog_models import ResearchConfig, ResearchProvider, ResearchMode
|
||||
|
||||
|
||||
class ParameterOptimizer:
|
||||
"""
|
||||
AI-driven parameter optimization for research providers.
|
||||
|
||||
Analyzes the research context and selects optimal parameters
|
||||
for Exa, Tavily, or Google without requiring user expertise.
|
||||
"""
|
||||
|
||||
# Query patterns for intelligent routing
|
||||
TRENDING_PATTERNS = [
|
||||
r'\b(latest|recent|new|2024|2025|current|trending|news)\b',
|
||||
r'\b(update|announcement|launch|release)\b',
|
||||
]
|
||||
|
||||
TECHNICAL_PATTERNS = [
|
||||
r'\b(api|sdk|framework|library|implementation|architecture)\b',
|
||||
r'\b(code|programming|developer|technical|engineering)\b',
|
||||
]
|
||||
|
||||
COMPETITIVE_PATTERNS = [
|
||||
r'\b(competitor|alternative|vs|versus|compare|comparison)\b',
|
||||
r'\b(market|industry|landscape|players)\b',
|
||||
]
|
||||
|
||||
FACTUAL_PATTERNS = [
|
||||
r'\b(statistics|data|research|study|report|survey)\b',
|
||||
r'\b(percent|percentage|number|figure|metric)\b',
|
||||
]
|
||||
|
||||
# Exa category mapping based on query analysis
|
||||
EXA_CATEGORY_MAP = {
|
||||
'research': 'research paper',
|
||||
'news': 'news',
|
||||
'company': 'company',
|
||||
'personal': 'personal site',
|
||||
'github': 'github',
|
||||
'linkedin': 'linkedin profile',
|
||||
'finance': 'financial report',
|
||||
}
|
||||
|
||||
# Tavily topic mapping
|
||||
TAVILY_TOPIC_MAP = {
|
||||
ResearchGoal.TRENDING: 'news',
|
||||
ResearchGoal.FACTUAL: 'general',
|
||||
ResearchGoal.COMPETITIVE: 'general',
|
||||
ResearchGoal.TECHNICAL: 'general',
|
||||
ResearchGoal.EDUCATIONAL: 'general',
|
||||
ResearchGoal.INSPIRATIONAL: 'general',
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the optimizer."""
|
||||
self.exa_available = bool(os.getenv("EXA_API_KEY"))
|
||||
self.tavily_available = bool(os.getenv("TAVILY_API_KEY"))
|
||||
logger.info(f"ParameterOptimizer initialized: exa={self.exa_available}, tavily={self.tavily_available}")
|
||||
|
||||
def optimize(self, context: ResearchContext) -> Tuple[ResearchProvider, ResearchConfig]:
|
||||
"""
|
||||
Analyze research context and return optimized provider and config.
|
||||
|
||||
Args:
|
||||
context: The research context from the calling tool
|
||||
|
||||
Returns:
|
||||
Tuple of (selected_provider, optimized_config)
|
||||
"""
|
||||
# If advanced mode, use raw parameters
|
||||
if context.advanced_mode:
|
||||
return self._build_advanced_config(context)
|
||||
|
||||
# Analyze query to determine optimal approach
|
||||
query_analysis = self._analyze_query(context.query)
|
||||
|
||||
# Select provider based on analysis and preferences
|
||||
provider = self._select_provider(context, query_analysis)
|
||||
|
||||
# Build optimized config for selected provider
|
||||
config = self._build_config(context, provider, query_analysis)
|
||||
|
||||
logger.info(f"Optimized research: provider={provider.value}, mode={config.mode.value}")
|
||||
|
||||
return provider, config
|
||||
|
||||
def _analyze_query(self, query: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze the query to understand intent and optimal approach.
|
||||
|
||||
Returns dict with:
|
||||
- is_trending: Query is about recent/current events
|
||||
- is_technical: Query is technical in nature
|
||||
- is_competitive: Query is about competition/comparison
|
||||
- is_factual: Query needs data/statistics
|
||||
- suggested_category: Exa category if applicable
|
||||
- suggested_topic: Tavily topic
|
||||
"""
|
||||
query_lower = query.lower()
|
||||
|
||||
analysis = {
|
||||
'is_trending': self._matches_patterns(query_lower, self.TRENDING_PATTERNS),
|
||||
'is_technical': self._matches_patterns(query_lower, self.TECHNICAL_PATTERNS),
|
||||
'is_competitive': self._matches_patterns(query_lower, self.COMPETITIVE_PATTERNS),
|
||||
'is_factual': self._matches_patterns(query_lower, self.FACTUAL_PATTERNS),
|
||||
'suggested_category': None,
|
||||
'suggested_topic': 'general',
|
||||
'suggested_search_type': 'auto',
|
||||
}
|
||||
|
||||
# Determine Exa category
|
||||
if 'research' in query_lower or 'study' in query_lower or 'paper' in query_lower:
|
||||
analysis['suggested_category'] = 'research paper'
|
||||
elif 'github' in query_lower or 'repository' in query_lower:
|
||||
analysis['suggested_category'] = 'github'
|
||||
elif 'linkedin' in query_lower or 'professional' in query_lower:
|
||||
analysis['suggested_category'] = 'linkedin profile'
|
||||
elif analysis['is_trending']:
|
||||
analysis['suggested_category'] = 'news'
|
||||
elif 'company' in query_lower or 'startup' in query_lower:
|
||||
analysis['suggested_category'] = 'company'
|
||||
|
||||
# Determine Tavily topic
|
||||
if analysis['is_trending']:
|
||||
analysis['suggested_topic'] = 'news'
|
||||
elif 'finance' in query_lower or 'stock' in query_lower or 'investment' in query_lower:
|
||||
analysis['suggested_topic'] = 'finance'
|
||||
else:
|
||||
analysis['suggested_topic'] = 'general'
|
||||
|
||||
# Determine search type
|
||||
if analysis['is_technical'] or analysis['is_factual']:
|
||||
analysis['suggested_search_type'] = 'neural' # Better for semantic understanding
|
||||
elif analysis['is_trending']:
|
||||
analysis['suggested_search_type'] = 'keyword' # Better for current events
|
||||
|
||||
return analysis
|
||||
|
||||
def _matches_patterns(self, text: str, patterns: list) -> bool:
|
||||
"""Check if text matches any of the patterns."""
|
||||
for pattern in patterns:
|
||||
if re.search(pattern, text, re.IGNORECASE):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _select_provider(self, context: ResearchContext, analysis: Dict[str, Any]) -> ResearchProvider:
|
||||
"""
|
||||
Select the optimal provider based on context and query analysis.
|
||||
|
||||
Priority: Exa → Tavily → Google for ALL modes (including basic).
|
||||
This provides better semantic search results for content creators.
|
||||
|
||||
Exa's neural search excels at understanding context and meaning,
|
||||
which is valuable for all research types, not just technical queries.
|
||||
"""
|
||||
preference = context.provider_preference
|
||||
|
||||
# If user explicitly requested a provider, respect that
|
||||
if preference == ProviderPreference.EXA:
|
||||
if self.exa_available:
|
||||
return ResearchProvider.EXA
|
||||
logger.warning("Exa requested but not available, falling back")
|
||||
|
||||
if preference == ProviderPreference.TAVILY:
|
||||
if self.tavily_available:
|
||||
return ResearchProvider.TAVILY
|
||||
logger.warning("Tavily requested but not available, falling back")
|
||||
|
||||
if preference == ProviderPreference.GOOGLE:
|
||||
return ResearchProvider.GOOGLE
|
||||
|
||||
# AUTO mode: Always prefer Exa → Tavily → Google
|
||||
# Exa provides superior semantic search for all content types
|
||||
if self.exa_available:
|
||||
logger.info(f"Selected Exa (primary provider): query analysis shows " +
|
||||
f"technical={analysis.get('is_technical', False)}, " +
|
||||
f"trending={analysis.get('is_trending', False)}")
|
||||
return ResearchProvider.EXA
|
||||
|
||||
# Tavily as secondary option - good for real-time and news
|
||||
if self.tavily_available:
|
||||
logger.info(f"Selected Tavily (secondary): Exa unavailable, " +
|
||||
f"trending={analysis.get('is_trending', False)}")
|
||||
return ResearchProvider.TAVILY
|
||||
|
||||
# Google grounding as fallback
|
||||
logger.info("Selected Google (fallback): Exa and Tavily unavailable")
|
||||
return ResearchProvider.GOOGLE
|
||||
|
||||
def _build_config(
|
||||
self,
|
||||
context: ResearchContext,
|
||||
provider: ResearchProvider,
|
||||
analysis: Dict[str, Any]
|
||||
) -> ResearchConfig:
|
||||
"""Build optimized ResearchConfig for the selected provider."""
|
||||
|
||||
# Map ResearchDepth to ResearchMode
|
||||
mode_map = {
|
||||
ResearchDepth.QUICK: ResearchMode.BASIC,
|
||||
ResearchDepth.STANDARD: ResearchMode.BASIC,
|
||||
ResearchDepth.COMPREHENSIVE: ResearchMode.COMPREHENSIVE,
|
||||
ResearchDepth.EXPERT: ResearchMode.COMPREHENSIVE,
|
||||
}
|
||||
mode = mode_map.get(context.depth, ResearchMode.BASIC)
|
||||
|
||||
# Base config
|
||||
config = ResearchConfig(
|
||||
mode=mode,
|
||||
provider=provider,
|
||||
max_sources=context.max_sources,
|
||||
include_statistics=context.personalization.include_statistics if context.personalization else True,
|
||||
include_expert_quotes=context.personalization.include_expert_quotes if context.personalization else True,
|
||||
include_competitors=analysis['is_competitive'],
|
||||
include_trends=analysis['is_trending'],
|
||||
)
|
||||
|
||||
# Provider-specific optimizations
|
||||
if provider == ResearchProvider.EXA:
|
||||
config = self._optimize_exa_config(config, context, analysis)
|
||||
elif provider == ResearchProvider.TAVILY:
|
||||
config = self._optimize_tavily_config(config, context, analysis)
|
||||
|
||||
# Apply domain filters
|
||||
if context.include_domains:
|
||||
if provider == ResearchProvider.EXA:
|
||||
config.exa_include_domains = context.include_domains
|
||||
elif provider == ResearchProvider.TAVILY:
|
||||
config.tavily_include_domains = context.include_domains[:300] # Tavily limit
|
||||
|
||||
if context.exclude_domains:
|
||||
if provider == ResearchProvider.EXA:
|
||||
config.exa_exclude_domains = context.exclude_domains
|
||||
elif provider == ResearchProvider.TAVILY:
|
||||
config.tavily_exclude_domains = context.exclude_domains[:150] # Tavily limit
|
||||
|
||||
return config
|
||||
|
||||
def _optimize_exa_config(
|
||||
self,
|
||||
config: ResearchConfig,
|
||||
context: ResearchContext,
|
||||
analysis: Dict[str, Any]
|
||||
) -> ResearchConfig:
|
||||
"""Add Exa-specific optimizations."""
|
||||
|
||||
# Set category based on analysis
|
||||
if analysis['suggested_category']:
|
||||
config.exa_category = analysis['suggested_category']
|
||||
|
||||
# Set search type
|
||||
config.exa_search_type = analysis.get('suggested_search_type', 'auto')
|
||||
|
||||
# For comprehensive research, use neural search
|
||||
if context.depth in [ResearchDepth.COMPREHENSIVE, ResearchDepth.EXPERT]:
|
||||
config.exa_search_type = 'neural'
|
||||
|
||||
return config
|
||||
|
||||
def _optimize_tavily_config(
|
||||
self,
|
||||
config: ResearchConfig,
|
||||
context: ResearchContext,
|
||||
analysis: Dict[str, Any]
|
||||
) -> ResearchConfig:
|
||||
"""Add Tavily-specific optimizations."""
|
||||
|
||||
# Set topic based on analysis
|
||||
config.tavily_topic = analysis.get('suggested_topic', 'general')
|
||||
|
||||
# Set search depth based on research depth
|
||||
if context.depth in [ResearchDepth.COMPREHENSIVE, ResearchDepth.EXPERT]:
|
||||
config.tavily_search_depth = 'advanced' # 2 credits, but better results
|
||||
config.tavily_chunks_per_source = 3
|
||||
else:
|
||||
config.tavily_search_depth = 'basic' # 1 credit
|
||||
|
||||
# Set time range based on recency
|
||||
if context.recency:
|
||||
recency_map = {
|
||||
'day': 'd',
|
||||
'week': 'w',
|
||||
'month': 'm',
|
||||
'year': 'y',
|
||||
}
|
||||
config.tavily_time_range = recency_map.get(context.recency, context.recency)
|
||||
elif analysis['is_trending']:
|
||||
config.tavily_time_range = 'w' # Last week for trending topics
|
||||
|
||||
# Include answer for comprehensive research
|
||||
if context.depth in [ResearchDepth.COMPREHENSIVE, ResearchDepth.EXPERT]:
|
||||
config.tavily_include_answer = 'advanced'
|
||||
|
||||
# Include raw content for expert depth
|
||||
if context.depth == ResearchDepth.EXPERT:
|
||||
config.tavily_include_raw_content = 'markdown'
|
||||
|
||||
return config
|
||||
|
||||
def _build_advanced_config(self, context: ResearchContext) -> Tuple[ResearchProvider, ResearchConfig]:
|
||||
"""
|
||||
Build config from raw advanced parameters.
|
||||
Used when advanced_mode=True and user wants full control.
|
||||
"""
|
||||
# Determine provider from explicit parameters
|
||||
provider = ResearchProvider.GOOGLE
|
||||
|
||||
if context.exa_category or context.exa_search_type:
|
||||
provider = ResearchProvider.EXA if self.exa_available else ResearchProvider.GOOGLE
|
||||
elif context.tavily_topic or context.tavily_search_depth:
|
||||
provider = ResearchProvider.TAVILY if self.tavily_available else ResearchProvider.GOOGLE
|
||||
|
||||
# Check preference override
|
||||
if context.provider_preference == ProviderPreference.EXA and self.exa_available:
|
||||
provider = ResearchProvider.EXA
|
||||
elif context.provider_preference == ProviderPreference.TAVILY and self.tavily_available:
|
||||
provider = ResearchProvider.TAVILY
|
||||
elif context.provider_preference == ProviderPreference.GOOGLE:
|
||||
provider = ResearchProvider.GOOGLE
|
||||
|
||||
# Map depth to mode
|
||||
mode_map = {
|
||||
ResearchDepth.QUICK: ResearchMode.BASIC,
|
||||
ResearchDepth.STANDARD: ResearchMode.BASIC,
|
||||
ResearchDepth.COMPREHENSIVE: ResearchMode.COMPREHENSIVE,
|
||||
ResearchDepth.EXPERT: ResearchMode.COMPREHENSIVE,
|
||||
}
|
||||
mode = mode_map.get(context.depth, ResearchMode.BASIC)
|
||||
|
||||
# Build config with raw parameters
|
||||
config = ResearchConfig(
|
||||
mode=mode,
|
||||
provider=provider,
|
||||
max_sources=context.max_sources,
|
||||
# Exa
|
||||
exa_category=context.exa_category,
|
||||
exa_search_type=context.exa_search_type,
|
||||
exa_include_domains=context.include_domains,
|
||||
exa_exclude_domains=context.exclude_domains,
|
||||
# Tavily
|
||||
tavily_topic=context.tavily_topic,
|
||||
tavily_search_depth=context.tavily_search_depth,
|
||||
tavily_include_domains=context.include_domains[:300] if context.include_domains else [],
|
||||
tavily_exclude_domains=context.exclude_domains[:150] if context.exclude_domains else [],
|
||||
tavily_include_answer=context.tavily_include_answer,
|
||||
tavily_include_raw_content=context.tavily_include_raw_content,
|
||||
tavily_time_range=context.tavily_time_range,
|
||||
tavily_country=context.tavily_country,
|
||||
)
|
||||
|
||||
logger.info(f"Advanced config: provider={provider.value}, mode={mode.value}")
|
||||
|
||||
return provider, config
|
||||
|
||||
198
backend/services/research/core/research_context.py
Normal file
198
backend/services/research/core/research_context.py
Normal file
@@ -0,0 +1,198 @@
|
||||
"""
|
||||
Research Context Schema
|
||||
|
||||
Defines the unified input schema for the Research Engine.
|
||||
Any tool (Blog Writer, Podcast Maker, YouTube Creator) can create a ResearchContext
|
||||
and pass it to the Research Engine.
|
||||
|
||||
Author: ALwrity Team
|
||||
Version: 2.0
|
||||
"""
|
||||
|
||||
from enum import Enum
|
||||
from typing import Optional, List, Dict, Any
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ContentType(str, Enum):
|
||||
"""Type of content being created - affects research focus."""
|
||||
BLOG = "blog"
|
||||
PODCAST = "podcast"
|
||||
VIDEO = "video"
|
||||
SOCIAL = "social"
|
||||
EMAIL = "email"
|
||||
NEWSLETTER = "newsletter"
|
||||
WHITEPAPER = "whitepaper"
|
||||
GENERAL = "general"
|
||||
|
||||
|
||||
class ResearchGoal(str, Enum):
|
||||
"""Primary goal of the research - affects provider selection and depth."""
|
||||
FACTUAL = "factual" # Stats, data, citations
|
||||
TRENDING = "trending" # Current trends, news
|
||||
COMPETITIVE = "competitive" # Competitor analysis
|
||||
EDUCATIONAL = "educational" # How-to, explanations
|
||||
INSPIRATIONAL = "inspirational" # Stories, quotes
|
||||
TECHNICAL = "technical" # Deep technical content
|
||||
|
||||
|
||||
class ResearchDepth(str, Enum):
|
||||
"""Depth of research - maps to existing ResearchMode."""
|
||||
QUICK = "quick" # Fast, surface-level (maps to BASIC)
|
||||
STANDARD = "standard" # Balanced depth (maps to BASIC with more sources)
|
||||
COMPREHENSIVE = "comprehensive" # Deep research (maps to COMPREHENSIVE)
|
||||
EXPERT = "expert" # Maximum depth with expert sources
|
||||
|
||||
|
||||
class ProviderPreference(str, Enum):
|
||||
"""Provider preference - AUTO lets the engine decide."""
|
||||
AUTO = "auto" # AI decides based on query (default)
|
||||
EXA = "exa" # Force Exa neural search
|
||||
TAVILY = "tavily" # Force Tavily AI search
|
||||
GOOGLE = "google" # Force Google grounding
|
||||
HYBRID = "hybrid" # Use multiple providers
|
||||
|
||||
|
||||
class ResearchPersonalizationContext(BaseModel):
|
||||
"""
|
||||
Context from the calling tool (Blog Writer, Podcast Maker, etc.)
|
||||
This personalizes the research without the Research Engine knowing
|
||||
the specific tool implementation.
|
||||
"""
|
||||
# Who is creating the content
|
||||
creator_id: Optional[str] = None # Clerk user ID
|
||||
|
||||
# Content context
|
||||
content_type: ContentType = ContentType.GENERAL
|
||||
industry: Optional[str] = None
|
||||
target_audience: Optional[str] = None
|
||||
tone: Optional[str] = None # professional, casual, technical, etc.
|
||||
|
||||
# Persona data (from onboarding)
|
||||
persona_id: Optional[str] = None
|
||||
brand_voice: Optional[str] = None
|
||||
competitor_urls: List[str] = Field(default_factory=list)
|
||||
|
||||
# Content requirements
|
||||
word_count_target: Optional[int] = None
|
||||
include_statistics: bool = True
|
||||
include_expert_quotes: bool = True
|
||||
include_case_studies: bool = False
|
||||
include_visuals: bool = False
|
||||
|
||||
# Platform-specific hints
|
||||
platform: Optional[str] = None # medium, wordpress, youtube, spotify, etc.
|
||||
|
||||
class Config:
|
||||
use_enum_values = True
|
||||
|
||||
|
||||
class ResearchContext(BaseModel):
|
||||
"""
|
||||
Main input schema for the Research Engine.
|
||||
|
||||
This is what any tool passes to the Research Engine to get research results.
|
||||
The engine uses AI to optimize parameters based on this context.
|
||||
"""
|
||||
# Primary research input
|
||||
query: str = Field(..., description="Main research query or topic")
|
||||
keywords: List[str] = Field(default_factory=list, description="Additional keywords")
|
||||
|
||||
# Research configuration
|
||||
goal: ResearchGoal = ResearchGoal.FACTUAL
|
||||
depth: ResearchDepth = ResearchDepth.STANDARD
|
||||
provider_preference: ProviderPreference = ProviderPreference.AUTO
|
||||
|
||||
# Personalization from calling tool
|
||||
personalization: Optional[ResearchPersonalizationContext] = None
|
||||
|
||||
# Constraints
|
||||
max_sources: int = Field(default=10, ge=1, le=25)
|
||||
recency: Optional[str] = None # "day", "week", "month", "year", None for all-time
|
||||
|
||||
# Domain filtering
|
||||
include_domains: List[str] = Field(default_factory=list)
|
||||
exclude_domains: List[str] = Field(default_factory=list)
|
||||
|
||||
# Advanced mode (exposes raw provider parameters)
|
||||
advanced_mode: bool = False
|
||||
|
||||
# Raw provider parameters (only used if advanced_mode=True)
|
||||
# Exa-specific
|
||||
exa_category: Optional[str] = None
|
||||
exa_search_type: Optional[str] = None # auto, keyword, neural
|
||||
|
||||
# Tavily-specific
|
||||
tavily_topic: Optional[str] = None # general, news, finance
|
||||
tavily_search_depth: Optional[str] = None # basic, advanced
|
||||
tavily_include_answer: bool = False
|
||||
tavily_include_raw_content: bool = False
|
||||
tavily_time_range: Optional[str] = None
|
||||
tavily_country: Optional[str] = None
|
||||
|
||||
class Config:
|
||||
use_enum_values = True
|
||||
|
||||
def get_effective_query(self) -> str:
|
||||
"""Build effective query combining query and keywords."""
|
||||
if self.keywords:
|
||||
return f"{self.query} {' '.join(self.keywords)}"
|
||||
return self.query
|
||||
|
||||
def get_industry(self) -> str:
|
||||
"""Get industry from personalization or default."""
|
||||
if self.personalization and self.personalization.industry:
|
||||
return self.personalization.industry
|
||||
return "General"
|
||||
|
||||
def get_audience(self) -> str:
|
||||
"""Get target audience from personalization or default."""
|
||||
if self.personalization and self.personalization.target_audience:
|
||||
return self.personalization.target_audience
|
||||
return "General"
|
||||
|
||||
def get_user_id(self) -> Optional[str]:
|
||||
"""Get user ID from personalization."""
|
||||
if self.personalization:
|
||||
return self.personalization.creator_id
|
||||
return None
|
||||
|
||||
|
||||
class ResearchResult(BaseModel):
|
||||
"""
|
||||
Output schema from the Research Engine.
|
||||
Standardized format that any tool can consume.
|
||||
"""
|
||||
success: bool = True
|
||||
|
||||
# Content
|
||||
summary: Optional[str] = None # AI-generated summary of findings
|
||||
raw_content: Optional[str] = None # Raw aggregated content for LLM processing
|
||||
|
||||
# Sources
|
||||
sources: List[Dict[str, Any]] = Field(default_factory=list)
|
||||
|
||||
# Analysis (reuses existing blog writer analysis)
|
||||
keyword_analysis: Dict[str, Any] = Field(default_factory=dict)
|
||||
competitor_analysis: Dict[str, Any] = Field(default_factory=dict)
|
||||
suggested_angles: List[str] = Field(default_factory=list)
|
||||
|
||||
# Metadata
|
||||
provider_used: str = "google" # Which provider was actually used
|
||||
search_queries: List[str] = Field(default_factory=list)
|
||||
grounding_metadata: Optional[Dict[str, Any]] = None
|
||||
|
||||
# Cost tracking
|
||||
estimated_cost: float = 0.0
|
||||
|
||||
# Error handling
|
||||
error_message: Optional[str] = None
|
||||
error_code: Optional[str] = None
|
||||
retry_suggested: bool = False
|
||||
|
||||
# Original context for reference
|
||||
original_query: Optional[str] = None
|
||||
|
||||
class Config:
|
||||
use_enum_values = True
|
||||
|
||||
558
backend/services/research/core/research_engine.py
Normal file
558
backend/services/research/core/research_engine.py
Normal file
@@ -0,0 +1,558 @@
|
||||
"""
|
||||
Research Engine - Core Orchestrator
|
||||
|
||||
The main entry point for AI research across all ALwrity tools.
|
||||
This engine wraps existing providers (Exa, Tavily, Google) and provides
|
||||
a unified interface for any content generation tool.
|
||||
|
||||
Usage:
|
||||
from services.research.core import ResearchEngine, ResearchContext, ContentType
|
||||
|
||||
engine = ResearchEngine()
|
||||
result = await engine.research(ResearchContext(
|
||||
query="AI trends in healthcare 2025",
|
||||
content_type=ContentType.PODCAST,
|
||||
personalization=ResearchPersonalizationContext(
|
||||
industry="Healthcare",
|
||||
target_audience="Medical professionals"
|
||||
)
|
||||
))
|
||||
|
||||
Author: ALwrity Team
|
||||
Version: 2.0
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
from typing import Dict, Any, Optional, Callable
|
||||
from loguru import logger
|
||||
|
||||
from .research_context import (
|
||||
ResearchContext,
|
||||
ResearchResult,
|
||||
ResearchDepth,
|
||||
ContentType,
|
||||
ResearchPersonalizationContext,
|
||||
)
|
||||
from .parameter_optimizer import ParameterOptimizer
|
||||
|
||||
# Reuse existing blog writer models and services
|
||||
from models.blog_models import (
|
||||
BlogResearchRequest,
|
||||
BlogResearchResponse,
|
||||
ResearchConfig,
|
||||
ResearchProvider,
|
||||
ResearchMode,
|
||||
PersonaInfo,
|
||||
ResearchSource,
|
||||
)
|
||||
|
||||
# Research persona for personalization
|
||||
from models.research_persona_models import ResearchPersona
|
||||
|
||||
|
||||
class ResearchEngine:
|
||||
"""
|
||||
AI Research Engine - Standalone module for content research.
|
||||
|
||||
This engine:
|
||||
1. Accepts a ResearchContext from any tool
|
||||
2. Uses AI to optimize parameters for Exa/Tavily
|
||||
3. Integrates research persona for personalization
|
||||
4. Executes research using existing providers
|
||||
5. Returns standardized ResearchResult
|
||||
|
||||
Can be imported by Blog Writer, Podcast Maker, YouTube Creator, etc.
|
||||
"""
|
||||
|
||||
def __init__(self, db_session=None):
|
||||
"""Initialize the Research Engine."""
|
||||
self.optimizer = ParameterOptimizer()
|
||||
self._providers_initialized = False
|
||||
self._exa_provider = None
|
||||
self._tavily_provider = None
|
||||
self._google_provider = None
|
||||
self._db_session = db_session
|
||||
|
||||
# Check provider availability
|
||||
self.exa_available = bool(os.getenv("EXA_API_KEY"))
|
||||
self.tavily_available = bool(os.getenv("TAVILY_API_KEY"))
|
||||
|
||||
logger.info(f"ResearchEngine initialized: exa={self.exa_available}, tavily={self.tavily_available}")
|
||||
|
||||
def _get_research_persona(self, user_id: str, generate_if_missing: bool = True) -> Optional[ResearchPersona]:
|
||||
"""
|
||||
Fetch research persona for user, generating if missing.
|
||||
|
||||
Phase 2: Since onboarding is mandatory and always completes before accessing
|
||||
any tool, we can safely generate research persona on first use. This ensures
|
||||
hyper-personalization without requiring "General" fallbacks.
|
||||
|
||||
Args:
|
||||
user_id: User ID (Clerk string)
|
||||
generate_if_missing: If True, generate persona if not cached (default: True)
|
||||
|
||||
Returns:
|
||||
ResearchPersona if successful, None only if user has no core persona
|
||||
"""
|
||||
if not user_id:
|
||||
return None
|
||||
|
||||
try:
|
||||
from services.research.research_persona_service import ResearchPersonaService
|
||||
|
||||
db = self._db_session
|
||||
if not db:
|
||||
from services.database import get_db_session
|
||||
db = get_db_session()
|
||||
|
||||
persona_service = ResearchPersonaService(db_session=db)
|
||||
|
||||
if generate_if_missing:
|
||||
# Phase 2: Use get_or_generate() to create persona on first visit
|
||||
# This triggers LLM call if not cached, but onboarding guarantees
|
||||
# core persona exists, so generation will succeed
|
||||
logger.info(f"🔄 Getting/generating research persona for user {user_id}...")
|
||||
persona = persona_service.get_or_generate(user_id, force_refresh=False)
|
||||
|
||||
if persona:
|
||||
logger.info(f"✅ Research persona ready for user {user_id}: industry={persona.default_industry}")
|
||||
else:
|
||||
logger.warning(f"⚠️ Could not get/generate research persona for user {user_id} - using core persona fallback")
|
||||
else:
|
||||
# Fast path: only return cached (for config endpoints)
|
||||
persona = persona_service.get_cached_only(user_id)
|
||||
if persona:
|
||||
logger.debug(f"Research persona loaded from cache for user {user_id}")
|
||||
|
||||
return persona
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load research persona for user {user_id}: {e}")
|
||||
return None
|
||||
|
||||
def _enrich_context_with_persona(
|
||||
self,
|
||||
context: ResearchContext,
|
||||
persona: ResearchPersona
|
||||
) -> ResearchContext:
|
||||
"""
|
||||
Enrich the research context with persona data.
|
||||
|
||||
Only applies persona defaults if the context doesn't already have values.
|
||||
User-provided values always take precedence.
|
||||
"""
|
||||
# Create personalization context if not exists
|
||||
if not context.personalization:
|
||||
context.personalization = ResearchPersonalizationContext()
|
||||
|
||||
# Apply persona defaults only if not already set
|
||||
if not context.personalization.industry or context.personalization.industry == "General":
|
||||
if persona.default_industry:
|
||||
context.personalization.industry = persona.default_industry
|
||||
logger.debug(f"Applied persona industry: {persona.default_industry}")
|
||||
|
||||
if not context.personalization.target_audience or context.personalization.target_audience == "General":
|
||||
if persona.default_target_audience:
|
||||
context.personalization.target_audience = persona.default_target_audience
|
||||
logger.debug(f"Applied persona target_audience: {persona.default_target_audience}")
|
||||
|
||||
# Apply suggested Exa domains if not already set
|
||||
if not context.include_domains and persona.suggested_exa_domains:
|
||||
context.include_domains = persona.suggested_exa_domains[:6] # Limit to 6 domains
|
||||
logger.debug(f"Applied persona domains: {context.include_domains}")
|
||||
|
||||
# Apply suggested Exa category if not already set
|
||||
if not context.exa_category and persona.suggested_exa_category:
|
||||
context.exa_category = persona.suggested_exa_category
|
||||
logger.debug(f"Applied persona exa_category: {persona.suggested_exa_category}")
|
||||
|
||||
return context
|
||||
|
||||
async def research(
|
||||
self,
|
||||
context: ResearchContext,
|
||||
progress_callback: Optional[Callable[[str], None]] = None
|
||||
) -> ResearchResult:
|
||||
"""
|
||||
Execute research based on the given context.
|
||||
|
||||
Args:
|
||||
context: Research context with query, goals, and personalization
|
||||
progress_callback: Optional callback for progress updates
|
||||
|
||||
Returns:
|
||||
ResearchResult with sources, analysis, and content
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
# Progress update
|
||||
self._progress(progress_callback, "🔍 Analyzing research query...")
|
||||
|
||||
# Enrich context with research persona (Phase 2: generate if missing)
|
||||
user_id = context.get_user_id()
|
||||
if user_id:
|
||||
self._progress(progress_callback, "👤 Loading personalized research profile...")
|
||||
persona = self._get_research_persona(user_id, generate_if_missing=True)
|
||||
if persona:
|
||||
self._progress(progress_callback, "✨ Applying hyper-personalized settings...")
|
||||
context = self._enrich_context_with_persona(context, persona)
|
||||
else:
|
||||
logger.warning(f"No research persona available for user {user_id} - proceeding with provided context")
|
||||
|
||||
# Optimize parameters based on enriched context
|
||||
provider, config = self.optimizer.optimize(context)
|
||||
|
||||
self._progress(progress_callback, f"🤖 Selected {provider.value.upper()} for research")
|
||||
|
||||
# Build the request using existing blog models
|
||||
request = self._build_request(context, config)
|
||||
user_id = context.get_user_id() or ""
|
||||
|
||||
# Execute research using appropriate provider
|
||||
self._progress(progress_callback, f"🌐 Connecting to {provider.value} search...")
|
||||
|
||||
if provider == ResearchProvider.EXA:
|
||||
response = await self._execute_exa_research(request, config, user_id, progress_callback)
|
||||
elif provider == ResearchProvider.TAVILY:
|
||||
response = await self._execute_tavily_research(request, config, user_id, progress_callback)
|
||||
else:
|
||||
response = await self._execute_google_research(request, config, user_id, progress_callback)
|
||||
|
||||
# Transform response to ResearchResult
|
||||
self._progress(progress_callback, "📊 Processing results...")
|
||||
|
||||
result = self._transform_response(response, provider, context)
|
||||
|
||||
duration_ms = (time.time() - start_time) * 1000
|
||||
logger.info(f"Research completed in {duration_ms:.0f}ms: {len(result.sources)} sources")
|
||||
|
||||
self._progress(progress_callback, f"✅ Research complete: {len(result.sources)} sources found")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Research failed: {e}")
|
||||
return ResearchResult(
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
error_code="RESEARCH_FAILED",
|
||||
retry_suggested=True,
|
||||
original_query=context.query
|
||||
)
|
||||
|
||||
def _progress(self, callback: Optional[Callable[[str], None]], message: str):
|
||||
"""Send progress update if callback provided."""
|
||||
if callback:
|
||||
callback(message)
|
||||
logger.info(f"[Research] {message}")
|
||||
|
||||
def _build_request(self, context: ResearchContext, config: ResearchConfig) -> BlogResearchRequest:
|
||||
"""Build BlogResearchRequest from ResearchContext."""
|
||||
|
||||
# Extract keywords from query
|
||||
keywords = context.keywords if context.keywords else [context.query]
|
||||
|
||||
# Build persona info from personalization
|
||||
persona = None
|
||||
if context.personalization:
|
||||
persona = PersonaInfo(
|
||||
persona_id=context.personalization.persona_id,
|
||||
tone=context.personalization.tone,
|
||||
audience=context.personalization.target_audience,
|
||||
industry=context.personalization.industry,
|
||||
)
|
||||
|
||||
return BlogResearchRequest(
|
||||
keywords=keywords,
|
||||
topic=context.query,
|
||||
industry=context.get_industry(),
|
||||
target_audience=context.get_audience(),
|
||||
tone=context.personalization.tone if context.personalization else None,
|
||||
word_count_target=context.personalization.word_count_target if context.personalization else 1500,
|
||||
persona=persona,
|
||||
research_mode=config.mode,
|
||||
config=config,
|
||||
)
|
||||
|
||||
async def _execute_exa_research(
|
||||
self,
|
||||
request: BlogResearchRequest,
|
||||
config: ResearchConfig,
|
||||
user_id: str,
|
||||
progress_callback: Optional[Callable[[str], None]] = None
|
||||
) -> BlogResearchResponse:
|
||||
"""Execute research using Exa provider."""
|
||||
from services.blog_writer.research.exa_provider import ExaResearchProvider
|
||||
from services.blog_writer.research.research_strategies import get_strategy_for_mode
|
||||
|
||||
self._progress(progress_callback, "🔍 Executing Exa neural search...")
|
||||
|
||||
# Get strategy for building prompt
|
||||
strategy = get_strategy_for_mode(config.mode)
|
||||
topic = request.topic or ", ".join(request.keywords)
|
||||
industry = request.industry or "General"
|
||||
target_audience = request.target_audience or "General"
|
||||
|
||||
research_prompt = strategy.build_research_prompt(topic, industry, target_audience, config)
|
||||
|
||||
# Execute Exa search
|
||||
try:
|
||||
exa_provider = ExaResearchProvider()
|
||||
raw_result = await exa_provider.search(
|
||||
research_prompt, topic, industry, target_audience, config, user_id
|
||||
)
|
||||
|
||||
# Track usage
|
||||
cost = raw_result.get('cost', {}).get('total', 0.005) if isinstance(raw_result.get('cost'), dict) else 0.005
|
||||
exa_provider.track_exa_usage(user_id, cost)
|
||||
|
||||
self._progress(progress_callback, f"📝 Found {len(raw_result.get('sources', []))} sources")
|
||||
|
||||
# Run common analysis
|
||||
return await self._run_analysis(request, raw_result, config, user_id, progress_callback)
|
||||
|
||||
except RuntimeError as e:
|
||||
if "EXA_API_KEY not configured" in str(e):
|
||||
logger.warning("Exa not configured, falling back to Tavily")
|
||||
self._progress(progress_callback, "⚠️ Exa unavailable, trying Tavily...")
|
||||
config.provider = ResearchProvider.TAVILY
|
||||
return await self._execute_tavily_research(request, config, user_id, progress_callback)
|
||||
raise
|
||||
|
||||
async def _execute_tavily_research(
|
||||
self,
|
||||
request: BlogResearchRequest,
|
||||
config: ResearchConfig,
|
||||
user_id: str,
|
||||
progress_callback: Optional[Callable[[str], None]] = None
|
||||
) -> BlogResearchResponse:
|
||||
"""Execute research using Tavily provider."""
|
||||
from services.blog_writer.research.tavily_provider import TavilyResearchProvider
|
||||
from services.blog_writer.research.research_strategies import get_strategy_for_mode
|
||||
|
||||
self._progress(progress_callback, "🔍 Executing Tavily AI search...")
|
||||
|
||||
# Get strategy for building prompt
|
||||
strategy = get_strategy_for_mode(config.mode)
|
||||
topic = request.topic or ", ".join(request.keywords)
|
||||
industry = request.industry or "General"
|
||||
target_audience = request.target_audience or "General"
|
||||
|
||||
research_prompt = strategy.build_research_prompt(topic, industry, target_audience, config)
|
||||
|
||||
# Execute Tavily search
|
||||
try:
|
||||
tavily_provider = TavilyResearchProvider()
|
||||
raw_result = await tavily_provider.search(
|
||||
research_prompt, topic, industry, target_audience, config, user_id
|
||||
)
|
||||
|
||||
# Track usage
|
||||
cost = raw_result.get('cost', {}).get('total', 0.001) if isinstance(raw_result.get('cost'), dict) else 0.001
|
||||
search_depth = config.tavily_search_depth or "basic"
|
||||
tavily_provider.track_tavily_usage(user_id, cost, search_depth)
|
||||
|
||||
self._progress(progress_callback, f"📝 Found {len(raw_result.get('sources', []))} sources")
|
||||
|
||||
# Run common analysis
|
||||
return await self._run_analysis(request, raw_result, config, user_id, progress_callback)
|
||||
|
||||
except RuntimeError as e:
|
||||
if "TAVILY_API_KEY not configured" in str(e):
|
||||
logger.warning("Tavily not configured, falling back to Google")
|
||||
self._progress(progress_callback, "⚠️ Tavily unavailable, using Google Search...")
|
||||
config.provider = ResearchProvider.GOOGLE
|
||||
return await self._execute_google_research(request, config, user_id, progress_callback)
|
||||
raise
|
||||
|
||||
async def _execute_google_research(
|
||||
self,
|
||||
request: BlogResearchRequest,
|
||||
config: ResearchConfig,
|
||||
user_id: str,
|
||||
progress_callback: Optional[Callable[[str], None]] = None
|
||||
) -> BlogResearchResponse:
|
||||
"""Execute research using Google/Gemini grounding."""
|
||||
from services.blog_writer.research.google_provider import GoogleResearchProvider
|
||||
from services.blog_writer.research.research_strategies import get_strategy_for_mode
|
||||
|
||||
self._progress(progress_callback, "🔍 Executing Google Search grounding...")
|
||||
|
||||
# Get strategy for building prompt
|
||||
strategy = get_strategy_for_mode(config.mode)
|
||||
topic = request.topic or ", ".join(request.keywords)
|
||||
industry = request.industry or "General"
|
||||
target_audience = request.target_audience or "General"
|
||||
|
||||
research_prompt = strategy.build_research_prompt(topic, industry, target_audience, config)
|
||||
|
||||
# Execute Google search
|
||||
google_provider = GoogleResearchProvider()
|
||||
raw_result = await google_provider.search(
|
||||
research_prompt, topic, industry, target_audience, config, user_id
|
||||
)
|
||||
|
||||
self._progress(progress_callback, "📝 Processing grounded results...")
|
||||
|
||||
# Run common analysis
|
||||
return await self._run_analysis(request, raw_result, config, user_id, progress_callback, is_google=True)
|
||||
|
||||
async def _run_analysis(
|
||||
self,
|
||||
request: BlogResearchRequest,
|
||||
raw_result: Dict[str, Any],
|
||||
config: ResearchConfig,
|
||||
user_id: str,
|
||||
progress_callback: Optional[Callable[[str], None]] = None,
|
||||
is_google: bool = False
|
||||
) -> BlogResearchResponse:
|
||||
"""Run common analysis on raw results."""
|
||||
from services.blog_writer.research.keyword_analyzer import KeywordAnalyzer
|
||||
from services.blog_writer.research.competitor_analyzer import CompetitorAnalyzer
|
||||
from services.blog_writer.research.content_angle_generator import ContentAngleGenerator
|
||||
from services.blog_writer.research.data_filter import ResearchDataFilter
|
||||
|
||||
self._progress(progress_callback, "🔍 Analyzing keywords and content angles...")
|
||||
|
||||
# Extract content for analysis
|
||||
if is_google:
|
||||
content = raw_result.get("content", "")
|
||||
sources = self._extract_sources_from_grounding(raw_result)
|
||||
search_queries = raw_result.get("search_queries", []) or []
|
||||
grounding_metadata = self._extract_grounding_metadata(raw_result)
|
||||
else:
|
||||
content = raw_result.get('content', '')
|
||||
sources = [ResearchSource(**s) if isinstance(s, dict) else s for s in raw_result.get('sources', [])]
|
||||
search_queries = raw_result.get('search_queries', [])
|
||||
grounding_metadata = None
|
||||
|
||||
topic = request.topic or ", ".join(request.keywords)
|
||||
industry = request.industry or "General"
|
||||
|
||||
# Run analyzers
|
||||
keyword_analyzer = KeywordAnalyzer()
|
||||
competitor_analyzer = CompetitorAnalyzer()
|
||||
content_angle_generator = ContentAngleGenerator()
|
||||
data_filter = ResearchDataFilter()
|
||||
|
||||
keyword_analysis = keyword_analyzer.analyze(content, request.keywords, user_id=user_id)
|
||||
competitor_analysis = competitor_analyzer.analyze(content, user_id=user_id)
|
||||
suggested_angles = content_angle_generator.generate(content, topic, industry, user_id=user_id)
|
||||
|
||||
# Build response
|
||||
response = BlogResearchResponse(
|
||||
success=True,
|
||||
sources=sources,
|
||||
keyword_analysis=keyword_analysis,
|
||||
competitor_analysis=competitor_analysis,
|
||||
suggested_angles=suggested_angles,
|
||||
search_widget="",
|
||||
search_queries=search_queries,
|
||||
grounding_metadata=grounding_metadata,
|
||||
original_keywords=request.keywords,
|
||||
)
|
||||
|
||||
# Filter and clean research data
|
||||
self._progress(progress_callback, "✨ Filtering and optimizing results...")
|
||||
filtered_response = data_filter.filter_research_data(response)
|
||||
|
||||
return filtered_response
|
||||
|
||||
def _extract_sources_from_grounding(self, gemini_result: Dict[str, Any]) -> list:
|
||||
"""Extract sources from Gemini grounding metadata."""
|
||||
from models.blog_models import ResearchSource
|
||||
|
||||
sources = []
|
||||
if not gemini_result or not isinstance(gemini_result, dict):
|
||||
return sources
|
||||
|
||||
raw_sources = gemini_result.get("sources", []) or []
|
||||
|
||||
for src in raw_sources:
|
||||
source = ResearchSource(
|
||||
title=src.get("title", "Untitled"),
|
||||
url=src.get("url", ""),
|
||||
excerpt=src.get("content", "")[:500] if src.get("content") else f"Source from {src.get('title', 'web')}",
|
||||
credibility_score=float(src.get("credibility_score", 0.8)),
|
||||
published_at=str(src.get("publication_date", "2024-01-01")),
|
||||
index=src.get("index"),
|
||||
source_type=src.get("type", "web")
|
||||
)
|
||||
sources.append(source)
|
||||
|
||||
return sources
|
||||
|
||||
def _extract_grounding_metadata(self, gemini_result: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
"""Extract grounding metadata from Gemini result."""
|
||||
if not gemini_result or not isinstance(gemini_result, dict):
|
||||
return None
|
||||
|
||||
return gemini_result.get("grounding_metadata")
|
||||
|
||||
def _transform_response(
|
||||
self,
|
||||
response: BlogResearchResponse,
|
||||
provider: ResearchProvider,
|
||||
context: ResearchContext
|
||||
) -> ResearchResult:
|
||||
"""Transform BlogResearchResponse to ResearchResult."""
|
||||
|
||||
# Convert sources to dicts
|
||||
sources = []
|
||||
for s in response.sources:
|
||||
if hasattr(s, 'dict'):
|
||||
sources.append(s.dict())
|
||||
elif isinstance(s, dict):
|
||||
sources.append(s)
|
||||
else:
|
||||
sources.append({
|
||||
'title': getattr(s, 'title', ''),
|
||||
'url': getattr(s, 'url', ''),
|
||||
'excerpt': getattr(s, 'excerpt', ''),
|
||||
})
|
||||
|
||||
# Extract grounding metadata
|
||||
grounding = None
|
||||
if response.grounding_metadata:
|
||||
if hasattr(response.grounding_metadata, 'dict'):
|
||||
grounding = response.grounding_metadata.dict()
|
||||
else:
|
||||
grounding = response.grounding_metadata
|
||||
|
||||
return ResearchResult(
|
||||
success=response.success,
|
||||
sources=sources,
|
||||
keyword_analysis=response.keyword_analysis,
|
||||
competitor_analysis=response.competitor_analysis,
|
||||
suggested_angles=response.suggested_angles,
|
||||
provider_used=provider.value,
|
||||
search_queries=response.search_queries,
|
||||
grounding_metadata=grounding,
|
||||
original_query=context.query,
|
||||
error_message=response.error_message,
|
||||
error_code=response.error_code if hasattr(response, 'error_code') else None,
|
||||
retry_suggested=response.retry_suggested if hasattr(response, 'retry_suggested') else False,
|
||||
)
|
||||
|
||||
def get_provider_status(self) -> Dict[str, Any]:
|
||||
"""Get status of available providers."""
|
||||
return {
|
||||
"exa": {
|
||||
"available": self.exa_available,
|
||||
"priority": 1,
|
||||
"description": "Neural search for semantic understanding"
|
||||
},
|
||||
"tavily": {
|
||||
"available": self.tavily_available,
|
||||
"priority": 2,
|
||||
"description": "AI-powered web search"
|
||||
},
|
||||
"google": {
|
||||
"available": True, # Always available via Gemini
|
||||
"priority": 3,
|
||||
"description": "Google Search grounding"
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user