""" AI Parameter Optimizer for Research Engine Uses AI to analyze the research query and context to select optimal parameters for Exa and Tavily APIs. This abstracts the complexity from non-technical users. Key Decisions: - Provider selection (Exa vs Tavily vs Google) - Search type (neural vs keyword) - Category/topic selection - Depth and result limits - Domain filtering Author: ALwrity Team Version: 2.0 """ import os import re from typing import Dict, Any, Optional, Tuple from loguru import logger from .research_context import ( ResearchContext, ResearchGoal, ResearchDepth, ProviderPreference, ContentType, ) from models.blog_models import ResearchConfig, ResearchProvider, ResearchMode class ParameterOptimizer: """ AI-driven parameter optimization for research providers. Analyzes the research context and selects optimal parameters for Exa, Tavily, or Google without requiring user expertise. """ # Query patterns for intelligent routing TRENDING_PATTERNS = [ r'\b(latest|recent|new|2024|2025|current|trending|news)\b', r'\b(update|announcement|launch|release)\b', ] TECHNICAL_PATTERNS = [ r'\b(api|sdk|framework|library|implementation|architecture)\b', r'\b(code|programming|developer|technical|engineering)\b', ] COMPETITIVE_PATTERNS = [ r'\b(competitor|alternative|vs|versus|compare|comparison)\b', r'\b(market|industry|landscape|players)\b', ] FACTUAL_PATTERNS = [ r'\b(statistics|data|research|study|report|survey)\b', r'\b(percent|percentage|number|figure|metric)\b', ] # Exa category mapping based on query analysis EXA_CATEGORY_MAP = { 'research': 'research paper', 'news': 'news', 'company': 'company', 'personal': 'personal site', 'github': 'github', 'linkedin': 'linkedin profile', 'finance': 'financial report', } # Tavily topic mapping TAVILY_TOPIC_MAP = { ResearchGoal.TRENDING: 'news', ResearchGoal.FACTUAL: 'general', ResearchGoal.COMPETITIVE: 'general', ResearchGoal.TECHNICAL: 'general', ResearchGoal.EDUCATIONAL: 'general', ResearchGoal.INSPIRATIONAL: 'general', } def __init__(self): """Initialize the optimizer.""" self.exa_available = bool(os.getenv("EXA_API_KEY")) self.tavily_available = bool(os.getenv("TAVILY_API_KEY")) logger.info(f"ParameterOptimizer initialized: exa={self.exa_available}, tavily={self.tavily_available}") def optimize(self, context: ResearchContext) -> Tuple[ResearchProvider, ResearchConfig]: """ Analyze research context and return optimized provider and config. Args: context: The research context from the calling tool Returns: Tuple of (selected_provider, optimized_config) """ # If advanced mode, use raw parameters if context.advanced_mode: return self._build_advanced_config(context) # Analyze query to determine optimal approach query_analysis = self._analyze_query(context.query) # Select provider based on analysis and preferences provider = self._select_provider(context, query_analysis) # Build optimized config for selected provider config = self._build_config(context, provider, query_analysis) logger.info(f"Optimized research: provider={provider.value}, mode={config.mode.value}") return provider, config def _analyze_query(self, query: str) -> Dict[str, Any]: """ Analyze the query to understand intent and optimal approach. Returns dict with: - is_trending: Query is about recent/current events - is_technical: Query is technical in nature - is_competitive: Query is about competition/comparison - is_factual: Query needs data/statistics - suggested_category: Exa category if applicable - suggested_topic: Tavily topic """ query_lower = query.lower() analysis = { 'is_trending': self._matches_patterns(query_lower, self.TRENDING_PATTERNS), 'is_technical': self._matches_patterns(query_lower, self.TECHNICAL_PATTERNS), 'is_competitive': self._matches_patterns(query_lower, self.COMPETITIVE_PATTERNS), 'is_factual': self._matches_patterns(query_lower, self.FACTUAL_PATTERNS), 'suggested_category': None, 'suggested_topic': 'general', 'suggested_search_type': 'auto', } # Determine Exa category if 'research' in query_lower or 'study' in query_lower or 'paper' in query_lower: analysis['suggested_category'] = 'research paper' elif 'github' in query_lower or 'repository' in query_lower: analysis['suggested_category'] = 'github' elif 'linkedin' in query_lower or 'professional' in query_lower: analysis['suggested_category'] = 'linkedin profile' elif analysis['is_trending']: analysis['suggested_category'] = 'news' elif 'company' in query_lower or 'startup' in query_lower: analysis['suggested_category'] = 'company' # Determine Tavily topic if analysis['is_trending']: analysis['suggested_topic'] = 'news' elif 'finance' in query_lower or 'stock' in query_lower or 'investment' in query_lower: analysis['suggested_topic'] = 'finance' else: analysis['suggested_topic'] = 'general' # Determine search type if analysis['is_technical'] or analysis['is_factual']: analysis['suggested_search_type'] = 'neural' # Better for semantic understanding elif analysis['is_trending']: analysis['suggested_search_type'] = 'keyword' # Better for current events return analysis def _matches_patterns(self, text: str, patterns: list) -> bool: """Check if text matches any of the patterns.""" for pattern in patterns: if re.search(pattern, text, re.IGNORECASE): return True return False def _select_provider(self, context: ResearchContext, analysis: Dict[str, Any]) -> ResearchProvider: """ Select the optimal provider based on context and query analysis. Priority: Exa → Tavily → Google for ALL modes (including basic). This provides better semantic search results for content creators. Exa's neural search excels at understanding context and meaning, which is valuable for all research types, not just technical queries. """ preference = context.provider_preference # If user explicitly requested a provider, respect that if preference == ProviderPreference.EXA: if self.exa_available: return ResearchProvider.EXA logger.warning("Exa requested but not available, falling back") if preference == ProviderPreference.TAVILY: if self.tavily_available: return ResearchProvider.TAVILY logger.warning("Tavily requested but not available, falling back") if preference == ProviderPreference.GOOGLE: return ResearchProvider.GOOGLE # AUTO mode: Always prefer Exa → Tavily → Google # Exa provides superior semantic search for all content types if self.exa_available: logger.info(f"Selected Exa (primary provider): query analysis shows " + f"technical={analysis.get('is_technical', False)}, " + f"trending={analysis.get('is_trending', False)}") return ResearchProvider.EXA # Tavily as secondary option - good for real-time and news if self.tavily_available: logger.info(f"Selected Tavily (secondary): Exa unavailable, " + f"trending={analysis.get('is_trending', False)}") return ResearchProvider.TAVILY # Google grounding as fallback logger.info("Selected Google (fallback): Exa and Tavily unavailable") return ResearchProvider.GOOGLE def _build_config( self, context: ResearchContext, provider: ResearchProvider, analysis: Dict[str, Any] ) -> ResearchConfig: """Build optimized ResearchConfig for the selected provider.""" # Map ResearchDepth to ResearchMode mode_map = { ResearchDepth.QUICK: ResearchMode.BASIC, ResearchDepth.STANDARD: ResearchMode.BASIC, ResearchDepth.COMPREHENSIVE: ResearchMode.COMPREHENSIVE, ResearchDepth.EXPERT: ResearchMode.COMPREHENSIVE, } mode = mode_map.get(context.depth, ResearchMode.BASIC) # Base config config = ResearchConfig( mode=mode, provider=provider, max_sources=context.max_sources, include_statistics=context.personalization.include_statistics if context.personalization else True, include_expert_quotes=context.personalization.include_expert_quotes if context.personalization else True, include_competitors=analysis['is_competitive'], include_trends=analysis['is_trending'], ) # Provider-specific optimizations if provider == ResearchProvider.EXA: config = self._optimize_exa_config(config, context, analysis) elif provider == ResearchProvider.TAVILY: config = self._optimize_tavily_config(config, context, analysis) # Apply domain filters if context.include_domains: if provider == ResearchProvider.EXA: config.exa_include_domains = context.include_domains elif provider == ResearchProvider.TAVILY: config.tavily_include_domains = context.include_domains[:300] # Tavily limit if context.exclude_domains: if provider == ResearchProvider.EXA: config.exa_exclude_domains = context.exclude_domains elif provider == ResearchProvider.TAVILY: config.tavily_exclude_domains = context.exclude_domains[:150] # Tavily limit return config def _optimize_exa_config( self, config: ResearchConfig, context: ResearchContext, analysis: Dict[str, Any] ) -> ResearchConfig: """Add Exa-specific optimizations.""" # Set category based on analysis if analysis['suggested_category']: config.exa_category = analysis['suggested_category'] # Set search type config.exa_search_type = analysis.get('suggested_search_type', 'auto') # For comprehensive research, use neural search if context.depth in [ResearchDepth.COMPREHENSIVE, ResearchDepth.EXPERT]: config.exa_search_type = 'neural' return config def _optimize_tavily_config( self, config: ResearchConfig, context: ResearchContext, analysis: Dict[str, Any] ) -> ResearchConfig: """Add Tavily-specific optimizations.""" # Set topic based on analysis config.tavily_topic = analysis.get('suggested_topic', 'general') # Set search depth based on research depth if context.depth in [ResearchDepth.COMPREHENSIVE, ResearchDepth.EXPERT]: config.tavily_search_depth = 'advanced' # 2 credits, but better results config.tavily_chunks_per_source = 3 else: config.tavily_search_depth = 'basic' # 1 credit # Set time range based on recency if context.recency: recency_map = { 'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y', } config.tavily_time_range = recency_map.get(context.recency, context.recency) elif analysis['is_trending']: config.tavily_time_range = 'w' # Last week for trending topics # Include answer for comprehensive research if context.depth in [ResearchDepth.COMPREHENSIVE, ResearchDepth.EXPERT]: config.tavily_include_answer = 'advanced' # Include raw content for expert depth if context.depth == ResearchDepth.EXPERT: config.tavily_include_raw_content = 'markdown' return config def _build_advanced_config(self, context: ResearchContext) -> Tuple[ResearchProvider, ResearchConfig]: """ Build config from raw advanced parameters. Used when advanced_mode=True and user wants full control. """ # Determine provider from explicit parameters provider = ResearchProvider.GOOGLE if context.exa_category or context.exa_search_type: provider = ResearchProvider.EXA if self.exa_available else ResearchProvider.GOOGLE elif context.tavily_topic or context.tavily_search_depth: provider = ResearchProvider.TAVILY if self.tavily_available else ResearchProvider.GOOGLE # Check preference override if context.provider_preference == ProviderPreference.EXA and self.exa_available: provider = ResearchProvider.EXA elif context.provider_preference == ProviderPreference.TAVILY and self.tavily_available: provider = ResearchProvider.TAVILY elif context.provider_preference == ProviderPreference.GOOGLE: provider = ResearchProvider.GOOGLE # Map depth to mode mode_map = { ResearchDepth.QUICK: ResearchMode.BASIC, ResearchDepth.STANDARD: ResearchMode.BASIC, ResearchDepth.COMPREHENSIVE: ResearchMode.COMPREHENSIVE, ResearchDepth.EXPERT: ResearchMode.COMPREHENSIVE, } mode = mode_map.get(context.depth, ResearchMode.BASIC) # Build config with raw parameters config = ResearchConfig( mode=mode, provider=provider, max_sources=context.max_sources, # Exa exa_category=context.exa_category, exa_search_type=context.exa_search_type, exa_include_domains=context.include_domains, exa_exclude_domains=context.exclude_domains, # Tavily tavily_topic=context.tavily_topic, tavily_search_depth=context.tavily_search_depth, tavily_include_domains=context.include_domains[:300] if context.include_domains else [], tavily_exclude_domains=context.exclude_domains[:150] if context.exclude_domains else [], tavily_include_answer=context.tavily_include_answer, tavily_include_raw_content=context.tavily_include_raw_content, tavily_time_range=context.tavily_time_range, tavily_country=context.tavily_country, ) logger.info(f"Advanced config: provider={provider.value}, mode={mode.value}") return provider, config