Files
ALwrity/backend/services/research/core/parameter_optimizer.py
ajaysi b134e9dc7e Added video studio router and endpoints. Added research router and endpoints. Added youtube router and endpoints. Added onboarding utils router and endpoints. Added onboarding utils service. Added onboarding utils models. Added onboarding utils routes. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils.
2026-01-01 17:56:25 +05:30

385 lines
15 KiB
Python

"""
AI Parameter Optimizer for Research Engine
Uses AI to analyze the research query and context to select optimal
parameters for Exa and Tavily APIs. This abstracts the complexity
from non-technical users.
Key Decisions:
- Provider selection (Exa vs Tavily vs Google)
- Search type (neural vs keyword)
- Category/topic selection
- Depth and result limits
- Domain filtering
Author: ALwrity Team
Version: 2.0
"""
import os
import re
from typing import Dict, Any, Optional, Tuple
from loguru import logger
from .research_context import (
ResearchContext,
ResearchGoal,
ResearchDepth,
ProviderPreference,
ContentType,
)
from models.blog_models import ResearchConfig, ResearchProvider, ResearchMode
class ParameterOptimizer:
"""
AI-driven parameter optimization for research providers.
Analyzes the research context and selects optimal parameters
for Exa, Tavily, or Google without requiring user expertise.
"""
# Query patterns for intelligent routing
TRENDING_PATTERNS = [
r'\b(latest|recent|new|2024|2025|current|trending|news)\b',
r'\b(update|announcement|launch|release)\b',
]
TECHNICAL_PATTERNS = [
r'\b(api|sdk|framework|library|implementation|architecture)\b',
r'\b(code|programming|developer|technical|engineering)\b',
]
COMPETITIVE_PATTERNS = [
r'\b(competitor|alternative|vs|versus|compare|comparison)\b',
r'\b(market|industry|landscape|players)\b',
]
FACTUAL_PATTERNS = [
r'\b(statistics|data|research|study|report|survey)\b',
r'\b(percent|percentage|number|figure|metric)\b',
]
# Exa category mapping based on query analysis
EXA_CATEGORY_MAP = {
'research': 'research paper',
'news': 'news',
'company': 'company',
'personal': 'personal site',
'github': 'github',
'linkedin': 'linkedin profile',
'finance': 'financial report',
}
# Tavily topic mapping
TAVILY_TOPIC_MAP = {
ResearchGoal.TRENDING: 'news',
ResearchGoal.FACTUAL: 'general',
ResearchGoal.COMPETITIVE: 'general',
ResearchGoal.TECHNICAL: 'general',
ResearchGoal.EDUCATIONAL: 'general',
ResearchGoal.INSPIRATIONAL: 'general',
}
def __init__(self):
"""Initialize the optimizer."""
self.exa_available = bool(os.getenv("EXA_API_KEY"))
self.tavily_available = bool(os.getenv("TAVILY_API_KEY"))
logger.info(f"ParameterOptimizer initialized: exa={self.exa_available}, tavily={self.tavily_available}")
def optimize(self, context: ResearchContext) -> Tuple[ResearchProvider, ResearchConfig]:
"""
Analyze research context and return optimized provider and config.
Args:
context: The research context from the calling tool
Returns:
Tuple of (selected_provider, optimized_config)
"""
# If advanced mode, use raw parameters
if context.advanced_mode:
return self._build_advanced_config(context)
# Analyze query to determine optimal approach
query_analysis = self._analyze_query(context.query)
# Select provider based on analysis and preferences
provider = self._select_provider(context, query_analysis)
# Build optimized config for selected provider
config = self._build_config(context, provider, query_analysis)
logger.info(f"Optimized research: provider={provider.value}, mode={config.mode.value}")
return provider, config
def _analyze_query(self, query: str) -> Dict[str, Any]:
"""
Analyze the query to understand intent and optimal approach.
Returns dict with:
- is_trending: Query is about recent/current events
- is_technical: Query is technical in nature
- is_competitive: Query is about competition/comparison
- is_factual: Query needs data/statistics
- suggested_category: Exa category if applicable
- suggested_topic: Tavily topic
"""
query_lower = query.lower()
analysis = {
'is_trending': self._matches_patterns(query_lower, self.TRENDING_PATTERNS),
'is_technical': self._matches_patterns(query_lower, self.TECHNICAL_PATTERNS),
'is_competitive': self._matches_patterns(query_lower, self.COMPETITIVE_PATTERNS),
'is_factual': self._matches_patterns(query_lower, self.FACTUAL_PATTERNS),
'suggested_category': None,
'suggested_topic': 'general',
'suggested_search_type': 'auto',
}
# Determine Exa category
if 'research' in query_lower or 'study' in query_lower or 'paper' in query_lower:
analysis['suggested_category'] = 'research paper'
elif 'github' in query_lower or 'repository' in query_lower:
analysis['suggested_category'] = 'github'
elif 'linkedin' in query_lower or 'professional' in query_lower:
analysis['suggested_category'] = 'linkedin profile'
elif analysis['is_trending']:
analysis['suggested_category'] = 'news'
elif 'company' in query_lower or 'startup' in query_lower:
analysis['suggested_category'] = 'company'
# Determine Tavily topic
if analysis['is_trending']:
analysis['suggested_topic'] = 'news'
elif 'finance' in query_lower or 'stock' in query_lower or 'investment' in query_lower:
analysis['suggested_topic'] = 'finance'
else:
analysis['suggested_topic'] = 'general'
# Determine search type
if analysis['is_technical'] or analysis['is_factual']:
analysis['suggested_search_type'] = 'neural' # Better for semantic understanding
elif analysis['is_trending']:
analysis['suggested_search_type'] = 'keyword' # Better for current events
return analysis
def _matches_patterns(self, text: str, patterns: list) -> bool:
"""Check if text matches any of the patterns."""
for pattern in patterns:
if re.search(pattern, text, re.IGNORECASE):
return True
return False
def _select_provider(self, context: ResearchContext, analysis: Dict[str, Any]) -> ResearchProvider:
"""
Select the optimal provider based on context and query analysis.
Priority: Exa → Tavily → Google for ALL modes (including basic).
This provides better semantic search results for content creators.
Exa's neural search excels at understanding context and meaning,
which is valuable for all research types, not just technical queries.
"""
preference = context.provider_preference
# If user explicitly requested a provider, respect that
if preference == ProviderPreference.EXA:
if self.exa_available:
return ResearchProvider.EXA
logger.warning("Exa requested but not available, falling back")
if preference == ProviderPreference.TAVILY:
if self.tavily_available:
return ResearchProvider.TAVILY
logger.warning("Tavily requested but not available, falling back")
if preference == ProviderPreference.GOOGLE:
return ResearchProvider.GOOGLE
# AUTO mode: Always prefer Exa → Tavily → Google
# Exa provides superior semantic search for all content types
if self.exa_available:
logger.info(f"Selected Exa (primary provider): query analysis shows " +
f"technical={analysis.get('is_technical', False)}, " +
f"trending={analysis.get('is_trending', False)}")
return ResearchProvider.EXA
# Tavily as secondary option - good for real-time and news
if self.tavily_available:
logger.info(f"Selected Tavily (secondary): Exa unavailable, " +
f"trending={analysis.get('is_trending', False)}")
return ResearchProvider.TAVILY
# Google grounding as fallback
logger.info("Selected Google (fallback): Exa and Tavily unavailable")
return ResearchProvider.GOOGLE
def _build_config(
self,
context: ResearchContext,
provider: ResearchProvider,
analysis: Dict[str, Any]
) -> ResearchConfig:
"""Build optimized ResearchConfig for the selected provider."""
# Map ResearchDepth to ResearchMode
mode_map = {
ResearchDepth.QUICK: ResearchMode.BASIC,
ResearchDepth.STANDARD: ResearchMode.BASIC,
ResearchDepth.COMPREHENSIVE: ResearchMode.COMPREHENSIVE,
ResearchDepth.EXPERT: ResearchMode.COMPREHENSIVE,
}
mode = mode_map.get(context.depth, ResearchMode.BASIC)
# Base config
config = ResearchConfig(
mode=mode,
provider=provider,
max_sources=context.max_sources,
include_statistics=context.personalization.include_statistics if context.personalization else True,
include_expert_quotes=context.personalization.include_expert_quotes if context.personalization else True,
include_competitors=analysis['is_competitive'],
include_trends=analysis['is_trending'],
)
# Provider-specific optimizations
if provider == ResearchProvider.EXA:
config = self._optimize_exa_config(config, context, analysis)
elif provider == ResearchProvider.TAVILY:
config = self._optimize_tavily_config(config, context, analysis)
# Apply domain filters
if context.include_domains:
if provider == ResearchProvider.EXA:
config.exa_include_domains = context.include_domains
elif provider == ResearchProvider.TAVILY:
config.tavily_include_domains = context.include_domains[:300] # Tavily limit
if context.exclude_domains:
if provider == ResearchProvider.EXA:
config.exa_exclude_domains = context.exclude_domains
elif provider == ResearchProvider.TAVILY:
config.tavily_exclude_domains = context.exclude_domains[:150] # Tavily limit
return config
def _optimize_exa_config(
self,
config: ResearchConfig,
context: ResearchContext,
analysis: Dict[str, Any]
) -> ResearchConfig:
"""Add Exa-specific optimizations."""
# Set category based on analysis
if analysis['suggested_category']:
config.exa_category = analysis['suggested_category']
# Set search type
config.exa_search_type = analysis.get('suggested_search_type', 'auto')
# For comprehensive research, use neural search
if context.depth in [ResearchDepth.COMPREHENSIVE, ResearchDepth.EXPERT]:
config.exa_search_type = 'neural'
return config
def _optimize_tavily_config(
self,
config: ResearchConfig,
context: ResearchContext,
analysis: Dict[str, Any]
) -> ResearchConfig:
"""Add Tavily-specific optimizations."""
# Set topic based on analysis
config.tavily_topic = analysis.get('suggested_topic', 'general')
# Set search depth based on research depth
if context.depth in [ResearchDepth.COMPREHENSIVE, ResearchDepth.EXPERT]:
config.tavily_search_depth = 'advanced' # 2 credits, but better results
config.tavily_chunks_per_source = 3
else:
config.tavily_search_depth = 'basic' # 1 credit
# Set time range based on recency
if context.recency:
recency_map = {
'day': 'd',
'week': 'w',
'month': 'm',
'year': 'y',
}
config.tavily_time_range = recency_map.get(context.recency, context.recency)
elif analysis['is_trending']:
config.tavily_time_range = 'w' # Last week for trending topics
# Include answer for comprehensive research
if context.depth in [ResearchDepth.COMPREHENSIVE, ResearchDepth.EXPERT]:
config.tavily_include_answer = 'advanced'
# Include raw content for expert depth
if context.depth == ResearchDepth.EXPERT:
config.tavily_include_raw_content = 'markdown'
return config
def _build_advanced_config(self, context: ResearchContext) -> Tuple[ResearchProvider, ResearchConfig]:
"""
Build config from raw advanced parameters.
Used when advanced_mode=True and user wants full control.
"""
# Determine provider from explicit parameters
provider = ResearchProvider.GOOGLE
if context.exa_category or context.exa_search_type:
provider = ResearchProvider.EXA if self.exa_available else ResearchProvider.GOOGLE
elif context.tavily_topic or context.tavily_search_depth:
provider = ResearchProvider.TAVILY if self.tavily_available else ResearchProvider.GOOGLE
# Check preference override
if context.provider_preference == ProviderPreference.EXA and self.exa_available:
provider = ResearchProvider.EXA
elif context.provider_preference == ProviderPreference.TAVILY and self.tavily_available:
provider = ResearchProvider.TAVILY
elif context.provider_preference == ProviderPreference.GOOGLE:
provider = ResearchProvider.GOOGLE
# Map depth to mode
mode_map = {
ResearchDepth.QUICK: ResearchMode.BASIC,
ResearchDepth.STANDARD: ResearchMode.BASIC,
ResearchDepth.COMPREHENSIVE: ResearchMode.COMPREHENSIVE,
ResearchDepth.EXPERT: ResearchMode.COMPREHENSIVE,
}
mode = mode_map.get(context.depth, ResearchMode.BASIC)
# Build config with raw parameters
config = ResearchConfig(
mode=mode,
provider=provider,
max_sources=context.max_sources,
# Exa
exa_category=context.exa_category,
exa_search_type=context.exa_search_type,
exa_include_domains=context.include_domains,
exa_exclude_domains=context.exclude_domains,
# Tavily
tavily_topic=context.tavily_topic,
tavily_search_depth=context.tavily_search_depth,
tavily_include_domains=context.include_domains[:300] if context.include_domains else [],
tavily_exclude_domains=context.exclude_domains[:150] if context.exclude_domains else [],
tavily_include_answer=context.tavily_include_answer,
tavily_include_raw_content=context.tavily_include_raw_content,
tavily_time_range=context.tavily_time_range,
tavily_country=context.tavily_country,
)
logger.info(f"Advanced config: provider={provider.value}, mode={mode.value}")
return provider, config